1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright IBM Corp. 2012
4 *
5 * Author(s):
6 * Jan Glauber <jang@linux.vnet.ibm.com>
7 */
8
9 #define KMSG_COMPONENT "zpci"
10 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
11
12 #include <linux/kernel.h>
13 #include <linux/pci.h>
14 #include <asm/pci_debug.h>
15 #include <asm/pci_dma.h>
16 #include <asm/sclp.h>
17
18 #include "pci_bus.h"
19 #include "pci_report.h"
20
21 /* Content Code Description for PCI Function Error */
22 struct zpci_ccdf_err {
23 u32 reserved1;
24 u32 fh; /* function handle */
25 u32 fid; /* function id */
26 u32 ett : 4; /* expected table type */
27 u32 mvn : 12; /* MSI vector number */
28 u32 dmaas : 8; /* DMA address space */
29 u32 : 6;
30 u32 q : 1; /* event qualifier */
31 u32 rw : 1; /* read/write */
32 u64 faddr; /* failing address */
33 u32 reserved3;
34 u16 reserved4;
35 u16 pec; /* PCI event code */
36 } __packed;
37
38 /* Content Code Description for PCI Function Availability */
39 struct zpci_ccdf_avail {
40 u32 reserved1;
41 u32 fh; /* function handle */
42 u32 fid; /* function id */
43 u32 reserved2;
44 u32 reserved3;
45 u32 reserved4;
46 u32 reserved5;
47 u16 reserved6;
48 u16 pec; /* PCI event code */
49 } __packed;
50
ers_result_indicates_abort(pci_ers_result_t ers_res)51 static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res)
52 {
53 switch (ers_res) {
54 case PCI_ERS_RESULT_CAN_RECOVER:
55 case PCI_ERS_RESULT_RECOVERED:
56 case PCI_ERS_RESULT_NEED_RESET:
57 case PCI_ERS_RESULT_NONE:
58 return false;
59 default:
60 return true;
61 }
62 }
63
is_passed_through(struct pci_dev * pdev)64 static bool is_passed_through(struct pci_dev *pdev)
65 {
66 struct zpci_dev *zdev = to_zpci(pdev);
67 bool ret;
68
69 mutex_lock(&zdev->kzdev_lock);
70 ret = !!zdev->kzdev;
71 mutex_unlock(&zdev->kzdev_lock);
72
73 return ret;
74 }
75
is_driver_supported(struct pci_driver * driver)76 static bool is_driver_supported(struct pci_driver *driver)
77 {
78 if (!driver || !driver->err_handler)
79 return false;
80 if (!driver->err_handler->error_detected)
81 return false;
82 return true;
83 }
84
zpci_event_notify_error_detected(struct pci_dev * pdev,struct pci_driver * driver)85 static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev,
86 struct pci_driver *driver)
87 {
88 pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
89
90 ers_res = driver->err_handler->error_detected(pdev, pdev->error_state);
91 pci_uevent_ers(pdev, ers_res);
92 if (ers_result_indicates_abort(ers_res))
93 pr_info("%s: Automatic recovery failed after initial reporting\n", pci_name(pdev));
94 else if (ers_res == PCI_ERS_RESULT_NEED_RESET)
95 pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
96
97 return ers_res;
98 }
99
zpci_event_do_error_state_clear(struct pci_dev * pdev,struct pci_driver * driver)100 static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
101 struct pci_driver *driver)
102 {
103 pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
104 struct zpci_dev *zdev = to_zpci(pdev);
105 int rc;
106
107 /* The underlying device may have been disabled by the event */
108 if (!zdev_enabled(zdev))
109 return PCI_ERS_RESULT_NEED_RESET;
110
111 pr_info("%s: Unblocking device access for examination\n", pci_name(pdev));
112 rc = zpci_reset_load_store_blocked(zdev);
113 if (rc) {
114 pr_err("%s: Unblocking device access failed\n", pci_name(pdev));
115 /* Let's try a full reset instead */
116 return PCI_ERS_RESULT_NEED_RESET;
117 }
118
119 if (driver->err_handler->mmio_enabled)
120 ers_res = driver->err_handler->mmio_enabled(pdev);
121 else
122 ers_res = PCI_ERS_RESULT_NONE;
123
124 if (ers_result_indicates_abort(ers_res)) {
125 pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
126 pci_name(pdev));
127 return ers_res;
128 } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
129 pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
130 return ers_res;
131 }
132
133 pr_debug("%s: Unblocking DMA\n", pci_name(pdev));
134 rc = zpci_clear_error_state(zdev);
135 if (!rc) {
136 pdev->error_state = pci_channel_io_normal;
137 } else {
138 pr_err("%s: Unblocking DMA failed\n", pci_name(pdev));
139 /* Let's try a full reset instead */
140 return PCI_ERS_RESULT_NEED_RESET;
141 }
142
143 return ers_res;
144 }
145
zpci_event_do_reset(struct pci_dev * pdev,struct pci_driver * driver)146 static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
147 struct pci_driver *driver)
148 {
149 pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
150
151 pr_info("%s: Initiating reset\n", pci_name(pdev));
152 if (zpci_hot_reset_device(to_zpci(pdev))) {
153 pr_err("%s: The reset request failed\n", pci_name(pdev));
154 return ers_res;
155 }
156 pdev->error_state = pci_channel_io_normal;
157
158 if (driver->err_handler->slot_reset)
159 ers_res = driver->err_handler->slot_reset(pdev);
160 else
161 ers_res = PCI_ERS_RESULT_NONE;
162
163 if (ers_result_indicates_abort(ers_res)) {
164 pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev));
165 return ers_res;
166 }
167
168 return ers_res;
169 }
170
171 /* zpci_event_attempt_error_recovery - Try to recover the given PCI function
172 * @pdev: PCI function to recover currently in the error state
173 *
174 * We follow the scheme outlined in Documentation/PCI/pci-error-recovery.rst.
175 * With the simplification that recovery always happens per function
176 * and the platform determines which functions are affected for
177 * multi-function devices.
178 */
zpci_event_attempt_error_recovery(struct pci_dev * pdev)179 static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
180 {
181 pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
182 struct zpci_dev *zdev = to_zpci(pdev);
183 char *status_str = "success";
184 struct pci_driver *driver;
185
186 /*
187 * Ensure that the PCI function is not removed concurrently, no driver
188 * is unbound or probed and that userspace can't access its
189 * configuration space while we perform recovery.
190 */
191 pci_dev_lock(pdev);
192 if (pdev->error_state == pci_channel_io_perm_failure) {
193 ers_res = PCI_ERS_RESULT_DISCONNECT;
194 goto out_unlock;
195 }
196 pdev->error_state = pci_channel_io_frozen;
197
198 if (is_passed_through(pdev)) {
199 pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
200 pci_name(pdev));
201 status_str = "failed (pass-through)";
202 goto out_unlock;
203 }
204
205 driver = to_pci_driver(pdev->dev.driver);
206 if (!is_driver_supported(driver)) {
207 if (!driver) {
208 pr_info("%s: Cannot be recovered because no driver is bound to the device\n",
209 pci_name(pdev));
210 status_str = "failed (no driver)";
211 } else {
212 pr_info("%s: The %s driver bound to the device does not support error recovery\n",
213 pci_name(pdev),
214 driver->name);
215 status_str = "failed (no driver support)";
216 }
217 goto out_unlock;
218 }
219
220 ers_res = zpci_event_notify_error_detected(pdev, driver);
221 if (ers_result_indicates_abort(ers_res)) {
222 status_str = "failed (abort on detection)";
223 goto out_unlock;
224 }
225
226 if (ers_res != PCI_ERS_RESULT_NEED_RESET) {
227 ers_res = zpci_event_do_error_state_clear(pdev, driver);
228 if (ers_result_indicates_abort(ers_res)) {
229 status_str = "failed (abort on MMIO enable)";
230 goto out_unlock;
231 }
232 }
233
234 if (ers_res == PCI_ERS_RESULT_NEED_RESET)
235 ers_res = zpci_event_do_reset(pdev, driver);
236
237 /*
238 * ers_res can be PCI_ERS_RESULT_NONE either because the driver
239 * decided to return it, indicating that it abstains from voting
240 * on how to recover, or because it didn't implement the callback.
241 * Both cases assume, that if there is nothing else causing a
242 * disconnect, we recovered successfully.
243 */
244 if (ers_res == PCI_ERS_RESULT_NONE)
245 ers_res = PCI_ERS_RESULT_RECOVERED;
246
247 if (ers_res != PCI_ERS_RESULT_RECOVERED) {
248 pci_uevent_ers(pdev, PCI_ERS_RESULT_DISCONNECT);
249 pr_err("%s: Automatic recovery failed; operator intervention is required\n",
250 pci_name(pdev));
251 status_str = "failed (driver can't recover)";
252 goto out_unlock;
253 }
254
255 pr_info("%s: The device is ready to resume operations\n", pci_name(pdev));
256 if (driver->err_handler->resume)
257 driver->err_handler->resume(pdev);
258 pci_uevent_ers(pdev, PCI_ERS_RESULT_RECOVERED);
259 out_unlock:
260 pci_dev_unlock(pdev);
261 zpci_report_status(zdev, "recovery", status_str);
262
263 return ers_res;
264 }
265
266 /* zpci_event_io_failure - Report PCI channel failure state to driver
267 * @pdev: PCI function for which to report
268 * @es: PCI channel failure state to report
269 */
zpci_event_io_failure(struct pci_dev * pdev,pci_channel_state_t es)270 static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es)
271 {
272 struct pci_driver *driver;
273
274 pci_dev_lock(pdev);
275 pdev->error_state = es;
276 /**
277 * While vfio-pci's error_detected callback notifies user-space QEMU
278 * reacts to this by freezing the guest. In an s390 environment PCI
279 * errors are rarely fatal so this is overkill. Instead in the future
280 * we will inject the error event and let the guest recover the device
281 * itself.
282 */
283 if (is_passed_through(pdev))
284 goto out;
285 driver = to_pci_driver(pdev->dev.driver);
286 if (driver && driver->err_handler && driver->err_handler->error_detected)
287 driver->err_handler->error_detected(pdev, pdev->error_state);
288 out:
289 pci_dev_unlock(pdev);
290 }
291
__zpci_event_error(struct zpci_ccdf_err * ccdf)292 static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
293 {
294 struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
295 struct pci_dev *pdev = NULL;
296 pci_ers_result_t ers_res;
297 u32 fh = 0;
298 int rc;
299
300 zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n",
301 ccdf->fid, ccdf->fh, ccdf->pec);
302 zpci_err("error CCDF:\n");
303 zpci_err_hex(ccdf, sizeof(*ccdf));
304
305 if (zdev) {
306 mutex_lock(&zdev->state_lock);
307 rc = clp_refresh_fh(zdev->fid, &fh);
308 if (rc)
309 goto no_pdev;
310 if (!fh || ccdf->fh != fh) {
311 /* Ignore events with stale handles */
312 zpci_dbg(3, "err fid:%x, fh:%x (stale %x)\n",
313 ccdf->fid, fh, ccdf->fh);
314 goto no_pdev;
315 }
316 zpci_update_fh(zdev, ccdf->fh);
317 if (zdev->zbus->bus)
318 pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
319 }
320
321 pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
322 pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
323
324 if (!pdev)
325 goto no_pdev;
326
327 switch (ccdf->pec) {
328 case 0x002a: /* Error event concerns FMB */
329 case 0x002b:
330 case 0x002c:
331 break;
332 case 0x0040: /* Service Action or Error Recovery Failed */
333 case 0x003b:
334 zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
335 break;
336 default: /* PCI function left in the error state attempt to recover */
337 ers_res = zpci_event_attempt_error_recovery(pdev);
338 if (ers_res != PCI_ERS_RESULT_RECOVERED)
339 zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
340 break;
341 }
342 pci_dev_put(pdev);
343 no_pdev:
344 if (zdev)
345 mutex_unlock(&zdev->state_lock);
346 zpci_zdev_put(zdev);
347 }
348
zpci_event_error(void * data)349 void zpci_event_error(void *data)
350 {
351 if (zpci_is_enabled())
352 __zpci_event_error(data);
353 }
354
zpci_event_hard_deconfigured(struct zpci_dev * zdev,u32 fh)355 static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
356 {
357 zpci_update_fh(zdev, fh);
358 /* Give the driver a hint that the function is
359 * already unusable.
360 */
361 zpci_bus_remove_device(zdev, true);
362 /* Even though the device is already gone we still
363 * need to free zPCI resources as part of the disable.
364 */
365 if (zdev_enabled(zdev))
366 zpci_disable_device(zdev);
367 zdev->state = ZPCI_FN_STATE_STANDBY;
368 }
369
zpci_event_reappear(struct zpci_dev * zdev)370 static void zpci_event_reappear(struct zpci_dev *zdev)
371 {
372 lockdep_assert_held(&zdev->state_lock);
373 /*
374 * The zdev is in the reserved state. This means that it was presumed to
375 * go away but there are still undropped references. Now, the platform
376 * announced its availability again. Bring back the lingering zdev
377 * to standby. This is safe because we hold a temporary reference
378 * now so that it won't go away. Account for the re-appearance of the
379 * underlying device by incrementing the reference count.
380 */
381 zdev->state = ZPCI_FN_STATE_STANDBY;
382 zpci_zdev_get(zdev);
383 zpci_dbg(1, "rea fid:%x, fh:%x\n", zdev->fid, zdev->fh);
384 }
385
__zpci_event_availability(struct zpci_ccdf_avail * ccdf)386 static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
387 {
388 struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
389 bool existing_zdev = !!zdev;
390 enum zpci_state state;
391
392 zpci_dbg(3, "avl fid:%x, fh:%x, pec:%x\n",
393 ccdf->fid, ccdf->fh, ccdf->pec);
394
395 if (existing_zdev)
396 mutex_lock(&zdev->state_lock);
397
398 switch (ccdf->pec) {
399 case 0x0301: /* Reserved|Standby -> Configured */
400 if (!zdev) {
401 zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_CONFIGURED);
402 if (IS_ERR(zdev))
403 break;
404 if (zpci_add_device(zdev)) {
405 kfree(zdev);
406 break;
407 }
408 } else {
409 if (zdev->state == ZPCI_FN_STATE_RESERVED)
410 zpci_event_reappear(zdev);
411 /* the configuration request may be stale */
412 else if (zdev->state != ZPCI_FN_STATE_STANDBY)
413 break;
414 zdev->state = ZPCI_FN_STATE_CONFIGURED;
415 }
416 zpci_scan_configured_device(zdev, ccdf->fh);
417 break;
418 case 0x0302: /* Reserved -> Standby */
419 if (!zdev) {
420 zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY);
421 if (IS_ERR(zdev))
422 break;
423 if (zpci_add_device(zdev)) {
424 kfree(zdev);
425 break;
426 }
427 } else {
428 if (zdev->state == ZPCI_FN_STATE_RESERVED)
429 zpci_event_reappear(zdev);
430 zpci_update_fh(zdev, ccdf->fh);
431 }
432 break;
433 case 0x0303: /* Deconfiguration requested */
434 if (zdev) {
435 /* The event may have been queued before we configured
436 * the device.
437 */
438 if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
439 break;
440 zpci_update_fh(zdev, ccdf->fh);
441 zpci_deconfigure_device(zdev);
442 }
443 break;
444 case 0x0304: /* Configured -> Standby|Reserved */
445 if (zdev) {
446 /* The event may have been queued before we configured
447 * the device.:
448 */
449 if (zdev->state == ZPCI_FN_STATE_CONFIGURED)
450 zpci_event_hard_deconfigured(zdev, ccdf->fh);
451 /* The 0x0304 event may immediately reserve the device */
452 if (!clp_get_state(zdev->fid, &state) &&
453 state == ZPCI_FN_STATE_RESERVED) {
454 zpci_device_reserved(zdev);
455 }
456 }
457 break;
458 case 0x0306: /* 0x308 or 0x302 for multiple devices */
459 zpci_remove_reserved_devices();
460 zpci_scan_devices();
461 break;
462 case 0x0308: /* Standby -> Reserved */
463 if (!zdev)
464 break;
465 zpci_device_reserved(zdev);
466 break;
467 default:
468 break;
469 }
470 if (existing_zdev) {
471 mutex_unlock(&zdev->state_lock);
472 zpci_zdev_put(zdev);
473 }
474 }
475
zpci_event_availability(void * data)476 void zpci_event_availability(void *data)
477 {
478 if (zpci_is_enabled())
479 __zpci_event_availability(data);
480 }
481