xref: /linux/drivers/pci/hotplug/pciehp_hpc.c (revision 69050f8d6d075dc01af7a5f2f550a8067510366f)
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * PCI Express PCI Hot Plug Driver
4  *
5  * Copyright (C) 1995,2001 Compaq Computer Corporation
6  * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
7  * Copyright (C) 2001 IBM Corp.
8  * Copyright (C) 2003-2004 Intel Corporation
9  *
10  * All rights reserved.
11  *
12  * Send feedback to <greg@kroah.com>,<kristen.c.accardi@intel.com>
13  */
14 
15 #define dev_fmt(fmt) "pciehp: " fmt
16 
17 #include <linux/bitfield.h>
18 #include <linux/dmi.h>
19 #include <linux/kernel.h>
20 #include <linux/types.h>
21 #include <linux/jiffies.h>
22 #include <linux/kthread.h>
23 #include <linux/pci.h>
24 #include <linux/pm_runtime.h>
25 #include <linux/interrupt.h>
26 #include <linux/slab.h>
27 
28 #include "../pci.h"
29 #include "pciehp.h"
30 
31 static const struct dmi_system_id inband_presence_disabled_dmi_table[] = {
32 	/*
33 	 * Match all Dell systems, as some Dell systems have inband
34 	 * presence disabled on NVMe slots (but don't support the bit to
35 	 * report it). Setting inband presence disabled should have no
36 	 * negative effect, except on broken hotplug slots that never
37 	 * assert presence detect--and those will still work, they will
38 	 * just have a bit of extra delay before being probed.
39 	 */
40 	{
41 		.ident = "Dell System",
42 		.matches = {
43 			DMI_MATCH(DMI_OEM_STRING, "Dell System"),
44 		},
45 	},
46 	{}
47 };
48 
49 static inline struct pci_dev *ctrl_dev(struct controller *ctrl)
50 {
51 	return ctrl->pcie->port;
52 }
53 
54 static irqreturn_t pciehp_isr(int irq, void *dev_id);
55 static irqreturn_t pciehp_ist(int irq, void *dev_id);
56 static int pciehp_poll(void *data);
57 
58 static inline int pciehp_request_irq(struct controller *ctrl)
59 {
60 	int retval, irq = ctrl->pcie->irq;
61 
62 	if (pciehp_poll_mode) {
63 		ctrl->poll_thread = kthread_run(&pciehp_poll, ctrl,
64 						"pciehp_poll-%s",
65 						slot_name(ctrl));
66 		return PTR_ERR_OR_ZERO(ctrl->poll_thread);
67 	}
68 
69 	/* Installs the interrupt handler */
70 	retval = request_threaded_irq(irq, pciehp_isr, pciehp_ist,
71 				      IRQF_SHARED, "pciehp", ctrl);
72 	if (retval)
73 		ctrl_err(ctrl, "Cannot get irq %d for the hotplug controller\n",
74 			 irq);
75 	return retval;
76 }
77 
78 static inline void pciehp_free_irq(struct controller *ctrl)
79 {
80 	if (pciehp_poll_mode)
81 		kthread_stop(ctrl->poll_thread);
82 	else
83 		free_irq(ctrl->pcie->irq, ctrl);
84 }
85 
86 static int pcie_poll_cmd(struct controller *ctrl, int timeout)
87 {
88 	struct pci_dev *pdev = ctrl_dev(ctrl);
89 	u16 slot_status;
90 
91 	do {
92 		pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &slot_status);
93 		if (PCI_POSSIBLE_ERROR(slot_status)) {
94 			ctrl_info(ctrl, "%s: no response from device\n",
95 				  __func__);
96 			return 0;
97 		}
98 
99 		if (slot_status & PCI_EXP_SLTSTA_CC) {
100 			pcie_capability_write_word(pdev, PCI_EXP_SLTSTA,
101 						   PCI_EXP_SLTSTA_CC);
102 			ctrl->cmd_busy = 0;
103 			smp_mb();
104 			return 1;
105 		}
106 		msleep(10);
107 		timeout -= 10;
108 	} while (timeout >= 0);
109 	return 0;	/* timeout */
110 }
111 
112 static void pcie_wait_cmd(struct controller *ctrl)
113 {
114 	unsigned int msecs = pciehp_poll_mode ? 2500 : 1000;
115 	unsigned long duration = msecs_to_jiffies(msecs);
116 	unsigned long cmd_timeout = ctrl->cmd_started + duration;
117 	unsigned long now, timeout;
118 	int rc;
119 
120 	/*
121 	 * If the controller does not generate notifications for command
122 	 * completions, we never need to wait between writes.
123 	 */
124 	if (NO_CMD_CMPL(ctrl))
125 		return;
126 
127 	if (!ctrl->cmd_busy)
128 		return;
129 
130 	/*
131 	 * Even if the command has already timed out, we want to call
132 	 * pcie_poll_cmd() so it can clear PCI_EXP_SLTSTA_CC.
133 	 */
134 	now = jiffies;
135 	if (time_before_eq(cmd_timeout, now))
136 		timeout = 1;
137 	else
138 		timeout = cmd_timeout - now;
139 
140 	if (ctrl->slot_ctrl & PCI_EXP_SLTCTL_HPIE &&
141 	    ctrl->slot_ctrl & PCI_EXP_SLTCTL_CCIE)
142 		rc = wait_event_timeout(ctrl->queue, !ctrl->cmd_busy, timeout);
143 	else
144 		rc = pcie_poll_cmd(ctrl, jiffies_to_msecs(timeout));
145 
146 	if (!rc)
147 		ctrl_info(ctrl, "Timeout on hotplug command %#06x (issued %u msec ago)\n",
148 			  ctrl->slot_ctrl,
149 			  jiffies_to_msecs(jiffies - ctrl->cmd_started));
150 }
151 
152 #define CC_ERRATUM_MASK		(PCI_EXP_SLTCTL_PCC |	\
153 				 PCI_EXP_SLTCTL_PIC |	\
154 				 PCI_EXP_SLTCTL_AIC |	\
155 				 PCI_EXP_SLTCTL_EIC)
156 
157 static void pcie_do_write_cmd(struct controller *ctrl, u16 cmd,
158 			      u16 mask, bool wait)
159 {
160 	struct pci_dev *pdev = ctrl_dev(ctrl);
161 	u16 slot_ctrl_orig, slot_ctrl;
162 
163 	mutex_lock(&ctrl->ctrl_lock);
164 
165 	/*
166 	 * Always wait for any previous command that might still be in progress
167 	 */
168 	pcie_wait_cmd(ctrl);
169 
170 	pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &slot_ctrl);
171 	if (PCI_POSSIBLE_ERROR(slot_ctrl)) {
172 		ctrl_info(ctrl, "%s: no response from device\n", __func__);
173 		goto out;
174 	}
175 
176 	slot_ctrl_orig = slot_ctrl;
177 	slot_ctrl &= ~mask;
178 	slot_ctrl |= (cmd & mask);
179 	ctrl->cmd_busy = 1;
180 	smp_mb();
181 	ctrl->slot_ctrl = slot_ctrl;
182 	pcie_capability_write_word(pdev, PCI_EXP_SLTCTL, slot_ctrl);
183 	ctrl->cmd_started = jiffies;
184 
185 	/*
186 	 * Controllers with the Intel CF118 and similar errata advertise
187 	 * Command Completed support, but they only set Command Completed
188 	 * if we change the "Control" bits for power, power indicator,
189 	 * attention indicator, or interlock.  If we only change the
190 	 * "Enable" bits, they never set the Command Completed bit.
191 	 */
192 	if (pdev->broken_cmd_compl &&
193 	    (slot_ctrl_orig & CC_ERRATUM_MASK) == (slot_ctrl & CC_ERRATUM_MASK))
194 		ctrl->cmd_busy = 0;
195 
196 	/*
197 	 * Optionally wait for the hardware to be ready for a new command,
198 	 * indicating completion of the above issued command.
199 	 */
200 	if (wait)
201 		pcie_wait_cmd(ctrl);
202 
203 out:
204 	mutex_unlock(&ctrl->ctrl_lock);
205 }
206 
207 /**
208  * pcie_write_cmd - Issue controller command
209  * @ctrl: controller to which the command is issued
210  * @cmd:  command value written to slot control register
211  * @mask: bitmask of slot control register to be modified
212  */
213 static void pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask)
214 {
215 	pcie_do_write_cmd(ctrl, cmd, mask, true);
216 }
217 
218 /* Same as above without waiting for the hardware to latch */
219 static void pcie_write_cmd_nowait(struct controller *ctrl, u16 cmd, u16 mask)
220 {
221 	pcie_do_write_cmd(ctrl, cmd, mask, false);
222 }
223 
224 /**
225  * pciehp_check_link_active() - Is the link active
226  * @ctrl: PCIe hotplug controller
227  *
228  * Check whether the downstream link is currently active. Note it is
229  * possible that the card is removed immediately after this so the
230  * caller may need to take it into account.
231  *
232  * If the hotplug controller itself is not available anymore returns
233  * %-ENODEV.
234  */
235 int pciehp_check_link_active(struct controller *ctrl)
236 {
237 	struct pci_dev *pdev = ctrl_dev(ctrl);
238 	u16 lnk_status;
239 	int ret;
240 
241 	ret = pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
242 	if (ret == PCIBIOS_DEVICE_NOT_FOUND || PCI_POSSIBLE_ERROR(lnk_status))
243 		return -ENODEV;
244 
245 	ret = !!(lnk_status & PCI_EXP_LNKSTA_DLLLA);
246 	ctrl_dbg(ctrl, "%s: lnk_status = %x\n", __func__, lnk_status);
247 
248 	return ret;
249 }
250 
251 static bool pci_bus_check_dev(struct pci_bus *bus, int devfn)
252 {
253 	u32 l;
254 	int count = 0;
255 	int delay = 1000, step = 20;
256 	bool found = false;
257 
258 	do {
259 		found = pci_bus_read_dev_vendor_id(bus, devfn, &l, 0);
260 		count++;
261 
262 		if (found)
263 			break;
264 
265 		msleep(step);
266 		delay -= step;
267 	} while (delay > 0);
268 
269 	if (count > 1)
270 		pr_debug("pci %04x:%02x:%02x.%d id reading try %d times with interval %d ms to get %08x\n",
271 			pci_domain_nr(bus), bus->number, PCI_SLOT(devfn),
272 			PCI_FUNC(devfn), count, step, l);
273 
274 	return found;
275 }
276 
277 static void pcie_wait_for_presence(struct pci_dev *pdev)
278 {
279 	int timeout = 1250;
280 	u16 slot_status;
281 
282 	do {
283 		pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &slot_status);
284 		if (slot_status & PCI_EXP_SLTSTA_PDS)
285 			return;
286 		msleep(10);
287 		timeout -= 10;
288 	} while (timeout > 0);
289 }
290 
291 int pciehp_check_link_status(struct controller *ctrl)
292 {
293 	struct pci_dev *pdev = ctrl_dev(ctrl);
294 	bool found;
295 	u16 lnk_status, linksta2;
296 
297 	if (!pcie_wait_for_link(pdev, true)) {
298 		ctrl_info(ctrl, "Slot(%s): No link\n", slot_name(ctrl));
299 		return -1;
300 	}
301 
302 	if (ctrl->inband_presence_disabled)
303 		pcie_wait_for_presence(pdev);
304 
305 	found = pci_bus_check_dev(ctrl->pcie->port->subordinate,
306 					PCI_DEVFN(0, 0));
307 
308 	/* ignore link or presence changes up to this point */
309 	if (found)
310 		atomic_and(~(PCI_EXP_SLTSTA_DLLSC | PCI_EXP_SLTSTA_PDC),
311 			   &ctrl->pending_events);
312 
313 	pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
314 	ctrl_dbg(ctrl, "%s: lnk_status = %x\n", __func__, lnk_status);
315 	if ((lnk_status & PCI_EXP_LNKSTA_LT) ||
316 	    !(lnk_status & PCI_EXP_LNKSTA_NLW)) {
317 		ctrl_info(ctrl, "Slot(%s): Cannot train link: status %#06x\n",
318 			  slot_name(ctrl), lnk_status);
319 		return -1;
320 	}
321 
322 	pcie_capability_read_word(pdev, PCI_EXP_LNKSTA2, &linksta2);
323 	__pcie_update_link_speed(ctrl->pcie->port->subordinate, PCIE_HOTPLUG,
324 				 lnk_status, linksta2);
325 
326 	if (!found) {
327 		ctrl_info(ctrl, "Slot(%s): No device found\n",
328 			  slot_name(ctrl));
329 		return -1;
330 	}
331 
332 	return 0;
333 }
334 
335 static int __pciehp_link_set(struct controller *ctrl, bool enable)
336 {
337 	struct pci_dev *pdev = ctrl_dev(ctrl);
338 
339 	pcie_capability_clear_and_set_word(pdev, PCI_EXP_LNKCTL,
340 					   PCI_EXP_LNKCTL_LD,
341 					   enable ? 0 : PCI_EXP_LNKCTL_LD);
342 
343 	return 0;
344 }
345 
346 static int pciehp_link_enable(struct controller *ctrl)
347 {
348 	return __pciehp_link_set(ctrl, true);
349 }
350 
351 int pciehp_get_raw_indicator_status(struct hotplug_slot *hotplug_slot,
352 				    u8 *status)
353 {
354 	struct controller *ctrl = to_ctrl(hotplug_slot);
355 	struct pci_dev *pdev = ctrl_dev(ctrl);
356 	u16 slot_ctrl;
357 
358 	pci_config_pm_runtime_get(pdev);
359 	pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &slot_ctrl);
360 	pci_config_pm_runtime_put(pdev);
361 	*status = (slot_ctrl & (PCI_EXP_SLTCTL_AIC | PCI_EXP_SLTCTL_PIC)) >> 6;
362 	return 0;
363 }
364 
365 int pciehp_get_attention_status(struct hotplug_slot *hotplug_slot, u8 *status)
366 {
367 	struct controller *ctrl = to_ctrl(hotplug_slot);
368 	struct pci_dev *pdev = ctrl_dev(ctrl);
369 	u16 slot_ctrl;
370 
371 	pci_config_pm_runtime_get(pdev);
372 	pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &slot_ctrl);
373 	pci_config_pm_runtime_put(pdev);
374 	ctrl_dbg(ctrl, "%s: SLOTCTRL %x, value read %x\n", __func__,
375 		 pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, slot_ctrl);
376 
377 	switch (slot_ctrl & PCI_EXP_SLTCTL_AIC) {
378 	case PCI_EXP_SLTCTL_ATTN_IND_ON:
379 		*status = 1;	/* On */
380 		break;
381 	case PCI_EXP_SLTCTL_ATTN_IND_BLINK:
382 		*status = 2;	/* Blink */
383 		break;
384 	case PCI_EXP_SLTCTL_ATTN_IND_OFF:
385 		*status = 0;	/* Off */
386 		break;
387 	default:
388 		*status = 0xFF;
389 		break;
390 	}
391 
392 	return 0;
393 }
394 
395 void pciehp_get_power_status(struct controller *ctrl, u8 *status)
396 {
397 	struct pci_dev *pdev = ctrl_dev(ctrl);
398 	u16 slot_ctrl;
399 
400 	pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &slot_ctrl);
401 	ctrl_dbg(ctrl, "%s: SLOTCTRL %x value read %x\n", __func__,
402 		 pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, slot_ctrl);
403 
404 	switch (slot_ctrl & PCI_EXP_SLTCTL_PCC) {
405 	case PCI_EXP_SLTCTL_PWR_ON:
406 		*status = 1;	/* On */
407 		break;
408 	case PCI_EXP_SLTCTL_PWR_OFF:
409 		*status = 0;	/* Off */
410 		break;
411 	default:
412 		*status = 0xFF;
413 		break;
414 	}
415 }
416 
417 void pciehp_get_latch_status(struct controller *ctrl, u8 *status)
418 {
419 	struct pci_dev *pdev = ctrl_dev(ctrl);
420 	u16 slot_status;
421 
422 	pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &slot_status);
423 	*status = !!(slot_status & PCI_EXP_SLTSTA_MRLSS);
424 }
425 
426 /**
427  * pciehp_card_present() - Is the card present
428  * @ctrl: PCIe hotplug controller
429  *
430  * Function checks whether the card is currently present in the slot and
431  * in that case returns true. Note it is possible that the card is
432  * removed immediately after the check so the caller may need to take
433  * this into account.
434  *
435  * If the hotplug controller itself is not available anymore returns
436  * %-ENODEV.
437  */
438 int pciehp_card_present(struct controller *ctrl)
439 {
440 	struct pci_dev *pdev = ctrl_dev(ctrl);
441 	u16 slot_status;
442 	int ret;
443 
444 	ret = pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &slot_status);
445 	if (ret == PCIBIOS_DEVICE_NOT_FOUND || PCI_POSSIBLE_ERROR(slot_status))
446 		return -ENODEV;
447 
448 	return !!(slot_status & PCI_EXP_SLTSTA_PDS);
449 }
450 
451 /**
452  * pciehp_card_present_or_link_active() - whether given slot is occupied
453  * @ctrl: PCIe hotplug controller
454  *
455  * Unlike pciehp_card_present(), which determines presence solely from the
456  * Presence Detect State bit, this helper also returns true if the Link Active
457  * bit is set.  This is a concession to broken hotplug ports which hardwire
458  * Presence Detect State to zero, such as Wilocity's [1ae9:0200].
459  *
460  * Returns: %1 if the slot is occupied and %0 if it is not. If the hotplug
461  *	    port is not present anymore returns %-ENODEV.
462  */
463 int pciehp_card_present_or_link_active(struct controller *ctrl)
464 {
465 	int ret;
466 
467 	ret = pciehp_card_present(ctrl);
468 	if (ret)
469 		return ret;
470 
471 	return pciehp_check_link_active(ctrl);
472 }
473 
474 int pciehp_query_power_fault(struct controller *ctrl)
475 {
476 	struct pci_dev *pdev = ctrl_dev(ctrl);
477 	u16 slot_status;
478 
479 	pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &slot_status);
480 	return !!(slot_status & PCI_EXP_SLTSTA_PFD);
481 }
482 
483 int pciehp_set_raw_indicator_status(struct hotplug_slot *hotplug_slot,
484 				    u8 status)
485 {
486 	struct controller *ctrl = to_ctrl(hotplug_slot);
487 	struct pci_dev *pdev = ctrl_dev(ctrl);
488 
489 	pci_config_pm_runtime_get(pdev);
490 
491 	/* Attention and Power Indicator Control bits are supported */
492 	pcie_write_cmd_nowait(ctrl, FIELD_PREP(PCI_EXP_SLTCTL_AIC | PCI_EXP_SLTCTL_PIC, status),
493 			      PCI_EXP_SLTCTL_AIC | PCI_EXP_SLTCTL_PIC);
494 	pci_config_pm_runtime_put(pdev);
495 	return 0;
496 }
497 
498 /**
499  * pciehp_set_indicators() - set attention indicator, power indicator, or both
500  * @ctrl: PCIe hotplug controller
501  * @pwr: one of:
502  *	PCI_EXP_SLTCTL_PWR_IND_ON
503  *	PCI_EXP_SLTCTL_PWR_IND_BLINK
504  *	PCI_EXP_SLTCTL_PWR_IND_OFF
505  * @attn: one of:
506  *	PCI_EXP_SLTCTL_ATTN_IND_ON
507  *	PCI_EXP_SLTCTL_ATTN_IND_BLINK
508  *	PCI_EXP_SLTCTL_ATTN_IND_OFF
509  *
510  * Either @pwr or @attn can also be INDICATOR_NOOP to leave that indicator
511  * unchanged.
512  */
513 void pciehp_set_indicators(struct controller *ctrl, int pwr, int attn)
514 {
515 	u16 cmd = 0, mask = 0;
516 
517 	if (PWR_LED(ctrl) && pwr != INDICATOR_NOOP) {
518 		cmd |= (pwr & PCI_EXP_SLTCTL_PIC);
519 		mask |= PCI_EXP_SLTCTL_PIC;
520 	}
521 
522 	if (ATTN_LED(ctrl) && attn != INDICATOR_NOOP) {
523 		cmd |= (attn & PCI_EXP_SLTCTL_AIC);
524 		mask |= PCI_EXP_SLTCTL_AIC;
525 	}
526 
527 	if (cmd) {
528 		pcie_write_cmd_nowait(ctrl, cmd, mask);
529 		ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
530 			 pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, cmd);
531 	}
532 }
533 
534 int pciehp_power_on_slot(struct controller *ctrl)
535 {
536 	struct pci_dev *pdev = ctrl_dev(ctrl);
537 	u16 slot_status;
538 	int retval;
539 
540 	/* Clear power-fault bit from previous power failures */
541 	pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &slot_status);
542 	if (slot_status & PCI_EXP_SLTSTA_PFD)
543 		pcie_capability_write_word(pdev, PCI_EXP_SLTSTA,
544 					   PCI_EXP_SLTSTA_PFD);
545 	ctrl->power_fault_detected = 0;
546 
547 	pcie_write_cmd(ctrl, PCI_EXP_SLTCTL_PWR_ON, PCI_EXP_SLTCTL_PCC);
548 	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
549 		 pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL,
550 		 PCI_EXP_SLTCTL_PWR_ON);
551 
552 	retval = pciehp_link_enable(ctrl);
553 	if (retval)
554 		ctrl_err(ctrl, "%s: Can not enable the link!\n", __func__);
555 
556 	return retval;
557 }
558 
559 void pciehp_power_off_slot(struct controller *ctrl)
560 {
561 	pcie_write_cmd(ctrl, PCI_EXP_SLTCTL_PWR_OFF, PCI_EXP_SLTCTL_PCC);
562 	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
563 		 pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL,
564 		 PCI_EXP_SLTCTL_PWR_OFF);
565 }
566 
567 bool pciehp_device_replaced(struct controller *ctrl)
568 {
569 	struct pci_dev *pdev __free(pci_dev_put) = NULL;
570 	u32 reg;
571 
572 	if (pci_dev_is_disconnected(ctrl->pcie->port))
573 		return false;
574 
575 	pdev = pci_get_slot(ctrl->pcie->port->subordinate, PCI_DEVFN(0, 0));
576 	if (!pdev)
577 		return true;
578 
579 	if (pci_read_config_dword(pdev, PCI_VENDOR_ID, &reg) ||
580 	    reg != (pdev->vendor | (pdev->device << 16)) ||
581 	    pci_read_config_dword(pdev, PCI_CLASS_REVISION, &reg) ||
582 	    reg != (pdev->revision | (pdev->class << 8)))
583 		return true;
584 
585 	if (pdev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
586 	    (pci_read_config_dword(pdev, PCI_SUBSYSTEM_VENDOR_ID, &reg) ||
587 	     reg != (pdev->subsystem_vendor | (pdev->subsystem_device << 16))))
588 		return true;
589 
590 	if (pci_get_dsn(pdev) != ctrl->dsn)
591 		return true;
592 
593 	return false;
594 }
595 
596 static void pciehp_ignore_link_change(struct controller *ctrl,
597 				      struct pci_dev *pdev, int irq,
598 				      u16 ignored_events)
599 {
600 	/*
601 	 * Ignore link changes which occurred while waiting for DPC recovery.
602 	 * Could be several if DPC triggered multiple times consecutively.
603 	 * Also ignore link changes caused by Secondary Bus Reset, etc.
604 	 */
605 	synchronize_hardirq(irq);
606 	atomic_and(~ignored_events, &ctrl->pending_events);
607 	if (pciehp_poll_mode)
608 		pcie_capability_write_word(pdev, PCI_EXP_SLTSTA,
609 					   ignored_events);
610 	ctrl_info(ctrl, "Slot(%s): Link Down/Up ignored\n", slot_name(ctrl));
611 
612 	/*
613 	 * If the link is unexpectedly down after successful recovery,
614 	 * the corresponding link change may have been ignored above.
615 	 * Synthesize it to ensure that it is acted on.
616 	 */
617 	down_read_nested(&ctrl->reset_lock, ctrl->depth);
618 	if (!pciehp_check_link_active(ctrl) || pciehp_device_replaced(ctrl))
619 		pciehp_request(ctrl, ignored_events);
620 	up_read(&ctrl->reset_lock);
621 }
622 
623 static irqreturn_t pciehp_isr(int irq, void *dev_id)
624 {
625 	struct controller *ctrl = (struct controller *)dev_id;
626 	struct pci_dev *pdev = ctrl_dev(ctrl);
627 	struct device *parent = pdev->dev.parent;
628 	u16 status, events = 0;
629 
630 	/*
631 	 * Interrupts only occur in D3hot or shallower and only if enabled
632 	 * in the Slot Control register (PCIe r4.0, sec 6.7.3.4).
633 	 */
634 	if (pdev->current_state == PCI_D3cold ||
635 	    (!(ctrl->slot_ctrl & PCI_EXP_SLTCTL_HPIE) && !pciehp_poll_mode))
636 		return IRQ_NONE;
637 
638 	/*
639 	 * Keep the port accessible by holding a runtime PM ref on its parent.
640 	 * Defer resume of the parent to the IRQ thread if it's suspended.
641 	 * Mask the interrupt until then.
642 	 */
643 	if (parent) {
644 		pm_runtime_get_noresume(parent);
645 		if (!pm_runtime_active(parent)) {
646 			pm_runtime_put(parent);
647 			disable_irq_nosync(irq);
648 			atomic_or(RERUN_ISR, &ctrl->pending_events);
649 			return IRQ_WAKE_THREAD;
650 		}
651 	}
652 
653 read_status:
654 	pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &status);
655 	if (PCI_POSSIBLE_ERROR(status)) {
656 		ctrl_info(ctrl, "%s: no response from device\n", __func__);
657 		if (parent)
658 			pm_runtime_put(parent);
659 		return IRQ_NONE;
660 	}
661 
662 	/*
663 	 * Slot Status contains plain status bits as well as event
664 	 * notification bits; right now we only want the event bits.
665 	 */
666 	status &= PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD |
667 		  PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_CC |
668 		  PCI_EXP_SLTSTA_DLLSC;
669 
670 	/*
671 	 * If we've already reported a power fault, don't report it again
672 	 * until we've done something to handle it.
673 	 */
674 	if (ctrl->power_fault_detected)
675 		status &= ~PCI_EXP_SLTSTA_PFD;
676 	else if (status & PCI_EXP_SLTSTA_PFD)
677 		ctrl->power_fault_detected = true;
678 
679 	events |= status;
680 	if (!events) {
681 		if (parent)
682 			pm_runtime_put(parent);
683 		return IRQ_NONE;
684 	}
685 
686 	if (status) {
687 		pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, status);
688 
689 		/*
690 		 * In MSI mode, all event bits must be zero before the port
691 		 * will send a new interrupt (PCIe Base Spec r5.0 sec 6.7.3.4).
692 		 * So re-read the Slot Status register in case a bit was set
693 		 * between read and write.
694 		 */
695 		if (pci_dev_msi_enabled(pdev) && !pciehp_poll_mode)
696 			goto read_status;
697 	}
698 
699 	ctrl_dbg(ctrl, "pending interrupts %#06x from Slot Status\n", events);
700 	if (parent)
701 		pm_runtime_put(parent);
702 
703 	/*
704 	 * Command Completed notifications are not deferred to the
705 	 * IRQ thread because it may be waiting for their arrival.
706 	 */
707 	if (events & PCI_EXP_SLTSTA_CC) {
708 		ctrl->cmd_busy = 0;
709 		smp_mb();
710 		wake_up(&ctrl->queue);
711 
712 		if (events == PCI_EXP_SLTSTA_CC)
713 			return IRQ_HANDLED;
714 
715 		events &= ~PCI_EXP_SLTSTA_CC;
716 	}
717 
718 	if (pdev->ignore_hotplug) {
719 		ctrl_dbg(ctrl, "ignoring hotplug event %#06x\n", events);
720 		return IRQ_HANDLED;
721 	}
722 
723 	/* Save pending events for consumption by IRQ thread. */
724 	atomic_or(events, &ctrl->pending_events);
725 	return IRQ_WAKE_THREAD;
726 }
727 
728 static irqreturn_t pciehp_ist(int irq, void *dev_id)
729 {
730 	struct controller *ctrl = (struct controller *)dev_id;
731 	struct pci_dev *pdev = ctrl_dev(ctrl);
732 	irqreturn_t ret;
733 	u32 events;
734 
735 	ctrl->ist_running = true;
736 	pci_config_pm_runtime_get(pdev);
737 
738 	/* rerun pciehp_isr() if the port was inaccessible on interrupt */
739 	if (atomic_fetch_and(~RERUN_ISR, &ctrl->pending_events) & RERUN_ISR) {
740 		ret = pciehp_isr(irq, dev_id);
741 		enable_irq(irq);
742 		if (ret != IRQ_WAKE_THREAD)
743 			goto out;
744 	}
745 
746 	synchronize_hardirq(irq);
747 	events = atomic_xchg(&ctrl->pending_events, 0);
748 	if (!events) {
749 		ret = IRQ_NONE;
750 		goto out;
751 	}
752 
753 	/* Check Attention Button Pressed */
754 	if (events & PCI_EXP_SLTSTA_ABP)
755 		pciehp_handle_button_press(ctrl);
756 
757 	/* Check Power Fault Detected */
758 	if (events & PCI_EXP_SLTSTA_PFD) {
759 		ctrl_err(ctrl, "Slot(%s): Power fault\n", slot_name(ctrl));
760 		pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
761 				      PCI_EXP_SLTCTL_ATTN_IND_ON);
762 	}
763 
764 	/*
765 	 * Ignore Link Down/Up events caused by Downstream Port Containment
766 	 * if recovery succeeded, or caused by Secondary Bus Reset,
767 	 * suspend to D3cold, firmware update, FPGA reconfiguration, etc.
768 	 */
769 	if ((events & (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC)) &&
770 	    (pci_dpc_recovered(pdev) || pci_hp_spurious_link_change(pdev)) &&
771 	    ctrl->state == ON_STATE) {
772 		u16 ignored_events = PCI_EXP_SLTSTA_DLLSC;
773 
774 		if (!ctrl->inband_presence_disabled)
775 			ignored_events |= PCI_EXP_SLTSTA_PDC;
776 
777 		events &= ~ignored_events;
778 		pciehp_ignore_link_change(ctrl, pdev, irq, ignored_events);
779 	}
780 
781 	/*
782 	 * Disable requests have higher priority than Presence Detect Changed
783 	 * or Data Link Layer State Changed events.
784 	 */
785 	down_read_nested(&ctrl->reset_lock, ctrl->depth);
786 	if (events & DISABLE_SLOT)
787 		pciehp_handle_disable_request(ctrl);
788 	else if (events & (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC))
789 		pciehp_handle_presence_or_link_change(ctrl, events);
790 	up_read(&ctrl->reset_lock);
791 
792 	ret = IRQ_HANDLED;
793 out:
794 	pci_config_pm_runtime_put(pdev);
795 	ctrl->ist_running = false;
796 	wake_up(&ctrl->requester);
797 	return ret;
798 }
799 
800 static int pciehp_poll(void *data)
801 {
802 	struct controller *ctrl = data;
803 
804 	schedule_timeout_idle(10 * HZ); /* start with 10 sec delay */
805 
806 	while (!kthread_should_stop()) {
807 		/* poll for interrupt events or user requests */
808 		while (pciehp_isr(IRQ_NOTCONNECTED, ctrl) == IRQ_WAKE_THREAD ||
809 		       atomic_read(&ctrl->pending_events))
810 			pciehp_ist(IRQ_NOTCONNECTED, ctrl);
811 
812 		if (pciehp_poll_time <= 0 || pciehp_poll_time > 60)
813 			pciehp_poll_time = 2; /* clamp to sane value */
814 
815 		schedule_timeout_idle(pciehp_poll_time * HZ);
816 	}
817 
818 	return 0;
819 }
820 
821 static void pcie_enable_notification(struct controller *ctrl)
822 {
823 	u16 cmd, mask;
824 
825 	/*
826 	 * TBD: Power fault detected software notification support.
827 	 *
828 	 * Power fault detected software notification is not enabled
829 	 * now, because it caused power fault detected interrupt storm
830 	 * on some machines. On those machines, power fault detected
831 	 * bit in the slot status register was set again immediately
832 	 * when it is cleared in the interrupt service routine, and
833 	 * next power fault detected interrupt was notified again.
834 	 */
835 
836 	/*
837 	 * Always enable link events: thus link-up and link-down shall
838 	 * always be treated as hotplug and unplug respectively. Enable
839 	 * presence detect only if Attention Button is not present.
840 	 */
841 	cmd = PCI_EXP_SLTCTL_DLLSCE;
842 	if (ATTN_BUTTN(ctrl))
843 		cmd |= PCI_EXP_SLTCTL_ABPE;
844 	else
845 		cmd |= PCI_EXP_SLTCTL_PDCE;
846 	if (!pciehp_poll_mode)
847 		cmd |= PCI_EXP_SLTCTL_HPIE;
848 	if (!pciehp_poll_mode && !NO_CMD_CMPL(ctrl))
849 		cmd |= PCI_EXP_SLTCTL_CCIE;
850 
851 	mask = (PCI_EXP_SLTCTL_PDCE | PCI_EXP_SLTCTL_ABPE |
852 		PCI_EXP_SLTCTL_PFDE |
853 		PCI_EXP_SLTCTL_HPIE | PCI_EXP_SLTCTL_CCIE |
854 		PCI_EXP_SLTCTL_DLLSCE);
855 
856 	pcie_write_cmd_nowait(ctrl, cmd, mask);
857 	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
858 		 pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, cmd);
859 }
860 
861 static void pcie_disable_notification(struct controller *ctrl)
862 {
863 	u16 mask;
864 
865 	mask = (PCI_EXP_SLTCTL_PDCE | PCI_EXP_SLTCTL_ABPE |
866 		PCI_EXP_SLTCTL_MRLSCE | PCI_EXP_SLTCTL_PFDE |
867 		PCI_EXP_SLTCTL_HPIE | PCI_EXP_SLTCTL_CCIE |
868 		PCI_EXP_SLTCTL_DLLSCE);
869 	pcie_write_cmd(ctrl, 0, mask);
870 	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
871 		 pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, 0);
872 }
873 
874 void pcie_clear_hotplug_events(struct controller *ctrl)
875 {
876 	pcie_capability_write_word(ctrl_dev(ctrl), PCI_EXP_SLTSTA,
877 				   PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC);
878 }
879 
880 void pcie_enable_interrupt(struct controller *ctrl)
881 {
882 	u16 mask;
883 
884 	mask = PCI_EXP_SLTCTL_DLLSCE;
885 	if (!pciehp_poll_mode)
886 		mask |= PCI_EXP_SLTCTL_HPIE;
887 	pcie_write_cmd(ctrl, mask, mask);
888 }
889 
890 void pcie_disable_interrupt(struct controller *ctrl)
891 {
892 	u16 mask;
893 
894 	/*
895 	 * Mask hot-plug interrupt to prevent it triggering immediately
896 	 * when the link goes inactive (we still get PME when any of the
897 	 * enabled events is detected). Same goes with Link Layer State
898 	 * changed event which generates PME immediately when the link goes
899 	 * inactive so mask it as well.
900 	 */
901 	mask = PCI_EXP_SLTCTL_HPIE | PCI_EXP_SLTCTL_DLLSCE;
902 	pcie_write_cmd(ctrl, 0, mask);
903 }
904 
905 /**
906  * pciehp_slot_reset() - ignore link event caused by error-induced hot reset
907  * @dev: PCI Express port service device
908  *
909  * Called from pcie_portdrv_slot_reset() after AER or DPC initiated a reset
910  * further up in the hierarchy to recover from an error.  The reset was
911  * propagated down to this hotplug port.  Ignore the resulting link flap.
912  * If the link failed to retrain successfully, synthesize the ignored event.
913  * Surprise removal during reset is detected through Presence Detect Changed.
914  */
915 int pciehp_slot_reset(struct pcie_device *dev)
916 {
917 	struct controller *ctrl = get_service_data(dev);
918 
919 	if (ctrl->state != ON_STATE)
920 		return 0;
921 
922 	pcie_capability_write_word(dev->port, PCI_EXP_SLTSTA,
923 				   PCI_EXP_SLTSTA_DLLSC);
924 
925 	if (!pciehp_check_link_active(ctrl))
926 		pciehp_request(ctrl, PCI_EXP_SLTSTA_DLLSC);
927 
928 	return 0;
929 }
930 
931 /*
932  * pciehp has a 1:1 bus:slot relationship so we ultimately want a secondary
933  * bus reset of the bridge, but at the same time we want to ensure that it is
934  * not seen as a hot-unplug, followed by the hot-plug of the device. Thus,
935  * disable link state notification and presence detection change notification
936  * momentarily, if we see that they could interfere. Also, clear any spurious
937  * events after.
938  */
939 int pciehp_reset_slot(struct hotplug_slot *hotplug_slot, bool probe)
940 {
941 	struct controller *ctrl = to_ctrl(hotplug_slot);
942 	struct pci_dev *pdev = ctrl_dev(ctrl);
943 	int rc;
944 
945 	if (probe)
946 		return 0;
947 
948 	down_write_nested(&ctrl->reset_lock, ctrl->depth);
949 
950 	pci_hp_ignore_link_change(pdev);
951 
952 	rc = pci_bridge_secondary_bus_reset(ctrl->pcie->port);
953 
954 	pci_hp_unignore_link_change(pdev);
955 
956 	up_write(&ctrl->reset_lock);
957 	return rc;
958 }
959 
960 int pcie_init_notification(struct controller *ctrl)
961 {
962 	if (pciehp_request_irq(ctrl))
963 		return -1;
964 	pcie_enable_notification(ctrl);
965 	ctrl->notification_enabled = 1;
966 	return 0;
967 }
968 
969 void pcie_shutdown_notification(struct controller *ctrl)
970 {
971 	if (ctrl->notification_enabled) {
972 		pcie_disable_notification(ctrl);
973 		pciehp_free_irq(ctrl);
974 		ctrl->notification_enabled = 0;
975 	}
976 }
977 
978 static inline void dbg_ctrl(struct controller *ctrl)
979 {
980 	struct pci_dev *pdev = ctrl->pcie->port;
981 	u16 reg16;
982 
983 	ctrl_dbg(ctrl, "Slot Capabilities      : 0x%08x\n", ctrl->slot_cap);
984 	pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &reg16);
985 	ctrl_dbg(ctrl, "Slot Status            : 0x%04x\n", reg16);
986 	pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &reg16);
987 	ctrl_dbg(ctrl, "Slot Control           : 0x%04x\n", reg16);
988 }
989 
990 #define FLAG(x, y)	(((x) & (y)) ? '+' : '-')
991 
992 static inline int pcie_hotplug_depth(struct pci_dev *dev)
993 {
994 	struct pci_bus *bus = dev->bus;
995 	int depth = 0;
996 
997 	while (bus->parent) {
998 		bus = bus->parent;
999 		if (bus->self && bus->self->is_pciehp)
1000 			depth++;
1001 	}
1002 
1003 	return depth;
1004 }
1005 
1006 struct controller *pcie_init(struct pcie_device *dev)
1007 {
1008 	struct controller *ctrl;
1009 	u32 slot_cap, slot_cap2;
1010 	u8 poweron;
1011 	struct pci_dev *pdev = dev->port;
1012 	struct pci_bus *subordinate = pdev->subordinate;
1013 
1014 	ctrl = kzalloc_obj(*ctrl, GFP_KERNEL);
1015 	if (!ctrl)
1016 		return NULL;
1017 
1018 	ctrl->pcie = dev;
1019 	ctrl->depth = pcie_hotplug_depth(dev->port);
1020 	pcie_capability_read_dword(pdev, PCI_EXP_SLTCAP, &slot_cap);
1021 
1022 	if (pdev->hotplug_user_indicators)
1023 		slot_cap &= ~(PCI_EXP_SLTCAP_AIP | PCI_EXP_SLTCAP_PIP);
1024 
1025 	/*
1026 	 * We assume no Thunderbolt controllers support Command Complete events,
1027 	 * but some controllers falsely claim they do.
1028 	 */
1029 	if (pdev->is_thunderbolt)
1030 		slot_cap |= PCI_EXP_SLTCAP_NCCS;
1031 
1032 	ctrl->slot_cap = slot_cap;
1033 	mutex_init(&ctrl->ctrl_lock);
1034 	mutex_init(&ctrl->state_lock);
1035 	init_rwsem(&ctrl->reset_lock);
1036 	init_waitqueue_head(&ctrl->requester);
1037 	init_waitqueue_head(&ctrl->queue);
1038 	INIT_DELAYED_WORK(&ctrl->button_work, pciehp_queue_pushbutton_work);
1039 	dbg_ctrl(ctrl);
1040 
1041 	down_read(&pci_bus_sem);
1042 	ctrl->state = list_empty(&subordinate->devices) ? OFF_STATE : ON_STATE;
1043 	up_read(&pci_bus_sem);
1044 
1045 	pcie_capability_read_dword(pdev, PCI_EXP_SLTCAP2, &slot_cap2);
1046 	if (slot_cap2 & PCI_EXP_SLTCAP2_IBPD) {
1047 		pcie_write_cmd_nowait(ctrl, PCI_EXP_SLTCTL_IBPD_DISABLE,
1048 				      PCI_EXP_SLTCTL_IBPD_DISABLE);
1049 		ctrl->inband_presence_disabled = 1;
1050 	}
1051 
1052 	if (dmi_first_match(inband_presence_disabled_dmi_table))
1053 		ctrl->inband_presence_disabled = 1;
1054 
1055 	/* Clear all remaining event bits in Slot Status register. */
1056 	pcie_capability_write_word(pdev, PCI_EXP_SLTSTA,
1057 		PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD |
1058 		PCI_EXP_SLTSTA_MRLSC | PCI_EXP_SLTSTA_CC |
1059 		PCI_EXP_SLTSTA_DLLSC | PCI_EXP_SLTSTA_PDC);
1060 
1061 	ctrl_info(ctrl, "Slot #%d AttnBtn%c PwrCtrl%c MRL%c AttnInd%c PwrInd%c HotPlug%c Surprise%c Interlock%c NoCompl%c IbPresDis%c LLActRep%c%s\n",
1062 		FIELD_GET(PCI_EXP_SLTCAP_PSN, slot_cap),
1063 		FLAG(slot_cap, PCI_EXP_SLTCAP_ABP),
1064 		FLAG(slot_cap, PCI_EXP_SLTCAP_PCP),
1065 		FLAG(slot_cap, PCI_EXP_SLTCAP_MRLSP),
1066 		FLAG(slot_cap, PCI_EXP_SLTCAP_AIP),
1067 		FLAG(slot_cap, PCI_EXP_SLTCAP_PIP),
1068 		FLAG(slot_cap, PCI_EXP_SLTCAP_HPC),
1069 		FLAG(slot_cap, PCI_EXP_SLTCAP_HPS),
1070 		FLAG(slot_cap, PCI_EXP_SLTCAP_EIP),
1071 		FLAG(slot_cap, PCI_EXP_SLTCAP_NCCS),
1072 		FLAG(slot_cap2, PCI_EXP_SLTCAP2_IBPD),
1073 		FLAG(pdev->link_active_reporting, true),
1074 		pdev->broken_cmd_compl ? " (with Cmd Compl erratum)" : "");
1075 
1076 	/*
1077 	 * If empty slot's power status is on, turn power off.  The IRQ isn't
1078 	 * requested yet, so avoid triggering a notification with this command.
1079 	 */
1080 	if (POWER_CTRL(ctrl)) {
1081 		pciehp_get_power_status(ctrl, &poweron);
1082 		if (!pciehp_card_present_or_link_active(ctrl) && poweron) {
1083 			pcie_disable_notification(ctrl);
1084 			pciehp_power_off_slot(ctrl);
1085 		}
1086 	}
1087 
1088 	pdev = pci_get_slot(subordinate, PCI_DEVFN(0, 0));
1089 	if (pdev)
1090 		ctrl->dsn = pci_get_dsn(pdev);
1091 	pci_dev_put(pdev);
1092 
1093 	return ctrl;
1094 }
1095 
1096 void pciehp_release_ctrl(struct controller *ctrl)
1097 {
1098 	cancel_delayed_work_sync(&ctrl->button_work);
1099 	kfree(ctrl);
1100 }
1101 
1102 static void quirk_cmd_compl(struct pci_dev *pdev)
1103 {
1104 	u32 slot_cap;
1105 
1106 	if (pci_is_pcie(pdev)) {
1107 		pcie_capability_read_dword(pdev, PCI_EXP_SLTCAP, &slot_cap);
1108 		if (slot_cap & PCI_EXP_SLTCAP_HPC &&
1109 		    !(slot_cap & PCI_EXP_SLTCAP_NCCS))
1110 			pdev->broken_cmd_compl = 1;
1111 	}
1112 }
1113 DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, PCI_ANY_ID,
1114 			      PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl);
1115 DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_QCOM, 0x010e,
1116 			      PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl);
1117 DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_QCOM, 0x0110,
1118 			      PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl);
1119 DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_QCOM, 0x0400,
1120 			      PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl);
1121 DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_QCOM, 0x0401,
1122 			      PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl);
1123 DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_HXT, 0x0401,
1124 			      PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl);
1125