xref: /linux/drivers/pci/hotplug/pnv_php.c (revision c4cac4a15c6e7a6f9517a2ddc9dc8d7d0d1aa11c)
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * PCI Hotplug Driver for PowerPC PowerNV platform.
4  *
5  * Copyright Gavin Shan, IBM Corporation 2016.
6  * Copyright (C) 2025 Raptor Engineering, LLC
7  * Copyright (C) 2025 Raptor Computing Systems, LLC
8  */
9 
10 #include <linux/bitfield.h>
11 #include <linux/libfdt.h>
12 #include <linux/module.h>
13 #include <linux/pci.h>
14 #include <linux/delay.h>
15 #include <linux/pci_hotplug.h>
16 #include <linux/of_fdt.h>
17 
18 #include <asm/opal.h>
19 #include <asm/pnv-pci.h>
20 #include <asm/ppc-pci.h>
21 
22 #define DRIVER_VERSION	"0.1"
23 #define DRIVER_AUTHOR	"Gavin Shan, IBM Corporation"
24 #define DRIVER_DESC	"PowerPC PowerNV PCI Hotplug Driver"
25 
26 #define SLOT_WARN(sl, x...) \
27 	((sl)->pdev ? pci_warn((sl)->pdev, x) : dev_warn(&(sl)->bus->dev, x))
28 
29 struct pnv_php_event {
30 	bool			added;
31 	struct pnv_php_slot	*php_slot;
32 	struct work_struct	work;
33 };
34 
35 static LIST_HEAD(pnv_php_slot_list);
36 static DEFINE_SPINLOCK(pnv_php_lock);
37 
38 static void pnv_php_register(struct device_node *dn);
39 static void pnv_php_unregister_one(struct device_node *dn);
40 static void pnv_php_unregister(struct device_node *dn);
41 
42 static void pnv_php_enable_irq(struct pnv_php_slot *php_slot);
43 
44 static void pnv_php_disable_irq(struct pnv_php_slot *php_slot,
45 				bool disable_device, bool disable_msi)
46 {
47 	struct pci_dev *pdev = php_slot->pdev;
48 	u16 ctrl;
49 
50 	if (php_slot->irq > 0) {
51 		pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &ctrl);
52 		ctrl &= ~(PCI_EXP_SLTCTL_HPIE |
53 			  PCI_EXP_SLTCTL_PDCE |
54 			  PCI_EXP_SLTCTL_DLLSCE);
55 		pcie_capability_write_word(pdev, PCI_EXP_SLTCTL, ctrl);
56 
57 		free_irq(php_slot->irq, php_slot);
58 		php_slot->irq = 0;
59 	}
60 
61 	if (disable_device || disable_msi) {
62 		if (pdev->msix_enabled)
63 			pci_disable_msix(pdev);
64 		else if (pdev->msi_enabled)
65 			pci_disable_msi(pdev);
66 	}
67 
68 	if (disable_device)
69 		pci_disable_device(pdev);
70 }
71 
72 static void pnv_php_free_slot(struct kref *kref)
73 {
74 	struct pnv_php_slot *php_slot = container_of(kref,
75 					struct pnv_php_slot, kref);
76 
77 	WARN_ON(!list_empty(&php_slot->children));
78 	pnv_php_disable_irq(php_slot, false, false);
79 	destroy_workqueue(php_slot->wq);
80 	kfree(php_slot->name);
81 	kfree(php_slot);
82 }
83 
84 static inline void pnv_php_put_slot(struct pnv_php_slot *php_slot)
85 {
86 
87 	if (!php_slot)
88 		return;
89 
90 	kref_put(&php_slot->kref, pnv_php_free_slot);
91 }
92 
93 static struct pnv_php_slot *pnv_php_match(struct device_node *dn,
94 					  struct pnv_php_slot *php_slot)
95 {
96 	struct pnv_php_slot *target, *tmp;
97 
98 	if (php_slot->dn == dn) {
99 		kref_get(&php_slot->kref);
100 		return php_slot;
101 	}
102 
103 	list_for_each_entry(tmp, &php_slot->children, link) {
104 		target = pnv_php_match(dn, tmp);
105 		if (target)
106 			return target;
107 	}
108 
109 	return NULL;
110 }
111 
112 struct pnv_php_slot *pnv_php_find_slot(struct device_node *dn)
113 {
114 	struct pnv_php_slot *php_slot, *tmp;
115 	unsigned long flags;
116 
117 	spin_lock_irqsave(&pnv_php_lock, flags);
118 	list_for_each_entry(tmp, &pnv_php_slot_list, link) {
119 		php_slot = pnv_php_match(dn, tmp);
120 		if (php_slot) {
121 			spin_unlock_irqrestore(&pnv_php_lock, flags);
122 			return php_slot;
123 		}
124 	}
125 	spin_unlock_irqrestore(&pnv_php_lock, flags);
126 
127 	return NULL;
128 }
129 EXPORT_SYMBOL_GPL(pnv_php_find_slot);
130 
131 /*
132  * Remove pdn for all children of the indicated device node.
133  * The function should remove pdn in a depth-first manner.
134  */
135 static void pnv_php_rmv_pdns(struct device_node *dn)
136 {
137 	struct device_node *child;
138 
139 	for_each_child_of_node(dn, child) {
140 		pnv_php_rmv_pdns(child);
141 
142 		pci_remove_device_node_info(child);
143 	}
144 }
145 
146 /*
147  * Detach all child nodes of the indicated device nodes. The
148  * function should handle device nodes in depth-first manner.
149  *
150  * We should not invoke of_node_release() as the memory for
151  * individual device node is part of large memory block. The
152  * large block is allocated from memblock (system bootup) or
153  * kmalloc() when unflattening the device tree by OF changeset.
154  * We can not free the large block allocated from memblock. For
155  * later case, it should be released at once.
156  */
157 static void pnv_php_detach_device_nodes(struct device_node *parent)
158 {
159 	struct device_node *dn;
160 
161 	for_each_child_of_node(parent, dn) {
162 		pnv_php_detach_device_nodes(dn);
163 
164 		of_node_put(dn);
165 		of_detach_node(dn);
166 	}
167 }
168 
169 static void pnv_php_rmv_devtree(struct pnv_php_slot *php_slot)
170 {
171 	pnv_php_rmv_pdns(php_slot->dn);
172 
173 	/*
174 	 * Decrease the refcount if the device nodes were created
175 	 * through OF changeset before detaching them.
176 	 */
177 	if (php_slot->fdt)
178 		of_changeset_destroy(&php_slot->ocs);
179 	pnv_php_detach_device_nodes(php_slot->dn);
180 
181 	if (php_slot->fdt) {
182 		kfree(php_slot->dt);
183 		kfree(php_slot->fdt);
184 		php_slot->dt        = NULL;
185 		php_slot->dn->child = NULL;
186 		php_slot->fdt       = NULL;
187 	}
188 }
189 
190 /*
191  * As the nodes in OF changeset are applied in reverse order, we
192  * need revert the nodes in advance so that we have correct node
193  * order after the changeset is applied.
194  */
195 static void pnv_php_reverse_nodes(struct device_node *parent)
196 {
197 	struct device_node *child, *next;
198 
199 	/* In-depth first */
200 	for_each_child_of_node(parent, child)
201 		pnv_php_reverse_nodes(child);
202 
203 	/* Reverse the nodes in the child list */
204 	child = parent->child;
205 	parent->child = NULL;
206 	while (child) {
207 		next = child->sibling;
208 
209 		child->sibling = parent->child;
210 		parent->child = child;
211 		child = next;
212 	}
213 }
214 
215 static int pnv_php_populate_changeset(struct of_changeset *ocs,
216 				      struct device_node *dn)
217 {
218 	int ret;
219 
220 	for_each_child_of_node_scoped(dn, child) {
221 		ret = of_changeset_attach_node(ocs, child);
222 		if (ret)
223 			return ret;
224 
225 		ret = pnv_php_populate_changeset(ocs, child);
226 		if (ret)
227 			return ret;
228 	}
229 
230 	return 0;
231 }
232 
233 static void *pnv_php_add_one_pdn(struct device_node *dn, void *data)
234 {
235 	struct pci_controller *hose = (struct pci_controller *)data;
236 	struct pci_dn *pdn;
237 
238 	pdn = pci_add_device_node_info(hose, dn);
239 	if (!pdn)
240 		return ERR_PTR(-ENOMEM);
241 
242 	return NULL;
243 }
244 
245 static void pnv_php_add_pdns(struct pnv_php_slot *slot)
246 {
247 	struct pci_controller *hose = pci_bus_to_host(slot->bus);
248 
249 	pci_traverse_device_nodes(slot->dn, pnv_php_add_one_pdn, hose);
250 }
251 
252 static int pnv_php_add_devtree(struct pnv_php_slot *php_slot)
253 {
254 	void *fdt, *fdt1, *dt;
255 	int ret;
256 
257 	/* We don't know the FDT blob size. We try to get it through
258 	 * maximal memory chunk and then copy it to another chunk that
259 	 * fits the real size.
260 	 */
261 	fdt1 = kzalloc(0x10000, GFP_KERNEL);
262 	if (!fdt1) {
263 		ret = -ENOMEM;
264 		goto out;
265 	}
266 
267 	ret = pnv_pci_get_device_tree(php_slot->dn->phandle, fdt1, 0x10000);
268 	if (ret) {
269 		SLOT_WARN(php_slot, "Error %d getting FDT blob\n", ret);
270 		goto free_fdt1;
271 	}
272 
273 	fdt = kmemdup(fdt1, fdt_totalsize(fdt1), GFP_KERNEL);
274 	if (!fdt) {
275 		ret = -ENOMEM;
276 		goto free_fdt1;
277 	}
278 
279 	/* Unflatten device tree blob */
280 	dt = of_fdt_unflatten_tree(fdt, php_slot->dn, NULL);
281 	if (!dt) {
282 		ret = -EINVAL;
283 		SLOT_WARN(php_slot, "Cannot unflatten FDT\n");
284 		goto free_fdt;
285 	}
286 
287 	/* Initialize and apply the changeset */
288 	of_changeset_init(&php_slot->ocs);
289 	pnv_php_reverse_nodes(php_slot->dn);
290 	ret = pnv_php_populate_changeset(&php_slot->ocs, php_slot->dn);
291 	if (ret) {
292 		pnv_php_reverse_nodes(php_slot->dn);
293 		SLOT_WARN(php_slot, "Error %d populating changeset\n",
294 			  ret);
295 		goto free_dt;
296 	}
297 
298 	php_slot->dn->child = NULL;
299 	ret = of_changeset_apply(&php_slot->ocs);
300 	if (ret) {
301 		SLOT_WARN(php_slot, "Error %d applying changeset\n", ret);
302 		goto destroy_changeset;
303 	}
304 
305 	/* Add device node firmware data */
306 	pnv_php_add_pdns(php_slot);
307 	php_slot->fdt = fdt;
308 	php_slot->dt  = dt;
309 	kfree(fdt1);
310 	goto out;
311 
312 destroy_changeset:
313 	of_changeset_destroy(&php_slot->ocs);
314 free_dt:
315 	kfree(dt);
316 	php_slot->dn->child = NULL;
317 free_fdt:
318 	kfree(fdt);
319 free_fdt1:
320 	kfree(fdt1);
321 out:
322 	return ret;
323 }
324 
325 static inline struct pnv_php_slot *to_pnv_php_slot(struct hotplug_slot *slot)
326 {
327 	return container_of(slot, struct pnv_php_slot, slot);
328 }
329 
330 int pnv_php_set_slot_power_state(struct hotplug_slot *slot,
331 				 uint8_t state)
332 {
333 	struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
334 	struct opal_msg msg;
335 	int ret;
336 
337 	ret = pnv_pci_set_power_state(php_slot->id, state, &msg);
338 	if (ret > 0) {
339 		if (be64_to_cpu(msg.params[1]) != php_slot->dn->phandle	||
340 		    be64_to_cpu(msg.params[2]) != state) {
341 			SLOT_WARN(php_slot, "Wrong msg (%lld, %lld, %lld)\n",
342 				  be64_to_cpu(msg.params[1]),
343 				  be64_to_cpu(msg.params[2]),
344 				  be64_to_cpu(msg.params[3]));
345 			return -ENOMSG;
346 		}
347 		if (be64_to_cpu(msg.params[3]) != OPAL_SUCCESS) {
348 			ret = -ENODEV;
349 			goto error;
350 		}
351 	} else if (ret < 0) {
352 		goto error;
353 	}
354 
355 	if (state == OPAL_PCI_SLOT_POWER_OFF || state == OPAL_PCI_SLOT_OFFLINE)
356 		pnv_php_rmv_devtree(php_slot);
357 	else
358 		ret = pnv_php_add_devtree(php_slot);
359 
360 	return ret;
361 
362 error:
363 	SLOT_WARN(php_slot, "Error %d powering %s\n",
364 		  ret, (state == OPAL_PCI_SLOT_POWER_ON) ? "on" : "off");
365 	return ret;
366 }
367 EXPORT_SYMBOL_GPL(pnv_php_set_slot_power_state);
368 
369 static int pnv_php_get_power_state(struct hotplug_slot *slot, u8 *state)
370 {
371 	struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
372 	uint8_t power_state = OPAL_PCI_SLOT_POWER_ON;
373 	int ret;
374 
375 	/*
376 	 * Retrieve power status from firmware. If we fail
377 	 * getting that, the power status fails back to
378 	 * be on.
379 	 */
380 	ret = pnv_pci_get_power_state(php_slot->id, &power_state);
381 	if (ret) {
382 		SLOT_WARN(php_slot, "Error %d getting power status\n",
383 			  ret);
384 	} else {
385 		*state = power_state;
386 	}
387 
388 	return 0;
389 }
390 
391 static int pcie_check_link_active(struct pci_dev *pdev)
392 {
393 	u16 lnk_status;
394 	int ret;
395 
396 	ret = pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
397 	if (ret == PCIBIOS_DEVICE_NOT_FOUND || PCI_POSSIBLE_ERROR(lnk_status))
398 		return -ENODEV;
399 
400 	ret = !!(lnk_status & PCI_EXP_LNKSTA_DLLLA);
401 
402 	return ret;
403 }
404 
405 static int pnv_php_get_adapter_state(struct hotplug_slot *slot, u8 *state)
406 {
407 	struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
408 	uint8_t presence = OPAL_PCI_SLOT_EMPTY;
409 	int ret;
410 
411 	/*
412 	 * Retrieve presence status from firmware. If we can't
413 	 * get that, it will fail back to be empty.
414 	 */
415 	ret = pnv_pci_get_presence_state(php_slot->id, &presence);
416 	if (ret >= 0) {
417 		if (pci_pcie_type(php_slot->pdev) == PCI_EXP_TYPE_DOWNSTREAM &&
418 			presence == OPAL_PCI_SLOT_EMPTY) {
419 			/*
420 			 * Similar to pciehp_hpc, check whether the Link Active
421 			 * bit is set to account for broken downstream bridges
422 			 * that don't properly assert Presence Detect State, as
423 			 * was observed on the Microsemi Switchtec PM8533 PFX
424 			 * [11f8:8533].
425 			 */
426 			if (pcie_check_link_active(php_slot->pdev) > 0)
427 				presence = OPAL_PCI_SLOT_PRESENT;
428 		}
429 
430 		*state = presence;
431 		ret = 0;
432 	} else {
433 		SLOT_WARN(php_slot, "Error %d getting presence\n", ret);
434 	}
435 
436 	return ret;
437 }
438 
439 static int pnv_php_get_raw_indicator_status(struct hotplug_slot *slot, u8 *state)
440 {
441 	struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
442 	struct pci_dev *bridge = php_slot->pdev;
443 	u16 status;
444 
445 	pcie_capability_read_word(bridge, PCI_EXP_SLTCTL, &status);
446 	*state = (status & (PCI_EXP_SLTCTL_AIC | PCI_EXP_SLTCTL_PIC)) >> 6;
447 	return 0;
448 }
449 
450 
451 static int pnv_php_get_attention_state(struct hotplug_slot *slot, u8 *state)
452 {
453 	struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
454 
455 	pnv_php_get_raw_indicator_status(slot, &php_slot->attention_state);
456 	*state = php_slot->attention_state;
457 	return 0;
458 }
459 
460 static int pnv_php_set_attention_state(struct hotplug_slot *slot, u8 state)
461 {
462 	struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
463 	struct pci_dev *bridge = php_slot->pdev;
464 	u16 new, mask;
465 
466 	php_slot->attention_state = state;
467 	if (!bridge)
468 		return 0;
469 
470 	mask = PCI_EXP_SLTCTL_AIC;
471 
472 	if (state)
473 		new = FIELD_PREP(PCI_EXP_SLTCTL_AIC, state);
474 	else
475 		new = PCI_EXP_SLTCTL_ATTN_IND_OFF;
476 
477 	pcie_capability_clear_and_set_word(bridge, PCI_EXP_SLTCTL, mask, new);
478 
479 	return 0;
480 }
481 
482 static int pnv_php_activate_slot(struct pnv_php_slot *php_slot,
483 				 struct hotplug_slot *slot)
484 {
485 	int ret, i;
486 
487 	/*
488 	 * Issue initial slot activation command to firmware
489 	 *
490 	 * Firmware will power slot on, attempt to train the link, and
491 	 * discover any downstream devices. If this process fails, firmware
492 	 * will return an error code and an invalid device tree. Failure
493 	 * can be caused for multiple reasons, including a faulty
494 	 * downstream device, poor connection to the downstream device, or
495 	 * a previously latched PHB fence.  On failure, issue fundamental
496 	 * reset up to three times before aborting.
497 	 */
498 	ret = pnv_php_set_slot_power_state(slot, OPAL_PCI_SLOT_POWER_ON);
499 	if (ret) {
500 		SLOT_WARN(
501 			php_slot,
502 			"PCI slot activation failed with error code %d, possible frozen PHB",
503 			ret);
504 		SLOT_WARN(
505 			php_slot,
506 			"Attempting complete PHB reset before retrying slot activation\n");
507 		for (i = 0; i < 3; i++) {
508 			/*
509 			 * Slot activation failed, PHB may be fenced from a
510 			 * prior device failure.
511 			 *
512 			 * Use the OPAL fundamental reset call to both try a
513 			 * device reset and clear any potentially active PHB
514 			 * fence / freeze.
515 			 */
516 			SLOT_WARN(php_slot, "Try %d...\n", i + 1);
517 			pci_set_pcie_reset_state(php_slot->pdev,
518 						 pcie_warm_reset);
519 			msleep(250);
520 			pci_set_pcie_reset_state(php_slot->pdev,
521 						 pcie_deassert_reset);
522 
523 			ret = pnv_php_set_slot_power_state(
524 				slot, OPAL_PCI_SLOT_POWER_ON);
525 			if (!ret)
526 				break;
527 		}
528 
529 		if (i >= 3)
530 			SLOT_WARN(php_slot,
531 				  "Failed to bring slot online, aborting!\n");
532 	}
533 
534 	return ret;
535 }
536 
537 static int pnv_php_enable(struct pnv_php_slot *php_slot, bool rescan)
538 {
539 	struct hotplug_slot *slot = &php_slot->slot;
540 	uint8_t presence = OPAL_PCI_SLOT_EMPTY;
541 	uint8_t power_status = OPAL_PCI_SLOT_POWER_ON;
542 	int ret;
543 
544 	/* Check if the slot has been configured */
545 	if (php_slot->state != PNV_PHP_STATE_REGISTERED)
546 		return 0;
547 
548 	/* Retrieve slot presence status */
549 	ret = pnv_php_get_adapter_state(slot, &presence);
550 	if (ret)
551 		return ret;
552 
553 	/*
554 	 * Proceed if there have nothing behind the slot. However,
555 	 * we should leave the slot in registered state at the
556 	 * beginning. Otherwise, the PCI devices inserted afterwards
557 	 * won't be probed and populated.
558 	 */
559 	if (presence == OPAL_PCI_SLOT_EMPTY) {
560 		if (!php_slot->power_state_check) {
561 			php_slot->power_state_check = true;
562 
563 			return 0;
564 		}
565 
566 		goto scan;
567 	}
568 
569 	/*
570 	 * If the power supply to the slot is off, we can't detect
571 	 * adapter presence state. That means we have to turn the
572 	 * slot on before going to probe slot's presence state.
573 	 *
574 	 * On the first time, we don't change the power status to
575 	 * boost system boot with assumption that the firmware
576 	 * supplies consistent slot power status: empty slot always
577 	 * has its power off and non-empty slot has its power on.
578 	 */
579 	if (!php_slot->power_state_check) {
580 		php_slot->power_state_check = true;
581 
582 		ret = pnv_php_get_power_state(slot, &power_status);
583 		if (ret)
584 			return ret;
585 
586 		if (power_status != OPAL_PCI_SLOT_POWER_ON)
587 			return 0;
588 	}
589 
590 	/* Check the power status. Scan the slot if it is already on */
591 	ret = pnv_php_get_power_state(slot, &power_status);
592 	if (ret)
593 		return ret;
594 
595 	if (power_status == OPAL_PCI_SLOT_POWER_ON)
596 		goto scan;
597 
598 	/* Power is off, turn it on and then scan the slot */
599 	ret = pnv_php_activate_slot(php_slot, slot);
600 	if (ret)
601 		return ret;
602 
603 scan:
604 	if (presence == OPAL_PCI_SLOT_PRESENT) {
605 		if (rescan) {
606 			pci_lock_rescan_remove();
607 			pci_hp_add_devices(php_slot->bus);
608 			pci_unlock_rescan_remove();
609 		}
610 
611 		/* Rescan for child hotpluggable slots */
612 		php_slot->state = PNV_PHP_STATE_POPULATED;
613 		if (rescan)
614 			pnv_php_register(php_slot->dn);
615 	} else {
616 		php_slot->state = PNV_PHP_STATE_POPULATED;
617 	}
618 
619 	return 0;
620 }
621 
622 static int pnv_php_reset_slot(struct hotplug_slot *slot, bool probe)
623 {
624 	struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
625 	struct pci_dev *bridge = php_slot->pdev;
626 	uint16_t sts;
627 
628 	/*
629 	 * The CAPI folks want pnv_php to drive OpenCAPI slots
630 	 * which don't have a bridge. Only claim to support
631 	 * reset_slot() if we have a bridge device (for now...)
632 	 */
633 	if (probe)
634 		return !bridge;
635 
636 	/* mask our interrupt while resetting the bridge */
637 	if (php_slot->irq > 0)
638 		disable_irq(php_slot->irq);
639 
640 	pci_bridge_secondary_bus_reset(bridge);
641 
642 	/* clear any state changes that happened due to the reset */
643 	pcie_capability_read_word(php_slot->pdev, PCI_EXP_SLTSTA, &sts);
644 	sts &= (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC);
645 	pcie_capability_write_word(php_slot->pdev, PCI_EXP_SLTSTA, sts);
646 
647 	if (php_slot->irq > 0)
648 		enable_irq(php_slot->irq);
649 
650 	return 0;
651 }
652 
653 static int pnv_php_enable_slot(struct hotplug_slot *slot)
654 {
655 	struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
656 	u32 prop32;
657 	int ret;
658 
659 	ret = pnv_php_enable(php_slot, true);
660 	if (ret)
661 		return ret;
662 
663 	/* (Re-)enable interrupt if the slot supports surprise hotplug */
664 	ret = of_property_read_u32(php_slot->dn, "ibm,slot-surprise-pluggable",
665 				   &prop32);
666 	if (!ret && prop32)
667 		pnv_php_enable_irq(php_slot);
668 
669 	return 0;
670 }
671 
672 /*
673  * Disable any hotplug interrupts for all slots on the provided bus, as well as
674  * all downstream slots in preparation for a hot unplug.
675  */
676 static int pnv_php_disable_all_irqs(struct pci_bus *bus)
677 {
678 	struct pci_bus *child_bus;
679 	struct pci_slot *slot;
680 
681 	/* First go down child buses */
682 	list_for_each_entry(child_bus, &bus->children, node)
683 		pnv_php_disable_all_irqs(child_bus);
684 
685 	/* Disable IRQs for all pnv_php slots on this bus */
686 	list_for_each_entry(slot, &bus->slots, list) {
687 		struct pnv_php_slot *php_slot = to_pnv_php_slot(slot->hotplug);
688 
689 		pnv_php_disable_irq(php_slot, false, true);
690 	}
691 
692 	return 0;
693 }
694 
695 /*
696  * Disable any hotplug interrupts for all downstream slots on the provided
697  * bus in preparation for a hot unplug.
698  */
699 static int pnv_php_disable_all_downstream_irqs(struct pci_bus *bus)
700 {
701 	struct pci_bus *child_bus;
702 
703 	/* Go down child buses, recursively deactivating their IRQs */
704 	list_for_each_entry(child_bus, &bus->children, node)
705 		pnv_php_disable_all_irqs(child_bus);
706 
707 	return 0;
708 }
709 
710 static int pnv_php_disable_slot(struct hotplug_slot *slot)
711 {
712 	struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
713 	int ret;
714 
715 	/*
716 	 * Allow to disable a slot already in the registered state to
717 	 * cover cases where the slot couldn't be enabled and never
718 	 * reached the populated state
719 	 */
720 	if (php_slot->state != PNV_PHP_STATE_POPULATED &&
721 	    php_slot->state != PNV_PHP_STATE_REGISTERED)
722 		return 0;
723 
724 	/*
725 	 * Free all IRQ resources from all child slots before remove.
726 	 * Note that we do not disable the root slot IRQ here as that
727 	 * would also deactivate the slot hot (re)plug interrupt!
728 	 */
729 	pnv_php_disable_all_downstream_irqs(php_slot->bus);
730 
731 	/* Remove all devices behind the slot */
732 	pci_lock_rescan_remove();
733 	pci_hp_remove_devices(php_slot->bus);
734 	pci_unlock_rescan_remove();
735 
736 	/* Detach the child hotpluggable slots */
737 	pnv_php_unregister(php_slot->dn);
738 
739 	/* Notify firmware and remove device nodes */
740 	ret = pnv_php_set_slot_power_state(slot, OPAL_PCI_SLOT_POWER_OFF);
741 
742 	php_slot->state = PNV_PHP_STATE_REGISTERED;
743 	return ret;
744 }
745 
746 static const struct hotplug_slot_ops php_slot_ops = {
747 	.get_power_status	= pnv_php_get_power_state,
748 	.get_adapter_status	= pnv_php_get_adapter_state,
749 	.get_attention_status	= pnv_php_get_attention_state,
750 	.set_attention_status	= pnv_php_set_attention_state,
751 	.enable_slot		= pnv_php_enable_slot,
752 	.disable_slot		= pnv_php_disable_slot,
753 	.reset_slot		= pnv_php_reset_slot,
754 };
755 
756 static void pnv_php_release(struct pnv_php_slot *php_slot)
757 {
758 	unsigned long flags;
759 
760 	/* Remove from global or child list */
761 	spin_lock_irqsave(&pnv_php_lock, flags);
762 	list_del(&php_slot->link);
763 	spin_unlock_irqrestore(&pnv_php_lock, flags);
764 
765 	/* Detach from parent */
766 	pnv_php_put_slot(php_slot);
767 	pnv_php_put_slot(php_slot->parent);
768 }
769 
770 static struct pnv_php_slot *pnv_php_alloc_slot(struct device_node *dn)
771 {
772 	struct pnv_php_slot *php_slot;
773 	struct pci_bus *bus;
774 	const char *label;
775 	uint64_t id;
776 	int ret;
777 
778 	ret = of_property_read_string(dn, "ibm,slot-label", &label);
779 	if (ret)
780 		return NULL;
781 
782 	if (pnv_pci_get_slot_id(dn, &id))
783 		return NULL;
784 
785 	bus = pci_find_bus_by_node(dn);
786 	if (!bus)
787 		return NULL;
788 
789 	php_slot = kzalloc_obj(*php_slot);
790 	if (!php_slot)
791 		return NULL;
792 
793 	php_slot->name = kstrdup(label, GFP_KERNEL);
794 	if (!php_slot->name) {
795 		kfree(php_slot);
796 		return NULL;
797 	}
798 
799 	/* Allocate workqueue for this slot's interrupt handling */
800 	php_slot->wq = alloc_workqueue("pciehp-%s", WQ_PERCPU, 0, php_slot->name);
801 	if (!php_slot->wq) {
802 		SLOT_WARN(php_slot, "Cannot alloc workqueue\n");
803 		kfree(php_slot->name);
804 		kfree(php_slot);
805 		return NULL;
806 	}
807 
808 	if (dn->child && PCI_DN(dn->child))
809 		php_slot->slot_no = PCI_SLOT(PCI_DN(dn->child)->devfn);
810 	else
811 		php_slot->slot_no = -1;   /* Placeholder slot */
812 
813 	kref_init(&php_slot->kref);
814 	php_slot->state	                = PNV_PHP_STATE_INITIALIZED;
815 	php_slot->dn	                = dn;
816 	php_slot->pdev	                = bus->self;
817 	php_slot->bus	                = bus;
818 	php_slot->id	                = id;
819 	php_slot->power_state_check     = false;
820 	php_slot->slot.ops              = &php_slot_ops;
821 
822 	INIT_LIST_HEAD(&php_slot->children);
823 	INIT_LIST_HEAD(&php_slot->link);
824 
825 	return php_slot;
826 }
827 
828 static int pnv_php_register_slot(struct pnv_php_slot *php_slot)
829 {
830 	struct pnv_php_slot *parent;
831 	struct device_node *dn = php_slot->dn;
832 	unsigned long flags;
833 	int ret;
834 
835 	/* Check if the slot is registered or not */
836 	parent = pnv_php_find_slot(php_slot->dn);
837 	if (parent) {
838 		pnv_php_put_slot(parent);
839 		return -EEXIST;
840 	}
841 
842 	/* Register PCI slot */
843 	ret = pci_hp_register(&php_slot->slot, php_slot->bus,
844 			      php_slot->slot_no, php_slot->name);
845 	if (ret) {
846 		SLOT_WARN(php_slot, "Error %d registering slot\n", ret);
847 		return ret;
848 	}
849 
850 	/* Attach to the parent's child list or global list */
851 	while ((dn = of_get_parent(dn))) {
852 		if (!PCI_DN(dn)) {
853 			of_node_put(dn);
854 			break;
855 		}
856 
857 		parent = pnv_php_find_slot(dn);
858 		if (parent) {
859 			of_node_put(dn);
860 			break;
861 		}
862 
863 		of_node_put(dn);
864 	}
865 
866 	spin_lock_irqsave(&pnv_php_lock, flags);
867 	php_slot->parent = parent;
868 	if (parent)
869 		list_add_tail(&php_slot->link, &parent->children);
870 	else
871 		list_add_tail(&php_slot->link, &pnv_php_slot_list);
872 	spin_unlock_irqrestore(&pnv_php_lock, flags);
873 
874 	php_slot->state = PNV_PHP_STATE_REGISTERED;
875 	return 0;
876 }
877 
878 static int pnv_php_enable_msix(struct pnv_php_slot *php_slot)
879 {
880 	struct pci_dev *pdev = php_slot->pdev;
881 	struct msix_entry entry;
882 	int nr_entries, ret;
883 	u16 pcie_flag;
884 
885 	/* Get total number of MSIx entries */
886 	nr_entries = pci_msix_vec_count(pdev);
887 	if (nr_entries < 0)
888 		return nr_entries;
889 
890 	/* Check hotplug MSIx entry is in range */
891 	pcie_capability_read_word(pdev, PCI_EXP_FLAGS, &pcie_flag);
892 	entry.entry = FIELD_GET(PCI_EXP_FLAGS_IRQ, pcie_flag);
893 	if (entry.entry >= nr_entries)
894 		return -ERANGE;
895 
896 	/* Enable MSIx */
897 	ret = pci_enable_msix_exact(pdev, &entry, 1);
898 	if (ret) {
899 		SLOT_WARN(php_slot, "Error %d enabling MSIx\n", ret);
900 		return ret;
901 	}
902 
903 	return entry.vector;
904 }
905 
906 static void
907 pnv_php_detect_clear_suprise_removal_freeze(struct pnv_php_slot *php_slot)
908 {
909 	struct pci_dev *pdev = php_slot->pdev;
910 	struct eeh_dev *edev;
911 	struct eeh_pe *pe;
912 	int i, rc;
913 
914 	/*
915 	 * When a device is surprise removed from a downstream bridge slot,
916 	 * the upstream bridge port can still end up frozen due to related EEH
917 	 * events, which will in turn block the MSI interrupts for slot hotplug
918 	 * detection.
919 	 *
920 	 * Detect and thaw any frozen upstream PE after slot deactivation.
921 	 */
922 	edev = pci_dev_to_eeh_dev(pdev);
923 	pe = edev ? edev->pe : NULL;
924 	rc = eeh_pe_get_state(pe);
925 	if ((rc == -ENODEV) || (rc == -ENOENT)) {
926 		SLOT_WARN(
927 			php_slot,
928 			"Upstream bridge PE state unknown, hotplug detect may fail\n");
929 	} else {
930 		if (pe->state & EEH_PE_ISOLATED) {
931 			SLOT_WARN(
932 				php_slot,
933 				"Upstream bridge PE %02x frozen, thawing...\n",
934 				pe->addr);
935 			for (i = 0; i < 3; i++)
936 				if (!eeh_unfreeze_pe(pe))
937 					break;
938 			if (i >= 3)
939 				SLOT_WARN(
940 					php_slot,
941 					"Unable to thaw PE %02x, hotplug detect will fail!\n",
942 					pe->addr);
943 			else
944 				SLOT_WARN(php_slot,
945 					  "PE %02x thawed successfully\n",
946 					  pe->addr);
947 		}
948 	}
949 }
950 
951 static void pnv_php_event_handler(struct work_struct *work)
952 {
953 	struct pnv_php_event *event =
954 		container_of(work, struct pnv_php_event, work);
955 	struct pnv_php_slot *php_slot = event->php_slot;
956 
957 	if (event->added) {
958 		pnv_php_enable_slot(&php_slot->slot);
959 	} else {
960 		pnv_php_disable_slot(&php_slot->slot);
961 		pnv_php_detect_clear_suprise_removal_freeze(php_slot);
962 	}
963 
964 	kfree(event);
965 }
966 
967 static irqreturn_t pnv_php_interrupt(int irq, void *data)
968 {
969 	struct pnv_php_slot *php_slot = data;
970 	struct pci_dev *pchild, *pdev = php_slot->pdev;
971 	struct eeh_dev *edev;
972 	struct eeh_pe *pe;
973 	struct pnv_php_event *event;
974 	u16 sts, lsts;
975 	u8 presence;
976 	bool added;
977 	unsigned long flags;
978 	int ret;
979 
980 	pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &sts);
981 	sts &= (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC);
982 	pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, sts);
983 
984 	pci_dbg(pdev, "PCI slot [%s]: HP int! DLAct: %d, PresDet: %d\n",
985 			php_slot->name,
986 			!!(sts & PCI_EXP_SLTSTA_DLLSC),
987 			!!(sts & PCI_EXP_SLTSTA_PDC));
988 
989 	if (sts & PCI_EXP_SLTSTA_DLLSC) {
990 		pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lsts);
991 		added = !!(lsts & PCI_EXP_LNKSTA_DLLLA);
992 	} else if (!(php_slot->flags & PNV_PHP_FLAG_BROKEN_PDC) &&
993 		   (sts & PCI_EXP_SLTSTA_PDC)) {
994 		ret = pnv_pci_get_presence_state(php_slot->id, &presence);
995 		if (ret) {
996 			SLOT_WARN(php_slot,
997 				  "PCI slot [%s] error %d getting presence (0x%04x), to retry the operation.\n",
998 				  php_slot->name, ret, sts);
999 			return IRQ_HANDLED;
1000 		}
1001 
1002 		added = !!(presence == OPAL_PCI_SLOT_PRESENT);
1003 	} else {
1004 		pci_dbg(pdev, "PCI slot [%s]: Spurious IRQ?\n", php_slot->name);
1005 		return IRQ_NONE;
1006 	}
1007 
1008 	/* Freeze the removed PE to avoid unexpected error reporting */
1009 	if (!added) {
1010 		pchild = list_first_entry_or_null(&php_slot->bus->devices,
1011 						  struct pci_dev, bus_list);
1012 		edev = pchild ? pci_dev_to_eeh_dev(pchild) : NULL;
1013 		pe = edev ? edev->pe : NULL;
1014 		if (pe) {
1015 			eeh_serialize_lock(&flags);
1016 			eeh_pe_mark_isolated(pe);
1017 			eeh_serialize_unlock(flags);
1018 			eeh_pe_set_option(pe, EEH_OPT_FREEZE_PE);
1019 		}
1020 	}
1021 
1022 	/*
1023 	 * The PE is left in frozen state if the event is missed. It's
1024 	 * fine as the PCI devices (PE) aren't functional any more.
1025 	 */
1026 	event = kzalloc_obj(*event, GFP_ATOMIC);
1027 	if (!event) {
1028 		SLOT_WARN(php_slot,
1029 			  "PCI slot [%s] missed hotplug event 0x%04x\n",
1030 			  php_slot->name, sts);
1031 		return IRQ_HANDLED;
1032 	}
1033 
1034 	pci_info(pdev, "PCI slot [%s] %s (IRQ: %d)\n",
1035 		 php_slot->name, added ? "added" : "removed", irq);
1036 	INIT_WORK(&event->work, pnv_php_event_handler);
1037 	event->added = added;
1038 	event->php_slot = php_slot;
1039 	queue_work(php_slot->wq, &event->work);
1040 
1041 	return IRQ_HANDLED;
1042 }
1043 
1044 static void pnv_php_init_irq(struct pnv_php_slot *php_slot, int irq)
1045 {
1046 	struct pci_dev *pdev = php_slot->pdev;
1047 	u32 broken_pdc = 0;
1048 	u16 sts, ctrl;
1049 	int ret;
1050 
1051 	/* Check PDC (Presence Detection Change) is broken or not */
1052 	ret = of_property_read_u32(php_slot->dn, "ibm,slot-broken-pdc",
1053 				   &broken_pdc);
1054 	if (!ret && broken_pdc)
1055 		php_slot->flags |= PNV_PHP_FLAG_BROKEN_PDC;
1056 
1057 	/* Clear pending interrupts */
1058 	pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &sts);
1059 	if (php_slot->flags & PNV_PHP_FLAG_BROKEN_PDC)
1060 		sts |= PCI_EXP_SLTSTA_DLLSC;
1061 	else
1062 		sts |= (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC);
1063 	pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, sts);
1064 
1065 	/* Request the interrupt */
1066 	ret = request_irq(irq, pnv_php_interrupt, IRQF_SHARED,
1067 			  php_slot->name, php_slot);
1068 	if (ret) {
1069 		pnv_php_disable_irq(php_slot, true, true);
1070 		SLOT_WARN(php_slot, "Error %d enabling IRQ %d\n", ret, irq);
1071 		return;
1072 	}
1073 
1074 	/* Enable the interrupts */
1075 	pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &ctrl);
1076 	if (php_slot->flags & PNV_PHP_FLAG_BROKEN_PDC) {
1077 		ctrl &= ~PCI_EXP_SLTCTL_PDCE;
1078 		ctrl |= (PCI_EXP_SLTCTL_HPIE |
1079 			 PCI_EXP_SLTCTL_DLLSCE);
1080 	} else {
1081 		ctrl |= (PCI_EXP_SLTCTL_HPIE |
1082 			 PCI_EXP_SLTCTL_PDCE |
1083 			 PCI_EXP_SLTCTL_DLLSCE);
1084 	}
1085 	pcie_capability_write_word(pdev, PCI_EXP_SLTCTL, ctrl);
1086 
1087 	/* The interrupt is initialized successfully when @irq is valid */
1088 	php_slot->irq = irq;
1089 }
1090 
1091 static void pnv_php_enable_irq(struct pnv_php_slot *php_slot)
1092 {
1093 	struct pci_dev *pdev = php_slot->pdev;
1094 	int irq, ret;
1095 
1096 	/*
1097 	 * The MSI/MSIx interrupt might have been occupied by other
1098 	 * drivers. Don't populate the surprise hotplug capability
1099 	 * in that case.
1100 	 */
1101 	if (pci_dev_msi_enabled(pdev))
1102 		return;
1103 
1104 	ret = pci_enable_device(pdev);
1105 	if (ret) {
1106 		SLOT_WARN(php_slot, "Error %d enabling device\n", ret);
1107 		return;
1108 	}
1109 
1110 	pci_set_master(pdev);
1111 
1112 	/* Enable MSIx interrupt */
1113 	irq = pnv_php_enable_msix(php_slot);
1114 	if (irq > 0) {
1115 		pnv_php_init_irq(php_slot, irq);
1116 		return;
1117 	}
1118 
1119 	/*
1120 	 * Use MSI if MSIx doesn't work. Fail back to legacy INTx
1121 	 * if MSI doesn't work either
1122 	 */
1123 	ret = pci_enable_msi(pdev);
1124 	if (!ret || pdev->irq) {
1125 		irq = pdev->irq;
1126 		pnv_php_init_irq(php_slot, irq);
1127 	}
1128 }
1129 
1130 static int pnv_php_register_one(struct device_node *dn)
1131 {
1132 	struct pnv_php_slot *php_slot;
1133 	u32 prop32;
1134 	int ret;
1135 
1136 	/* Check if it's hotpluggable slot */
1137 	ret = of_property_read_u32(dn, "ibm,slot-pluggable", &prop32);
1138 	if (ret || !prop32)
1139 		return -ENXIO;
1140 
1141 	ret = of_property_read_u32(dn, "ibm,reset-by-firmware", &prop32);
1142 	if (ret || !prop32)
1143 		return -ENXIO;
1144 
1145 	php_slot = pnv_php_alloc_slot(dn);
1146 	if (!php_slot)
1147 		return -ENODEV;
1148 
1149 	ret = pnv_php_register_slot(php_slot);
1150 	if (ret)
1151 		goto free_slot;
1152 
1153 	ret = pnv_php_enable(php_slot, false);
1154 	if (ret)
1155 		goto unregister_slot;
1156 
1157 	/* Enable interrupt if the slot supports surprise hotplug */
1158 	ret = of_property_read_u32(dn, "ibm,slot-surprise-pluggable", &prop32);
1159 	if (!ret && prop32)
1160 		pnv_php_enable_irq(php_slot);
1161 
1162 	return 0;
1163 
1164 unregister_slot:
1165 	pnv_php_unregister_one(php_slot->dn);
1166 free_slot:
1167 	pnv_php_put_slot(php_slot);
1168 	return ret;
1169 }
1170 
1171 static void pnv_php_register(struct device_node *dn)
1172 {
1173 	struct device_node *child;
1174 
1175 	/*
1176 	 * The parent slots should be registered before their
1177 	 * child slots.
1178 	 */
1179 	for_each_child_of_node(dn, child) {
1180 		pnv_php_register_one(child);
1181 		pnv_php_register(child);
1182 	}
1183 }
1184 
1185 static void pnv_php_unregister_one(struct device_node *dn)
1186 {
1187 	struct pnv_php_slot *php_slot;
1188 
1189 	php_slot = pnv_php_find_slot(dn);
1190 	if (!php_slot)
1191 		return;
1192 
1193 	php_slot->state = PNV_PHP_STATE_OFFLINE;
1194 	pci_hp_deregister(&php_slot->slot);
1195 	pnv_php_release(php_slot);
1196 	pnv_php_put_slot(php_slot);
1197 }
1198 
1199 static void pnv_php_unregister(struct device_node *dn)
1200 {
1201 	struct device_node *child;
1202 
1203 	/* The child slots should go before their parent slots */
1204 	for_each_child_of_node(dn, child) {
1205 		pnv_php_unregister(child);
1206 		pnv_php_unregister_one(child);
1207 	}
1208 }
1209 
1210 static int __init pnv_php_init(void)
1211 {
1212 	struct device_node *dn;
1213 
1214 	pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
1215 	for_each_compatible_node(dn, NULL, "ibm,ioda2-phb")
1216 		pnv_php_register(dn);
1217 
1218 	for_each_compatible_node(dn, NULL, "ibm,ioda3-phb")
1219 		pnv_php_register(dn);
1220 
1221 	for_each_compatible_node(dn, NULL, "ibm,ioda2-npu2-opencapi-phb")
1222 		pnv_php_register_one(dn); /* slot directly under the PHB */
1223 	return 0;
1224 }
1225 
1226 static void __exit pnv_php_exit(void)
1227 {
1228 	struct device_node *dn;
1229 
1230 	for_each_compatible_node(dn, NULL, "ibm,ioda2-phb")
1231 		pnv_php_unregister(dn);
1232 
1233 	for_each_compatible_node(dn, NULL, "ibm,ioda3-phb")
1234 		pnv_php_unregister(dn);
1235 
1236 	for_each_compatible_node(dn, NULL, "ibm,ioda2-npu2-opencapi-phb")
1237 		pnv_php_unregister_one(dn); /* slot directly under the PHB */
1238 }
1239 
1240 module_init(pnv_php_init);
1241 module_exit(pnv_php_exit);
1242 
1243 MODULE_VERSION(DRIVER_VERSION);
1244 MODULE_LICENSE("GPL v2");
1245 MODULE_AUTHOR(DRIVER_AUTHOR);
1246 MODULE_DESCRIPTION(DRIVER_DESC);
1247