xref: /linux/drivers/pci/probe.c (revision 0da3050bdded5f121aaca6b5247ea50681d7129e)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * PCI detection and setup code
4  */
5 
6 #include <linux/array_size.h>
7 #include <linux/kernel.h>
8 #include <linux/delay.h>
9 #include <linux/init.h>
10 #include <linux/pci.h>
11 #include <linux/msi.h>
12 #include <linux/of_pci.h>
13 #include <linux/of_platform.h>
14 #include <linux/platform_device.h>
15 #include <linux/pci_hotplug.h>
16 #include <linux/slab.h>
17 #include <linux/module.h>
18 #include <linux/cpumask.h>
19 #include <linux/aer.h>
20 #include <linux/acpi.h>
21 #include <linux/hypervisor.h>
22 #include <linux/irqdomain.h>
23 #include <linux/pm_runtime.h>
24 #include <linux/bitfield.h>
25 #include "pci.h"
26 
27 #define CARDBUS_LATENCY_TIMER	176	/* secondary latency timer */
28 #define CARDBUS_RESERVE_BUSNR	3
29 
30 static struct resource busn_resource = {
31 	.name	= "PCI busn",
32 	.start	= 0,
33 	.end	= 255,
34 	.flags	= IORESOURCE_BUS,
35 };
36 
37 /* Ugh.  Need to stop exporting this to modules. */
38 LIST_HEAD(pci_root_buses);
39 EXPORT_SYMBOL(pci_root_buses);
40 
41 static LIST_HEAD(pci_domain_busn_res_list);
42 
43 struct pci_domain_busn_res {
44 	struct list_head list;
45 	struct resource res;
46 	int domain_nr;
47 };
48 
49 static struct resource *get_pci_domain_busn_res(int domain_nr)
50 {
51 	struct pci_domain_busn_res *r;
52 
53 	list_for_each_entry(r, &pci_domain_busn_res_list, list)
54 		if (r->domain_nr == domain_nr)
55 			return &r->res;
56 
57 	r = kzalloc(sizeof(*r), GFP_KERNEL);
58 	if (!r)
59 		return NULL;
60 
61 	r->domain_nr = domain_nr;
62 	r->res.start = 0;
63 	r->res.end = 0xff;
64 	r->res.flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED;
65 
66 	list_add_tail(&r->list, &pci_domain_busn_res_list);
67 
68 	return &r->res;
69 }
70 
71 /*
72  * Some device drivers need know if PCI is initiated.
73  * Basically, we think PCI is not initiated when there
74  * is no device to be found on the pci_bus_type.
75  */
76 int no_pci_devices(void)
77 {
78 	struct device *dev;
79 	int no_devices;
80 
81 	dev = bus_find_next_device(&pci_bus_type, NULL);
82 	no_devices = (dev == NULL);
83 	put_device(dev);
84 	return no_devices;
85 }
86 EXPORT_SYMBOL(no_pci_devices);
87 
88 /*
89  * PCI Bus Class
90  */
91 static void release_pcibus_dev(struct device *dev)
92 {
93 	struct pci_bus *pci_bus = to_pci_bus(dev);
94 
95 	put_device(pci_bus->bridge);
96 	pci_bus_remove_resources(pci_bus);
97 	pci_release_bus_of_node(pci_bus);
98 	kfree(pci_bus);
99 }
100 
101 static const struct class pcibus_class = {
102 	.name		= "pci_bus",
103 	.dev_release	= &release_pcibus_dev,
104 	.dev_groups	= pcibus_groups,
105 };
106 
107 static int __init pcibus_class_init(void)
108 {
109 	return class_register(&pcibus_class);
110 }
111 postcore_initcall(pcibus_class_init);
112 
113 static u64 pci_size(u64 base, u64 maxbase, u64 mask)
114 {
115 	u64 size = mask & maxbase;	/* Find the significant bits */
116 	if (!size)
117 		return 0;
118 
119 	/*
120 	 * Get the lowest of them to find the decode size, and from that
121 	 * the extent.
122 	 */
123 	size = size & ~(size-1);
124 
125 	/*
126 	 * base == maxbase can be valid only if the BAR has already been
127 	 * programmed with all 1s.
128 	 */
129 	if (base == maxbase && ((base | (size - 1)) & mask) != mask)
130 		return 0;
131 
132 	return size;
133 }
134 
135 static inline unsigned long decode_bar(struct pci_dev *dev, u32 bar)
136 {
137 	u32 mem_type;
138 	unsigned long flags;
139 
140 	if ((bar & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO) {
141 		flags = bar & ~PCI_BASE_ADDRESS_IO_MASK;
142 		flags |= IORESOURCE_IO;
143 		return flags;
144 	}
145 
146 	flags = bar & ~PCI_BASE_ADDRESS_MEM_MASK;
147 	flags |= IORESOURCE_MEM;
148 	if (flags & PCI_BASE_ADDRESS_MEM_PREFETCH)
149 		flags |= IORESOURCE_PREFETCH;
150 
151 	mem_type = bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK;
152 	switch (mem_type) {
153 	case PCI_BASE_ADDRESS_MEM_TYPE_32:
154 		break;
155 	case PCI_BASE_ADDRESS_MEM_TYPE_1M:
156 		/* 1M mem BAR treated as 32-bit BAR */
157 		break;
158 	case PCI_BASE_ADDRESS_MEM_TYPE_64:
159 		flags |= IORESOURCE_MEM_64;
160 		break;
161 	default:
162 		/* mem unknown type treated as 32-bit BAR */
163 		break;
164 	}
165 	return flags;
166 }
167 
168 #define PCI_COMMAND_DECODE_ENABLE	(PCI_COMMAND_MEMORY | PCI_COMMAND_IO)
169 
170 /**
171  * __pci_size_bars - Read the raw BAR mask for a range of PCI BARs
172  * @dev: the PCI device
173  * @count: number of BARs to size
174  * @pos: starting config space position
175  * @sizes: array to store mask values
176  * @rom: indicate whether to use ROM mask, which avoids enabling ROM BARs
177  *
178  * Provided @sizes array must be sufficiently sized to store results for
179  * @count u32 BARs.  Caller is responsible for disabling decode to specified
180  * BAR range around calling this function.  This function is intended to avoid
181  * disabling decode around sizing each BAR individually, which can result in
182  * non-trivial overhead in virtualized environments with very large PCI BARs.
183  */
184 static void __pci_size_bars(struct pci_dev *dev, int count,
185 			    unsigned int pos, u32 *sizes, bool rom)
186 {
187 	u32 orig, mask = rom ? PCI_ROM_ADDRESS_MASK : ~0;
188 	int i;
189 
190 	for (i = 0; i < count; i++, pos += 4, sizes++) {
191 		pci_read_config_dword(dev, pos, &orig);
192 		pci_write_config_dword(dev, pos, mask);
193 		pci_read_config_dword(dev, pos, sizes);
194 		pci_write_config_dword(dev, pos, orig);
195 	}
196 }
197 
198 void __pci_size_stdbars(struct pci_dev *dev, int count,
199 			unsigned int pos, u32 *sizes)
200 {
201 	__pci_size_bars(dev, count, pos, sizes, false);
202 }
203 
204 static void __pci_size_rom(struct pci_dev *dev, unsigned int pos, u32 *sizes)
205 {
206 	__pci_size_bars(dev, 1, pos, sizes, true);
207 }
208 
209 /**
210  * __pci_read_base - Read a PCI BAR
211  * @dev: the PCI device
212  * @type: type of the BAR
213  * @res: resource buffer to be filled in
214  * @pos: BAR position in the config space
215  * @sizes: array of one or more pre-read BAR masks
216  *
217  * Returns 1 if the BAR is 64-bit, or 0 if 32-bit.
218  */
219 int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
220 		    struct resource *res, unsigned int pos, u32 *sizes)
221 {
222 	u32 l = 0, sz;
223 	u64 l64, sz64, mask64;
224 	struct pci_bus_region region, inverted_region;
225 	const char *res_name = pci_resource_name(dev, res - dev->resource);
226 
227 	res->name = pci_name(dev);
228 
229 	pci_read_config_dword(dev, pos, &l);
230 	sz = sizes[0];
231 
232 	/*
233 	 * All bits set in sz means the device isn't working properly.
234 	 * If the BAR isn't implemented, all bits must be 0.  If it's a
235 	 * memory BAR or a ROM, bit 0 must be clear; if it's an io BAR, bit
236 	 * 1 must be clear.
237 	 */
238 	if (PCI_POSSIBLE_ERROR(sz))
239 		sz = 0;
240 
241 	/*
242 	 * I don't know how l can have all bits set.  Copied from old code.
243 	 * Maybe it fixes a bug on some ancient platform.
244 	 */
245 	if (PCI_POSSIBLE_ERROR(l))
246 		l = 0;
247 
248 	if (type == pci_bar_unknown) {
249 		res->flags = decode_bar(dev, l);
250 		res->flags |= IORESOURCE_SIZEALIGN;
251 		if (res->flags & IORESOURCE_IO) {
252 			l64 = l & PCI_BASE_ADDRESS_IO_MASK;
253 			sz64 = sz & PCI_BASE_ADDRESS_IO_MASK;
254 			mask64 = PCI_BASE_ADDRESS_IO_MASK & (u32)IO_SPACE_LIMIT;
255 		} else {
256 			l64 = l & PCI_BASE_ADDRESS_MEM_MASK;
257 			sz64 = sz & PCI_BASE_ADDRESS_MEM_MASK;
258 			mask64 = (u32)PCI_BASE_ADDRESS_MEM_MASK;
259 		}
260 	} else {
261 		if (l & PCI_ROM_ADDRESS_ENABLE)
262 			res->flags |= IORESOURCE_ROM_ENABLE;
263 		l64 = l & PCI_ROM_ADDRESS_MASK;
264 		sz64 = sz & PCI_ROM_ADDRESS_MASK;
265 		mask64 = PCI_ROM_ADDRESS_MASK;
266 	}
267 
268 	if (res->flags & IORESOURCE_MEM_64) {
269 		pci_read_config_dword(dev, pos + 4, &l);
270 		sz = sizes[1];
271 
272 		l64 |= ((u64)l << 32);
273 		sz64 |= ((u64)sz << 32);
274 		mask64 |= ((u64)~0 << 32);
275 	}
276 
277 	if (!sz64)
278 		goto fail;
279 
280 	sz64 = pci_size(l64, sz64, mask64);
281 	if (!sz64) {
282 		pci_info(dev, FW_BUG "%s: invalid; can't size\n", res_name);
283 		goto fail;
284 	}
285 
286 	if (res->flags & IORESOURCE_MEM_64) {
287 		if ((sizeof(pci_bus_addr_t) < 8 || sizeof(resource_size_t) < 8)
288 		    && sz64 > 0x100000000ULL) {
289 			res->flags |= IORESOURCE_UNSET | IORESOURCE_DISABLED;
290 			res->start = 0;
291 			res->end = 0;
292 			pci_err(dev, "%s: can't handle BAR larger than 4GB (size %#010llx)\n",
293 				res_name, (unsigned long long)sz64);
294 			goto out;
295 		}
296 
297 		if ((sizeof(pci_bus_addr_t) < 8) && l) {
298 			/* Above 32-bit boundary; try to reallocate */
299 			res->flags |= IORESOURCE_UNSET;
300 			res->start = 0;
301 			res->end = sz64 - 1;
302 			pci_info(dev, "%s: can't handle BAR above 4GB (bus address %#010llx)\n",
303 				 res_name, (unsigned long long)l64);
304 			goto out;
305 		}
306 	}
307 
308 	region.start = l64;
309 	region.end = l64 + sz64 - 1;
310 
311 	pcibios_bus_to_resource(dev->bus, res, &region);
312 	pcibios_resource_to_bus(dev->bus, &inverted_region, res);
313 
314 	/*
315 	 * If "A" is a BAR value (a bus address), "bus_to_resource(A)" is
316 	 * the corresponding resource address (the physical address used by
317 	 * the CPU.  Converting that resource address back to a bus address
318 	 * should yield the original BAR value:
319 	 *
320 	 *     resource_to_bus(bus_to_resource(A)) == A
321 	 *
322 	 * If it doesn't, CPU accesses to "bus_to_resource(A)" will not
323 	 * be claimed by the device.
324 	 */
325 	if (inverted_region.start != region.start) {
326 		res->flags |= IORESOURCE_UNSET;
327 		res->start = 0;
328 		res->end = region.end - region.start;
329 		pci_info(dev, "%s: initial BAR value %#010llx invalid\n",
330 			 res_name, (unsigned long long)region.start);
331 	}
332 
333 	goto out;
334 
335 
336 fail:
337 	res->flags = 0;
338 out:
339 	if (res->flags)
340 		pci_info(dev, "%s %pR\n", res_name, res);
341 
342 	return (res->flags & IORESOURCE_MEM_64) ? 1 : 0;
343 }
344 
345 static __always_inline void pci_read_bases(struct pci_dev *dev,
346 					   unsigned int howmany, int rom)
347 {
348 	u32 rombar, stdbars[PCI_STD_NUM_BARS];
349 	unsigned int pos, reg;
350 	u16 orig_cmd;
351 
352 	BUILD_BUG_ON(statically_true(howmany > PCI_STD_NUM_BARS));
353 
354 	if (dev->non_compliant_bars)
355 		return;
356 
357 	/* Per PCIe r4.0, sec 9.3.4.1.11, the VF BARs are all RO Zero */
358 	if (dev->is_virtfn)
359 		return;
360 
361 	/* No printks while decoding is disabled! */
362 	if (!dev->mmio_always_on) {
363 		pci_read_config_word(dev, PCI_COMMAND, &orig_cmd);
364 		if (orig_cmd & PCI_COMMAND_DECODE_ENABLE) {
365 			pci_write_config_word(dev, PCI_COMMAND,
366 				orig_cmd & ~PCI_COMMAND_DECODE_ENABLE);
367 		}
368 	}
369 
370 	__pci_size_stdbars(dev, howmany, PCI_BASE_ADDRESS_0, stdbars);
371 	if (rom)
372 		__pci_size_rom(dev, rom, &rombar);
373 
374 	if (!dev->mmio_always_on &&
375 	    (orig_cmd & PCI_COMMAND_DECODE_ENABLE))
376 		pci_write_config_word(dev, PCI_COMMAND, orig_cmd);
377 
378 	for (pos = 0; pos < howmany; pos++) {
379 		struct resource *res = &dev->resource[pos];
380 		reg = PCI_BASE_ADDRESS_0 + (pos << 2);
381 		pos += __pci_read_base(dev, pci_bar_unknown,
382 				       res, reg, &stdbars[pos]);
383 	}
384 
385 	if (rom) {
386 		struct resource *res = &dev->resource[PCI_ROM_RESOURCE];
387 		dev->rom_base_reg = rom;
388 		res->flags = IORESOURCE_MEM | IORESOURCE_PREFETCH |
389 				IORESOURCE_READONLY | IORESOURCE_SIZEALIGN;
390 		__pci_read_base(dev, pci_bar_mem32, res, rom, &rombar);
391 	}
392 }
393 
394 static void pci_read_bridge_io(struct pci_dev *dev, struct resource *res,
395 			       bool log)
396 {
397 	u8 io_base_lo, io_limit_lo;
398 	unsigned long io_mask, io_granularity, base, limit;
399 	struct pci_bus_region region;
400 
401 	io_mask = PCI_IO_RANGE_MASK;
402 	io_granularity = 0x1000;
403 	if (dev->io_window_1k) {
404 		/* Support 1K I/O space granularity */
405 		io_mask = PCI_IO_1K_RANGE_MASK;
406 		io_granularity = 0x400;
407 	}
408 
409 	pci_read_config_byte(dev, PCI_IO_BASE, &io_base_lo);
410 	pci_read_config_byte(dev, PCI_IO_LIMIT, &io_limit_lo);
411 	base = (io_base_lo & io_mask) << 8;
412 	limit = (io_limit_lo & io_mask) << 8;
413 
414 	if ((io_base_lo & PCI_IO_RANGE_TYPE_MASK) == PCI_IO_RANGE_TYPE_32) {
415 		u16 io_base_hi, io_limit_hi;
416 
417 		pci_read_config_word(dev, PCI_IO_BASE_UPPER16, &io_base_hi);
418 		pci_read_config_word(dev, PCI_IO_LIMIT_UPPER16, &io_limit_hi);
419 		base |= ((unsigned long) io_base_hi << 16);
420 		limit |= ((unsigned long) io_limit_hi << 16);
421 	}
422 
423 	res->flags = (io_base_lo & PCI_IO_RANGE_TYPE_MASK) | IORESOURCE_IO;
424 
425 	if (base <= limit) {
426 		region.start = base;
427 		region.end = limit + io_granularity - 1;
428 		pcibios_bus_to_resource(dev->bus, res, &region);
429 		if (log)
430 			pci_info(dev, "  bridge window %pR\n", res);
431 	} else {
432 		resource_set_range(res, 0, 0);
433 		res->flags |= IORESOURCE_UNSET | IORESOURCE_DISABLED;
434 	}
435 }
436 
437 static void pci_read_bridge_mmio(struct pci_dev *dev, struct resource *res,
438 				 bool log)
439 {
440 	u16 mem_base_lo, mem_limit_lo;
441 	unsigned long base, limit;
442 	struct pci_bus_region region;
443 
444 	pci_read_config_word(dev, PCI_MEMORY_BASE, &mem_base_lo);
445 	pci_read_config_word(dev, PCI_MEMORY_LIMIT, &mem_limit_lo);
446 	base = ((unsigned long) mem_base_lo & PCI_MEMORY_RANGE_MASK) << 16;
447 	limit = ((unsigned long) mem_limit_lo & PCI_MEMORY_RANGE_MASK) << 16;
448 
449 	res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM;
450 
451 	if (base <= limit) {
452 		region.start = base;
453 		region.end = limit + 0xfffff;
454 		pcibios_bus_to_resource(dev->bus, res, &region);
455 		if (log)
456 			pci_info(dev, "  bridge window %pR\n", res);
457 	} else {
458 		resource_set_range(res, 0, 0);
459 		res->flags |= IORESOURCE_UNSET | IORESOURCE_DISABLED;
460 	}
461 }
462 
463 static void pci_read_bridge_mmio_pref(struct pci_dev *dev, struct resource *res,
464 				      bool log)
465 {
466 	u16 mem_base_lo, mem_limit_lo;
467 	u64 base64, limit64;
468 	pci_bus_addr_t base, limit;
469 	struct pci_bus_region region;
470 
471 	pci_read_config_word(dev, PCI_PREF_MEMORY_BASE, &mem_base_lo);
472 	pci_read_config_word(dev, PCI_PREF_MEMORY_LIMIT, &mem_limit_lo);
473 	base64 = (mem_base_lo & PCI_PREF_RANGE_MASK) << 16;
474 	limit64 = (mem_limit_lo & PCI_PREF_RANGE_MASK) << 16;
475 
476 	if ((mem_base_lo & PCI_PREF_RANGE_TYPE_MASK) == PCI_PREF_RANGE_TYPE_64) {
477 		u32 mem_base_hi, mem_limit_hi;
478 
479 		pci_read_config_dword(dev, PCI_PREF_BASE_UPPER32, &mem_base_hi);
480 		pci_read_config_dword(dev, PCI_PREF_LIMIT_UPPER32, &mem_limit_hi);
481 
482 		/*
483 		 * Some bridges set the base > limit by default, and some
484 		 * (broken) BIOSes do not initialize them.  If we find
485 		 * this, just assume they are not being used.
486 		 */
487 		if (mem_base_hi <= mem_limit_hi) {
488 			base64 |= (u64) mem_base_hi << 32;
489 			limit64 |= (u64) mem_limit_hi << 32;
490 		}
491 	}
492 
493 	base = (pci_bus_addr_t) base64;
494 	limit = (pci_bus_addr_t) limit64;
495 
496 	if (base != base64) {
497 		pci_err(dev, "can't handle bridge window above 4GB (bus address %#010llx)\n",
498 			(unsigned long long) base64);
499 		return;
500 	}
501 
502 	res->flags = (mem_base_lo & PCI_PREF_RANGE_TYPE_MASK) | IORESOURCE_MEM |
503 		     IORESOURCE_PREFETCH;
504 	if (res->flags & PCI_PREF_RANGE_TYPE_64)
505 		res->flags |= IORESOURCE_MEM_64;
506 
507 	if (base <= limit) {
508 		region.start = base;
509 		region.end = limit + 0xfffff;
510 		pcibios_bus_to_resource(dev->bus, res, &region);
511 		if (log)
512 			pci_info(dev, "  bridge window %pR\n", res);
513 	} else {
514 		resource_set_range(res, 0, 0);
515 		res->flags |= IORESOURCE_UNSET | IORESOURCE_DISABLED;
516 	}
517 }
518 
519 static void pci_read_bridge_windows(struct pci_dev *bridge)
520 {
521 	u32 buses;
522 	u16 io;
523 	u32 pmem, tmp;
524 	struct resource res;
525 
526 	pci_read_config_dword(bridge, PCI_PRIMARY_BUS, &buses);
527 	res.flags = IORESOURCE_BUS;
528 	res.start = (buses >> 8) & 0xff;
529 	res.end = (buses >> 16) & 0xff;
530 	pci_info(bridge, "PCI bridge to %pR%s\n", &res,
531 		 bridge->transparent ? " (subtractive decode)" : "");
532 
533 	pci_read_config_word(bridge, PCI_IO_BASE, &io);
534 	if (!io) {
535 		pci_write_config_word(bridge, PCI_IO_BASE, 0xe0f0);
536 		pci_read_config_word(bridge, PCI_IO_BASE, &io);
537 		pci_write_config_word(bridge, PCI_IO_BASE, 0x0);
538 	}
539 	if (io) {
540 		bridge->io_window = 1;
541 		pci_read_bridge_io(bridge, &res, true);
542 	}
543 
544 	pci_read_bridge_mmio(bridge, &res, true);
545 
546 	/*
547 	 * DECchip 21050 pass 2 errata: the bridge may miss an address
548 	 * disconnect boundary by one PCI data phase.  Workaround: do not
549 	 * use prefetching on this device.
550 	 */
551 	if (bridge->vendor == PCI_VENDOR_ID_DEC && bridge->device == 0x0001)
552 		return;
553 
554 	pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem);
555 	if (!pmem) {
556 		pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE,
557 					       0xffe0fff0);
558 		pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem);
559 		pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, 0x0);
560 	}
561 	if (!pmem)
562 		return;
563 
564 	bridge->pref_window = 1;
565 
566 	if ((pmem & PCI_PREF_RANGE_TYPE_MASK) == PCI_PREF_RANGE_TYPE_64) {
567 
568 		/*
569 		 * Bridge claims to have a 64-bit prefetchable memory
570 		 * window; verify that the upper bits are actually
571 		 * writable.
572 		 */
573 		pci_read_config_dword(bridge, PCI_PREF_BASE_UPPER32, &pmem);
574 		pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32,
575 				       0xffffffff);
576 		pci_read_config_dword(bridge, PCI_PREF_BASE_UPPER32, &tmp);
577 		pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, pmem);
578 		if (tmp)
579 			bridge->pref_64_window = 1;
580 	}
581 
582 	pci_read_bridge_mmio_pref(bridge, &res, true);
583 }
584 
585 void pci_read_bridge_bases(struct pci_bus *child)
586 {
587 	struct pci_dev *dev = child->self;
588 	struct resource *res;
589 	int i;
590 
591 	if (pci_is_root_bus(child))	/* It's a host bus, nothing to read */
592 		return;
593 
594 	pci_info(dev, "PCI bridge to %pR%s\n",
595 		 &child->busn_res,
596 		 dev->transparent ? " (subtractive decode)" : "");
597 
598 	pci_bus_remove_resources(child);
599 	for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++)
600 		child->resource[i] = &dev->resource[PCI_BRIDGE_RESOURCES+i];
601 
602 	pci_read_bridge_io(child->self,
603 			   child->resource[PCI_BUS_BRIDGE_IO_WINDOW], false);
604 	pci_read_bridge_mmio(child->self,
605 			     child->resource[PCI_BUS_BRIDGE_MEM_WINDOW], false);
606 	pci_read_bridge_mmio_pref(child->self,
607 				  child->resource[PCI_BUS_BRIDGE_PREF_MEM_WINDOW],
608 				  false);
609 
610 	if (!dev->transparent)
611 		return;
612 
613 	pci_bus_for_each_resource(child->parent, res) {
614 		if (!res || !res->flags)
615 			continue;
616 
617 		pci_bus_add_resource(child, res);
618 		pci_info(dev, "  bridge window %pR (subtractive decode)\n", res);
619 	}
620 }
621 
622 static struct pci_bus *pci_alloc_bus(struct pci_bus *parent)
623 {
624 	struct pci_bus *b;
625 
626 	b = kzalloc(sizeof(*b), GFP_KERNEL);
627 	if (!b)
628 		return NULL;
629 
630 	INIT_LIST_HEAD(&b->node);
631 	INIT_LIST_HEAD(&b->children);
632 	INIT_LIST_HEAD(&b->devices);
633 	INIT_LIST_HEAD(&b->slots);
634 	INIT_LIST_HEAD(&b->resources);
635 	b->max_bus_speed = PCI_SPEED_UNKNOWN;
636 	b->cur_bus_speed = PCI_SPEED_UNKNOWN;
637 #ifdef CONFIG_PCI_DOMAINS_GENERIC
638 	if (parent)
639 		b->domain_nr = parent->domain_nr;
640 #endif
641 	return b;
642 }
643 
644 static void pci_release_host_bridge_dev(struct device *dev)
645 {
646 	struct pci_host_bridge *bridge = to_pci_host_bridge(dev);
647 
648 	if (bridge->release_fn)
649 		bridge->release_fn(bridge);
650 
651 	pci_free_resource_list(&bridge->windows);
652 	pci_free_resource_list(&bridge->dma_ranges);
653 
654 	/* Host bridges only have domain_nr set in the emulation case */
655 	if (bridge->domain_nr != PCI_DOMAIN_NR_NOT_SET)
656 		pci_bus_release_emul_domain_nr(bridge->domain_nr);
657 
658 	kfree(bridge);
659 }
660 
661 static const struct attribute_group *pci_host_bridge_groups[] = {
662 #ifdef CONFIG_PCI_IDE
663 	&pci_ide_attr_group,
664 #endif
665 	NULL
666 };
667 
668 static const struct device_type pci_host_bridge_type = {
669 	.groups = pci_host_bridge_groups,
670 	.release = pci_release_host_bridge_dev,
671 };
672 
673 static void pci_init_host_bridge(struct pci_host_bridge *bridge)
674 {
675 	INIT_LIST_HEAD(&bridge->windows);
676 	INIT_LIST_HEAD(&bridge->dma_ranges);
677 
678 	/*
679 	 * We assume we can manage these PCIe features.  Some systems may
680 	 * reserve these for use by the platform itself, e.g., an ACPI BIOS
681 	 * may implement its own AER handling and use _OSC to prevent the
682 	 * OS from interfering.
683 	 */
684 	bridge->native_aer = 1;
685 	bridge->native_pcie_hotplug = 1;
686 	bridge->native_shpc_hotplug = 1;
687 	bridge->native_pme = 1;
688 	bridge->native_ltr = 1;
689 	bridge->native_dpc = 1;
690 	bridge->domain_nr = PCI_DOMAIN_NR_NOT_SET;
691 	bridge->native_cxl_error = 1;
692 	bridge->dev.type = &pci_host_bridge_type;
693 	pci_ide_init_host_bridge(bridge);
694 
695 	device_initialize(&bridge->dev);
696 }
697 
698 struct pci_host_bridge *pci_alloc_host_bridge(size_t priv)
699 {
700 	struct pci_host_bridge *bridge;
701 
702 	bridge = kzalloc(sizeof(*bridge) + priv, GFP_KERNEL);
703 	if (!bridge)
704 		return NULL;
705 
706 	pci_init_host_bridge(bridge);
707 
708 	return bridge;
709 }
710 EXPORT_SYMBOL(pci_alloc_host_bridge);
711 
712 static void devm_pci_alloc_host_bridge_release(void *data)
713 {
714 	pci_free_host_bridge(data);
715 }
716 
717 struct pci_host_bridge *devm_pci_alloc_host_bridge(struct device *dev,
718 						   size_t priv)
719 {
720 	int ret;
721 	struct pci_host_bridge *bridge;
722 
723 	bridge = pci_alloc_host_bridge(priv);
724 	if (!bridge)
725 		return NULL;
726 
727 	bridge->dev.parent = dev;
728 
729 	ret = devm_add_action_or_reset(dev, devm_pci_alloc_host_bridge_release,
730 				       bridge);
731 	if (ret)
732 		return NULL;
733 
734 	ret = devm_of_pci_bridge_init(dev, bridge);
735 	if (ret)
736 		return NULL;
737 
738 	return bridge;
739 }
740 EXPORT_SYMBOL(devm_pci_alloc_host_bridge);
741 
742 void pci_free_host_bridge(struct pci_host_bridge *bridge)
743 {
744 	put_device(&bridge->dev);
745 }
746 EXPORT_SYMBOL(pci_free_host_bridge);
747 
748 /* Indexed by PCI_X_SSTATUS_FREQ (secondary bus mode and frequency) */
749 static const unsigned char pcix_bus_speed[] = {
750 	PCI_SPEED_UNKNOWN,		/* 0 */
751 	PCI_SPEED_66MHz_PCIX,		/* 1 */
752 	PCI_SPEED_100MHz_PCIX,		/* 2 */
753 	PCI_SPEED_133MHz_PCIX,		/* 3 */
754 	PCI_SPEED_UNKNOWN,		/* 4 */
755 	PCI_SPEED_66MHz_PCIX_ECC,	/* 5 */
756 	PCI_SPEED_100MHz_PCIX_ECC,	/* 6 */
757 	PCI_SPEED_133MHz_PCIX_ECC,	/* 7 */
758 	PCI_SPEED_UNKNOWN,		/* 8 */
759 	PCI_SPEED_66MHz_PCIX_266,	/* 9 */
760 	PCI_SPEED_100MHz_PCIX_266,	/* A */
761 	PCI_SPEED_133MHz_PCIX_266,	/* B */
762 	PCI_SPEED_UNKNOWN,		/* C */
763 	PCI_SPEED_66MHz_PCIX_533,	/* D */
764 	PCI_SPEED_100MHz_PCIX_533,	/* E */
765 	PCI_SPEED_133MHz_PCIX_533	/* F */
766 };
767 
768 /* Indexed by PCI_EXP_LNKCAP_SLS, PCI_EXP_LNKSTA_CLS */
769 const unsigned char pcie_link_speed[] = {
770 	PCI_SPEED_UNKNOWN,		/* 0 */
771 	PCIE_SPEED_2_5GT,		/* 1 */
772 	PCIE_SPEED_5_0GT,		/* 2 */
773 	PCIE_SPEED_8_0GT,		/* 3 */
774 	PCIE_SPEED_16_0GT,		/* 4 */
775 	PCIE_SPEED_32_0GT,		/* 5 */
776 	PCIE_SPEED_64_0GT,		/* 6 */
777 	PCI_SPEED_UNKNOWN,		/* 7 */
778 	PCI_SPEED_UNKNOWN,		/* 8 */
779 	PCI_SPEED_UNKNOWN,		/* 9 */
780 	PCI_SPEED_UNKNOWN,		/* A */
781 	PCI_SPEED_UNKNOWN,		/* B */
782 	PCI_SPEED_UNKNOWN,		/* C */
783 	PCI_SPEED_UNKNOWN,		/* D */
784 	PCI_SPEED_UNKNOWN,		/* E */
785 	PCI_SPEED_UNKNOWN		/* F */
786 };
787 EXPORT_SYMBOL_GPL(pcie_link_speed);
788 
789 const char *pci_speed_string(enum pci_bus_speed speed)
790 {
791 	/* Indexed by the pci_bus_speed enum */
792 	static const char *speed_strings[] = {
793 	    "33 MHz PCI",		/* 0x00 */
794 	    "66 MHz PCI",		/* 0x01 */
795 	    "66 MHz PCI-X",		/* 0x02 */
796 	    "100 MHz PCI-X",		/* 0x03 */
797 	    "133 MHz PCI-X",		/* 0x04 */
798 	    NULL,			/* 0x05 */
799 	    NULL,			/* 0x06 */
800 	    NULL,			/* 0x07 */
801 	    NULL,			/* 0x08 */
802 	    "66 MHz PCI-X 266",		/* 0x09 */
803 	    "100 MHz PCI-X 266",	/* 0x0a */
804 	    "133 MHz PCI-X 266",	/* 0x0b */
805 	    "Unknown AGP",		/* 0x0c */
806 	    "1x AGP",			/* 0x0d */
807 	    "2x AGP",			/* 0x0e */
808 	    "4x AGP",			/* 0x0f */
809 	    "8x AGP",			/* 0x10 */
810 	    "66 MHz PCI-X 533",		/* 0x11 */
811 	    "100 MHz PCI-X 533",	/* 0x12 */
812 	    "133 MHz PCI-X 533",	/* 0x13 */
813 	    "2.5 GT/s PCIe",		/* 0x14 */
814 	    "5.0 GT/s PCIe",		/* 0x15 */
815 	    "8.0 GT/s PCIe",		/* 0x16 */
816 	    "16.0 GT/s PCIe",		/* 0x17 */
817 	    "32.0 GT/s PCIe",		/* 0x18 */
818 	    "64.0 GT/s PCIe",		/* 0x19 */
819 	};
820 
821 	if (speed < ARRAY_SIZE(speed_strings))
822 		return speed_strings[speed];
823 	return "Unknown";
824 }
825 EXPORT_SYMBOL_GPL(pci_speed_string);
826 
827 void pcie_update_link_speed(struct pci_bus *bus)
828 {
829 	struct pci_dev *bridge = bus->self;
830 	u16 linksta, linksta2;
831 
832 	pcie_capability_read_word(bridge, PCI_EXP_LNKSTA, &linksta);
833 	pcie_capability_read_word(bridge, PCI_EXP_LNKSTA2, &linksta2);
834 	__pcie_update_link_speed(bus, linksta, linksta2);
835 }
836 EXPORT_SYMBOL_GPL(pcie_update_link_speed);
837 
838 static unsigned char agp_speeds[] = {
839 	AGP_UNKNOWN,
840 	AGP_1X,
841 	AGP_2X,
842 	AGP_4X,
843 	AGP_8X
844 };
845 
846 static enum pci_bus_speed agp_speed(int agp3, int agpstat)
847 {
848 	int index = 0;
849 
850 	if (agpstat & 4)
851 		index = 3;
852 	else if (agpstat & 2)
853 		index = 2;
854 	else if (agpstat & 1)
855 		index = 1;
856 	else
857 		goto out;
858 
859 	if (agp3) {
860 		index += 2;
861 		if (index == 5)
862 			index = 0;
863 	}
864 
865  out:
866 	return agp_speeds[index];
867 }
868 
869 static void pci_set_bus_speed(struct pci_bus *bus)
870 {
871 	struct pci_dev *bridge = bus->self;
872 	int pos;
873 
874 	pos = pci_find_capability(bridge, PCI_CAP_ID_AGP);
875 	if (!pos)
876 		pos = pci_find_capability(bridge, PCI_CAP_ID_AGP3);
877 	if (pos) {
878 		u32 agpstat, agpcmd;
879 
880 		pci_read_config_dword(bridge, pos + PCI_AGP_STATUS, &agpstat);
881 		bus->max_bus_speed = agp_speed(agpstat & 8, agpstat & 7);
882 
883 		pci_read_config_dword(bridge, pos + PCI_AGP_COMMAND, &agpcmd);
884 		bus->cur_bus_speed = agp_speed(agpstat & 8, agpcmd & 7);
885 	}
886 
887 	pos = pci_find_capability(bridge, PCI_CAP_ID_PCIX);
888 	if (pos) {
889 		u16 status;
890 		enum pci_bus_speed max;
891 
892 		pci_read_config_word(bridge, pos + PCI_X_BRIDGE_SSTATUS,
893 				     &status);
894 
895 		if (status & PCI_X_SSTATUS_533MHZ) {
896 			max = PCI_SPEED_133MHz_PCIX_533;
897 		} else if (status & PCI_X_SSTATUS_266MHZ) {
898 			max = PCI_SPEED_133MHz_PCIX_266;
899 		} else if (status & PCI_X_SSTATUS_133MHZ) {
900 			if ((status & PCI_X_SSTATUS_VERS) == PCI_X_SSTATUS_V2)
901 				max = PCI_SPEED_133MHz_PCIX_ECC;
902 			else
903 				max = PCI_SPEED_133MHz_PCIX;
904 		} else {
905 			max = PCI_SPEED_66MHz_PCIX;
906 		}
907 
908 		bus->max_bus_speed = max;
909 		bus->cur_bus_speed =
910 			pcix_bus_speed[FIELD_GET(PCI_X_SSTATUS_FREQ, status)];
911 
912 		return;
913 	}
914 
915 	if (pci_is_pcie(bridge)) {
916 		u32 linkcap;
917 
918 		pcie_capability_read_dword(bridge, PCI_EXP_LNKCAP, &linkcap);
919 		bus->max_bus_speed = pcie_link_speed[linkcap & PCI_EXP_LNKCAP_SLS];
920 
921 		pcie_update_link_speed(bus);
922 	}
923 }
924 
925 static struct irq_domain *pci_host_bridge_msi_domain(struct pci_bus *bus)
926 {
927 	struct irq_domain *d;
928 
929 	/* If the host bridge driver sets a MSI domain of the bridge, use it */
930 	d = dev_get_msi_domain(bus->bridge);
931 
932 	/*
933 	 * Any firmware interface that can resolve the msi_domain
934 	 * should be called from here.
935 	 */
936 	if (!d)
937 		d = pci_host_bridge_of_msi_domain(bus);
938 	if (!d)
939 		d = pci_host_bridge_acpi_msi_domain(bus);
940 
941 	/*
942 	 * If no IRQ domain was found via the OF tree, try looking it up
943 	 * directly through the fwnode_handle.
944 	 */
945 	if (!d) {
946 		struct fwnode_handle *fwnode = pci_root_bus_fwnode(bus);
947 
948 		if (fwnode)
949 			d = irq_find_matching_fwnode(fwnode,
950 						     DOMAIN_BUS_PCI_MSI);
951 	}
952 
953 	return d;
954 }
955 
956 static void pci_set_bus_msi_domain(struct pci_bus *bus)
957 {
958 	struct irq_domain *d;
959 	struct pci_bus *b;
960 
961 	/*
962 	 * The bus can be a root bus, a subordinate bus, or a virtual bus
963 	 * created by an SR-IOV device.  Walk up to the first bridge device
964 	 * found or derive the domain from the host bridge.
965 	 */
966 	for (b = bus, d = NULL; !d && !pci_is_root_bus(b); b = b->parent) {
967 		if (b->self)
968 			d = dev_get_msi_domain(&b->self->dev);
969 	}
970 
971 	if (!d)
972 		d = pci_host_bridge_msi_domain(b);
973 
974 	dev_set_msi_domain(&bus->dev, d);
975 }
976 
977 static bool pci_preserve_config(struct pci_host_bridge *host_bridge)
978 {
979 	if (pci_acpi_preserve_config(host_bridge))
980 		return true;
981 
982 	if (host_bridge->dev.parent && host_bridge->dev.parent->of_node)
983 		return of_pci_preserve_config(host_bridge->dev.parent->of_node);
984 
985 	return false;
986 }
987 
988 static int pci_register_host_bridge(struct pci_host_bridge *bridge)
989 {
990 	struct device *parent = bridge->dev.parent;
991 	struct resource_entry *window, *next, *n;
992 	struct pci_bus *bus, *b;
993 	resource_size_t offset, next_offset;
994 	LIST_HEAD(resources);
995 	struct resource *res, *next_res;
996 	bool bus_registered = false;
997 	char addr[64], *fmt;
998 	const char *name;
999 	int err;
1000 
1001 	bus = pci_alloc_bus(NULL);
1002 	if (!bus)
1003 		return -ENOMEM;
1004 
1005 	bridge->bus = bus;
1006 
1007 	bus->sysdata = bridge->sysdata;
1008 	bus->ops = bridge->ops;
1009 	bus->number = bus->busn_res.start = bridge->busnr;
1010 #ifdef CONFIG_PCI_DOMAINS_GENERIC
1011 	if (bridge->domain_nr == PCI_DOMAIN_NR_NOT_SET)
1012 		bus->domain_nr = pci_bus_find_domain_nr(bus, parent);
1013 	else
1014 		bus->domain_nr = bridge->domain_nr;
1015 	if (bus->domain_nr < 0) {
1016 		err = bus->domain_nr;
1017 		goto free;
1018 	}
1019 #endif
1020 
1021 	b = pci_find_bus(pci_domain_nr(bus), bridge->busnr);
1022 	if (b) {
1023 		/* Ignore it if we already got here via a different bridge */
1024 		dev_dbg(&b->dev, "bus already known\n");
1025 		err = -EEXIST;
1026 		goto free;
1027 	}
1028 
1029 	dev_set_name(&bridge->dev, "pci%04x:%02x", pci_domain_nr(bus),
1030 		     bridge->busnr);
1031 
1032 	err = pcibios_root_bridge_prepare(bridge);
1033 	if (err)
1034 		goto free;
1035 
1036 	/* Temporarily move resources off the list */
1037 	list_splice_init(&bridge->windows, &resources);
1038 	err = device_add(&bridge->dev);
1039 	if (err)
1040 		goto free;
1041 
1042 	bus->bridge = get_device(&bridge->dev);
1043 	device_enable_async_suspend(bus->bridge);
1044 	pci_set_bus_of_node(bus);
1045 	pci_set_bus_msi_domain(bus);
1046 	if (bridge->msi_domain && !dev_get_msi_domain(&bus->dev) &&
1047 	    !pci_host_of_has_msi_map(parent))
1048 		bus->bus_flags |= PCI_BUS_FLAGS_NO_MSI;
1049 
1050 	if (!parent)
1051 		set_dev_node(bus->bridge, pcibus_to_node(bus));
1052 
1053 	bus->dev.class = &pcibus_class;
1054 	bus->dev.parent = bus->bridge;
1055 
1056 	dev_set_name(&bus->dev, "%04x:%02x", pci_domain_nr(bus), bus->number);
1057 	name = dev_name(&bus->dev);
1058 
1059 	err = device_register(&bus->dev);
1060 	bus_registered = true;
1061 	if (err)
1062 		goto unregister;
1063 
1064 	pcibios_add_bus(bus);
1065 
1066 	if (bus->ops->add_bus) {
1067 		err = bus->ops->add_bus(bus);
1068 		if (WARN_ON(err < 0))
1069 			dev_err(&bus->dev, "failed to add bus: %d\n", err);
1070 	}
1071 
1072 	/* Create legacy_io and legacy_mem files for this bus */
1073 	pci_create_legacy_files(bus);
1074 
1075 	if (parent)
1076 		dev_info(parent, "PCI host bridge to bus %s\n", name);
1077 	else
1078 		pr_info("PCI host bridge to bus %s\n", name);
1079 
1080 	if (nr_node_ids > 1 && pcibus_to_node(bus) == NUMA_NO_NODE)
1081 		dev_warn(&bus->dev, "Unknown NUMA node; performance will be reduced\n");
1082 
1083 	/* Check if the boot configuration by FW needs to be preserved */
1084 	bridge->preserve_config = pci_preserve_config(bridge);
1085 
1086 	/* Coalesce contiguous windows */
1087 	resource_list_for_each_entry_safe(window, n, &resources) {
1088 		if (list_is_last(&window->node, &resources))
1089 			break;
1090 
1091 		next = list_next_entry(window, node);
1092 		offset = window->offset;
1093 		res = window->res;
1094 		next_offset = next->offset;
1095 		next_res = next->res;
1096 
1097 		if (res->flags != next_res->flags || offset != next_offset)
1098 			continue;
1099 
1100 		if (res->end + 1 == next_res->start) {
1101 			next_res->start = res->start;
1102 			res->flags = res->start = res->end = 0;
1103 		}
1104 	}
1105 
1106 	/* Add initial resources to the bus */
1107 	resource_list_for_each_entry_safe(window, n, &resources) {
1108 		offset = window->offset;
1109 		res = window->res;
1110 		if (!res->flags && !res->start && !res->end) {
1111 			release_resource(res);
1112 			resource_list_destroy_entry(window);
1113 			continue;
1114 		}
1115 
1116 		list_move_tail(&window->node, &bridge->windows);
1117 
1118 		if (res->flags & IORESOURCE_BUS)
1119 			pci_bus_insert_busn_res(bus, bus->number, res->end);
1120 		else
1121 			pci_bus_add_resource(bus, res);
1122 
1123 		if (offset) {
1124 			if (resource_type(res) == IORESOURCE_IO)
1125 				fmt = " (bus address [%#06llx-%#06llx])";
1126 			else
1127 				fmt = " (bus address [%#010llx-%#010llx])";
1128 
1129 			snprintf(addr, sizeof(addr), fmt,
1130 				 (unsigned long long)(res->start - offset),
1131 				 (unsigned long long)(res->end - offset));
1132 		} else
1133 			addr[0] = '\0';
1134 
1135 		dev_info(&bus->dev, "root bus resource %pR%s\n", res, addr);
1136 	}
1137 
1138 	of_pci_make_host_bridge_node(bridge);
1139 
1140 	down_write(&pci_bus_sem);
1141 	list_add_tail(&bus->node, &pci_root_buses);
1142 	up_write(&pci_bus_sem);
1143 
1144 	return 0;
1145 
1146 unregister:
1147 	put_device(&bridge->dev);
1148 	device_del(&bridge->dev);
1149 free:
1150 #ifdef CONFIG_PCI_DOMAINS_GENERIC
1151 	if (bridge->domain_nr == PCI_DOMAIN_NR_NOT_SET)
1152 		pci_bus_release_domain_nr(parent, bus->domain_nr);
1153 #endif
1154 	if (bus_registered)
1155 		put_device(&bus->dev);
1156 	else
1157 		kfree(bus);
1158 
1159 	return err;
1160 }
1161 
1162 static bool pci_bridge_child_ext_cfg_accessible(struct pci_dev *bridge)
1163 {
1164 	int pos;
1165 	u32 status;
1166 
1167 	/*
1168 	 * If extended config space isn't accessible on a bridge's primary
1169 	 * bus, we certainly can't access it on the secondary bus.
1170 	 */
1171 	if (bridge->bus->bus_flags & PCI_BUS_FLAGS_NO_EXTCFG)
1172 		return false;
1173 
1174 	/*
1175 	 * PCIe Root Ports and switch ports are PCIe on both sides, so if
1176 	 * extended config space is accessible on the primary, it's also
1177 	 * accessible on the secondary.
1178 	 */
1179 	if (pci_is_pcie(bridge) &&
1180 	    (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT ||
1181 	     pci_pcie_type(bridge) == PCI_EXP_TYPE_UPSTREAM ||
1182 	     pci_pcie_type(bridge) == PCI_EXP_TYPE_DOWNSTREAM))
1183 		return true;
1184 
1185 	/*
1186 	 * For the other bridge types:
1187 	 *   - PCI-to-PCI bridges
1188 	 *   - PCIe-to-PCI/PCI-X forward bridges
1189 	 *   - PCI/PCI-X-to-PCIe reverse bridges
1190 	 * extended config space on the secondary side is only accessible
1191 	 * if the bridge supports PCI-X Mode 2.
1192 	 */
1193 	pos = pci_find_capability(bridge, PCI_CAP_ID_PCIX);
1194 	if (!pos)
1195 		return false;
1196 
1197 	pci_read_config_dword(bridge, pos + PCI_X_STATUS, &status);
1198 	return status & (PCI_X_STATUS_266MHZ | PCI_X_STATUS_533MHZ);
1199 }
1200 
1201 static struct pci_bus *pci_alloc_child_bus(struct pci_bus *parent,
1202 					   struct pci_dev *bridge, int busnr)
1203 {
1204 	struct pci_bus *child;
1205 	struct pci_host_bridge *host;
1206 	int i;
1207 	int ret;
1208 
1209 	/* Allocate a new bus and inherit stuff from the parent */
1210 	child = pci_alloc_bus(parent);
1211 	if (!child)
1212 		return NULL;
1213 
1214 	child->parent = parent;
1215 	child->sysdata = parent->sysdata;
1216 	child->bus_flags = parent->bus_flags;
1217 
1218 	host = pci_find_host_bridge(parent);
1219 	if (host->child_ops)
1220 		child->ops = host->child_ops;
1221 	else
1222 		child->ops = parent->ops;
1223 
1224 	/*
1225 	 * Initialize some portions of the bus device, but don't register
1226 	 * it now as the parent is not properly set up yet.
1227 	 */
1228 	child->dev.class = &pcibus_class;
1229 	dev_set_name(&child->dev, "%04x:%02x", pci_domain_nr(child), busnr);
1230 
1231 	/* Set up the primary, secondary and subordinate bus numbers */
1232 	child->number = child->busn_res.start = busnr;
1233 	child->primary = parent->busn_res.start;
1234 	child->busn_res.end = 0xff;
1235 
1236 	if (!bridge) {
1237 		child->dev.parent = parent->bridge;
1238 		goto add_dev;
1239 	}
1240 
1241 	child->self = bridge;
1242 	child->bridge = get_device(&bridge->dev);
1243 	child->dev.parent = child->bridge;
1244 	pci_set_bus_of_node(child);
1245 	pci_set_bus_speed(child);
1246 
1247 	/*
1248 	 * Check whether extended config space is accessible on the child
1249 	 * bus.  Note that we currently assume it is always accessible on
1250 	 * the root bus.
1251 	 */
1252 	if (!pci_bridge_child_ext_cfg_accessible(bridge)) {
1253 		child->bus_flags |= PCI_BUS_FLAGS_NO_EXTCFG;
1254 		pci_info(child, "extended config space not accessible\n");
1255 	}
1256 
1257 	/* Set up default resource pointers and names */
1258 	for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) {
1259 		child->resource[i] = &bridge->resource[PCI_BRIDGE_RESOURCES+i];
1260 		child->resource[i]->name = child->name;
1261 	}
1262 	bridge->subordinate = child;
1263 
1264 add_dev:
1265 	pci_set_bus_msi_domain(child);
1266 	ret = device_register(&child->dev);
1267 	if (WARN_ON(ret < 0)) {
1268 		put_device(&child->dev);
1269 		return NULL;
1270 	}
1271 
1272 	pcibios_add_bus(child);
1273 
1274 	if (child->ops->add_bus) {
1275 		ret = child->ops->add_bus(child);
1276 		if (WARN_ON(ret < 0))
1277 			dev_err(&child->dev, "failed to add bus: %d\n", ret);
1278 	}
1279 
1280 	/* Create legacy_io and legacy_mem files for this bus */
1281 	pci_create_legacy_files(child);
1282 
1283 	return child;
1284 }
1285 
1286 struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev,
1287 				int busnr)
1288 {
1289 	struct pci_bus *child;
1290 
1291 	child = pci_alloc_child_bus(parent, dev, busnr);
1292 	if (child) {
1293 		down_write(&pci_bus_sem);
1294 		list_add_tail(&child->node, &parent->children);
1295 		up_write(&pci_bus_sem);
1296 	}
1297 	return child;
1298 }
1299 EXPORT_SYMBOL(pci_add_new_bus);
1300 
1301 static void pci_enable_rrs_sv(struct pci_dev *pdev)
1302 {
1303 	u16 root_cap = 0;
1304 
1305 	/* Enable Configuration RRS Software Visibility if supported */
1306 	pcie_capability_read_word(pdev, PCI_EXP_RTCAP, &root_cap);
1307 	if (root_cap & PCI_EXP_RTCAP_RRS_SV) {
1308 		pcie_capability_set_word(pdev, PCI_EXP_RTCTL,
1309 					 PCI_EXP_RTCTL_RRS_SVE);
1310 		pdev->config_rrs_sv = 1;
1311 	}
1312 }
1313 
1314 static unsigned int pci_scan_child_bus_extend(struct pci_bus *bus,
1315 					      unsigned int available_buses);
1316 /**
1317  * pci_ea_fixed_busnrs() - Read fixed Secondary and Subordinate bus
1318  * numbers from EA capability.
1319  * @dev: Bridge
1320  * @sec: updated with secondary bus number from EA
1321  * @sub: updated with subordinate bus number from EA
1322  *
1323  * If @dev is a bridge with EA capability that specifies valid secondary
1324  * and subordinate bus numbers, return true with the bus numbers in @sec
1325  * and @sub.  Otherwise return false.
1326  */
1327 static bool pci_ea_fixed_busnrs(struct pci_dev *dev, u8 *sec, u8 *sub)
1328 {
1329 	int ea, offset;
1330 	u32 dw;
1331 	u8 ea_sec, ea_sub;
1332 
1333 	if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
1334 		return false;
1335 
1336 	/* find PCI EA capability in list */
1337 	ea = pci_find_capability(dev, PCI_CAP_ID_EA);
1338 	if (!ea)
1339 		return false;
1340 
1341 	offset = ea + PCI_EA_FIRST_ENT;
1342 	pci_read_config_dword(dev, offset, &dw);
1343 	ea_sec = FIELD_GET(PCI_EA_SEC_BUS_MASK, dw);
1344 	ea_sub = FIELD_GET(PCI_EA_SUB_BUS_MASK, dw);
1345 	if (ea_sec  == 0 || ea_sub < ea_sec)
1346 		return false;
1347 
1348 	*sec = ea_sec;
1349 	*sub = ea_sub;
1350 	return true;
1351 }
1352 
1353 /*
1354  * pci_scan_bridge_extend() - Scan buses behind a bridge
1355  * @bus: Parent bus the bridge is on
1356  * @dev: Bridge itself
1357  * @max: Starting subordinate number of buses behind this bridge
1358  * @available_buses: Total number of buses available for this bridge and
1359  *		     the devices below. After the minimal bus space has
1360  *		     been allocated the remaining buses will be
1361  *		     distributed equally between hotplug-capable bridges.
1362  * @pass: Either %0 (scan already configured bridges) or %1 (scan bridges
1363  *        that need to be reconfigured.
1364  *
1365  * If it's a bridge, configure it and scan the bus behind it.
1366  * For CardBus bridges, we don't scan behind as the devices will
1367  * be handled by the bridge driver itself.
1368  *
1369  * We need to process bridges in two passes -- first we scan those
1370  * already configured by the BIOS and after we are done with all of
1371  * them, we proceed to assigning numbers to the remaining buses in
1372  * order to avoid overlaps between old and new bus numbers.
1373  *
1374  * Return: New subordinate number covering all buses behind this bridge.
1375  */
1376 static int pci_scan_bridge_extend(struct pci_bus *bus, struct pci_dev *dev,
1377 				  int max, unsigned int available_buses,
1378 				  int pass)
1379 {
1380 	struct pci_bus *child;
1381 	int is_cardbus = (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS);
1382 	u32 buses, i, j = 0;
1383 	u16 bctl;
1384 	u8 primary, secondary, subordinate;
1385 	int broken = 0;
1386 	bool fixed_buses;
1387 	u8 fixed_sec, fixed_sub;
1388 	int next_busnr;
1389 
1390 	/*
1391 	 * Make sure the bridge is powered on to be able to access config
1392 	 * space of devices below it.
1393 	 */
1394 	pm_runtime_get_sync(&dev->dev);
1395 
1396 	pci_read_config_dword(dev, PCI_PRIMARY_BUS, &buses);
1397 	primary = buses & 0xFF;
1398 	secondary = (buses >> 8) & 0xFF;
1399 	subordinate = (buses >> 16) & 0xFF;
1400 
1401 	pci_dbg(dev, "scanning [bus %02x-%02x] behind bridge, pass %d\n",
1402 		secondary, subordinate, pass);
1403 
1404 	if (!primary && (primary != bus->number) && secondary && subordinate) {
1405 		pci_warn(dev, "Primary bus is hard wired to 0\n");
1406 		primary = bus->number;
1407 	}
1408 
1409 	/* Check if setup is sensible at all */
1410 	if (!pass &&
1411 	    (primary != bus->number || secondary <= bus->number ||
1412 	     secondary > subordinate)) {
1413 		pci_info(dev, "bridge configuration invalid ([bus %02x-%02x]), reconfiguring\n",
1414 			 secondary, subordinate);
1415 		broken = 1;
1416 	}
1417 
1418 	/*
1419 	 * Disable Master-Abort Mode during probing to avoid reporting of
1420 	 * bus errors in some architectures.
1421 	 */
1422 	pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &bctl);
1423 	pci_write_config_word(dev, PCI_BRIDGE_CONTROL,
1424 			      bctl & ~PCI_BRIDGE_CTL_MASTER_ABORT);
1425 
1426 	if ((secondary || subordinate) && !pcibios_assign_all_busses() &&
1427 	    !is_cardbus && !broken) {
1428 		unsigned int cmax, buses;
1429 
1430 		/*
1431 		 * Bus already configured by firmware, process it in the
1432 		 * first pass and just note the configuration.
1433 		 */
1434 		if (pass)
1435 			goto out;
1436 
1437 		/*
1438 		 * The bus might already exist for two reasons: Either we
1439 		 * are rescanning the bus or the bus is reachable through
1440 		 * more than one bridge. The second case can happen with
1441 		 * the i450NX chipset.
1442 		 */
1443 		child = pci_find_bus(pci_domain_nr(bus), secondary);
1444 		if (!child) {
1445 			child = pci_add_new_bus(bus, dev, secondary);
1446 			if (!child)
1447 				goto out;
1448 			child->primary = primary;
1449 			pci_bus_insert_busn_res(child, secondary, subordinate);
1450 			child->bridge_ctl = bctl;
1451 		}
1452 
1453 		buses = subordinate - secondary;
1454 		cmax = pci_scan_child_bus_extend(child, buses);
1455 		if (cmax > subordinate)
1456 			pci_warn(dev, "bridge has subordinate %02x but max busn %02x\n",
1457 				 subordinate, cmax);
1458 
1459 		/* Subordinate should equal child->busn_res.end */
1460 		if (subordinate > max)
1461 			max = subordinate;
1462 	} else {
1463 
1464 		/*
1465 		 * We need to assign a number to this bus which we always
1466 		 * do in the second pass.
1467 		 */
1468 		if (!pass) {
1469 			if (pcibios_assign_all_busses() || broken || is_cardbus)
1470 
1471 				/*
1472 				 * Temporarily disable forwarding of the
1473 				 * configuration cycles on all bridges in
1474 				 * this bus segment to avoid possible
1475 				 * conflicts in the second pass between two
1476 				 * bridges programmed with overlapping bus
1477 				 * ranges.
1478 				 */
1479 				pci_write_config_dword(dev, PCI_PRIMARY_BUS,
1480 						       buses & ~0xffffff);
1481 			goto out;
1482 		}
1483 
1484 		/* Clear errors */
1485 		pci_write_config_word(dev, PCI_STATUS, 0xffff);
1486 
1487 		/* Read bus numbers from EA Capability (if present) */
1488 		fixed_buses = pci_ea_fixed_busnrs(dev, &fixed_sec, &fixed_sub);
1489 		if (fixed_buses)
1490 			next_busnr = fixed_sec;
1491 		else
1492 			next_busnr = max + 1;
1493 
1494 		/*
1495 		 * Prevent assigning a bus number that already exists.
1496 		 * This can happen when a bridge is hot-plugged, so in this
1497 		 * case we only re-scan this bus.
1498 		 */
1499 		child = pci_find_bus(pci_domain_nr(bus), next_busnr);
1500 		if (!child) {
1501 			child = pci_add_new_bus(bus, dev, next_busnr);
1502 			if (!child)
1503 				goto out;
1504 			pci_bus_insert_busn_res(child, next_busnr,
1505 						bus->busn_res.end);
1506 		}
1507 		max++;
1508 		if (available_buses)
1509 			available_buses--;
1510 
1511 		buses = (buses & 0xff000000)
1512 		      | ((unsigned int)(child->primary)     <<  0)
1513 		      | ((unsigned int)(child->busn_res.start)   <<  8)
1514 		      | ((unsigned int)(child->busn_res.end) << 16);
1515 
1516 		/*
1517 		 * yenta.c forces a secondary latency timer of 176.
1518 		 * Copy that behaviour here.
1519 		 */
1520 		if (is_cardbus) {
1521 			buses &= ~0xff000000;
1522 			buses |= CARDBUS_LATENCY_TIMER << 24;
1523 		}
1524 
1525 		/* We need to blast all three values with a single write */
1526 		pci_write_config_dword(dev, PCI_PRIMARY_BUS, buses);
1527 
1528 		if (!is_cardbus) {
1529 			child->bridge_ctl = bctl;
1530 			max = pci_scan_child_bus_extend(child, available_buses);
1531 		} else {
1532 
1533 			/*
1534 			 * For CardBus bridges, we leave 4 bus numbers as
1535 			 * cards with a PCI-to-PCI bridge can be inserted
1536 			 * later.
1537 			 */
1538 			for (i = 0; i < CARDBUS_RESERVE_BUSNR; i++) {
1539 				struct pci_bus *parent = bus;
1540 				if (pci_find_bus(pci_domain_nr(bus),
1541 							max+i+1))
1542 					break;
1543 				while (parent->parent) {
1544 					if ((!pcibios_assign_all_busses()) &&
1545 					    (parent->busn_res.end > max) &&
1546 					    (parent->busn_res.end <= max+i)) {
1547 						j = 1;
1548 					}
1549 					parent = parent->parent;
1550 				}
1551 				if (j) {
1552 
1553 					/*
1554 					 * Often, there are two CardBus
1555 					 * bridges -- try to leave one
1556 					 * valid bus number for each one.
1557 					 */
1558 					i /= 2;
1559 					break;
1560 				}
1561 			}
1562 			max += i;
1563 		}
1564 
1565 		/*
1566 		 * Set subordinate bus number to its real value.
1567 		 * If fixed subordinate bus number exists from EA
1568 		 * capability then use it.
1569 		 */
1570 		if (fixed_buses)
1571 			max = fixed_sub;
1572 		pci_bus_update_busn_res_end(child, max);
1573 		pci_write_config_byte(dev, PCI_SUBORDINATE_BUS, max);
1574 	}
1575 
1576 	sprintf(child->name,
1577 		(is_cardbus ? "PCI CardBus %04x:%02x" : "PCI Bus %04x:%02x"),
1578 		pci_domain_nr(bus), child->number);
1579 
1580 	/* Check that all devices are accessible */
1581 	while (bus->parent) {
1582 		if ((child->busn_res.end > bus->busn_res.end) ||
1583 		    (child->number > bus->busn_res.end) ||
1584 		    (child->number < bus->number) ||
1585 		    (child->busn_res.end < bus->number)) {
1586 			dev_info(&dev->dev, "devices behind bridge are unusable because %pR cannot be assigned for them\n",
1587 				 &child->busn_res);
1588 			break;
1589 		}
1590 		bus = bus->parent;
1591 	}
1592 
1593 out:
1594 	/* Clear errors in the Secondary Status Register */
1595 	pci_write_config_word(dev, PCI_SEC_STATUS, 0xffff);
1596 
1597 	pci_write_config_word(dev, PCI_BRIDGE_CONTROL, bctl);
1598 
1599 	pm_runtime_put(&dev->dev);
1600 
1601 	return max;
1602 }
1603 
1604 /*
1605  * pci_scan_bridge() - Scan buses behind a bridge
1606  * @bus: Parent bus the bridge is on
1607  * @dev: Bridge itself
1608  * @max: Starting subordinate number of buses behind this bridge
1609  * @pass: Either %0 (scan already configured bridges) or %1 (scan bridges
1610  *        that need to be reconfigured.
1611  *
1612  * If it's a bridge, configure it and scan the bus behind it.
1613  * For CardBus bridges, we don't scan behind as the devices will
1614  * be handled by the bridge driver itself.
1615  *
1616  * We need to process bridges in two passes -- first we scan those
1617  * already configured by the BIOS and after we are done with all of
1618  * them, we proceed to assigning numbers to the remaining buses in
1619  * order to avoid overlaps between old and new bus numbers.
1620  *
1621  * Return: New subordinate number covering all buses behind this bridge.
1622  */
1623 int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, int pass)
1624 {
1625 	return pci_scan_bridge_extend(bus, dev, max, 0, pass);
1626 }
1627 EXPORT_SYMBOL(pci_scan_bridge);
1628 
1629 /*
1630  * Read interrupt line and base address registers.
1631  * The architecture-dependent code can tweak these, of course.
1632  */
1633 static void pci_read_irq(struct pci_dev *dev)
1634 {
1635 	unsigned char irq;
1636 
1637 	/* VFs are not allowed to use INTx, so skip the config reads */
1638 	if (dev->is_virtfn) {
1639 		dev->pin = 0;
1640 		dev->irq = 0;
1641 		return;
1642 	}
1643 
1644 	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &irq);
1645 	dev->pin = irq;
1646 	if (irq)
1647 		pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq);
1648 	dev->irq = irq;
1649 }
1650 
1651 void set_pcie_port_type(struct pci_dev *pdev)
1652 {
1653 	int pos;
1654 	u16 reg16;
1655 	u32 reg32;
1656 	int type;
1657 	struct pci_dev *parent;
1658 
1659 	pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
1660 	if (!pos)
1661 		return;
1662 
1663 	pdev->pcie_cap = pos;
1664 	pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, &reg16);
1665 	pdev->pcie_flags_reg = reg16;
1666 
1667 	type = pci_pcie_type(pdev);
1668 	if (type == PCI_EXP_TYPE_ROOT_PORT)
1669 		pci_enable_rrs_sv(pdev);
1670 
1671 	pci_read_config_dword(pdev, pos + PCI_EXP_DEVCAP, &pdev->devcap);
1672 	pdev->pcie_mpss = FIELD_GET(PCI_EXP_DEVCAP_PAYLOAD, pdev->devcap);
1673 
1674 	pcie_capability_read_dword(pdev, PCI_EXP_LNKCAP, &reg32);
1675 	if (reg32 & PCI_EXP_LNKCAP_DLLLARC)
1676 		pdev->link_active_reporting = 1;
1677 
1678 #ifdef CONFIG_PCIEASPM
1679 	if (reg32 & PCI_EXP_LNKCAP_ASPM_L0S)
1680 		pdev->aspm_l0s_support = 1;
1681 	if (reg32 & PCI_EXP_LNKCAP_ASPM_L1)
1682 		pdev->aspm_l1_support = 1;
1683 #endif
1684 
1685 	parent = pci_upstream_bridge(pdev);
1686 	if (!parent)
1687 		return;
1688 
1689 	/*
1690 	 * Some systems do not identify their upstream/downstream ports
1691 	 * correctly so detect impossible configurations here and correct
1692 	 * the port type accordingly.
1693 	 */
1694 	if (type == PCI_EXP_TYPE_DOWNSTREAM) {
1695 		/*
1696 		 * If pdev claims to be downstream port but the parent
1697 		 * device is also downstream port assume pdev is actually
1698 		 * upstream port.
1699 		 */
1700 		if (pcie_downstream_port(parent)) {
1701 			pci_info(pdev, "claims to be downstream port but is acting as upstream port, correcting type\n");
1702 			pdev->pcie_flags_reg &= ~PCI_EXP_FLAGS_TYPE;
1703 			pdev->pcie_flags_reg |= PCI_EXP_TYPE_UPSTREAM;
1704 		}
1705 	} else if (type == PCI_EXP_TYPE_UPSTREAM) {
1706 		/*
1707 		 * If pdev claims to be upstream port but the parent
1708 		 * device is also upstream port assume pdev is actually
1709 		 * downstream port.
1710 		 */
1711 		if (pci_pcie_type(parent) == PCI_EXP_TYPE_UPSTREAM) {
1712 			pci_info(pdev, "claims to be upstream port but is acting as downstream port, correcting type\n");
1713 			pdev->pcie_flags_reg &= ~PCI_EXP_FLAGS_TYPE;
1714 			pdev->pcie_flags_reg |= PCI_EXP_TYPE_DOWNSTREAM;
1715 		}
1716 	}
1717 }
1718 
1719 void set_pcie_hotplug_bridge(struct pci_dev *pdev)
1720 {
1721 	u32 reg32;
1722 
1723 	pcie_capability_read_dword(pdev, PCI_EXP_SLTCAP, &reg32);
1724 	if (reg32 & PCI_EXP_SLTCAP_HPC)
1725 		pdev->is_hotplug_bridge = pdev->is_pciehp = 1;
1726 }
1727 
1728 static void set_pcie_thunderbolt(struct pci_dev *dev)
1729 {
1730 	u16 vsec;
1731 
1732 	/* Is the device part of a Thunderbolt controller? */
1733 	vsec = pci_find_vsec_capability(dev, PCI_VENDOR_ID_INTEL, PCI_VSEC_ID_INTEL_TBT);
1734 	if (vsec)
1735 		dev->is_thunderbolt = 1;
1736 }
1737 
1738 static void set_pcie_cxl(struct pci_dev *dev)
1739 {
1740 	struct pci_dev *bridge;
1741 	u16 dvsec, cap;
1742 
1743 	if (!pci_is_pcie(dev))
1744 		return;
1745 
1746 	/*
1747 	 * Update parent's CXL state because alternate protocol training
1748 	 * may have changed
1749 	 */
1750 	bridge = pci_upstream_bridge(dev);
1751 	if (bridge)
1752 		set_pcie_cxl(bridge);
1753 
1754 	dvsec = pci_find_dvsec_capability(dev, PCI_VENDOR_ID_CXL,
1755 					  PCI_DVSEC_CXL_FLEXBUS_PORT);
1756 	if (!dvsec)
1757 		return;
1758 
1759 	pci_read_config_word(dev, dvsec + PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS,
1760 			     &cap);
1761 
1762 	dev->is_cxl = FIELD_GET(PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS_CACHE, cap) ||
1763 		FIELD_GET(PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS_MEM, cap);
1764 
1765 }
1766 
1767 static void set_pcie_untrusted(struct pci_dev *dev)
1768 {
1769 	struct pci_dev *parent = pci_upstream_bridge(dev);
1770 
1771 	if (!parent)
1772 		return;
1773 	/*
1774 	 * If the upstream bridge is untrusted we treat this device as
1775 	 * untrusted as well.
1776 	 */
1777 	if (parent->untrusted) {
1778 		dev->untrusted = true;
1779 		return;
1780 	}
1781 
1782 	if (arch_pci_dev_is_removable(dev)) {
1783 		pci_dbg(dev, "marking as untrusted\n");
1784 		dev->untrusted = true;
1785 	}
1786 }
1787 
1788 static void pci_set_removable(struct pci_dev *dev)
1789 {
1790 	struct pci_dev *parent = pci_upstream_bridge(dev);
1791 
1792 	if (!parent)
1793 		return;
1794 	/*
1795 	 * We (only) consider everything tunneled below an external_facing
1796 	 * device to be removable by the user. We're mainly concerned with
1797 	 * consumer platforms with user accessible thunderbolt ports that are
1798 	 * vulnerable to DMA attacks, and we expect those ports to be marked by
1799 	 * the firmware as external_facing. Devices in traditional hotplug
1800 	 * slots can technically be removed, but the expectation is that unless
1801 	 * the port is marked with external_facing, such devices are less
1802 	 * accessible to user / may not be removed by end user, and thus not
1803 	 * exposed as "removable" to userspace.
1804 	 */
1805 	if (dev_is_removable(&parent->dev)) {
1806 		dev_set_removable(&dev->dev, DEVICE_REMOVABLE);
1807 		return;
1808 	}
1809 
1810 	if (arch_pci_dev_is_removable(dev)) {
1811 		pci_dbg(dev, "marking as removable\n");
1812 		dev_set_removable(&dev->dev, DEVICE_REMOVABLE);
1813 	}
1814 }
1815 
1816 /**
1817  * pci_ext_cfg_is_aliased - Is ext config space just an alias of std config?
1818  * @dev: PCI device
1819  *
1820  * PCI Express to PCI/PCI-X Bridge Specification, rev 1.0, 4.1.4 says that
1821  * when forwarding a type1 configuration request the bridge must check that
1822  * the extended register address field is zero.  The bridge is not permitted
1823  * to forward the transactions and must handle it as an Unsupported Request.
1824  * Some bridges do not follow this rule and simply drop the extended register
1825  * bits, resulting in the standard config space being aliased, every 256
1826  * bytes across the entire configuration space.  Test for this condition by
1827  * comparing the first dword of each potential alias to the vendor/device ID.
1828  * Known offenders:
1829  *   ASM1083/1085 PCIe-to-PCI Reversible Bridge (1b21:1080, rev 01 & 03)
1830  *   AMD/ATI SBx00 PCI to PCI Bridge (1002:4384, rev 40)
1831  */
1832 static bool pci_ext_cfg_is_aliased(struct pci_dev *dev)
1833 {
1834 #ifdef CONFIG_PCI_QUIRKS
1835 	int pos, ret;
1836 	u32 header, tmp;
1837 
1838 	pci_read_config_dword(dev, PCI_VENDOR_ID, &header);
1839 
1840 	for (pos = PCI_CFG_SPACE_SIZE;
1841 	     pos < PCI_CFG_SPACE_EXP_SIZE; pos += PCI_CFG_SPACE_SIZE) {
1842 		ret = pci_read_config_dword(dev, pos, &tmp);
1843 		if ((ret != PCIBIOS_SUCCESSFUL) || (header != tmp))
1844 			return false;
1845 	}
1846 
1847 	return true;
1848 #else
1849 	return false;
1850 #endif
1851 }
1852 
1853 /**
1854  * pci_cfg_space_size_ext - Get the configuration space size of the PCI device
1855  * @dev: PCI device
1856  *
1857  * Regular PCI devices have 256 bytes, but PCI-X 2 and PCI Express devices
1858  * have 4096 bytes.  Even if the device is capable, that doesn't mean we can
1859  * access it.  Maybe we don't have a way to generate extended config space
1860  * accesses, or the device is behind a reverse Express bridge.  So we try
1861  * reading the dword at 0x100 which must either be 0 or a valid extended
1862  * capability header.
1863  */
1864 static int pci_cfg_space_size_ext(struct pci_dev *dev)
1865 {
1866 	u32 status;
1867 	int pos = PCI_CFG_SPACE_SIZE;
1868 
1869 	if (pci_read_config_dword(dev, pos, &status) != PCIBIOS_SUCCESSFUL)
1870 		return PCI_CFG_SPACE_SIZE;
1871 	if (PCI_POSSIBLE_ERROR(status) || pci_ext_cfg_is_aliased(dev))
1872 		return PCI_CFG_SPACE_SIZE;
1873 
1874 	return PCI_CFG_SPACE_EXP_SIZE;
1875 }
1876 
1877 int pci_cfg_space_size(struct pci_dev *dev)
1878 {
1879 	int pos;
1880 	u32 status;
1881 	u16 class;
1882 
1883 #ifdef CONFIG_PCI_IOV
1884 	/*
1885 	 * Per the SR-IOV specification (rev 1.1, sec 3.5), VFs are required to
1886 	 * implement a PCIe capability and therefore must implement extended
1887 	 * config space.  We can skip the NO_EXTCFG test below and the
1888 	 * reachability/aliasing test in pci_cfg_space_size_ext() by virtue of
1889 	 * the fact that the SR-IOV capability on the PF resides in extended
1890 	 * config space and must be accessible and non-aliased to have enabled
1891 	 * support for this VF.  This is a micro performance optimization for
1892 	 * systems supporting many VFs.
1893 	 */
1894 	if (dev->is_virtfn)
1895 		return PCI_CFG_SPACE_EXP_SIZE;
1896 #endif
1897 
1898 	if (dev->bus->bus_flags & PCI_BUS_FLAGS_NO_EXTCFG)
1899 		return PCI_CFG_SPACE_SIZE;
1900 
1901 	class = dev->class >> 8;
1902 	if (class == PCI_CLASS_BRIDGE_HOST)
1903 		return pci_cfg_space_size_ext(dev);
1904 
1905 	if (pci_is_pcie(dev))
1906 		return pci_cfg_space_size_ext(dev);
1907 
1908 	pos = pci_find_capability(dev, PCI_CAP_ID_PCIX);
1909 	if (!pos)
1910 		return PCI_CFG_SPACE_SIZE;
1911 
1912 	pci_read_config_dword(dev, pos + PCI_X_STATUS, &status);
1913 	if (status & (PCI_X_STATUS_266MHZ | PCI_X_STATUS_533MHZ))
1914 		return pci_cfg_space_size_ext(dev);
1915 
1916 	return PCI_CFG_SPACE_SIZE;
1917 }
1918 
1919 static u32 pci_class(struct pci_dev *dev)
1920 {
1921 	u32 class;
1922 
1923 #ifdef CONFIG_PCI_IOV
1924 	if (dev->is_virtfn)
1925 		return dev->physfn->sriov->class;
1926 #endif
1927 	pci_read_config_dword(dev, PCI_CLASS_REVISION, &class);
1928 	return class;
1929 }
1930 
1931 static void pci_subsystem_ids(struct pci_dev *dev, u16 *vendor, u16 *device)
1932 {
1933 #ifdef CONFIG_PCI_IOV
1934 	if (dev->is_virtfn) {
1935 		*vendor = dev->physfn->sriov->subsystem_vendor;
1936 		*device = dev->physfn->sriov->subsystem_device;
1937 		return;
1938 	}
1939 #endif
1940 	pci_read_config_word(dev, PCI_SUBSYSTEM_VENDOR_ID, vendor);
1941 	pci_read_config_word(dev, PCI_SUBSYSTEM_ID, device);
1942 }
1943 
1944 static u8 pci_hdr_type(struct pci_dev *dev)
1945 {
1946 	u8 hdr_type;
1947 
1948 #ifdef CONFIG_PCI_IOV
1949 	if (dev->is_virtfn)
1950 		return dev->physfn->sriov->hdr_type;
1951 #endif
1952 	pci_read_config_byte(dev, PCI_HEADER_TYPE, &hdr_type);
1953 	return hdr_type;
1954 }
1955 
1956 #define LEGACY_IO_RESOURCE	(IORESOURCE_IO | IORESOURCE_PCI_FIXED)
1957 
1958 /**
1959  * pci_intx_mask_broken - Test PCI_COMMAND_INTX_DISABLE writability
1960  * @dev: PCI device
1961  *
1962  * Test whether PCI_COMMAND_INTX_DISABLE is writable for @dev.  Check this
1963  * at enumeration-time to avoid modifying PCI_COMMAND at run-time.
1964  */
1965 static int pci_intx_mask_broken(struct pci_dev *dev)
1966 {
1967 	u16 orig, toggle, new;
1968 
1969 	pci_read_config_word(dev, PCI_COMMAND, &orig);
1970 	toggle = orig ^ PCI_COMMAND_INTX_DISABLE;
1971 	pci_write_config_word(dev, PCI_COMMAND, toggle);
1972 	pci_read_config_word(dev, PCI_COMMAND, &new);
1973 
1974 	pci_write_config_word(dev, PCI_COMMAND, orig);
1975 
1976 	/*
1977 	 * PCI_COMMAND_INTX_DISABLE was reserved and read-only prior to PCI
1978 	 * r2.3, so strictly speaking, a device is not *broken* if it's not
1979 	 * writable.  But we'll live with the misnomer for now.
1980 	 */
1981 	if (new != toggle)
1982 		return 1;
1983 	return 0;
1984 }
1985 
1986 static void early_dump_pci_device(struct pci_dev *pdev)
1987 {
1988 	u32 value[PCI_CFG_SPACE_SIZE / sizeof(u32)];
1989 	int i;
1990 
1991 	pci_info(pdev, "config space:\n");
1992 
1993 	for (i = 0; i < ARRAY_SIZE(value); i++)
1994 		pci_read_config_dword(pdev, i * sizeof(u32), &value[i]);
1995 
1996 	print_hex_dump(KERN_INFO, "", DUMP_PREFIX_OFFSET, 16, 1,
1997 		       value, ARRAY_SIZE(value) * sizeof(u32), false);
1998 }
1999 
2000 static const char *pci_type_str(struct pci_dev *dev)
2001 {
2002 	static const char * const str[] = {
2003 		"PCIe Endpoint",
2004 		"PCIe Legacy Endpoint",
2005 		"PCIe unknown",
2006 		"PCIe unknown",
2007 		"PCIe Root Port",
2008 		"PCIe Switch Upstream Port",
2009 		"PCIe Switch Downstream Port",
2010 		"PCIe to PCI/PCI-X bridge",
2011 		"PCI/PCI-X to PCIe bridge",
2012 		"PCIe Root Complex Integrated Endpoint",
2013 		"PCIe Root Complex Event Collector",
2014 	};
2015 	int type;
2016 
2017 	if (pci_is_pcie(dev)) {
2018 		type = pci_pcie_type(dev);
2019 		if (type < ARRAY_SIZE(str))
2020 			return str[type];
2021 
2022 		return "PCIe unknown";
2023 	}
2024 
2025 	switch (dev->hdr_type) {
2026 	case PCI_HEADER_TYPE_NORMAL:
2027 		return "conventional PCI endpoint";
2028 	case PCI_HEADER_TYPE_BRIDGE:
2029 		return "conventional PCI bridge";
2030 	case PCI_HEADER_TYPE_CARDBUS:
2031 		return "CardBus bridge";
2032 	default:
2033 		return "conventional PCI";
2034 	}
2035 }
2036 
2037 /**
2038  * pci_setup_device - Fill in class and map information of a device
2039  * @dev: the device structure to fill
2040  *
2041  * Initialize the device structure with information about the device's
2042  * vendor,class,memory and IO-space addresses, IRQ lines etc.
2043  * Called at initialisation of the PCI subsystem and by CardBus services.
2044  * Returns 0 on success and negative if unknown type of device (not normal,
2045  * bridge or CardBus).
2046  */
2047 int pci_setup_device(struct pci_dev *dev)
2048 {
2049 	u32 class;
2050 	u16 cmd;
2051 	u8 hdr_type;
2052 	int err, pos = 0;
2053 	struct pci_bus_region region;
2054 	struct resource *res;
2055 
2056 	hdr_type = pci_hdr_type(dev);
2057 
2058 	dev->sysdata = dev->bus->sysdata;
2059 	dev->dev.parent = dev->bus->bridge;
2060 	dev->dev.bus = &pci_bus_type;
2061 	dev->hdr_type = FIELD_GET(PCI_HEADER_TYPE_MASK, hdr_type);
2062 	dev->multifunction = FIELD_GET(PCI_HEADER_TYPE_MFD, hdr_type);
2063 	dev->error_state = pci_channel_io_normal;
2064 	set_pcie_port_type(dev);
2065 
2066 	err = pci_set_of_node(dev);
2067 	if (err)
2068 		return err;
2069 	pci_set_acpi_fwnode(dev);
2070 
2071 	pci_dev_assign_slot(dev);
2072 
2073 	/*
2074 	 * Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
2075 	 * set this higher, assuming the system even supports it.
2076 	 */
2077 	dev->dma_mask = 0xffffffff;
2078 
2079 	dev_set_name(&dev->dev, "%04x:%02x:%02x.%d", pci_domain_nr(dev->bus),
2080 		     dev->bus->number, PCI_SLOT(dev->devfn),
2081 		     PCI_FUNC(dev->devfn));
2082 
2083 	class = pci_class(dev);
2084 
2085 	dev->revision = class & 0xff;
2086 	dev->class = class >> 8;		    /* upper 3 bytes */
2087 
2088 	if (pci_early_dump)
2089 		early_dump_pci_device(dev);
2090 
2091 	/* Need to have dev->class ready */
2092 	dev->cfg_size = pci_cfg_space_size(dev);
2093 
2094 	/* Need to have dev->cfg_size ready */
2095 	set_pcie_thunderbolt(dev);
2096 
2097 	set_pcie_cxl(dev);
2098 
2099 	set_pcie_untrusted(dev);
2100 
2101 	if (pci_is_pcie(dev))
2102 		dev->supported_speeds = pcie_get_supported_speeds(dev);
2103 
2104 	/* "Unknown power state" */
2105 	dev->current_state = PCI_UNKNOWN;
2106 
2107 	/* Early fixups, before probing the BARs */
2108 	pci_fixup_device(pci_fixup_early, dev);
2109 
2110 	pci_set_removable(dev);
2111 
2112 	pci_info(dev, "[%04x:%04x] type %02x class %#08x %s\n",
2113 		 dev->vendor, dev->device, dev->hdr_type, dev->class,
2114 		 pci_type_str(dev));
2115 
2116 	/* Device class may be changed after fixup */
2117 	class = dev->class >> 8;
2118 
2119 	if (dev->non_compliant_bars && !dev->mmio_always_on) {
2120 		pci_read_config_word(dev, PCI_COMMAND, &cmd);
2121 		if (cmd & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) {
2122 			pci_info(dev, "device has non-compliant BARs; disabling IO/MEM decoding\n");
2123 			cmd &= ~PCI_COMMAND_IO;
2124 			cmd &= ~PCI_COMMAND_MEMORY;
2125 			pci_write_config_word(dev, PCI_COMMAND, cmd);
2126 		}
2127 	}
2128 
2129 	dev->broken_intx_masking = pci_intx_mask_broken(dev);
2130 
2131 	switch (dev->hdr_type) {		    /* header type */
2132 	case PCI_HEADER_TYPE_NORMAL:		    /* standard header */
2133 		if (class == PCI_CLASS_BRIDGE_PCI)
2134 			goto bad;
2135 		pci_read_irq(dev);
2136 		pci_read_bases(dev, PCI_STD_NUM_BARS, PCI_ROM_ADDRESS);
2137 
2138 		pci_subsystem_ids(dev, &dev->subsystem_vendor, &dev->subsystem_device);
2139 
2140 		/*
2141 		 * Do the ugly legacy mode stuff here rather than broken chip
2142 		 * quirk code. Legacy mode ATA controllers have fixed
2143 		 * addresses. These are not always echoed in BAR0-3, and
2144 		 * BAR0-3 in a few cases contain junk!
2145 		 */
2146 		if (class == PCI_CLASS_STORAGE_IDE) {
2147 			u8 progif;
2148 			pci_read_config_byte(dev, PCI_CLASS_PROG, &progif);
2149 			if ((progif & 1) == 0) {
2150 				region.start = 0x1F0;
2151 				region.end = 0x1F7;
2152 				res = &dev->resource[0];
2153 				res->flags = LEGACY_IO_RESOURCE;
2154 				pcibios_bus_to_resource(dev->bus, res, &region);
2155 				pci_info(dev, "BAR 0 %pR: legacy IDE quirk\n",
2156 					 res);
2157 				region.start = 0x3F6;
2158 				region.end = 0x3F6;
2159 				res = &dev->resource[1];
2160 				res->flags = LEGACY_IO_RESOURCE;
2161 				pcibios_bus_to_resource(dev->bus, res, &region);
2162 				pci_info(dev, "BAR 1 %pR: legacy IDE quirk\n",
2163 					 res);
2164 			}
2165 			if ((progif & 4) == 0) {
2166 				region.start = 0x170;
2167 				region.end = 0x177;
2168 				res = &dev->resource[2];
2169 				res->flags = LEGACY_IO_RESOURCE;
2170 				pcibios_bus_to_resource(dev->bus, res, &region);
2171 				pci_info(dev, "BAR 2 %pR: legacy IDE quirk\n",
2172 					 res);
2173 				region.start = 0x376;
2174 				region.end = 0x376;
2175 				res = &dev->resource[3];
2176 				res->flags = LEGACY_IO_RESOURCE;
2177 				pcibios_bus_to_resource(dev->bus, res, &region);
2178 				pci_info(dev, "BAR 3 %pR: legacy IDE quirk\n",
2179 					 res);
2180 			}
2181 		}
2182 		break;
2183 
2184 	case PCI_HEADER_TYPE_BRIDGE:		    /* bridge header */
2185 		/*
2186 		 * The PCI-to-PCI bridge spec requires that subtractive
2187 		 * decoding (i.e. transparent) bridge must have programming
2188 		 * interface code of 0x01.
2189 		 */
2190 		pci_read_irq(dev);
2191 		dev->transparent = ((dev->class & 0xff) == 1);
2192 		pci_read_bases(dev, 2, PCI_ROM_ADDRESS1);
2193 		pci_read_bridge_windows(dev);
2194 		set_pcie_hotplug_bridge(dev);
2195 		pos = pci_find_capability(dev, PCI_CAP_ID_SSVID);
2196 		if (pos) {
2197 			pci_read_config_word(dev, pos + PCI_SSVID_VENDOR_ID, &dev->subsystem_vendor);
2198 			pci_read_config_word(dev, pos + PCI_SSVID_DEVICE_ID, &dev->subsystem_device);
2199 		}
2200 		break;
2201 
2202 	case PCI_HEADER_TYPE_CARDBUS:		    /* CardBus bridge header */
2203 		if (class != PCI_CLASS_BRIDGE_CARDBUS)
2204 			goto bad;
2205 		pci_read_irq(dev);
2206 		pci_read_bases(dev, 1, 0);
2207 		pci_read_config_word(dev, PCI_CB_SUBSYSTEM_VENDOR_ID, &dev->subsystem_vendor);
2208 		pci_read_config_word(dev, PCI_CB_SUBSYSTEM_ID, &dev->subsystem_device);
2209 		break;
2210 
2211 	default:				    /* unknown header */
2212 		pci_err(dev, "unknown header type %02x, ignoring device\n",
2213 			dev->hdr_type);
2214 		pci_release_of_node(dev);
2215 		return -EIO;
2216 
2217 	bad:
2218 		pci_err(dev, "ignoring class %#08x (doesn't match header type %02x)\n",
2219 			dev->class, dev->hdr_type);
2220 		dev->class = PCI_CLASS_NOT_DEFINED << 8;
2221 	}
2222 
2223 	/* We found a fine healthy device, go go go... */
2224 	return 0;
2225 }
2226 
2227 static void pci_configure_mps(struct pci_dev *dev)
2228 {
2229 	struct pci_dev *bridge = pci_upstream_bridge(dev);
2230 	int mps, mpss, p_mps, rc;
2231 
2232 	if (!pci_is_pcie(dev))
2233 		return;
2234 
2235 	/* MPS and MRRS fields are of type 'RsvdP' for VFs, short-circuit out */
2236 	if (dev->is_virtfn)
2237 		return;
2238 
2239 	/*
2240 	 * For Root Complex Integrated Endpoints, program the maximum
2241 	 * supported value unless limited by the PCIE_BUS_PEER2PEER case.
2242 	 */
2243 	if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END) {
2244 		if (pcie_bus_config == PCIE_BUS_PEER2PEER)
2245 			mps = 128;
2246 		else
2247 			mps = 128 << dev->pcie_mpss;
2248 		rc = pcie_set_mps(dev, mps);
2249 		if (rc) {
2250 			pci_warn(dev, "can't set Max Payload Size to %d; if necessary, use \"pci=pcie_bus_safe\" and report a bug\n",
2251 				 mps);
2252 		}
2253 		return;
2254 	}
2255 
2256 	if (!bridge || !pci_is_pcie(bridge))
2257 		return;
2258 
2259 	mps = pcie_get_mps(dev);
2260 	p_mps = pcie_get_mps(bridge);
2261 
2262 	if (mps == p_mps)
2263 		return;
2264 
2265 	if (pcie_bus_config == PCIE_BUS_TUNE_OFF) {
2266 		pci_warn(dev, "Max Payload Size %d, but upstream %s set to %d; if necessary, use \"pci=pcie_bus_safe\" and report a bug\n",
2267 			 mps, pci_name(bridge), p_mps);
2268 		return;
2269 	}
2270 
2271 	/*
2272 	 * Fancier MPS configuration is done later by
2273 	 * pcie_bus_configure_settings()
2274 	 */
2275 	if (pcie_bus_config != PCIE_BUS_DEFAULT)
2276 		return;
2277 
2278 	mpss = 128 << dev->pcie_mpss;
2279 	if (mpss < p_mps && pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) {
2280 		pcie_set_mps(bridge, mpss);
2281 		pci_info(dev, "Upstream bridge's Max Payload Size set to %d (was %d, max %d)\n",
2282 			 mpss, p_mps, 128 << bridge->pcie_mpss);
2283 		p_mps = pcie_get_mps(bridge);
2284 	}
2285 
2286 	rc = pcie_set_mps(dev, p_mps);
2287 	if (rc) {
2288 		pci_warn(dev, "can't set Max Payload Size to %d; if necessary, use \"pci=pcie_bus_safe\" and report a bug\n",
2289 			 p_mps);
2290 		return;
2291 	}
2292 
2293 	pci_info(dev, "Max Payload Size set to %d (was %d, max %d)\n",
2294 		 p_mps, mps, mpss);
2295 }
2296 
2297 int pci_configure_extended_tags(struct pci_dev *dev, void *ign)
2298 {
2299 	struct pci_host_bridge *host;
2300 	u32 cap;
2301 	u16 ctl;
2302 	int ret;
2303 
2304 	if (!pci_is_pcie(dev))
2305 		return 0;
2306 
2307 	ret = pcie_capability_read_dword(dev, PCI_EXP_DEVCAP, &cap);
2308 	if (ret)
2309 		return 0;
2310 
2311 	if (!(cap & PCI_EXP_DEVCAP_EXT_TAG))
2312 		return 0;
2313 
2314 	ret = pcie_capability_read_word(dev, PCI_EXP_DEVCTL, &ctl);
2315 	if (ret)
2316 		return 0;
2317 
2318 	host = pci_find_host_bridge(dev->bus);
2319 	if (!host)
2320 		return 0;
2321 
2322 	/*
2323 	 * If some device in the hierarchy doesn't handle Extended Tags
2324 	 * correctly, make sure they're disabled.
2325 	 */
2326 	if (host->no_ext_tags) {
2327 		if (ctl & PCI_EXP_DEVCTL_EXT_TAG) {
2328 			pci_info(dev, "disabling Extended Tags\n");
2329 			pcie_capability_clear_word(dev, PCI_EXP_DEVCTL,
2330 						   PCI_EXP_DEVCTL_EXT_TAG);
2331 		}
2332 		return 0;
2333 	}
2334 
2335 	if (!(ctl & PCI_EXP_DEVCTL_EXT_TAG)) {
2336 		pci_info(dev, "enabling Extended Tags\n");
2337 		pcie_capability_set_word(dev, PCI_EXP_DEVCTL,
2338 					 PCI_EXP_DEVCTL_EXT_TAG);
2339 	}
2340 	return 0;
2341 }
2342 
2343 static void pci_dev3_init(struct pci_dev *pdev)
2344 {
2345 	u16 cap = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DEV3);
2346 	u32 val = 0;
2347 
2348 	if (!cap)
2349 		return;
2350 	pci_read_config_dword(pdev, cap + PCI_DEV3_STA, &val);
2351 	pdev->fm_enabled = !!(val & PCI_DEV3_STA_SEGMENT);
2352 }
2353 
2354 /**
2355  * pcie_relaxed_ordering_enabled - Probe for PCIe relaxed ordering enable
2356  * @dev: PCI device to query
2357  *
2358  * Returns true if the device has enabled relaxed ordering attribute.
2359  */
2360 bool pcie_relaxed_ordering_enabled(struct pci_dev *dev)
2361 {
2362 	u16 v;
2363 
2364 	pcie_capability_read_word(dev, PCI_EXP_DEVCTL, &v);
2365 
2366 	return !!(v & PCI_EXP_DEVCTL_RELAX_EN);
2367 }
2368 EXPORT_SYMBOL(pcie_relaxed_ordering_enabled);
2369 
2370 static void pci_configure_relaxed_ordering(struct pci_dev *dev)
2371 {
2372 	struct pci_dev *root;
2373 
2374 	/* PCI_EXP_DEVCTL_RELAX_EN is RsvdP in VFs */
2375 	if (dev->is_virtfn)
2376 		return;
2377 
2378 	if (!pcie_relaxed_ordering_enabled(dev))
2379 		return;
2380 
2381 	/*
2382 	 * For now, we only deal with Relaxed Ordering issues with Root
2383 	 * Ports. Peer-to-Peer DMA is another can of worms.
2384 	 */
2385 	root = pcie_find_root_port(dev);
2386 	if (!root)
2387 		return;
2388 
2389 	if (root->dev_flags & PCI_DEV_FLAGS_NO_RELAXED_ORDERING) {
2390 		pcie_capability_clear_word(dev, PCI_EXP_DEVCTL,
2391 					   PCI_EXP_DEVCTL_RELAX_EN);
2392 		pci_info(dev, "Relaxed Ordering disabled because the Root Port didn't support it\n");
2393 	}
2394 }
2395 
2396 static void pci_configure_eetlp_prefix(struct pci_dev *dev)
2397 {
2398 	struct pci_dev *bridge;
2399 	unsigned int eetlp_max;
2400 	int pcie_type;
2401 	u32 cap;
2402 
2403 	if (!pci_is_pcie(dev))
2404 		return;
2405 
2406 	pcie_capability_read_dword(dev, PCI_EXP_DEVCAP2, &cap);
2407 	if (!(cap & PCI_EXP_DEVCAP2_EE_PREFIX))
2408 		return;
2409 
2410 	pcie_type = pci_pcie_type(dev);
2411 
2412 	eetlp_max = FIELD_GET(PCI_EXP_DEVCAP2_EE_PREFIX_MAX, cap);
2413 	/* 00b means 4 */
2414 	eetlp_max = eetlp_max ?: 4;
2415 
2416 	if (pcie_type == PCI_EXP_TYPE_ROOT_PORT ||
2417 	    pcie_type == PCI_EXP_TYPE_RC_END)
2418 		dev->eetlp_prefix_max = eetlp_max;
2419 	else {
2420 		bridge = pci_upstream_bridge(dev);
2421 		if (bridge && bridge->eetlp_prefix_max)
2422 			dev->eetlp_prefix_max = eetlp_max;
2423 	}
2424 }
2425 
2426 static void pci_configure_serr(struct pci_dev *dev)
2427 {
2428 	u16 control;
2429 
2430 	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
2431 
2432 		/*
2433 		 * A bridge will not forward ERR_ messages coming from an
2434 		 * endpoint unless SERR# forwarding is enabled.
2435 		 */
2436 		pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &control);
2437 		if (!(control & PCI_BRIDGE_CTL_SERR)) {
2438 			control |= PCI_BRIDGE_CTL_SERR;
2439 			pci_write_config_word(dev, PCI_BRIDGE_CONTROL, control);
2440 		}
2441 	}
2442 }
2443 
2444 static void pci_configure_device(struct pci_dev *dev)
2445 {
2446 	pci_configure_mps(dev);
2447 	pci_configure_extended_tags(dev, NULL);
2448 	pci_configure_relaxed_ordering(dev);
2449 	pci_configure_ltr(dev);
2450 	pci_configure_aspm_l1ss(dev);
2451 	pci_configure_eetlp_prefix(dev);
2452 	pci_configure_serr(dev);
2453 
2454 	pci_acpi_program_hp_params(dev);
2455 }
2456 
2457 static void pci_release_capabilities(struct pci_dev *dev)
2458 {
2459 	pci_aer_exit(dev);
2460 	pci_rcec_exit(dev);
2461 	pci_iov_release(dev);
2462 	pci_free_cap_save_buffers(dev);
2463 }
2464 
2465 /**
2466  * pci_release_dev - Free a PCI device structure when all users of it are
2467  *		     finished
2468  * @dev: device that's been disconnected
2469  *
2470  * Will be called only by the device core when all users of this PCI device are
2471  * done.
2472  */
2473 static void pci_release_dev(struct device *dev)
2474 {
2475 	struct pci_dev *pci_dev;
2476 
2477 	pci_dev = to_pci_dev(dev);
2478 	pci_release_capabilities(pci_dev);
2479 	pci_release_of_node(pci_dev);
2480 	pcibios_release_device(pci_dev);
2481 	pci_bus_put(pci_dev->bus);
2482 	kfree(pci_dev->driver_override);
2483 	bitmap_free(pci_dev->dma_alias_mask);
2484 	dev_dbg(dev, "device released\n");
2485 	kfree(pci_dev);
2486 }
2487 
2488 static const struct device_type pci_dev_type = {
2489 	.groups = pci_dev_attr_groups,
2490 };
2491 
2492 struct pci_dev *pci_alloc_dev(struct pci_bus *bus)
2493 {
2494 	struct pci_dev *dev;
2495 
2496 	dev = kzalloc(sizeof(struct pci_dev), GFP_KERNEL);
2497 	if (!dev)
2498 		return NULL;
2499 
2500 	INIT_LIST_HEAD(&dev->bus_list);
2501 	dev->dev.type = &pci_dev_type;
2502 	dev->bus = pci_bus_get(bus);
2503 	dev->driver_exclusive_resource = (struct resource) {
2504 		.name = "PCI Exclusive",
2505 		.start = 0,
2506 		.end = -1,
2507 	};
2508 
2509 	spin_lock_init(&dev->pcie_cap_lock);
2510 #ifdef CONFIG_PCI_MSI
2511 	raw_spin_lock_init(&dev->msi_lock);
2512 #endif
2513 	return dev;
2514 }
2515 EXPORT_SYMBOL(pci_alloc_dev);
2516 
2517 static bool pci_bus_wait_rrs(struct pci_bus *bus, int devfn, u32 *l,
2518 			     int timeout)
2519 {
2520 	int delay = 1;
2521 
2522 	if (!pci_bus_rrs_vendor_id(*l))
2523 		return true;	/* not a Configuration RRS completion */
2524 
2525 	if (!timeout)
2526 		return false;	/* RRS, but caller doesn't want to wait */
2527 
2528 	/*
2529 	 * We got the reserved Vendor ID that indicates a completion with
2530 	 * Configuration Request Retry Status (RRS).  Retry until we get a
2531 	 * valid Vendor ID or we time out.
2532 	 */
2533 	while (pci_bus_rrs_vendor_id(*l)) {
2534 		if (delay > timeout) {
2535 			pr_warn("pci %04x:%02x:%02x.%d: not ready after %dms; giving up\n",
2536 				pci_domain_nr(bus), bus->number,
2537 				PCI_SLOT(devfn), PCI_FUNC(devfn), delay - 1);
2538 
2539 			return false;
2540 		}
2541 		if (delay >= 1000)
2542 			pr_info("pci %04x:%02x:%02x.%d: not ready after %dms; waiting\n",
2543 				pci_domain_nr(bus), bus->number,
2544 				PCI_SLOT(devfn), PCI_FUNC(devfn), delay - 1);
2545 
2546 		msleep(delay);
2547 		delay *= 2;
2548 
2549 		if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l))
2550 			return false;
2551 	}
2552 
2553 	if (delay >= 1000)
2554 		pr_info("pci %04x:%02x:%02x.%d: ready after %dms\n",
2555 			pci_domain_nr(bus), bus->number,
2556 			PCI_SLOT(devfn), PCI_FUNC(devfn), delay - 1);
2557 
2558 	return true;
2559 }
2560 
2561 bool pci_bus_generic_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l,
2562 					int timeout)
2563 {
2564 	if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l))
2565 		return false;
2566 
2567 	/* Some broken boards return 0 or ~0 (PCI_ERROR_RESPONSE) if a slot is empty: */
2568 	if (PCI_POSSIBLE_ERROR(*l) || *l == 0x00000000 ||
2569 	    *l == 0x0000ffff || *l == 0xffff0000)
2570 		return false;
2571 
2572 	if (pci_bus_rrs_vendor_id(*l))
2573 		return pci_bus_wait_rrs(bus, devfn, l, timeout);
2574 
2575 	return true;
2576 }
2577 
2578 bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l,
2579 				int timeout)
2580 {
2581 #ifdef CONFIG_PCI_QUIRKS
2582 	struct pci_dev *bridge = bus->self;
2583 
2584 	/*
2585 	 * Certain IDT switches have an issue where they improperly trigger
2586 	 * ACS Source Validation errors on completions for config reads.
2587 	 */
2588 	if (bridge && bridge->vendor == PCI_VENDOR_ID_IDT &&
2589 	    bridge->device == 0x80b5)
2590 		return pci_idt_bus_quirk(bus, devfn, l, timeout);
2591 #endif
2592 
2593 	return pci_bus_generic_read_dev_vendor_id(bus, devfn, l, timeout);
2594 }
2595 EXPORT_SYMBOL(pci_bus_read_dev_vendor_id);
2596 
2597 #if IS_ENABLED(CONFIG_PCI_PWRCTRL)
2598 static struct platform_device *pci_pwrctrl_create_device(struct pci_bus *bus, int devfn)
2599 {
2600 	struct pci_host_bridge *host = pci_find_host_bridge(bus);
2601 	struct platform_device *pdev;
2602 	struct device_node *np;
2603 
2604 	np = of_pci_find_child_device(dev_of_node(&bus->dev), devfn);
2605 	if (!np)
2606 		return NULL;
2607 
2608 	pdev = of_find_device_by_node(np);
2609 	if (pdev) {
2610 		put_device(&pdev->dev);
2611 		goto err_put_of_node;
2612 	}
2613 
2614 	/*
2615 	 * First check whether the pwrctrl device really needs to be created or
2616 	 * not. This is decided based on at least one of the power supplies
2617 	 * being defined in the devicetree node of the device.
2618 	 */
2619 	if (!of_pci_supply_present(np)) {
2620 		pr_debug("PCI/pwrctrl: Skipping OF node: %s\n", np->name);
2621 		goto err_put_of_node;
2622 	}
2623 
2624 	/* Now create the pwrctrl device */
2625 	pdev = of_platform_device_create(np, NULL, &host->dev);
2626 	if (!pdev) {
2627 		pr_err("PCI/pwrctrl: Failed to create pwrctrl device for node: %s\n", np->name);
2628 		goto err_put_of_node;
2629 	}
2630 
2631 	of_node_put(np);
2632 
2633 	return pdev;
2634 
2635 err_put_of_node:
2636 	of_node_put(np);
2637 
2638 	return NULL;
2639 }
2640 #else
2641 static struct platform_device *pci_pwrctrl_create_device(struct pci_bus *bus, int devfn)
2642 {
2643 	return NULL;
2644 }
2645 #endif
2646 
2647 /*
2648  * Read the config data for a PCI device, sanity-check it,
2649  * and fill in the dev structure.
2650  */
2651 static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
2652 {
2653 	struct pci_dev *dev;
2654 	u32 l;
2655 
2656 	/*
2657 	 * Create pwrctrl device (if required) for the PCI device to handle the
2658 	 * power state. If the pwrctrl device is created, then skip scanning
2659 	 * further as the pwrctrl core will rescan the bus after powering on
2660 	 * the device.
2661 	 */
2662 	if (pci_pwrctrl_create_device(bus, devfn))
2663 		return NULL;
2664 
2665 	if (!pci_bus_read_dev_vendor_id(bus, devfn, &l, 60*1000))
2666 		return NULL;
2667 
2668 	dev = pci_alloc_dev(bus);
2669 	if (!dev)
2670 		return NULL;
2671 
2672 	dev->devfn = devfn;
2673 	dev->vendor = l & 0xffff;
2674 	dev->device = (l >> 16) & 0xffff;
2675 
2676 	if (pci_setup_device(dev)) {
2677 		pci_bus_put(dev->bus);
2678 		kfree(dev);
2679 		return NULL;
2680 	}
2681 
2682 	return dev;
2683 }
2684 
2685 void pcie_report_downtraining(struct pci_dev *dev)
2686 {
2687 	if (!pci_is_pcie(dev))
2688 		return;
2689 
2690 	/* Look from the device up to avoid downstream ports with no devices */
2691 	if ((pci_pcie_type(dev) != PCI_EXP_TYPE_ENDPOINT) &&
2692 	    (pci_pcie_type(dev) != PCI_EXP_TYPE_LEG_END) &&
2693 	    (pci_pcie_type(dev) != PCI_EXP_TYPE_UPSTREAM))
2694 		return;
2695 
2696 	/* Multi-function PCIe devices share the same link/status */
2697 	if (PCI_FUNC(dev->devfn) != 0 || dev->is_virtfn)
2698 		return;
2699 
2700 	/* Print link status only if the device is constrained by the fabric */
2701 	__pcie_print_link_status(dev, false);
2702 }
2703 
2704 static void pci_imm_ready_init(struct pci_dev *dev)
2705 {
2706 	u16 status;
2707 
2708 	pci_read_config_word(dev, PCI_STATUS, &status);
2709 	if (status & PCI_STATUS_IMM_READY)
2710 		dev->imm_ready = 1;
2711 }
2712 
2713 static void pci_init_capabilities(struct pci_dev *dev)
2714 {
2715 	pci_ea_init(dev);		/* Enhanced Allocation */
2716 	pci_msi_init(dev);		/* Disable MSI */
2717 	pci_msix_init(dev);		/* Disable MSI-X */
2718 
2719 	/* Buffers for saving PCIe and PCI-X capabilities */
2720 	pci_allocate_cap_save_buffers(dev);
2721 
2722 	pci_imm_ready_init(dev);	/* Immediate Readiness */
2723 	pci_pm_init(dev);		/* Power Management */
2724 	pci_vpd_init(dev);		/* Vital Product Data */
2725 	pci_configure_ari(dev);		/* Alternative Routing-ID Forwarding */
2726 	pci_iov_init(dev);		/* Single Root I/O Virtualization */
2727 	pci_ats_init(dev);		/* Address Translation Services */
2728 	pci_pri_init(dev);		/* Page Request Interface */
2729 	pci_pasid_init(dev);		/* Process Address Space ID */
2730 	pci_acs_init(dev);		/* Access Control Services */
2731 	pci_ptm_init(dev);		/* Precision Time Measurement */
2732 	pci_aer_init(dev);		/* Advanced Error Reporting */
2733 	pci_dpc_init(dev);		/* Downstream Port Containment */
2734 	pci_rcec_init(dev);		/* Root Complex Event Collector */
2735 	pci_doe_init(dev);		/* Data Object Exchange */
2736 	pci_tph_init(dev);		/* TLP Processing Hints */
2737 	pci_rebar_init(dev);		/* Resizable BAR */
2738 	pci_dev3_init(dev);		/* Device 3 capabilities */
2739 	pci_ide_init(dev);		/* Link Integrity and Data Encryption */
2740 
2741 	pcie_report_downtraining(dev);
2742 	pci_init_reset_methods(dev);
2743 }
2744 
2745 /*
2746  * This is the equivalent of pci_host_bridge_msi_domain() that acts on
2747  * devices. Firmware interfaces that can select the MSI domain on a
2748  * per-device basis should be called from here.
2749  */
2750 static struct irq_domain *pci_dev_msi_domain(struct pci_dev *dev)
2751 {
2752 	struct irq_domain *d;
2753 
2754 	/*
2755 	 * If a domain has been set through the pcibios_device_add()
2756 	 * callback, then this is the one (platform code knows best).
2757 	 */
2758 	d = dev_get_msi_domain(&dev->dev);
2759 	if (d)
2760 		return d;
2761 
2762 	/*
2763 	 * Let's see if we have a firmware interface able to provide
2764 	 * the domain.
2765 	 */
2766 	d = pci_msi_get_device_domain(dev);
2767 	if (d)
2768 		return d;
2769 
2770 	return NULL;
2771 }
2772 
2773 static void pci_set_msi_domain(struct pci_dev *dev)
2774 {
2775 	struct irq_domain *d;
2776 
2777 	/*
2778 	 * If the platform or firmware interfaces cannot supply a
2779 	 * device-specific MSI domain, then inherit the default domain
2780 	 * from the host bridge itself.
2781 	 */
2782 	d = pci_dev_msi_domain(dev);
2783 	if (!d)
2784 		d = dev_get_msi_domain(&dev->bus->dev);
2785 
2786 	dev_set_msi_domain(&dev->dev, d);
2787 }
2788 
2789 void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
2790 {
2791 	int ret;
2792 
2793 	pci_configure_device(dev);
2794 
2795 	device_initialize(&dev->dev);
2796 	dev->dev.release = pci_release_dev;
2797 
2798 	set_dev_node(&dev->dev, pcibus_to_node(bus));
2799 	dev->dev.dma_mask = &dev->dma_mask;
2800 	dev->dev.dma_parms = &dev->dma_parms;
2801 	dev->dev.coherent_dma_mask = 0xffffffffull;
2802 
2803 	dma_set_max_seg_size(&dev->dev, 65536);
2804 	dma_set_seg_boundary(&dev->dev, 0xffffffff);
2805 
2806 	pcie_failed_link_retrain(dev);
2807 
2808 	/* Fix up broken headers */
2809 	pci_fixup_device(pci_fixup_header, dev);
2810 
2811 	pci_reassigndev_resource_alignment(dev);
2812 
2813 	pci_init_capabilities(dev);
2814 
2815 	/*
2816 	 * Add the device to our list of discovered devices
2817 	 * and the bus list for fixup functions, etc.
2818 	 */
2819 	down_write(&pci_bus_sem);
2820 	list_add_tail(&dev->bus_list, &bus->devices);
2821 	up_write(&pci_bus_sem);
2822 
2823 	ret = pcibios_device_add(dev);
2824 	WARN_ON(ret < 0);
2825 
2826 	/* Set up MSI IRQ domain */
2827 	pci_set_msi_domain(dev);
2828 
2829 	/* Notifier could use PCI capabilities */
2830 	ret = device_add(&dev->dev);
2831 	WARN_ON(ret < 0);
2832 
2833 	/* Establish pdev->tsm for newly added (e.g. new SR-IOV VFs) */
2834 	pci_tsm_init(dev);
2835 
2836 	pci_npem_create(dev);
2837 
2838 	pci_doe_sysfs_init(dev);
2839 }
2840 
2841 struct pci_dev *pci_scan_single_device(struct pci_bus *bus, int devfn)
2842 {
2843 	struct pci_dev *dev;
2844 
2845 	dev = pci_get_slot(bus, devfn);
2846 	if (dev) {
2847 		pci_dev_put(dev);
2848 		return dev;
2849 	}
2850 
2851 	dev = pci_scan_device(bus, devfn);
2852 	if (!dev)
2853 		return NULL;
2854 
2855 	pci_device_add(dev, bus);
2856 
2857 	return dev;
2858 }
2859 EXPORT_SYMBOL(pci_scan_single_device);
2860 
2861 static int next_ari_fn(struct pci_bus *bus, struct pci_dev *dev, int fn)
2862 {
2863 	int pos;
2864 	u16 cap = 0;
2865 	unsigned int next_fn;
2866 
2867 	if (!dev)
2868 		return -ENODEV;
2869 
2870 	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ARI);
2871 	if (!pos)
2872 		return -ENODEV;
2873 
2874 	pci_read_config_word(dev, pos + PCI_ARI_CAP, &cap);
2875 	next_fn = PCI_ARI_CAP_NFN(cap);
2876 	if (next_fn <= fn)
2877 		return -ENODEV;	/* protect against malformed list */
2878 
2879 	return next_fn;
2880 }
2881 
2882 static int next_fn(struct pci_bus *bus, struct pci_dev *dev, int fn)
2883 {
2884 	if (pci_ari_enabled(bus))
2885 		return next_ari_fn(bus, dev, fn);
2886 
2887 	if (fn >= 7)
2888 		return -ENODEV;
2889 	/* only multifunction devices may have more functions */
2890 	if (dev && !dev->multifunction)
2891 		return -ENODEV;
2892 
2893 	return fn + 1;
2894 }
2895 
2896 static int only_one_child(struct pci_bus *bus)
2897 {
2898 	struct pci_dev *bridge = bus->self;
2899 
2900 	/*
2901 	 * Systems with unusual topologies set PCI_SCAN_ALL_PCIE_DEVS so
2902 	 * we scan for all possible devices, not just Device 0.
2903 	 */
2904 	if (pci_has_flag(PCI_SCAN_ALL_PCIE_DEVS))
2905 		return 0;
2906 
2907 	/*
2908 	 * A PCIe Downstream Port normally leads to a Link with only Device
2909 	 * 0 on it (PCIe spec r3.1, sec 7.3.1).  As an optimization, scan
2910 	 * only for Device 0 in that situation.
2911 	 */
2912 	if (bridge && pci_is_pcie(bridge) && pcie_downstream_port(bridge))
2913 		return 1;
2914 
2915 	return 0;
2916 }
2917 
2918 /**
2919  * pci_scan_slot - Scan a PCI slot on a bus for devices
2920  * @bus: PCI bus to scan
2921  * @devfn: slot number to scan (must have zero function)
2922  *
2923  * Scan a PCI slot on the specified PCI bus for devices, adding
2924  * discovered devices to the @bus->devices list.  New devices
2925  * will not have is_added set.
2926  *
2927  * Returns the number of new devices found.
2928  */
2929 int pci_scan_slot(struct pci_bus *bus, int devfn)
2930 {
2931 	struct pci_dev *dev;
2932 	int fn = 0, nr = 0;
2933 
2934 	if (only_one_child(bus) && (devfn > 0))
2935 		return 0; /* Already scanned the entire slot */
2936 
2937 	do {
2938 		dev = pci_scan_single_device(bus, devfn + fn);
2939 		if (dev) {
2940 			if (!pci_dev_is_added(dev))
2941 				nr++;
2942 			if (fn > 0)
2943 				dev->multifunction = 1;
2944 		} else if (fn == 0) {
2945 			/*
2946 			 * Function 0 is required unless we are running on
2947 			 * a hypervisor that passes through individual PCI
2948 			 * functions.
2949 			 */
2950 			if (!hypervisor_isolated_pci_functions())
2951 				break;
2952 		}
2953 		fn = next_fn(bus, dev, fn);
2954 	} while (fn >= 0);
2955 
2956 	/* Only one slot has PCIe device */
2957 	if (bus->self && nr)
2958 		pcie_aspm_init_link_state(bus->self);
2959 
2960 	return nr;
2961 }
2962 EXPORT_SYMBOL(pci_scan_slot);
2963 
2964 static int pcie_find_smpss(struct pci_dev *dev, void *data)
2965 {
2966 	u8 *smpss = data;
2967 
2968 	if (!pci_is_pcie(dev))
2969 		return 0;
2970 
2971 	/*
2972 	 * We don't have a way to change MPS settings on devices that have
2973 	 * drivers attached.  A hot-added device might support only the minimum
2974 	 * MPS setting (MPS=128).  Therefore, if the fabric contains a bridge
2975 	 * where devices may be hot-added, we limit the fabric MPS to 128 so
2976 	 * hot-added devices will work correctly.
2977 	 *
2978 	 * However, if we hot-add a device to a slot directly below a Root
2979 	 * Port, it's impossible for there to be other existing devices below
2980 	 * the port.  We don't limit the MPS in this case because we can
2981 	 * reconfigure MPS on both the Root Port and the hot-added device,
2982 	 * and there are no other devices involved.
2983 	 *
2984 	 * Note that this PCIE_BUS_SAFE path assumes no peer-to-peer DMA.
2985 	 */
2986 	if (dev->is_hotplug_bridge &&
2987 	    pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT)
2988 		*smpss = 0;
2989 
2990 	if (*smpss > dev->pcie_mpss)
2991 		*smpss = dev->pcie_mpss;
2992 
2993 	return 0;
2994 }
2995 
2996 static void pcie_write_mps(struct pci_dev *dev, int mps)
2997 {
2998 	int rc;
2999 
3000 	if (pcie_bus_config == PCIE_BUS_PERFORMANCE) {
3001 		mps = 128 << dev->pcie_mpss;
3002 
3003 		if (pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT &&
3004 		    dev->bus->self)
3005 
3006 			/*
3007 			 * For "Performance", the assumption is made that
3008 			 * downstream communication will never be larger than
3009 			 * the MRRS.  So, the MPS only needs to be configured
3010 			 * for the upstream communication.  This being the case,
3011 			 * walk from the top down and set the MPS of the child
3012 			 * to that of the parent bus.
3013 			 *
3014 			 * Configure the device MPS with the smaller of the
3015 			 * device MPSS or the bridge MPS (which is assumed to be
3016 			 * properly configured at this point to the largest
3017 			 * allowable MPS based on its parent bus).
3018 			 */
3019 			mps = min(mps, pcie_get_mps(dev->bus->self));
3020 	}
3021 
3022 	rc = pcie_set_mps(dev, mps);
3023 	if (rc)
3024 		pci_err(dev, "Failed attempting to set the MPS\n");
3025 }
3026 
3027 static void pcie_write_mrrs(struct pci_dev *dev)
3028 {
3029 	int rc, mrrs;
3030 
3031 	/*
3032 	 * In the "safe" case, do not configure the MRRS.  There appear to be
3033 	 * issues with setting MRRS to 0 on a number of devices.
3034 	 */
3035 	if (pcie_bus_config != PCIE_BUS_PERFORMANCE)
3036 		return;
3037 
3038 	/*
3039 	 * For max performance, the MRRS must be set to the largest supported
3040 	 * value.  However, it cannot be configured larger than the MPS the
3041 	 * device or the bus can support.  This should already be properly
3042 	 * configured by a prior call to pcie_write_mps().
3043 	 */
3044 	mrrs = pcie_get_mps(dev);
3045 
3046 	/*
3047 	 * MRRS is a R/W register.  Invalid values can be written, but a
3048 	 * subsequent read will verify if the value is acceptable or not.
3049 	 * If the MRRS value provided is not acceptable (e.g., too large),
3050 	 * shrink the value until it is acceptable to the HW.
3051 	 */
3052 	while (mrrs != pcie_get_readrq(dev) && mrrs >= 128) {
3053 		rc = pcie_set_readrq(dev, mrrs);
3054 		if (!rc)
3055 			break;
3056 
3057 		pci_warn(dev, "Failed attempting to set the MRRS\n");
3058 		mrrs /= 2;
3059 	}
3060 
3061 	if (mrrs < 128)
3062 		pci_err(dev, "MRRS was unable to be configured with a safe value.  If problems are experienced, try running with pci=pcie_bus_safe\n");
3063 }
3064 
3065 static int pcie_bus_configure_set(struct pci_dev *dev, void *data)
3066 {
3067 	int mps, orig_mps;
3068 
3069 	if (!pci_is_pcie(dev))
3070 		return 0;
3071 
3072 	if (pcie_bus_config == PCIE_BUS_TUNE_OFF ||
3073 	    pcie_bus_config == PCIE_BUS_DEFAULT)
3074 		return 0;
3075 
3076 	mps = 128 << *(u8 *)data;
3077 	orig_mps = pcie_get_mps(dev);
3078 
3079 	pcie_write_mps(dev, mps);
3080 	pcie_write_mrrs(dev);
3081 
3082 	pci_info(dev, "Max Payload Size set to %4d/%4d (was %4d), Max Read Rq %4d\n",
3083 		 pcie_get_mps(dev), 128 << dev->pcie_mpss,
3084 		 orig_mps, pcie_get_readrq(dev));
3085 
3086 	return 0;
3087 }
3088 
3089 /*
3090  * pcie_bus_configure_settings() requires that pci_walk_bus work in a top-down,
3091  * parents then children fashion.  If this changes, then this code will not
3092  * work as designed.
3093  */
3094 void pcie_bus_configure_settings(struct pci_bus *bus)
3095 {
3096 	u8 smpss = 0;
3097 
3098 	if (!bus->self)
3099 		return;
3100 
3101 	if (!pci_is_pcie(bus->self))
3102 		return;
3103 
3104 	/*
3105 	 * FIXME - Peer to peer DMA is possible, though the endpoint would need
3106 	 * to be aware of the MPS of the destination.  To work around this,
3107 	 * simply force the MPS of the entire system to the smallest possible.
3108 	 */
3109 	if (pcie_bus_config == PCIE_BUS_PEER2PEER)
3110 		smpss = 0;
3111 
3112 	if (pcie_bus_config == PCIE_BUS_SAFE) {
3113 		smpss = bus->self->pcie_mpss;
3114 
3115 		pcie_find_smpss(bus->self, &smpss);
3116 		pci_walk_bus(bus, pcie_find_smpss, &smpss);
3117 	}
3118 
3119 	pcie_bus_configure_set(bus->self, &smpss);
3120 	pci_walk_bus(bus, pcie_bus_configure_set, &smpss);
3121 }
3122 EXPORT_SYMBOL_GPL(pcie_bus_configure_settings);
3123 
3124 /*
3125  * Called after each bus is probed, but before its children are examined.  This
3126  * is marked as __weak because multiple architectures define it.
3127  */
3128 void __weak pcibios_fixup_bus(struct pci_bus *bus)
3129 {
3130        /* nothing to do, expected to be removed in the future */
3131 }
3132 
3133 /**
3134  * pci_scan_child_bus_extend() - Scan devices below a bus
3135  * @bus: Bus to scan for devices
3136  * @available_buses: Total number of buses available (%0 does not try to
3137  *		     extend beyond the minimal)
3138  *
3139  * Scans devices below @bus including subordinate buses. Returns new
3140  * subordinate number including all the found devices. Passing
3141  * @available_buses causes the remaining bus space to be distributed
3142  * equally between hotplug-capable bridges to allow future extension of the
3143  * hierarchy.
3144  */
3145 static unsigned int pci_scan_child_bus_extend(struct pci_bus *bus,
3146 					      unsigned int available_buses)
3147 {
3148 	unsigned int used_buses, normal_bridges = 0, hotplug_bridges = 0;
3149 	unsigned int start = bus->busn_res.start;
3150 	unsigned int devnr, cmax, max = start;
3151 	struct pci_dev *dev;
3152 
3153 	dev_dbg(&bus->dev, "scanning bus\n");
3154 
3155 	/* Go find them, Rover! */
3156 	for (devnr = 0; devnr < PCI_MAX_NR_DEVS; devnr++)
3157 		pci_scan_slot(bus, PCI_DEVFN(devnr, 0));
3158 
3159 	/* Reserve buses for SR-IOV capability */
3160 	used_buses = pci_iov_bus_range(bus);
3161 	max += used_buses;
3162 
3163 	/*
3164 	 * After performing arch-dependent fixup of the bus, look behind
3165 	 * all PCI-to-PCI bridges on this bus.
3166 	 */
3167 	if (!bus->is_added) {
3168 		dev_dbg(&bus->dev, "fixups for bus\n");
3169 		pcibios_fixup_bus(bus);
3170 		bus->is_added = 1;
3171 	}
3172 
3173 	/*
3174 	 * Calculate how many hotplug bridges and normal bridges there
3175 	 * are on this bus. We will distribute the additional available
3176 	 * buses between hotplug bridges.
3177 	 */
3178 	for_each_pci_bridge(dev, bus) {
3179 		if (dev->is_hotplug_bridge)
3180 			hotplug_bridges++;
3181 		else
3182 			normal_bridges++;
3183 	}
3184 
3185 	/*
3186 	 * Scan bridges that are already configured. We don't touch them
3187 	 * unless they are misconfigured (which will be done in the second
3188 	 * scan below).
3189 	 */
3190 	for_each_pci_bridge(dev, bus) {
3191 		cmax = max;
3192 		max = pci_scan_bridge_extend(bus, dev, max, 0, 0);
3193 
3194 		/*
3195 		 * Reserve one bus for each bridge now to avoid extending
3196 		 * hotplug bridges too much during the second scan below.
3197 		 */
3198 		used_buses++;
3199 		if (max - cmax > 1)
3200 			used_buses += max - cmax - 1;
3201 	}
3202 
3203 	/* Scan bridges that need to be reconfigured */
3204 	for_each_pci_bridge(dev, bus) {
3205 		unsigned int buses = 0;
3206 
3207 		if (!hotplug_bridges && normal_bridges == 1) {
3208 			/*
3209 			 * There is only one bridge on the bus (upstream
3210 			 * port) so it gets all available buses which it
3211 			 * can then distribute to the possible hotplug
3212 			 * bridges below.
3213 			 */
3214 			buses = available_buses;
3215 		} else if (dev->is_hotplug_bridge) {
3216 			/*
3217 			 * Distribute the extra buses between hotplug
3218 			 * bridges if any.
3219 			 */
3220 			buses = available_buses / hotplug_bridges;
3221 			buses = min(buses, available_buses - used_buses + 1);
3222 		}
3223 
3224 		cmax = max;
3225 		max = pci_scan_bridge_extend(bus, dev, cmax, buses, 1);
3226 		/* One bus is already accounted so don't add it again */
3227 		if (max - cmax > 1)
3228 			used_buses += max - cmax - 1;
3229 	}
3230 
3231 	/*
3232 	 * Make sure a hotplug bridge has at least the minimum requested
3233 	 * number of buses but allow it to grow up to the maximum available
3234 	 * bus number if there is room.
3235 	 */
3236 	if (bus->self && bus->self->is_hotplug_bridge) {
3237 		used_buses = max(available_buses, pci_hotplug_bus_size - 1);
3238 		if (max - start < used_buses) {
3239 			max = start + used_buses;
3240 
3241 			/* Do not allocate more buses than we have room left */
3242 			if (max > bus->busn_res.end)
3243 				max = bus->busn_res.end;
3244 
3245 			dev_dbg(&bus->dev, "%pR extended by %#02x\n",
3246 				&bus->busn_res, max - start);
3247 		}
3248 	}
3249 
3250 	/*
3251 	 * We've scanned the bus and so we know all about what's on
3252 	 * the other side of any bridges that may be on this bus plus
3253 	 * any devices.
3254 	 *
3255 	 * Return how far we've got finding sub-buses.
3256 	 */
3257 	dev_dbg(&bus->dev, "bus scan returning with max=%02x\n", max);
3258 	return max;
3259 }
3260 
3261 /**
3262  * pci_scan_child_bus() - Scan devices below a bus
3263  * @bus: Bus to scan for devices
3264  *
3265  * Scans devices below @bus including subordinate buses. Returns new
3266  * subordinate number including all the found devices.
3267  */
3268 unsigned int pci_scan_child_bus(struct pci_bus *bus)
3269 {
3270 	return pci_scan_child_bus_extend(bus, 0);
3271 }
3272 EXPORT_SYMBOL_GPL(pci_scan_child_bus);
3273 
3274 /**
3275  * pcibios_root_bridge_prepare - Platform-specific host bridge setup
3276  * @bridge: Host bridge to set up
3277  *
3278  * Default empty implementation.  Replace with an architecture-specific setup
3279  * routine, if necessary.
3280  */
3281 int __weak pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
3282 {
3283 	return 0;
3284 }
3285 
3286 void __weak pcibios_add_bus(struct pci_bus *bus)
3287 {
3288 }
3289 
3290 void __weak pcibios_remove_bus(struct pci_bus *bus)
3291 {
3292 }
3293 
3294 struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
3295 		struct pci_ops *ops, void *sysdata, struct list_head *resources)
3296 {
3297 	int error;
3298 	struct pci_host_bridge *bridge;
3299 
3300 	bridge = pci_alloc_host_bridge(0);
3301 	if (!bridge)
3302 		return NULL;
3303 
3304 	bridge->dev.parent = parent;
3305 
3306 	list_splice_init(resources, &bridge->windows);
3307 	bridge->sysdata = sysdata;
3308 	bridge->busnr = bus;
3309 	bridge->ops = ops;
3310 
3311 	error = pci_register_host_bridge(bridge);
3312 	if (error < 0)
3313 		goto err_out;
3314 
3315 	return bridge->bus;
3316 
3317 err_out:
3318 	put_device(&bridge->dev);
3319 	return NULL;
3320 }
3321 EXPORT_SYMBOL_GPL(pci_create_root_bus);
3322 
3323 int pci_host_probe(struct pci_host_bridge *bridge)
3324 {
3325 	struct pci_bus *bus, *child;
3326 	int ret;
3327 
3328 	pci_lock_rescan_remove();
3329 	ret = pci_scan_root_bus_bridge(bridge);
3330 	pci_unlock_rescan_remove();
3331 	if (ret < 0) {
3332 		dev_err(bridge->dev.parent, "Scanning root bridge failed");
3333 		return ret;
3334 	}
3335 
3336 	bus = bridge->bus;
3337 
3338 	/* If we must preserve the resource configuration, claim now */
3339 	if (bridge->preserve_config)
3340 		pci_bus_claim_resources(bus);
3341 
3342 	/*
3343 	 * Assign whatever was left unassigned. If we didn't claim above,
3344 	 * this will reassign everything.
3345 	 */
3346 	pci_assign_unassigned_root_bus_resources(bus);
3347 
3348 	list_for_each_entry(child, &bus->children, node)
3349 		pcie_bus_configure_settings(child);
3350 
3351 	pci_lock_rescan_remove();
3352 	pci_bus_add_devices(bus);
3353 	pci_unlock_rescan_remove();
3354 
3355 	/*
3356 	 * Ensure pm_runtime_enable() is called for the controller drivers
3357 	 * before calling pci_host_probe(). The PM framework expects that
3358 	 * if the parent device supports runtime PM, it will be enabled
3359 	 * before child runtime PM is enabled.
3360 	 */
3361 	pm_runtime_set_active(&bridge->dev);
3362 	pm_runtime_no_callbacks(&bridge->dev);
3363 	devm_pm_runtime_enable(&bridge->dev);
3364 
3365 	return 0;
3366 }
3367 EXPORT_SYMBOL_GPL(pci_host_probe);
3368 
3369 int pci_bus_insert_busn_res(struct pci_bus *b, int bus, int bus_max)
3370 {
3371 	struct resource *res = &b->busn_res;
3372 	struct resource *parent_res, *conflict;
3373 
3374 	res->start = bus;
3375 	res->end = bus_max;
3376 	res->flags = IORESOURCE_BUS;
3377 
3378 	if (!pci_is_root_bus(b))
3379 		parent_res = &b->parent->busn_res;
3380 	else {
3381 		parent_res = get_pci_domain_busn_res(pci_domain_nr(b));
3382 		res->flags |= IORESOURCE_PCI_FIXED;
3383 	}
3384 
3385 	conflict = request_resource_conflict(parent_res, res);
3386 
3387 	if (conflict)
3388 		dev_info(&b->dev,
3389 			   "busn_res: can not insert %pR under %s%pR (conflicts with %s %pR)\n",
3390 			    res, pci_is_root_bus(b) ? "domain " : "",
3391 			    parent_res, conflict->name, conflict);
3392 
3393 	return conflict == NULL;
3394 }
3395 
3396 int pci_bus_update_busn_res_end(struct pci_bus *b, int bus_max)
3397 {
3398 	struct resource *res = &b->busn_res;
3399 	struct resource old_res = *res;
3400 	resource_size_t size;
3401 	int ret;
3402 
3403 	if (res->start > bus_max)
3404 		return -EINVAL;
3405 
3406 	size = bus_max - res->start + 1;
3407 	ret = adjust_resource(res, res->start, size);
3408 	dev_info(&b->dev, "busn_res: %pR end %s updated to %02x\n",
3409 			&old_res, ret ? "can not be" : "is", bus_max);
3410 
3411 	if (!ret && !res->parent)
3412 		pci_bus_insert_busn_res(b, res->start, res->end);
3413 
3414 	return ret;
3415 }
3416 
3417 void pci_bus_release_busn_res(struct pci_bus *b)
3418 {
3419 	struct resource *res = &b->busn_res;
3420 	int ret;
3421 
3422 	if (!res->flags || !res->parent)
3423 		return;
3424 
3425 	ret = release_resource(res);
3426 	dev_info(&b->dev, "busn_res: %pR %s released\n",
3427 			res, ret ? "can not be" : "is");
3428 }
3429 
3430 int pci_scan_root_bus_bridge(struct pci_host_bridge *bridge)
3431 {
3432 	struct resource_entry *window;
3433 	bool found = false;
3434 	struct pci_bus *b;
3435 	int max, bus, ret;
3436 
3437 	if (!bridge)
3438 		return -EINVAL;
3439 
3440 	resource_list_for_each_entry(window, &bridge->windows)
3441 		if (window->res->flags & IORESOURCE_BUS) {
3442 			bridge->busnr = window->res->start;
3443 			found = true;
3444 			break;
3445 		}
3446 
3447 	ret = pci_register_host_bridge(bridge);
3448 	if (ret < 0)
3449 		return ret;
3450 
3451 	b = bridge->bus;
3452 	bus = bridge->busnr;
3453 
3454 	if (!found) {
3455 		dev_info(&b->dev,
3456 		 "No busn resource found for root bus, will use [bus %02x-ff]\n",
3457 			bus);
3458 		pci_bus_insert_busn_res(b, bus, 255);
3459 	}
3460 
3461 	max = pci_scan_child_bus(b);
3462 
3463 	if (!found)
3464 		pci_bus_update_busn_res_end(b, max);
3465 
3466 	return 0;
3467 }
3468 EXPORT_SYMBOL(pci_scan_root_bus_bridge);
3469 
3470 struct pci_bus *pci_scan_root_bus(struct device *parent, int bus,
3471 		struct pci_ops *ops, void *sysdata, struct list_head *resources)
3472 {
3473 	struct resource_entry *window;
3474 	bool found = false;
3475 	struct pci_bus *b;
3476 	int max;
3477 
3478 	resource_list_for_each_entry(window, resources)
3479 		if (window->res->flags & IORESOURCE_BUS) {
3480 			found = true;
3481 			break;
3482 		}
3483 
3484 	b = pci_create_root_bus(parent, bus, ops, sysdata, resources);
3485 	if (!b)
3486 		return NULL;
3487 
3488 	if (!found) {
3489 		dev_info(&b->dev,
3490 		 "No busn resource found for root bus, will use [bus %02x-ff]\n",
3491 			bus);
3492 		pci_bus_insert_busn_res(b, bus, 255);
3493 	}
3494 
3495 	max = pci_scan_child_bus(b);
3496 
3497 	if (!found)
3498 		pci_bus_update_busn_res_end(b, max);
3499 
3500 	return b;
3501 }
3502 EXPORT_SYMBOL(pci_scan_root_bus);
3503 
3504 struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops,
3505 					void *sysdata)
3506 {
3507 	LIST_HEAD(resources);
3508 	struct pci_bus *b;
3509 
3510 	pci_add_resource(&resources, &ioport_resource);
3511 	pci_add_resource(&resources, &iomem_resource);
3512 	pci_add_resource(&resources, &busn_resource);
3513 	b = pci_create_root_bus(NULL, bus, ops, sysdata, &resources);
3514 	if (b) {
3515 		pci_scan_child_bus(b);
3516 	} else {
3517 		pci_free_resource_list(&resources);
3518 	}
3519 	return b;
3520 }
3521 EXPORT_SYMBOL(pci_scan_bus);
3522 
3523 /**
3524  * pci_rescan_bus_bridge_resize - Scan a PCI bus for devices
3525  * @bridge: PCI bridge for the bus to scan
3526  *
3527  * Scan a PCI bus and child buses for new devices, add them,
3528  * and enable them, resizing bridge mmio/io resource if necessary
3529  * and possible.  The caller must ensure the child devices are already
3530  * removed for resizing to occur.
3531  *
3532  * Returns the max number of subordinate bus discovered.
3533  */
3534 unsigned int pci_rescan_bus_bridge_resize(struct pci_dev *bridge)
3535 {
3536 	unsigned int max;
3537 	struct pci_bus *bus = bridge->subordinate;
3538 
3539 	max = pci_scan_child_bus(bus);
3540 
3541 	pci_assign_unassigned_bridge_resources(bridge);
3542 
3543 	pci_bus_add_devices(bus);
3544 
3545 	return max;
3546 }
3547 
3548 /**
3549  * pci_rescan_bus - Scan a PCI bus for devices
3550  * @bus: PCI bus to scan
3551  *
3552  * Scan a PCI bus and child buses for new devices, add them,
3553  * and enable them.
3554  *
3555  * Returns the max number of subordinate bus discovered.
3556  */
3557 unsigned int pci_rescan_bus(struct pci_bus *bus)
3558 {
3559 	unsigned int max;
3560 
3561 	max = pci_scan_child_bus(bus);
3562 	pci_assign_unassigned_bus_resources(bus);
3563 	pci_bus_add_devices(bus);
3564 
3565 	return max;
3566 }
3567 EXPORT_SYMBOL_GPL(pci_rescan_bus);
3568 
3569 /*
3570  * pci_rescan_bus(), pci_rescan_bus_bridge_resize() and PCI device removal
3571  * routines should always be executed under this mutex.
3572  */
3573 DEFINE_MUTEX(pci_rescan_remove_lock);
3574 
3575 void pci_lock_rescan_remove(void)
3576 {
3577 	mutex_lock(&pci_rescan_remove_lock);
3578 }
3579 EXPORT_SYMBOL_GPL(pci_lock_rescan_remove);
3580 
3581 void pci_unlock_rescan_remove(void)
3582 {
3583 	mutex_unlock(&pci_rescan_remove_lock);
3584 }
3585 EXPORT_SYMBOL_GPL(pci_unlock_rescan_remove);
3586 
3587 static int __init pci_sort_bf_cmp(const struct device *d_a,
3588 				  const struct device *d_b)
3589 {
3590 	const struct pci_dev *a = to_pci_dev(d_a);
3591 	const struct pci_dev *b = to_pci_dev(d_b);
3592 
3593 	if      (pci_domain_nr(a->bus) < pci_domain_nr(b->bus)) return -1;
3594 	else if (pci_domain_nr(a->bus) > pci_domain_nr(b->bus)) return  1;
3595 
3596 	if      (a->bus->number < b->bus->number) return -1;
3597 	else if (a->bus->number > b->bus->number) return  1;
3598 
3599 	if      (a->devfn < b->devfn) return -1;
3600 	else if (a->devfn > b->devfn) return  1;
3601 
3602 	return 0;
3603 }
3604 
3605 void __init pci_sort_breadthfirst(void)
3606 {
3607 	bus_sort_breadthfirst(&pci_bus_type, &pci_sort_bf_cmp);
3608 }
3609 
3610 int pci_hp_add_bridge(struct pci_dev *dev)
3611 {
3612 	struct pci_bus *parent = dev->bus;
3613 	int busnr, start = parent->busn_res.start;
3614 	unsigned int available_buses = 0;
3615 	int end = parent->busn_res.end;
3616 
3617 	for (busnr = start; busnr <= end; busnr++) {
3618 		if (!pci_find_bus(pci_domain_nr(parent), busnr))
3619 			break;
3620 	}
3621 	if (busnr-- > end) {
3622 		pci_err(dev, "No bus number available for hot-added bridge\n");
3623 		return -1;
3624 	}
3625 
3626 	/* Scan bridges that are already configured */
3627 	busnr = pci_scan_bridge(parent, dev, busnr, 0);
3628 
3629 	/*
3630 	 * Distribute the available bus numbers between hotplug-capable
3631 	 * bridges to make extending the chain later possible.
3632 	 */
3633 	available_buses = end - busnr;
3634 
3635 	/* Scan bridges that need to be reconfigured */
3636 	pci_scan_bridge_extend(parent, dev, busnr, available_buses, 1);
3637 
3638 	if (!dev->subordinate)
3639 		return -1;
3640 
3641 	return 0;
3642 }
3643 EXPORT_SYMBOL_GPL(pci_hp_add_bridge);
3644