xref: /linux/drivers/pci/probe.c (revision 40286d6379aacfcc053253ef78dc78b09addffda)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * PCI detection and setup code
4  */
5 
6 #include <linux/array_size.h>
7 #include <linux/kernel.h>
8 #include <linux/delay.h>
9 #include <linux/init.h>
10 #include <linux/pci.h>
11 #include <linux/msi.h>
12 #include <linux/of_pci.h>
13 #include <linux/of_platform.h>
14 #include <linux/platform_device.h>
15 #include <linux/pci_hotplug.h>
16 #include <linux/slab.h>
17 #include <linux/sprintf.h>
18 #include <linux/module.h>
19 #include <linux/cpumask.h>
20 #include <linux/aer.h>
21 #include <linux/acpi.h>
22 #include <linux/hypervisor.h>
23 #include <linux/irqdomain.h>
24 #include <linux/pm_runtime.h>
25 #include <linux/bitfield.h>
26 #include <trace/events/pci.h>
27 #include "pci.h"
28 
29 static struct resource busn_resource = {
30 	.name	= "PCI busn",
31 	.start	= 0,
32 	.end	= 255,
33 	.flags	= IORESOURCE_BUS,
34 };
35 
36 /* Ugh.  Need to stop exporting this to modules. */
37 LIST_HEAD(pci_root_buses);
38 EXPORT_SYMBOL(pci_root_buses);
39 
40 static LIST_HEAD(pci_domain_busn_res_list);
41 
42 struct pci_domain_busn_res {
43 	struct list_head list;
44 	struct resource res;
45 	int domain_nr;
46 };
47 
48 static struct resource *get_pci_domain_busn_res(int domain_nr)
49 {
50 	struct pci_domain_busn_res *r;
51 
52 	list_for_each_entry(r, &pci_domain_busn_res_list, list)
53 		if (r->domain_nr == domain_nr)
54 			return &r->res;
55 
56 	r = kzalloc_obj(*r);
57 	if (!r)
58 		return NULL;
59 
60 	r->domain_nr = domain_nr;
61 	r->res.start = 0;
62 	r->res.end = 0xff;
63 	r->res.flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED;
64 
65 	list_add_tail(&r->list, &pci_domain_busn_res_list);
66 
67 	return &r->res;
68 }
69 
70 /*
71  * PCI Bus Class
72  */
73 static void release_pcibus_dev(struct device *dev)
74 {
75 	struct pci_bus *pci_bus = to_pci_bus(dev);
76 
77 	put_device(pci_bus->bridge);
78 	pci_bus_remove_resources(pci_bus);
79 	pci_release_bus_of_node(pci_bus);
80 	kfree(pci_bus);
81 }
82 
83 static const struct class pcibus_class = {
84 	.name		= "pci_bus",
85 	.dev_release	= &release_pcibus_dev,
86 	.dev_groups	= pcibus_groups,
87 };
88 
89 static int __init pcibus_class_init(void)
90 {
91 	return class_register(&pcibus_class);
92 }
93 postcore_initcall(pcibus_class_init);
94 
95 static u64 pci_size(u64 base, u64 maxbase, u64 mask)
96 {
97 	u64 size = mask & maxbase;	/* Find the significant bits */
98 	if (!size)
99 		return 0;
100 
101 	/*
102 	 * Get the lowest of them to find the decode size, and from that
103 	 * the extent.
104 	 */
105 	size = size & ~(size-1);
106 
107 	/*
108 	 * base == maxbase can be valid only if the BAR has already been
109 	 * programmed with all 1s.
110 	 */
111 	if (base == maxbase && ((base | (size - 1)) & mask) != mask)
112 		return 0;
113 
114 	return size;
115 }
116 
117 static inline unsigned long decode_bar(struct pci_dev *dev, u32 bar)
118 {
119 	u32 mem_type;
120 	unsigned long flags;
121 
122 	if ((bar & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO) {
123 		flags = bar & ~PCI_BASE_ADDRESS_IO_MASK;
124 		flags |= IORESOURCE_IO;
125 		return flags;
126 	}
127 
128 	flags = bar & ~PCI_BASE_ADDRESS_MEM_MASK;
129 	flags |= IORESOURCE_MEM;
130 	if (flags & PCI_BASE_ADDRESS_MEM_PREFETCH)
131 		flags |= IORESOURCE_PREFETCH;
132 
133 	mem_type = bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK;
134 	switch (mem_type) {
135 	case PCI_BASE_ADDRESS_MEM_TYPE_32:
136 		break;
137 	case PCI_BASE_ADDRESS_MEM_TYPE_1M:
138 		/* 1M mem BAR treated as 32-bit BAR */
139 		break;
140 	case PCI_BASE_ADDRESS_MEM_TYPE_64:
141 		flags |= IORESOURCE_MEM_64;
142 		break;
143 	default:
144 		/* mem unknown type treated as 32-bit BAR */
145 		break;
146 	}
147 	return flags;
148 }
149 
150 #define PCI_COMMAND_DECODE_ENABLE	(PCI_COMMAND_MEMORY | PCI_COMMAND_IO)
151 
152 /**
153  * __pci_size_bars - Read the raw BAR mask for a range of PCI BARs
154  * @dev: the PCI device
155  * @count: number of BARs to size
156  * @pos: starting config space position
157  * @sizes: array to store mask values
158  * @rom: indicate whether to use ROM mask, which avoids enabling ROM BARs
159  *
160  * Provided @sizes array must be sufficiently sized to store results for
161  * @count u32 BARs.  Caller is responsible for disabling decode to specified
162  * BAR range around calling this function.  This function is intended to avoid
163  * disabling decode around sizing each BAR individually, which can result in
164  * non-trivial overhead in virtualized environments with very large PCI BARs.
165  */
166 static void __pci_size_bars(struct pci_dev *dev, int count,
167 			    unsigned int pos, u32 *sizes, bool rom)
168 {
169 	u32 orig, mask = rom ? PCI_ROM_ADDRESS_MASK : ~0;
170 	int i;
171 
172 	for (i = 0; i < count; i++, pos += 4, sizes++) {
173 		pci_read_config_dword(dev, pos, &orig);
174 		pci_write_config_dword(dev, pos, mask);
175 		pci_read_config_dword(dev, pos, sizes);
176 		pci_write_config_dword(dev, pos, orig);
177 	}
178 }
179 
180 void __pci_size_stdbars(struct pci_dev *dev, int count,
181 			unsigned int pos, u32 *sizes)
182 {
183 	__pci_size_bars(dev, count, pos, sizes, false);
184 }
185 
186 static void __pci_size_rom(struct pci_dev *dev, unsigned int pos, u32 *sizes)
187 {
188 	__pci_size_bars(dev, 1, pos, sizes, true);
189 }
190 
191 /**
192  * __pci_read_base - Read a PCI BAR
193  * @dev: the PCI device
194  * @type: type of the BAR
195  * @res: resource buffer to be filled in
196  * @pos: BAR position in the config space
197  * @sizes: array of one or more pre-read BAR masks
198  *
199  * Returns 1 if the BAR is 64-bit, or 0 if 32-bit.
200  */
201 int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
202 		    struct resource *res, unsigned int pos, u32 *sizes)
203 {
204 	u32 l = 0, sz;
205 	u64 l64, sz64, mask64;
206 	struct pci_bus_region region, inverted_region;
207 	const char *res_name = pci_resource_name(dev, res - dev->resource);
208 
209 	res->name = pci_name(dev);
210 
211 	pci_read_config_dword(dev, pos, &l);
212 	sz = sizes[0];
213 
214 	/*
215 	 * All bits set in sz means the device isn't working properly.
216 	 * If the BAR isn't implemented, all bits must be 0.  If it's a
217 	 * memory BAR or a ROM, bit 0 must be clear; if it's an io BAR, bit
218 	 * 1 must be clear.
219 	 */
220 	if (PCI_POSSIBLE_ERROR(sz))
221 		sz = 0;
222 
223 	/*
224 	 * I don't know how l can have all bits set.  Copied from old code.
225 	 * Maybe it fixes a bug on some ancient platform.
226 	 */
227 	if (PCI_POSSIBLE_ERROR(l))
228 		l = 0;
229 
230 	if (type == pci_bar_unknown) {
231 		res->flags = decode_bar(dev, l);
232 		res->flags |= IORESOURCE_SIZEALIGN;
233 		if (res->flags & IORESOURCE_IO) {
234 			l64 = l & PCI_BASE_ADDRESS_IO_MASK;
235 			sz64 = sz & PCI_BASE_ADDRESS_IO_MASK;
236 			mask64 = PCI_BASE_ADDRESS_IO_MASK & (u32)IO_SPACE_LIMIT;
237 		} else {
238 			l64 = l & PCI_BASE_ADDRESS_MEM_MASK;
239 			sz64 = sz & PCI_BASE_ADDRESS_MEM_MASK;
240 			mask64 = (u32)PCI_BASE_ADDRESS_MEM_MASK;
241 		}
242 	} else {
243 		if (l & PCI_ROM_ADDRESS_ENABLE)
244 			res->flags |= IORESOURCE_ROM_ENABLE;
245 		l64 = l & PCI_ROM_ADDRESS_MASK;
246 		sz64 = sz & PCI_ROM_ADDRESS_MASK;
247 		mask64 = PCI_ROM_ADDRESS_MASK;
248 	}
249 
250 	if (res->flags & IORESOURCE_MEM_64) {
251 		pci_read_config_dword(dev, pos + 4, &l);
252 		sz = sizes[1];
253 
254 		l64 |= ((u64)l << 32);
255 		sz64 |= ((u64)sz << 32);
256 		mask64 |= ((u64)~0 << 32);
257 	}
258 
259 	if (!sz64)
260 		goto fail;
261 
262 	sz64 = pci_size(l64, sz64, mask64);
263 	if (!sz64) {
264 		pci_info(dev, FW_BUG "%s: invalid; can't size\n", res_name);
265 		goto fail;
266 	}
267 
268 	if (res->flags & IORESOURCE_MEM_64) {
269 		if ((sizeof(pci_bus_addr_t) < 8 || sizeof(resource_size_t) < 8)
270 		    && sz64 > 0x100000000ULL) {
271 			res->flags |= IORESOURCE_UNSET | IORESOURCE_DISABLED;
272 			resource_set_range(res, 0, 0);
273 			pci_err(dev, "%s: can't handle BAR larger than 4GB (size %#010llx)\n",
274 				res_name, (unsigned long long)sz64);
275 			goto out;
276 		}
277 
278 		if ((sizeof(pci_bus_addr_t) < 8) && l) {
279 			/* Above 32-bit boundary; try to reallocate */
280 			res->flags |= IORESOURCE_UNSET;
281 			resource_set_range(res, 0, sz64);
282 			pci_info(dev, "%s: can't handle BAR above 4GB (bus address %#010llx)\n",
283 				 res_name, (unsigned long long)l64);
284 			goto out;
285 		}
286 	}
287 
288 	region.start = l64;
289 	region.end = l64 + sz64 - 1;
290 
291 	pcibios_bus_to_resource(dev->bus, res, &region);
292 	pcibios_resource_to_bus(dev->bus, &inverted_region, res);
293 
294 	/*
295 	 * If "A" is a BAR value (a bus address), "bus_to_resource(A)" is
296 	 * the corresponding resource address (the physical address used by
297 	 * the CPU.  Converting that resource address back to a bus address
298 	 * should yield the original BAR value:
299 	 *
300 	 *     resource_to_bus(bus_to_resource(A)) == A
301 	 *
302 	 * If it doesn't, CPU accesses to "bus_to_resource(A)" will not
303 	 * be claimed by the device.
304 	 */
305 	if (inverted_region.start != region.start) {
306 		res->flags |= IORESOURCE_UNSET;
307 		res->start = 0;
308 		res->end = region.end - region.start;
309 		pci_info(dev, "%s: initial BAR value %#010llx invalid\n",
310 			 res_name, (unsigned long long)region.start);
311 	}
312 
313 	goto out;
314 
315 
316 fail:
317 	res->flags = 0;
318 out:
319 	if (res->flags)
320 		pci_info(dev, "%s %pR\n", res_name, res);
321 
322 	return (res->flags & IORESOURCE_MEM_64) ? 1 : 0;
323 }
324 
325 static __always_inline void pci_read_bases(struct pci_dev *dev,
326 					   unsigned int howmany, int rom)
327 {
328 	u32 rombar, stdbars[PCI_STD_NUM_BARS];
329 	unsigned int pos, reg;
330 	u16 orig_cmd;
331 
332 	BUILD_BUG_ON(statically_true(howmany > PCI_STD_NUM_BARS));
333 
334 	if (dev->non_compliant_bars)
335 		return;
336 
337 	/* Per PCIe r4.0, sec 9.3.4.1.11, the VF BARs are all RO Zero */
338 	if (dev->is_virtfn)
339 		return;
340 
341 	/* No printks while decoding is disabled! */
342 	if (!dev->mmio_always_on) {
343 		pci_read_config_word(dev, PCI_COMMAND, &orig_cmd);
344 		if (orig_cmd & PCI_COMMAND_DECODE_ENABLE) {
345 			pci_write_config_word(dev, PCI_COMMAND,
346 				orig_cmd & ~PCI_COMMAND_DECODE_ENABLE);
347 		}
348 	}
349 
350 	__pci_size_stdbars(dev, howmany, PCI_BASE_ADDRESS_0, stdbars);
351 	if (rom)
352 		__pci_size_rom(dev, rom, &rombar);
353 
354 	if (!dev->mmio_always_on &&
355 	    (orig_cmd & PCI_COMMAND_DECODE_ENABLE))
356 		pci_write_config_word(dev, PCI_COMMAND, orig_cmd);
357 
358 	for (pos = 0; pos < howmany; pos++) {
359 		struct resource *res = &dev->resource[pos];
360 		reg = PCI_BASE_ADDRESS_0 + (pos << 2);
361 		pos += __pci_read_base(dev, pci_bar_unknown,
362 				       res, reg, &stdbars[pos]);
363 	}
364 
365 	if (rom) {
366 		struct resource *res = &dev->resource[PCI_ROM_RESOURCE];
367 		dev->rom_base_reg = rom;
368 		res->flags = IORESOURCE_MEM | IORESOURCE_PREFETCH |
369 				IORESOURCE_READONLY | IORESOURCE_SIZEALIGN;
370 		__pci_read_base(dev, pci_bar_mem32, res, rom, &rombar);
371 	}
372 }
373 
374 static void pci_read_bridge_io(struct pci_dev *dev, struct resource *res,
375 			       bool log)
376 {
377 	u8 io_base_lo, io_limit_lo;
378 	unsigned long io_mask, io_granularity, base, limit;
379 	struct pci_bus_region region;
380 
381 	if (!dev->io_window)
382 		return;
383 
384 	io_mask = PCI_IO_RANGE_MASK;
385 	io_granularity = 0x1000;
386 	if (dev->io_window_1k) {
387 		/* Support 1K I/O space granularity */
388 		io_mask = PCI_IO_1K_RANGE_MASK;
389 		io_granularity = 0x400;
390 	}
391 
392 	pci_read_config_byte(dev, PCI_IO_BASE, &io_base_lo);
393 	pci_read_config_byte(dev, PCI_IO_LIMIT, &io_limit_lo);
394 	base = (io_base_lo & io_mask) << 8;
395 	limit = (io_limit_lo & io_mask) << 8;
396 
397 	if ((io_base_lo & PCI_IO_RANGE_TYPE_MASK) == PCI_IO_RANGE_TYPE_32) {
398 		u16 io_base_hi, io_limit_hi;
399 
400 		pci_read_config_word(dev, PCI_IO_BASE_UPPER16, &io_base_hi);
401 		pci_read_config_word(dev, PCI_IO_LIMIT_UPPER16, &io_limit_hi);
402 		base |= ((unsigned long) io_base_hi << 16);
403 		limit |= ((unsigned long) io_limit_hi << 16);
404 	}
405 
406 	res->flags = (io_base_lo & PCI_IO_RANGE_TYPE_MASK) | IORESOURCE_IO;
407 
408 	if (base <= limit) {
409 		region.start = base;
410 		region.end = limit + io_granularity - 1;
411 		pcibios_bus_to_resource(dev->bus, res, &region);
412 		if (log)
413 			pci_info(dev, "  bridge window %pR\n", res);
414 	} else {
415 		resource_set_range(res, 0, 0);
416 		res->flags |= IORESOURCE_UNSET | IORESOURCE_DISABLED;
417 	}
418 }
419 
420 static void pci_read_bridge_mmio(struct pci_dev *dev, struct resource *res,
421 				 bool log)
422 {
423 	u16 mem_base_lo, mem_limit_lo;
424 	unsigned long base, limit;
425 	struct pci_bus_region region;
426 
427 	pci_read_config_word(dev, PCI_MEMORY_BASE, &mem_base_lo);
428 	pci_read_config_word(dev, PCI_MEMORY_LIMIT, &mem_limit_lo);
429 	base = ((unsigned long) mem_base_lo & PCI_MEMORY_RANGE_MASK) << 16;
430 	limit = ((unsigned long) mem_limit_lo & PCI_MEMORY_RANGE_MASK) << 16;
431 
432 	res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM;
433 
434 	if (base <= limit) {
435 		region.start = base;
436 		region.end = limit + 0xfffff;
437 		pcibios_bus_to_resource(dev->bus, res, &region);
438 		if (log)
439 			pci_info(dev, "  bridge window %pR\n", res);
440 	} else {
441 		resource_set_range(res, 0, 0);
442 		res->flags |= IORESOURCE_UNSET | IORESOURCE_DISABLED;
443 	}
444 }
445 
446 static void pci_read_bridge_mmio_pref(struct pci_dev *dev, struct resource *res,
447 				      bool log)
448 {
449 	u16 mem_base_lo, mem_limit_lo;
450 	u64 base64, limit64;
451 	pci_bus_addr_t base, limit;
452 	struct pci_bus_region region;
453 
454 	if (!dev->pref_window)
455 		return;
456 
457 	pci_read_config_word(dev, PCI_PREF_MEMORY_BASE, &mem_base_lo);
458 	pci_read_config_word(dev, PCI_PREF_MEMORY_LIMIT, &mem_limit_lo);
459 	base64 = (mem_base_lo & PCI_PREF_RANGE_MASK) << 16;
460 	limit64 = (mem_limit_lo & PCI_PREF_RANGE_MASK) << 16;
461 
462 	if ((mem_base_lo & PCI_PREF_RANGE_TYPE_MASK) == PCI_PREF_RANGE_TYPE_64) {
463 		u32 mem_base_hi, mem_limit_hi;
464 
465 		pci_read_config_dword(dev, PCI_PREF_BASE_UPPER32, &mem_base_hi);
466 		pci_read_config_dword(dev, PCI_PREF_LIMIT_UPPER32, &mem_limit_hi);
467 
468 		/*
469 		 * Some bridges set the base > limit by default, and some
470 		 * (broken) BIOSes do not initialize them.  If we find
471 		 * this, just assume they are not being used.
472 		 */
473 		if (mem_base_hi <= mem_limit_hi) {
474 			base64 |= (u64) mem_base_hi << 32;
475 			limit64 |= (u64) mem_limit_hi << 32;
476 		}
477 	}
478 
479 	base = (pci_bus_addr_t) base64;
480 	limit = (pci_bus_addr_t) limit64;
481 
482 	if (base != base64) {
483 		pci_err(dev, "can't handle bridge window above 4GB (bus address %#010llx)\n",
484 			(unsigned long long) base64);
485 		return;
486 	}
487 
488 	res->flags = (mem_base_lo & PCI_PREF_RANGE_TYPE_MASK) | IORESOURCE_MEM |
489 		     IORESOURCE_PREFETCH;
490 	if (res->flags & PCI_PREF_RANGE_TYPE_64)
491 		res->flags |= IORESOURCE_MEM_64;
492 
493 	if (base <= limit) {
494 		region.start = base;
495 		region.end = limit + 0xfffff;
496 		pcibios_bus_to_resource(dev->bus, res, &region);
497 		if (log)
498 			pci_info(dev, "  bridge window %pR\n", res);
499 	} else {
500 		resource_set_range(res, 0, 0);
501 		res->flags |= IORESOURCE_UNSET | IORESOURCE_DISABLED;
502 	}
503 }
504 
505 static void pci_read_bridge_windows(struct pci_dev *bridge)
506 {
507 	u32 buses;
508 	u16 io;
509 	u32 pmem, tmp;
510 	struct resource res;
511 
512 	pci_read_config_dword(bridge, PCI_PRIMARY_BUS, &buses);
513 	res.flags = IORESOURCE_BUS;
514 	res.start = FIELD_GET(PCI_SECONDARY_BUS_MASK, buses);
515 	res.end = FIELD_GET(PCI_SUBORDINATE_BUS_MASK, buses);
516 	pci_info(bridge, "PCI bridge to %pR%s\n", &res,
517 		 bridge->transparent ? " (subtractive decode)" : "");
518 
519 	pci_read_config_word(bridge, PCI_IO_BASE, &io);
520 	if (!io) {
521 		pci_write_config_word(bridge, PCI_IO_BASE, 0xe0f0);
522 		pci_read_config_word(bridge, PCI_IO_BASE, &io);
523 		pci_write_config_word(bridge, PCI_IO_BASE, 0x0);
524 	}
525 	if (io) {
526 		bridge->io_window = 1;
527 		pci_read_bridge_io(bridge, &res, true);
528 	}
529 
530 	pci_read_bridge_mmio(bridge, &res, true);
531 
532 	/*
533 	 * DECchip 21050 pass 2 errata: the bridge may miss an address
534 	 * disconnect boundary by one PCI data phase.  Workaround: do not
535 	 * use prefetching on this device.
536 	 */
537 	if (bridge->vendor == PCI_VENDOR_ID_DEC && bridge->device == 0x0001)
538 		return;
539 
540 	pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem);
541 	if (!pmem) {
542 		pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE,
543 					       0xffe0fff0);
544 		pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem);
545 		pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, 0x0);
546 	}
547 	if (!pmem)
548 		return;
549 
550 	bridge->pref_window = 1;
551 
552 	if ((pmem & PCI_PREF_RANGE_TYPE_MASK) == PCI_PREF_RANGE_TYPE_64) {
553 
554 		/*
555 		 * Bridge claims to have a 64-bit prefetchable memory
556 		 * window; verify that the upper bits are actually
557 		 * writable.
558 		 */
559 		pci_read_config_dword(bridge, PCI_PREF_BASE_UPPER32, &pmem);
560 		pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32,
561 				       0xffffffff);
562 		pci_read_config_dword(bridge, PCI_PREF_BASE_UPPER32, &tmp);
563 		pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, pmem);
564 		if (tmp)
565 			bridge->pref_64_window = 1;
566 	}
567 
568 	pci_read_bridge_mmio_pref(bridge, &res, true);
569 }
570 
571 void pci_read_bridge_bases(struct pci_bus *child)
572 {
573 	struct pci_dev *dev = child->self;
574 	struct resource *res;
575 	int i;
576 
577 	if (pci_is_root_bus(child))	/* It's a host bus, nothing to read */
578 		return;
579 
580 	pci_info(dev, "PCI bridge to %pR%s\n",
581 		 &child->busn_res,
582 		 dev->transparent ? " (subtractive decode)" : "");
583 
584 	pci_bus_remove_resources(child);
585 	for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++)
586 		child->resource[i] = &dev->resource[PCI_BRIDGE_RESOURCES+i];
587 
588 	pci_read_bridge_io(child->self,
589 			   child->resource[PCI_BUS_BRIDGE_IO_WINDOW], false);
590 	pci_read_bridge_mmio(child->self,
591 			     child->resource[PCI_BUS_BRIDGE_MEM_WINDOW], false);
592 	pci_read_bridge_mmio_pref(child->self,
593 				  child->resource[PCI_BUS_BRIDGE_PREF_MEM_WINDOW],
594 				  false);
595 
596 	if (!dev->transparent)
597 		return;
598 
599 	pci_bus_for_each_resource(child->parent, res) {
600 		if (!res || !res->flags)
601 			continue;
602 
603 		pci_bus_add_resource(child, res);
604 		pci_info(dev, "  bridge window %pR (subtractive decode)\n", res);
605 	}
606 }
607 
608 static struct pci_bus *pci_alloc_bus(struct pci_bus *parent)
609 {
610 	struct pci_bus *b;
611 
612 	b = kzalloc_obj(*b);
613 	if (!b)
614 		return NULL;
615 
616 	INIT_LIST_HEAD(&b->node);
617 	INIT_LIST_HEAD(&b->children);
618 	INIT_LIST_HEAD(&b->devices);
619 	INIT_LIST_HEAD(&b->slots);
620 	INIT_LIST_HEAD(&b->resources);
621 	b->max_bus_speed = PCI_SPEED_UNKNOWN;
622 	b->cur_bus_speed = PCI_SPEED_UNKNOWN;
623 #ifdef CONFIG_PCI_DOMAINS_GENERIC
624 	if (parent)
625 		b->domain_nr = parent->domain_nr;
626 #endif
627 	return b;
628 }
629 
630 static void pci_release_host_bridge_dev(struct device *dev)
631 {
632 	struct pci_host_bridge *bridge = to_pci_host_bridge(dev);
633 
634 	if (bridge->release_fn)
635 		bridge->release_fn(bridge);
636 
637 	pci_free_resource_list(&bridge->windows);
638 	pci_free_resource_list(&bridge->dma_ranges);
639 
640 	/* Host bridges only have domain_nr set in the emulation case */
641 	if (bridge->domain_nr != PCI_DOMAIN_NR_NOT_SET)
642 		pci_bus_release_emul_domain_nr(bridge->domain_nr);
643 
644 	kfree(bridge);
645 }
646 
647 static const struct attribute_group *pci_host_bridge_groups[] = {
648 #ifdef CONFIG_PCI_IDE
649 	&pci_ide_attr_group,
650 #endif
651 	NULL
652 };
653 
654 static const struct device_type pci_host_bridge_type = {
655 	.groups = pci_host_bridge_groups,
656 	.release = pci_release_host_bridge_dev,
657 };
658 
659 static void pci_init_host_bridge(struct pci_host_bridge *bridge)
660 {
661 	INIT_LIST_HEAD(&bridge->windows);
662 	INIT_LIST_HEAD(&bridge->dma_ranges);
663 
664 	/*
665 	 * We assume we can manage these PCIe features.  Some systems may
666 	 * reserve these for use by the platform itself, e.g., an ACPI BIOS
667 	 * may implement its own AER handling and use _OSC to prevent the
668 	 * OS from interfering.
669 	 */
670 	bridge->native_aer = 1;
671 	bridge->native_pcie_hotplug = 1;
672 	bridge->native_shpc_hotplug = 1;
673 	bridge->native_pme = 1;
674 	bridge->native_ltr = 1;
675 	bridge->native_dpc = 1;
676 	bridge->domain_nr = PCI_DOMAIN_NR_NOT_SET;
677 	bridge->native_cxl_error = 1;
678 	bridge->dev.type = &pci_host_bridge_type;
679 	pci_ide_init_host_bridge(bridge);
680 
681 	device_initialize(&bridge->dev);
682 }
683 
684 struct pci_host_bridge *pci_alloc_host_bridge(size_t priv)
685 {
686 	struct pci_host_bridge *bridge;
687 
688 	bridge = kzalloc(sizeof(*bridge) + priv, GFP_KERNEL);
689 	if (!bridge)
690 		return NULL;
691 
692 	pci_init_host_bridge(bridge);
693 
694 	return bridge;
695 }
696 EXPORT_SYMBOL(pci_alloc_host_bridge);
697 
698 static void devm_pci_alloc_host_bridge_release(void *data)
699 {
700 	pci_free_host_bridge(data);
701 }
702 
703 struct pci_host_bridge *devm_pci_alloc_host_bridge(struct device *dev,
704 						   size_t priv)
705 {
706 	int ret;
707 	struct pci_host_bridge *bridge;
708 
709 	bridge = pci_alloc_host_bridge(priv);
710 	if (!bridge)
711 		return NULL;
712 
713 	bridge->dev.parent = dev;
714 
715 	ret = devm_add_action_or_reset(dev, devm_pci_alloc_host_bridge_release,
716 				       bridge);
717 	if (ret)
718 		return NULL;
719 
720 	ret = devm_of_pci_bridge_init(dev, bridge);
721 	if (ret)
722 		return NULL;
723 
724 	return bridge;
725 }
726 EXPORT_SYMBOL(devm_pci_alloc_host_bridge);
727 
728 void pci_free_host_bridge(struct pci_host_bridge *bridge)
729 {
730 	put_device(&bridge->dev);
731 }
732 EXPORT_SYMBOL(pci_free_host_bridge);
733 
734 /* Indexed by PCI_X_SSTATUS_FREQ (secondary bus mode and frequency) */
735 static const unsigned char pcix_bus_speed[] = {
736 	PCI_SPEED_UNKNOWN,		/* 0 */
737 	PCI_SPEED_66MHz_PCIX,		/* 1 */
738 	PCI_SPEED_100MHz_PCIX,		/* 2 */
739 	PCI_SPEED_133MHz_PCIX,		/* 3 */
740 	PCI_SPEED_UNKNOWN,		/* 4 */
741 	PCI_SPEED_66MHz_PCIX_ECC,	/* 5 */
742 	PCI_SPEED_100MHz_PCIX_ECC,	/* 6 */
743 	PCI_SPEED_133MHz_PCIX_ECC,	/* 7 */
744 	PCI_SPEED_UNKNOWN,		/* 8 */
745 	PCI_SPEED_66MHz_PCIX_266,	/* 9 */
746 	PCI_SPEED_100MHz_PCIX_266,	/* A */
747 	PCI_SPEED_133MHz_PCIX_266,	/* B */
748 	PCI_SPEED_UNKNOWN,		/* C */
749 	PCI_SPEED_66MHz_PCIX_533,	/* D */
750 	PCI_SPEED_100MHz_PCIX_533,	/* E */
751 	PCI_SPEED_133MHz_PCIX_533	/* F */
752 };
753 
754 /* Indexed by PCI_EXP_LNKCAP_SLS, PCI_EXP_LNKSTA_CLS */
755 const unsigned char pcie_link_speed[] = {
756 	PCI_SPEED_UNKNOWN,		/* 0 */
757 	PCIE_SPEED_2_5GT,		/* 1 */
758 	PCIE_SPEED_5_0GT,		/* 2 */
759 	PCIE_SPEED_8_0GT,		/* 3 */
760 	PCIE_SPEED_16_0GT,		/* 4 */
761 	PCIE_SPEED_32_0GT,		/* 5 */
762 	PCIE_SPEED_64_0GT,		/* 6 */
763 	PCI_SPEED_UNKNOWN,		/* 7 */
764 	PCI_SPEED_UNKNOWN,		/* 8 */
765 	PCI_SPEED_UNKNOWN,		/* 9 */
766 	PCI_SPEED_UNKNOWN,		/* A */
767 	PCI_SPEED_UNKNOWN,		/* B */
768 	PCI_SPEED_UNKNOWN,		/* C */
769 	PCI_SPEED_UNKNOWN,		/* D */
770 	PCI_SPEED_UNKNOWN,		/* E */
771 	PCI_SPEED_UNKNOWN		/* F */
772 };
773 EXPORT_SYMBOL_GPL(pcie_link_speed);
774 
775 /**
776  * pcie_get_link_speed - Get speed value from PCIe generation number
777  * @speed: PCIe speed (1-based: 1 = 2.5GT, 2 = 5GT, ...)
778  *
779  * Returns the speed value (e.g., PCIE_SPEED_2_5GT) if @speed is valid,
780  * otherwise returns PCI_SPEED_UNKNOWN.
781  */
782 unsigned char pcie_get_link_speed(unsigned int speed)
783 {
784 	if (speed >= ARRAY_SIZE(pcie_link_speed))
785 		return PCI_SPEED_UNKNOWN;
786 
787 	return pcie_link_speed[speed];
788 }
789 EXPORT_SYMBOL_GPL(pcie_get_link_speed);
790 
791 const char *pci_speed_string(enum pci_bus_speed speed)
792 {
793 	/* Indexed by the pci_bus_speed enum */
794 	static const char *speed_strings[] = {
795 	    "33 MHz PCI",		/* 0x00 */
796 	    "66 MHz PCI",		/* 0x01 */
797 	    "66 MHz PCI-X",		/* 0x02 */
798 	    "100 MHz PCI-X",		/* 0x03 */
799 	    "133 MHz PCI-X",		/* 0x04 */
800 	    NULL,			/* 0x05 */
801 	    NULL,			/* 0x06 */
802 	    NULL,			/* 0x07 */
803 	    NULL,			/* 0x08 */
804 	    "66 MHz PCI-X 266",		/* 0x09 */
805 	    "100 MHz PCI-X 266",	/* 0x0a */
806 	    "133 MHz PCI-X 266",	/* 0x0b */
807 	    "Unknown AGP",		/* 0x0c */
808 	    "1x AGP",			/* 0x0d */
809 	    "2x AGP",			/* 0x0e */
810 	    "4x AGP",			/* 0x0f */
811 	    "8x AGP",			/* 0x10 */
812 	    "66 MHz PCI-X 533",		/* 0x11 */
813 	    "100 MHz PCI-X 533",	/* 0x12 */
814 	    "133 MHz PCI-X 533",	/* 0x13 */
815 	    "2.5 GT/s PCIe",		/* 0x14 */
816 	    "5.0 GT/s PCIe",		/* 0x15 */
817 	    "8.0 GT/s PCIe",		/* 0x16 */
818 	    "16.0 GT/s PCIe",		/* 0x17 */
819 	    "32.0 GT/s PCIe",		/* 0x18 */
820 	    "64.0 GT/s PCIe",		/* 0x19 */
821 	};
822 
823 	if (speed < ARRAY_SIZE(speed_strings))
824 		return speed_strings[speed];
825 	return "Unknown";
826 }
827 EXPORT_SYMBOL_GPL(pci_speed_string);
828 
829 void pcie_update_link_speed(struct pci_bus *bus,
830 			    enum pcie_link_change_reason reason)
831 {
832 	struct pci_dev *bridge = bus->self;
833 	u16 linksta, linksta2;
834 
835 	pcie_capability_read_word(bridge, PCI_EXP_LNKSTA, &linksta);
836 	pcie_capability_read_word(bridge, PCI_EXP_LNKSTA2, &linksta2);
837 
838 	__pcie_update_link_speed(bus, reason, linksta, linksta2);
839 }
840 EXPORT_SYMBOL_GPL(pcie_update_link_speed);
841 
842 static unsigned char agp_speeds[] = {
843 	AGP_UNKNOWN,
844 	AGP_1X,
845 	AGP_2X,
846 	AGP_4X,
847 	AGP_8X
848 };
849 
850 static enum pci_bus_speed agp_speed(int agp3, int agpstat)
851 {
852 	int index = 0;
853 
854 	if (agpstat & 4)
855 		index = 3;
856 	else if (agpstat & 2)
857 		index = 2;
858 	else if (agpstat & 1)
859 		index = 1;
860 	else
861 		goto out;
862 
863 	if (agp3) {
864 		index += 2;
865 		if (index == 5)
866 			index = 0;
867 	}
868 
869  out:
870 	return agp_speeds[index];
871 }
872 
873 static void pci_set_bus_speed(struct pci_bus *bus)
874 {
875 	struct pci_dev *bridge = bus->self;
876 	int pos;
877 
878 	pos = pci_find_capability(bridge, PCI_CAP_ID_AGP);
879 	if (!pos)
880 		pos = pci_find_capability(bridge, PCI_CAP_ID_AGP3);
881 	if (pos) {
882 		u32 agpstat, agpcmd;
883 
884 		pci_read_config_dword(bridge, pos + PCI_AGP_STATUS, &agpstat);
885 		bus->max_bus_speed = agp_speed(agpstat & 8, agpstat & 7);
886 
887 		pci_read_config_dword(bridge, pos + PCI_AGP_COMMAND, &agpcmd);
888 		bus->cur_bus_speed = agp_speed(agpstat & 8, agpcmd & 7);
889 	}
890 
891 	pos = pci_find_capability(bridge, PCI_CAP_ID_PCIX);
892 	if (pos) {
893 		u16 status;
894 		enum pci_bus_speed max;
895 
896 		pci_read_config_word(bridge, pos + PCI_X_BRIDGE_SSTATUS,
897 				     &status);
898 
899 		if (status & PCI_X_SSTATUS_533MHZ) {
900 			max = PCI_SPEED_133MHz_PCIX_533;
901 		} else if (status & PCI_X_SSTATUS_266MHZ) {
902 			max = PCI_SPEED_133MHz_PCIX_266;
903 		} else if (status & PCI_X_SSTATUS_133MHZ) {
904 			if ((status & PCI_X_SSTATUS_VERS) == PCI_X_SSTATUS_V2)
905 				max = PCI_SPEED_133MHz_PCIX_ECC;
906 			else
907 				max = PCI_SPEED_133MHz_PCIX;
908 		} else {
909 			max = PCI_SPEED_66MHz_PCIX;
910 		}
911 
912 		bus->max_bus_speed = max;
913 		bus->cur_bus_speed =
914 			pcix_bus_speed[FIELD_GET(PCI_X_SSTATUS_FREQ, status)];
915 
916 		return;
917 	}
918 
919 	if (pci_is_pcie(bridge)) {
920 		u32 linkcap;
921 
922 		pcie_capability_read_dword(bridge, PCI_EXP_LNKCAP, &linkcap);
923 		bus->max_bus_speed = pcie_link_speed[linkcap & PCI_EXP_LNKCAP_SLS];
924 
925 		pcie_update_link_speed(bus, PCIE_ADD_BUS);
926 	}
927 }
928 
929 static struct irq_domain *pci_host_bridge_msi_domain(struct pci_bus *bus)
930 {
931 	struct irq_domain *d;
932 
933 	/* If the host bridge driver sets a MSI domain of the bridge, use it */
934 	d = dev_get_msi_domain(bus->bridge);
935 
936 	/*
937 	 * Any firmware interface that can resolve the msi_domain
938 	 * should be called from here.
939 	 */
940 	if (!d)
941 		d = pci_host_bridge_of_msi_domain(bus);
942 	if (!d)
943 		d = pci_host_bridge_acpi_msi_domain(bus);
944 
945 	/*
946 	 * If no IRQ domain was found via the OF tree, try looking it up
947 	 * directly through the fwnode_handle.
948 	 */
949 	if (!d) {
950 		struct fwnode_handle *fwnode = pci_root_bus_fwnode(bus);
951 
952 		if (fwnode)
953 			d = irq_find_matching_fwnode(fwnode,
954 						     DOMAIN_BUS_PCI_MSI);
955 	}
956 
957 	return d;
958 }
959 
960 static void pci_set_bus_msi_domain(struct pci_bus *bus)
961 {
962 	struct irq_domain *d;
963 	struct pci_bus *b;
964 
965 	/*
966 	 * The bus can be a root bus, a subordinate bus, or a virtual bus
967 	 * created by an SR-IOV device.  Walk up to the first bridge device
968 	 * found or derive the domain from the host bridge.
969 	 */
970 	for (b = bus, d = NULL; !d && !pci_is_root_bus(b); b = b->parent) {
971 		if (b->self)
972 			d = dev_get_msi_domain(&b->self->dev);
973 	}
974 
975 	if (!d)
976 		d = pci_host_bridge_msi_domain(b);
977 
978 	dev_set_msi_domain(&bus->dev, d);
979 }
980 
981 static bool pci_preserve_config(struct pci_host_bridge *host_bridge)
982 {
983 	if (pci_acpi_preserve_config(host_bridge))
984 		return true;
985 
986 	if (host_bridge->dev.parent && host_bridge->dev.parent->of_node)
987 		return of_pci_preserve_config(host_bridge->dev.parent->of_node);
988 
989 	return false;
990 }
991 
992 static int pci_register_host_bridge(struct pci_host_bridge *bridge)
993 {
994 	struct device *parent = bridge->dev.parent;
995 	struct resource_entry *window, *next, *n;
996 	struct pci_bus *bus, *b;
997 	resource_size_t offset, next_offset;
998 	LIST_HEAD(resources);
999 	struct resource *res, *next_res;
1000 	bool bus_registered = false;
1001 	char addr[64], *fmt;
1002 	const char *name;
1003 	int err;
1004 
1005 	bus = pci_alloc_bus(NULL);
1006 	if (!bus)
1007 		return -ENOMEM;
1008 
1009 	bridge->bus = bus;
1010 
1011 	bus->sysdata = bridge->sysdata;
1012 	bus->ops = bridge->ops;
1013 	bus->number = bus->busn_res.start = bridge->busnr;
1014 #ifdef CONFIG_PCI_DOMAINS_GENERIC
1015 	if (bridge->domain_nr == PCI_DOMAIN_NR_NOT_SET)
1016 		bus->domain_nr = pci_bus_find_domain_nr(bus, parent);
1017 	else
1018 		bus->domain_nr = bridge->domain_nr;
1019 	if (bus->domain_nr < 0) {
1020 		err = bus->domain_nr;
1021 		goto free;
1022 	}
1023 #endif
1024 
1025 	b = pci_find_bus(pci_domain_nr(bus), bridge->busnr);
1026 	if (b) {
1027 		/* Ignore it if we already got here via a different bridge */
1028 		dev_dbg(&b->dev, "bus already known\n");
1029 		err = -EEXIST;
1030 		goto free;
1031 	}
1032 
1033 	dev_set_name(&bridge->dev, "pci%04x:%02x", pci_domain_nr(bus),
1034 		     bridge->busnr);
1035 
1036 	err = pcibios_root_bridge_prepare(bridge);
1037 	if (err)
1038 		goto free;
1039 
1040 	/* Temporarily move resources off the list */
1041 	list_splice_init(&bridge->windows, &resources);
1042 	err = device_add(&bridge->dev);
1043 	if (err)
1044 		goto free;
1045 
1046 	bus->bridge = get_device(&bridge->dev);
1047 	device_enable_async_suspend(bus->bridge);
1048 	pci_set_bus_of_node(bus);
1049 	pci_set_bus_msi_domain(bus);
1050 	if (bridge->msi_domain && !dev_get_msi_domain(&bus->dev) &&
1051 	    !pci_host_of_has_msi_map(parent))
1052 		bus->bus_flags |= PCI_BUS_FLAGS_NO_MSI;
1053 
1054 	if (!parent)
1055 		set_dev_node(bus->bridge, pcibus_to_node(bus));
1056 
1057 	bus->dev.class = &pcibus_class;
1058 	bus->dev.parent = bus->bridge;
1059 
1060 	dev_set_name(&bus->dev, "%04x:%02x", pci_domain_nr(bus), bus->number);
1061 	name = dev_name(&bus->dev);
1062 
1063 	err = device_register(&bus->dev);
1064 	bus_registered = true;
1065 	if (err)
1066 		goto unregister;
1067 
1068 	pcibios_add_bus(bus);
1069 
1070 	if (bus->ops->add_bus) {
1071 		err = bus->ops->add_bus(bus);
1072 		if (WARN_ON(err < 0))
1073 			dev_err(&bus->dev, "failed to add bus: %d\n", err);
1074 	}
1075 
1076 	/* Create legacy_io and legacy_mem files for this bus */
1077 	pci_create_legacy_files(bus);
1078 
1079 	if (parent)
1080 		dev_info(parent, "PCI host bridge to bus %s\n", name);
1081 	else
1082 		pr_info("PCI host bridge to bus %s\n", name);
1083 
1084 	if (nr_node_ids > 1 && pcibus_to_node(bus) == NUMA_NO_NODE)
1085 		dev_warn(&bus->dev, "Unknown NUMA node; performance will be reduced\n");
1086 
1087 	/* Check if the boot configuration by FW needs to be preserved */
1088 	bridge->preserve_config = pci_preserve_config(bridge);
1089 
1090 	/* Coalesce contiguous windows */
1091 	resource_list_for_each_entry_safe(window, n, &resources) {
1092 		if (list_is_last(&window->node, &resources))
1093 			break;
1094 
1095 		next = list_next_entry(window, node);
1096 		offset = window->offset;
1097 		res = window->res;
1098 		next_offset = next->offset;
1099 		next_res = next->res;
1100 
1101 		if (res->flags != next_res->flags || offset != next_offset)
1102 			continue;
1103 
1104 		if (res->end + 1 == next_res->start) {
1105 			next_res->start = res->start;
1106 			res->flags = res->start = res->end = 0;
1107 		}
1108 	}
1109 
1110 	/* Add initial resources to the bus */
1111 	resource_list_for_each_entry_safe(window, n, &resources) {
1112 		offset = window->offset;
1113 		res = window->res;
1114 		if (!res->flags && !res->start && !res->end) {
1115 			release_resource(res);
1116 			resource_list_destroy_entry(window);
1117 			continue;
1118 		}
1119 
1120 		list_move_tail(&window->node, &bridge->windows);
1121 
1122 		if (res->flags & IORESOURCE_BUS)
1123 			pci_bus_insert_busn_res(bus, bus->number, res->end);
1124 		else
1125 			pci_bus_add_resource(bus, res);
1126 
1127 		if (offset) {
1128 			if (resource_type(res) == IORESOURCE_IO)
1129 				fmt = " (bus address [%#06llx-%#06llx])";
1130 			else
1131 				fmt = " (bus address [%#010llx-%#010llx])";
1132 
1133 			snprintf(addr, sizeof(addr), fmt,
1134 				 (unsigned long long)(res->start - offset),
1135 				 (unsigned long long)(res->end - offset));
1136 		} else
1137 			addr[0] = '\0';
1138 
1139 		dev_info(&bus->dev, "root bus resource %pR%s\n", res, addr);
1140 	}
1141 
1142 	of_pci_make_host_bridge_node(bridge);
1143 
1144 	down_write(&pci_bus_sem);
1145 	list_add_tail(&bus->node, &pci_root_buses);
1146 	up_write(&pci_bus_sem);
1147 
1148 	return 0;
1149 
1150 unregister:
1151 	put_device(&bridge->dev);
1152 	device_del(&bridge->dev);
1153 free:
1154 #ifdef CONFIG_PCI_DOMAINS_GENERIC
1155 	if (bridge->domain_nr == PCI_DOMAIN_NR_NOT_SET)
1156 		pci_bus_release_domain_nr(parent, bus->domain_nr);
1157 #endif
1158 	if (bus_registered)
1159 		put_device(&bus->dev);
1160 	else
1161 		kfree(bus);
1162 
1163 	return err;
1164 }
1165 
1166 static bool pci_bridge_child_ext_cfg_accessible(struct pci_dev *bridge)
1167 {
1168 	int pos;
1169 	u32 status;
1170 
1171 	/*
1172 	 * If extended config space isn't accessible on a bridge's primary
1173 	 * bus, we certainly can't access it on the secondary bus.
1174 	 */
1175 	if (bridge->bus->bus_flags & PCI_BUS_FLAGS_NO_EXTCFG)
1176 		return false;
1177 
1178 	/*
1179 	 * PCIe Root Ports and switch ports are PCIe on both sides, so if
1180 	 * extended config space is accessible on the primary, it's also
1181 	 * accessible on the secondary.
1182 	 */
1183 	if (pci_is_pcie(bridge) &&
1184 	    (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT ||
1185 	     pci_pcie_type(bridge) == PCI_EXP_TYPE_UPSTREAM ||
1186 	     pci_pcie_type(bridge) == PCI_EXP_TYPE_DOWNSTREAM))
1187 		return true;
1188 
1189 	/*
1190 	 * For the other bridge types:
1191 	 *   - PCI-to-PCI bridges
1192 	 *   - PCIe-to-PCI/PCI-X forward bridges
1193 	 *   - PCI/PCI-X-to-PCIe reverse bridges
1194 	 * extended config space on the secondary side is only accessible
1195 	 * if the bridge supports PCI-X Mode 2.
1196 	 */
1197 	pos = pci_find_capability(bridge, PCI_CAP_ID_PCIX);
1198 	if (!pos)
1199 		return false;
1200 
1201 	pci_read_config_dword(bridge, pos + PCI_X_STATUS, &status);
1202 	return status & (PCI_X_STATUS_266MHZ | PCI_X_STATUS_533MHZ);
1203 }
1204 
1205 static struct pci_bus *pci_alloc_child_bus(struct pci_bus *parent,
1206 					   struct pci_dev *bridge, int busnr)
1207 {
1208 	struct pci_bus *child;
1209 	struct pci_host_bridge *host;
1210 	int i;
1211 	int ret;
1212 
1213 	/* Allocate a new bus and inherit stuff from the parent */
1214 	child = pci_alloc_bus(parent);
1215 	if (!child)
1216 		return NULL;
1217 
1218 	child->parent = parent;
1219 	child->sysdata = parent->sysdata;
1220 	child->bus_flags = parent->bus_flags;
1221 
1222 	host = pci_find_host_bridge(parent);
1223 	if (host->child_ops)
1224 		child->ops = host->child_ops;
1225 	else
1226 		child->ops = parent->ops;
1227 
1228 	/*
1229 	 * Initialize some portions of the bus device, but don't register
1230 	 * it now as the parent is not properly set up yet.
1231 	 */
1232 	child->dev.class = &pcibus_class;
1233 	dev_set_name(&child->dev, "%04x:%02x", pci_domain_nr(child), busnr);
1234 
1235 	/* Set up the primary, secondary and subordinate bus numbers */
1236 	child->number = child->busn_res.start = busnr;
1237 	child->primary = parent->busn_res.start;
1238 	child->busn_res.end = 0xff;
1239 
1240 	if (!bridge) {
1241 		child->dev.parent = parent->bridge;
1242 		goto add_dev;
1243 	}
1244 
1245 	child->self = bridge;
1246 	child->bridge = get_device(&bridge->dev);
1247 	child->dev.parent = child->bridge;
1248 	pci_set_bus_of_node(child);
1249 	pci_set_bus_speed(child);
1250 
1251 	/*
1252 	 * Check whether extended config space is accessible on the child
1253 	 * bus.  Note that we currently assume it is always accessible on
1254 	 * the root bus.
1255 	 */
1256 	if (!pci_bridge_child_ext_cfg_accessible(bridge)) {
1257 		child->bus_flags |= PCI_BUS_FLAGS_NO_EXTCFG;
1258 		pci_info(child, "extended config space not accessible\n");
1259 	}
1260 
1261 	/* Set up default resource pointers and names */
1262 	for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) {
1263 		child->resource[i] = &bridge->resource[PCI_BRIDGE_RESOURCES+i];
1264 		child->resource[i]->name = child->name;
1265 	}
1266 	bridge->subordinate = child;
1267 
1268 add_dev:
1269 	pci_set_bus_msi_domain(child);
1270 	ret = device_register(&child->dev);
1271 	if (WARN_ON(ret < 0)) {
1272 		put_device(&child->dev);
1273 		return NULL;
1274 	}
1275 
1276 	pcibios_add_bus(child);
1277 
1278 	if (child->ops->add_bus) {
1279 		ret = child->ops->add_bus(child);
1280 		if (WARN_ON(ret < 0))
1281 			dev_err(&child->dev, "failed to add bus: %d\n", ret);
1282 	}
1283 
1284 	/* Create legacy_io and legacy_mem files for this bus */
1285 	pci_create_legacy_files(child);
1286 
1287 	return child;
1288 }
1289 
1290 struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev,
1291 				int busnr)
1292 {
1293 	struct pci_bus *child;
1294 
1295 	child = pci_alloc_child_bus(parent, dev, busnr);
1296 	if (child) {
1297 		down_write(&pci_bus_sem);
1298 		list_add_tail(&child->node, &parent->children);
1299 		up_write(&pci_bus_sem);
1300 	}
1301 	return child;
1302 }
1303 EXPORT_SYMBOL(pci_add_new_bus);
1304 
1305 static void pci_enable_rrs_sv(struct pci_dev *pdev)
1306 {
1307 	u16 root_cap = 0;
1308 
1309 	/* Enable Configuration RRS Software Visibility if supported */
1310 	pcie_capability_read_word(pdev, PCI_EXP_RTCAP, &root_cap);
1311 	if (root_cap & PCI_EXP_RTCAP_RRS_SV) {
1312 		pcie_capability_set_word(pdev, PCI_EXP_RTCTL,
1313 					 PCI_EXP_RTCTL_RRS_SVE);
1314 		pdev->config_rrs_sv = 1;
1315 	}
1316 }
1317 
1318 static unsigned int pci_scan_child_bus_extend(struct pci_bus *bus,
1319 					      unsigned int available_buses);
1320 
1321 void pbus_validate_busn(struct pci_bus *bus)
1322 {
1323 	struct pci_bus *upstream = bus->parent;
1324 	struct pci_dev *bridge = bus->self;
1325 
1326 	/* Check that all devices are accessible */
1327 	while (upstream->parent) {
1328 		if ((bus->busn_res.end > upstream->busn_res.end) ||
1329 		    (bus->number > upstream->busn_res.end) ||
1330 		    (bus->number < upstream->number) ||
1331 		    (bus->busn_res.end < upstream->number)) {
1332 			pci_info(bridge, "devices behind bridge are unusable because %pR cannot be assigned for them\n",
1333 				 &bus->busn_res);
1334 			break;
1335 		}
1336 		upstream = upstream->parent;
1337 	}
1338 }
1339 
1340 /**
1341  * pci_ea_fixed_busnrs() - Read fixed Secondary and Subordinate bus
1342  * numbers from EA capability.
1343  * @dev: Bridge
1344  * @sec: updated with secondary bus number from EA
1345  * @sub: updated with subordinate bus number from EA
1346  *
1347  * If @dev is a bridge with EA capability that specifies valid secondary
1348  * and subordinate bus numbers, return true with the bus numbers in @sec
1349  * and @sub.  Otherwise return false.
1350  */
1351 bool pci_ea_fixed_busnrs(struct pci_dev *dev, u8 *sec, u8 *sub)
1352 {
1353 	int ea, offset;
1354 	u32 dw;
1355 	u8 ea_sec, ea_sub;
1356 
1357 	if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
1358 		return false;
1359 
1360 	/* find PCI EA capability in list */
1361 	ea = pci_find_capability(dev, PCI_CAP_ID_EA);
1362 	if (!ea)
1363 		return false;
1364 
1365 	offset = ea + PCI_EA_FIRST_ENT;
1366 	pci_read_config_dword(dev, offset, &dw);
1367 	ea_sec = FIELD_GET(PCI_EA_SEC_BUS_MASK, dw);
1368 	ea_sub = FIELD_GET(PCI_EA_SUB_BUS_MASK, dw);
1369 	if (ea_sec  == 0 || ea_sub < ea_sec)
1370 		return false;
1371 
1372 	*sec = ea_sec;
1373 	*sub = ea_sub;
1374 	return true;
1375 }
1376 
1377 /*
1378  * pci_scan_bridge_extend() - Scan buses behind a bridge
1379  * @bus: Parent bus the bridge is on
1380  * @dev: Bridge itself
1381  * @max: Starting subordinate number of buses behind this bridge
1382  * @available_buses: Total number of buses available for this bridge and
1383  *		     the devices below. After the minimal bus space has
1384  *		     been allocated the remaining buses will be
1385  *		     distributed equally between hotplug-capable bridges.
1386  * @pass: Either %0 (scan already configured bridges) or %1 (scan bridges
1387  *        that need to be reconfigured.
1388  *
1389  * If it's a bridge, configure it and scan the bus behind it.
1390  * For CardBus bridges, we don't scan behind as the devices will
1391  * be handled by the bridge driver itself.
1392  *
1393  * We need to process bridges in two passes -- first we scan those
1394  * already configured by the BIOS and after we are done with all of
1395  * them, we proceed to assigning numbers to the remaining buses in
1396  * order to avoid overlaps between old and new bus numbers.
1397  *
1398  * Return: New subordinate number covering all buses behind this bridge.
1399  */
1400 static int pci_scan_bridge_extend(struct pci_bus *bus, struct pci_dev *dev,
1401 				  int max, unsigned int available_buses,
1402 				  int pass)
1403 {
1404 	struct pci_bus *child;
1405 	u32 buses;
1406 	u16 bctl;
1407 	u8 primary, secondary, subordinate;
1408 	int broken = 0;
1409 	bool fixed_buses;
1410 	u8 fixed_sec, fixed_sub;
1411 	int next_busnr;
1412 
1413 	/*
1414 	 * Make sure the bridge is powered on to be able to access config
1415 	 * space of devices below it.
1416 	 */
1417 	pm_runtime_get_sync(&dev->dev);
1418 
1419 	pci_read_config_dword(dev, PCI_PRIMARY_BUS, &buses);
1420 	primary = FIELD_GET(PCI_PRIMARY_BUS_MASK, buses);
1421 	secondary = FIELD_GET(PCI_SECONDARY_BUS_MASK, buses);
1422 	subordinate = FIELD_GET(PCI_SUBORDINATE_BUS_MASK, buses);
1423 
1424 	pci_dbg(dev, "scanning [bus %02x-%02x] behind bridge, pass %d\n",
1425 		secondary, subordinate, pass);
1426 
1427 	if (!primary && (primary != bus->number) && secondary && subordinate) {
1428 		pci_warn(dev, "Primary bus is hard wired to 0\n");
1429 		primary = bus->number;
1430 	}
1431 
1432 	/* Check if setup is sensible at all */
1433 	if (!pass &&
1434 	    (primary != bus->number || secondary <= bus->number ||
1435 	     secondary > subordinate)) {
1436 		pci_info(dev, "bridge configuration invalid ([bus %02x-%02x]), reconfiguring\n",
1437 			 secondary, subordinate);
1438 		broken = 1;
1439 	}
1440 
1441 	/*
1442 	 * Disable Master-Abort Mode during probing to avoid reporting of
1443 	 * bus errors in some architectures.
1444 	 */
1445 	pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &bctl);
1446 	pci_write_config_word(dev, PCI_BRIDGE_CONTROL,
1447 			      bctl & ~PCI_BRIDGE_CTL_MASTER_ABORT);
1448 
1449 	if (pci_is_cardbus_bridge(dev)) {
1450 		max = pci_cardbus_scan_bridge_extend(bus, dev, buses, max,
1451 						     available_buses,
1452 						     pass);
1453 		goto out;
1454 	}
1455 
1456 	if ((secondary || subordinate) &&
1457 	    !pcibios_assign_all_busses() && !broken) {
1458 		unsigned int cmax, buses;
1459 
1460 		/*
1461 		 * Bus already configured by firmware, process it in the
1462 		 * first pass and just note the configuration.
1463 		 */
1464 		if (pass)
1465 			goto out;
1466 
1467 		/*
1468 		 * The bus might already exist for two reasons: Either we
1469 		 * are rescanning the bus or the bus is reachable through
1470 		 * more than one bridge. The second case can happen with
1471 		 * the i450NX chipset.
1472 		 */
1473 		child = pci_find_bus(pci_domain_nr(bus), secondary);
1474 		if (!child) {
1475 			child = pci_add_new_bus(bus, dev, secondary);
1476 			if (!child)
1477 				goto out;
1478 			child->primary = primary;
1479 			pci_bus_insert_busn_res(child, secondary, subordinate);
1480 			child->bridge_ctl = bctl;
1481 		}
1482 
1483 		buses = subordinate - secondary;
1484 		cmax = pci_scan_child_bus_extend(child, buses);
1485 		if (cmax > subordinate)
1486 			pci_warn(dev, "bridge has subordinate %02x but max busn %02x\n",
1487 				 subordinate, cmax);
1488 
1489 		/* Subordinate should equal child->busn_res.end */
1490 		if (subordinate > max)
1491 			max = subordinate;
1492 	} else {
1493 
1494 		/*
1495 		 * We need to assign a number to this bus which we always
1496 		 * do in the second pass.
1497 		 */
1498 		if (!pass) {
1499 			if (pcibios_assign_all_busses() || broken)
1500 
1501 				/*
1502 				 * Temporarily disable forwarding of the
1503 				 * configuration cycles on all bridges in
1504 				 * this bus segment to avoid possible
1505 				 * conflicts in the second pass between two
1506 				 * bridges programmed with overlapping bus
1507 				 * ranges.
1508 				 */
1509 				pci_write_config_dword(dev, PCI_PRIMARY_BUS,
1510 						       buses & PCI_SEC_LATENCY_TIMER_MASK);
1511 			goto out;
1512 		}
1513 
1514 		/* Clear errors */
1515 		pci_write_config_word(dev, PCI_STATUS, 0xffff);
1516 
1517 		/* Read bus numbers from EA Capability (if present) */
1518 		fixed_buses = pci_ea_fixed_busnrs(dev, &fixed_sec, &fixed_sub);
1519 		if (fixed_buses)
1520 			next_busnr = fixed_sec;
1521 		else
1522 			next_busnr = max + 1;
1523 
1524 		/*
1525 		 * Prevent assigning a bus number that already exists.
1526 		 * This can happen when a bridge is hot-plugged, so in this
1527 		 * case we only re-scan this bus.
1528 		 */
1529 		child = pci_find_bus(pci_domain_nr(bus), next_busnr);
1530 		if (!child) {
1531 			child = pci_add_new_bus(bus, dev, next_busnr);
1532 			if (!child)
1533 				goto out;
1534 			pci_bus_insert_busn_res(child, next_busnr,
1535 						bus->busn_res.end);
1536 		}
1537 		max++;
1538 		if (available_buses)
1539 			available_buses--;
1540 
1541 		buses = (buses & PCI_SEC_LATENCY_TIMER_MASK) |
1542 			FIELD_PREP(PCI_PRIMARY_BUS_MASK, child->primary) |
1543 			FIELD_PREP(PCI_SECONDARY_BUS_MASK, child->busn_res.start) |
1544 			FIELD_PREP(PCI_SUBORDINATE_BUS_MASK, child->busn_res.end);
1545 
1546 		/* We need to blast all three values with a single write */
1547 		pci_write_config_dword(dev, PCI_PRIMARY_BUS, buses);
1548 
1549 		child->bridge_ctl = bctl;
1550 		max = pci_scan_child_bus_extend(child, available_buses);
1551 
1552 		/*
1553 		 * Set subordinate bus number to its real value.
1554 		 * If fixed subordinate bus number exists from EA
1555 		 * capability then use it.
1556 		 */
1557 		if (fixed_buses)
1558 			max = fixed_sub;
1559 		pci_bus_update_busn_res_end(child, max);
1560 		pci_write_config_byte(dev, PCI_SUBORDINATE_BUS, max);
1561 	}
1562 	scnprintf(child->name, sizeof(child->name), "PCI Bus %04x:%02x",
1563 		  pci_domain_nr(bus), child->number);
1564 
1565 	pbus_validate_busn(child);
1566 
1567 out:
1568 	/* Clear errors in the Secondary Status Register */
1569 	pci_write_config_word(dev, PCI_SEC_STATUS, 0xffff);
1570 
1571 	pci_write_config_word(dev, PCI_BRIDGE_CONTROL, bctl);
1572 
1573 	pm_runtime_put(&dev->dev);
1574 
1575 	return max;
1576 }
1577 
1578 /*
1579  * pci_scan_bridge() - Scan buses behind a bridge
1580  * @bus: Parent bus the bridge is on
1581  * @dev: Bridge itself
1582  * @max: Starting subordinate number of buses behind this bridge
1583  * @pass: Either %0 (scan already configured bridges) or %1 (scan bridges
1584  *        that need to be reconfigured.
1585  *
1586  * If it's a bridge, configure it and scan the bus behind it.
1587  * For CardBus bridges, we don't scan behind as the devices will
1588  * be handled by the bridge driver itself.
1589  *
1590  * We need to process bridges in two passes -- first we scan those
1591  * already configured by the BIOS and after we are done with all of
1592  * them, we proceed to assigning numbers to the remaining buses in
1593  * order to avoid overlaps between old and new bus numbers.
1594  *
1595  * Return: New subordinate number covering all buses behind this bridge.
1596  */
1597 int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, int pass)
1598 {
1599 	return pci_scan_bridge_extend(bus, dev, max, 0, pass);
1600 }
1601 EXPORT_SYMBOL(pci_scan_bridge);
1602 
1603 /*
1604  * Read interrupt line and base address registers.
1605  * The architecture-dependent code can tweak these, of course.
1606  */
1607 static void pci_read_irq(struct pci_dev *dev)
1608 {
1609 	unsigned char irq;
1610 
1611 	/* VFs are not allowed to use INTx, so skip the config reads */
1612 	if (dev->is_virtfn) {
1613 		dev->pin = 0;
1614 		dev->irq = 0;
1615 		return;
1616 	}
1617 
1618 	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &irq);
1619 	dev->pin = irq;
1620 	if (irq)
1621 		pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq);
1622 	dev->irq = irq;
1623 }
1624 
1625 void set_pcie_port_type(struct pci_dev *pdev)
1626 {
1627 	int pos;
1628 	u16 reg16;
1629 	u32 reg32;
1630 	int type;
1631 	struct pci_dev *parent;
1632 
1633 	pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
1634 	if (!pos)
1635 		return;
1636 
1637 	pdev->pcie_cap = pos;
1638 	pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, &reg16);
1639 	pdev->pcie_flags_reg = reg16;
1640 
1641 	type = pci_pcie_type(pdev);
1642 	if (type == PCI_EXP_TYPE_ROOT_PORT)
1643 		pci_enable_rrs_sv(pdev);
1644 
1645 	pci_read_config_dword(pdev, pos + PCI_EXP_DEVCAP, &pdev->devcap);
1646 	pdev->pcie_mpss = FIELD_GET(PCI_EXP_DEVCAP_PAYLOAD, pdev->devcap);
1647 
1648 	pcie_capability_read_dword(pdev, PCI_EXP_LNKCAP, &reg32);
1649 	if (reg32 & PCI_EXP_LNKCAP_DLLLARC)
1650 		pdev->link_active_reporting = 1;
1651 
1652 #ifdef CONFIG_PCIEASPM
1653 	if (reg32 & PCI_EXP_LNKCAP_ASPM_L0S)
1654 		pdev->aspm_l0s_support = 1;
1655 	if (reg32 & PCI_EXP_LNKCAP_ASPM_L1)
1656 		pdev->aspm_l1_support = 1;
1657 #endif
1658 
1659 	parent = pci_upstream_bridge(pdev);
1660 	if (!parent)
1661 		return;
1662 
1663 	/*
1664 	 * Some systems do not identify their upstream/downstream ports
1665 	 * correctly so detect impossible configurations here and correct
1666 	 * the port type accordingly.
1667 	 */
1668 	if (type == PCI_EXP_TYPE_DOWNSTREAM) {
1669 		/*
1670 		 * If pdev claims to be downstream port but the parent
1671 		 * device is also downstream port assume pdev is actually
1672 		 * upstream port.
1673 		 */
1674 		if (pcie_downstream_port(parent)) {
1675 			pci_info(pdev, "claims to be downstream port but is acting as upstream port, correcting type\n");
1676 			pdev->pcie_flags_reg &= ~PCI_EXP_FLAGS_TYPE;
1677 			pdev->pcie_flags_reg |= PCI_EXP_TYPE_UPSTREAM;
1678 		}
1679 	} else if (type == PCI_EXP_TYPE_UPSTREAM) {
1680 		/*
1681 		 * If pdev claims to be upstream port but the parent
1682 		 * device is also upstream port assume pdev is actually
1683 		 * downstream port.
1684 		 */
1685 		if (pci_pcie_type(parent) == PCI_EXP_TYPE_UPSTREAM) {
1686 			pci_info(pdev, "claims to be upstream port but is acting as downstream port, correcting type\n");
1687 			pdev->pcie_flags_reg &= ~PCI_EXP_FLAGS_TYPE;
1688 			pdev->pcie_flags_reg |= PCI_EXP_TYPE_DOWNSTREAM;
1689 		}
1690 	}
1691 }
1692 
1693 void set_pcie_hotplug_bridge(struct pci_dev *pdev)
1694 {
1695 	u32 reg32;
1696 
1697 	pcie_capability_read_dword(pdev, PCI_EXP_SLTCAP, &reg32);
1698 	if (reg32 & PCI_EXP_SLTCAP_HPC)
1699 		pdev->is_hotplug_bridge = pdev->is_pciehp = 1;
1700 }
1701 
1702 static void set_pcie_thunderbolt(struct pci_dev *dev)
1703 {
1704 	u16 vsec;
1705 
1706 	/* Is the device part of a Thunderbolt controller? */
1707 	vsec = pci_find_vsec_capability(dev, PCI_VENDOR_ID_INTEL, PCI_VSEC_ID_INTEL_TBT);
1708 	if (vsec)
1709 		dev->is_thunderbolt = 1;
1710 }
1711 
1712 static void set_pcie_cxl(struct pci_dev *dev)
1713 {
1714 	struct pci_dev *bridge;
1715 	u16 dvsec, cap;
1716 
1717 	if (!pci_is_pcie(dev))
1718 		return;
1719 
1720 	/*
1721 	 * Update parent's CXL state because alternate protocol training
1722 	 * may have changed
1723 	 */
1724 	bridge = pci_upstream_bridge(dev);
1725 	if (bridge)
1726 		set_pcie_cxl(bridge);
1727 
1728 	dvsec = pci_find_dvsec_capability(dev, PCI_VENDOR_ID_CXL,
1729 					  PCI_DVSEC_CXL_FLEXBUS_PORT);
1730 	if (!dvsec)
1731 		return;
1732 
1733 	pci_read_config_word(dev, dvsec + PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS,
1734 			     &cap);
1735 
1736 	dev->is_cxl = FIELD_GET(PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS_CACHE, cap) ||
1737 		FIELD_GET(PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS_MEM, cap);
1738 
1739 }
1740 
1741 static void set_pcie_untrusted(struct pci_dev *dev)
1742 {
1743 	struct pci_dev *parent = pci_upstream_bridge(dev);
1744 
1745 	if (!parent)
1746 		return;
1747 	/*
1748 	 * If the upstream bridge is untrusted we treat this device as
1749 	 * untrusted as well.
1750 	 */
1751 	if (parent->untrusted) {
1752 		dev->untrusted = true;
1753 		return;
1754 	}
1755 
1756 	if (arch_pci_dev_is_removable(dev)) {
1757 		pci_dbg(dev, "marking as untrusted\n");
1758 		dev->untrusted = true;
1759 	}
1760 }
1761 
1762 static void pci_set_removable(struct pci_dev *dev)
1763 {
1764 	struct pci_dev *parent = pci_upstream_bridge(dev);
1765 
1766 	if (!parent)
1767 		return;
1768 	/*
1769 	 * We (only) consider everything tunneled below an external_facing
1770 	 * device to be removable by the user. We're mainly concerned with
1771 	 * consumer platforms with user accessible thunderbolt ports that are
1772 	 * vulnerable to DMA attacks, and we expect those ports to be marked by
1773 	 * the firmware as external_facing. Devices in traditional hotplug
1774 	 * slots can technically be removed, but the expectation is that unless
1775 	 * the port is marked with external_facing, such devices are less
1776 	 * accessible to user / may not be removed by end user, and thus not
1777 	 * exposed as "removable" to userspace.
1778 	 */
1779 	if (dev_is_removable(&parent->dev)) {
1780 		dev_set_removable(&dev->dev, DEVICE_REMOVABLE);
1781 		return;
1782 	}
1783 
1784 	if (arch_pci_dev_is_removable(dev)) {
1785 		pci_dbg(dev, "marking as removable\n");
1786 		dev_set_removable(&dev->dev, DEVICE_REMOVABLE);
1787 	}
1788 }
1789 
1790 /**
1791  * pci_ext_cfg_is_aliased - Is ext config space just an alias of std config?
1792  * @dev: PCI device
1793  *
1794  * PCI Express to PCI/PCI-X Bridge Specification, rev 1.0, 4.1.4 says that
1795  * when forwarding a type1 configuration request the bridge must check that
1796  * the extended register address field is zero.  The bridge is not permitted
1797  * to forward the transactions and must handle it as an Unsupported Request.
1798  * Some bridges do not follow this rule and simply drop the extended register
1799  * bits, resulting in the standard config space being aliased, every 256
1800  * bytes across the entire configuration space.  Test for this condition by
1801  * comparing the first dword of each potential alias to the vendor/device ID.
1802  * Known offenders:
1803  *   ASM1083/1085 PCIe-to-PCI Reversible Bridge (1b21:1080, rev 01 & 03)
1804  *   AMD/ATI SBx00 PCI to PCI Bridge (1002:4384, rev 40)
1805  */
1806 static bool pci_ext_cfg_is_aliased(struct pci_dev *dev)
1807 {
1808 #ifdef CONFIG_PCI_QUIRKS
1809 	int pos, ret;
1810 	u32 header, tmp;
1811 
1812 	pci_read_config_dword(dev, PCI_VENDOR_ID, &header);
1813 
1814 	for (pos = PCI_CFG_SPACE_SIZE;
1815 	     pos < PCI_CFG_SPACE_EXP_SIZE; pos += PCI_CFG_SPACE_SIZE) {
1816 		ret = pci_read_config_dword(dev, pos, &tmp);
1817 		if ((ret != PCIBIOS_SUCCESSFUL) || (header != tmp))
1818 			return false;
1819 	}
1820 
1821 	return true;
1822 #else
1823 	return false;
1824 #endif
1825 }
1826 
1827 /**
1828  * pci_cfg_space_size_ext - Get the configuration space size of the PCI device
1829  * @dev: PCI device
1830  *
1831  * Regular PCI devices have 256 bytes, but PCI-X 2 and PCI Express devices
1832  * have 4096 bytes.  Even if the device is capable, that doesn't mean we can
1833  * access it.  Maybe we don't have a way to generate extended config space
1834  * accesses, or the device is behind a reverse Express bridge.  So we try
1835  * reading the dword at 0x100 which must either be 0 or a valid extended
1836  * capability header.
1837  */
1838 static int pci_cfg_space_size_ext(struct pci_dev *dev)
1839 {
1840 	u32 status;
1841 	int pos = PCI_CFG_SPACE_SIZE;
1842 
1843 	if (pci_read_config_dword(dev, pos, &status) != PCIBIOS_SUCCESSFUL)
1844 		return PCI_CFG_SPACE_SIZE;
1845 	if (PCI_POSSIBLE_ERROR(status) || pci_ext_cfg_is_aliased(dev))
1846 		return PCI_CFG_SPACE_SIZE;
1847 
1848 	return PCI_CFG_SPACE_EXP_SIZE;
1849 }
1850 
1851 int pci_cfg_space_size(struct pci_dev *dev)
1852 {
1853 	int pos;
1854 	u32 status;
1855 	u16 class;
1856 
1857 #ifdef CONFIG_PCI_IOV
1858 	/*
1859 	 * Per the SR-IOV specification (rev 1.1, sec 3.5), VFs are required to
1860 	 * implement a PCIe capability and therefore must implement extended
1861 	 * config space.  We can skip the NO_EXTCFG test below and the
1862 	 * reachability/aliasing test in pci_cfg_space_size_ext() by virtue of
1863 	 * the fact that the SR-IOV capability on the PF resides in extended
1864 	 * config space and must be accessible and non-aliased to have enabled
1865 	 * support for this VF.  This is a micro performance optimization for
1866 	 * systems supporting many VFs.
1867 	 */
1868 	if (dev->is_virtfn)
1869 		return PCI_CFG_SPACE_EXP_SIZE;
1870 #endif
1871 
1872 	if (dev->bus->bus_flags & PCI_BUS_FLAGS_NO_EXTCFG)
1873 		return PCI_CFG_SPACE_SIZE;
1874 
1875 	class = dev->class >> 8;
1876 	if (class == PCI_CLASS_BRIDGE_HOST)
1877 		return pci_cfg_space_size_ext(dev);
1878 
1879 	if (pci_is_pcie(dev))
1880 		return pci_cfg_space_size_ext(dev);
1881 
1882 	pos = pci_find_capability(dev, PCI_CAP_ID_PCIX);
1883 	if (!pos)
1884 		return PCI_CFG_SPACE_SIZE;
1885 
1886 	pci_read_config_dword(dev, pos + PCI_X_STATUS, &status);
1887 	if (status & (PCI_X_STATUS_266MHZ | PCI_X_STATUS_533MHZ))
1888 		return pci_cfg_space_size_ext(dev);
1889 
1890 	return PCI_CFG_SPACE_SIZE;
1891 }
1892 
1893 static u32 pci_class(struct pci_dev *dev)
1894 {
1895 	u32 class;
1896 
1897 #ifdef CONFIG_PCI_IOV
1898 	if (dev->is_virtfn)
1899 		return dev->physfn->sriov->class;
1900 #endif
1901 	pci_read_config_dword(dev, PCI_CLASS_REVISION, &class);
1902 	return class;
1903 }
1904 
1905 static void pci_subsystem_ids(struct pci_dev *dev, u16 *vendor, u16 *device)
1906 {
1907 #ifdef CONFIG_PCI_IOV
1908 	if (dev->is_virtfn) {
1909 		*vendor = dev->physfn->sriov->subsystem_vendor;
1910 		*device = dev->physfn->sriov->subsystem_device;
1911 		return;
1912 	}
1913 #endif
1914 	pci_read_config_word(dev, PCI_SUBSYSTEM_VENDOR_ID, vendor);
1915 	pci_read_config_word(dev, PCI_SUBSYSTEM_ID, device);
1916 }
1917 
1918 static u8 pci_hdr_type(struct pci_dev *dev)
1919 {
1920 	u8 hdr_type;
1921 
1922 #ifdef CONFIG_PCI_IOV
1923 	if (dev->is_virtfn)
1924 		return dev->physfn->sriov->hdr_type;
1925 #endif
1926 	pci_read_config_byte(dev, PCI_HEADER_TYPE, &hdr_type);
1927 	return hdr_type;
1928 }
1929 
1930 #define LEGACY_IO_RESOURCE	(IORESOURCE_IO | IORESOURCE_PCI_FIXED)
1931 
1932 /**
1933  * pci_intx_mask_broken - Test PCI_COMMAND_INTX_DISABLE writability
1934  * @dev: PCI device
1935  *
1936  * Test whether PCI_COMMAND_INTX_DISABLE is writable for @dev.  Check this
1937  * at enumeration-time to avoid modifying PCI_COMMAND at run-time.
1938  */
1939 static int pci_intx_mask_broken(struct pci_dev *dev)
1940 {
1941 	u16 orig, toggle, new;
1942 
1943 	pci_read_config_word(dev, PCI_COMMAND, &orig);
1944 	toggle = orig ^ PCI_COMMAND_INTX_DISABLE;
1945 	pci_write_config_word(dev, PCI_COMMAND, toggle);
1946 	pci_read_config_word(dev, PCI_COMMAND, &new);
1947 
1948 	pci_write_config_word(dev, PCI_COMMAND, orig);
1949 
1950 	/*
1951 	 * PCI_COMMAND_INTX_DISABLE was reserved and read-only prior to PCI
1952 	 * r2.3, so strictly speaking, a device is not *broken* if it's not
1953 	 * writable.  But we'll live with the misnomer for now.
1954 	 */
1955 	if (new != toggle)
1956 		return 1;
1957 	return 0;
1958 }
1959 
1960 static void early_dump_pci_device(struct pci_dev *pdev)
1961 {
1962 	u32 value[PCI_CFG_SPACE_SIZE / sizeof(u32)];
1963 	int i;
1964 
1965 	pci_info(pdev, "config space:\n");
1966 
1967 	for (i = 0; i < ARRAY_SIZE(value); i++)
1968 		pci_read_config_dword(pdev, i * sizeof(u32), &value[i]);
1969 
1970 	print_hex_dump(KERN_INFO, "", DUMP_PREFIX_OFFSET, 16, 1,
1971 		       value, ARRAY_SIZE(value) * sizeof(u32), false);
1972 }
1973 
1974 static const char *pci_type_str(struct pci_dev *dev)
1975 {
1976 	static const char * const str[] = {
1977 		"PCIe Endpoint",
1978 		"PCIe Legacy Endpoint",
1979 		"PCIe unknown",
1980 		"PCIe unknown",
1981 		"PCIe Root Port",
1982 		"PCIe Switch Upstream Port",
1983 		"PCIe Switch Downstream Port",
1984 		"PCIe to PCI/PCI-X bridge",
1985 		"PCI/PCI-X to PCIe bridge",
1986 		"PCIe Root Complex Integrated Endpoint",
1987 		"PCIe Root Complex Event Collector",
1988 	};
1989 	int type;
1990 
1991 	if (pci_is_pcie(dev)) {
1992 		type = pci_pcie_type(dev);
1993 		if (type < ARRAY_SIZE(str))
1994 			return str[type];
1995 
1996 		return "PCIe unknown";
1997 	}
1998 
1999 	switch (dev->hdr_type) {
2000 	case PCI_HEADER_TYPE_NORMAL:
2001 		return "conventional PCI endpoint";
2002 	case PCI_HEADER_TYPE_BRIDGE:
2003 		return "conventional PCI bridge";
2004 	case PCI_HEADER_TYPE_CARDBUS:
2005 		return "CardBus bridge";
2006 	default:
2007 		return "conventional PCI";
2008 	}
2009 }
2010 
2011 /**
2012  * pci_setup_device - Fill in class and map information of a device
2013  * @dev: the device structure to fill
2014  *
2015  * Initialize the device structure with information about the device's
2016  * vendor,class,memory and IO-space addresses, IRQ lines etc.
2017  * Called at initialisation of the PCI subsystem and by CardBus services.
2018  * Returns 0 on success and negative if unknown type of device (not normal,
2019  * bridge or CardBus).
2020  */
2021 int pci_setup_device(struct pci_dev *dev)
2022 {
2023 	u32 class;
2024 	u16 cmd;
2025 	u8 hdr_type;
2026 	int err, pos = 0;
2027 	struct pci_bus_region region;
2028 	struct resource *res;
2029 
2030 	hdr_type = pci_hdr_type(dev);
2031 
2032 	dev->sysdata = dev->bus->sysdata;
2033 	dev->dev.parent = dev->bus->bridge;
2034 	dev->dev.bus = &pci_bus_type;
2035 	dev->hdr_type = FIELD_GET(PCI_HEADER_TYPE_MASK, hdr_type);
2036 	dev->multifunction = FIELD_GET(PCI_HEADER_TYPE_MFD, hdr_type);
2037 	dev->error_state = pci_channel_io_normal;
2038 	set_pcie_port_type(dev);
2039 
2040 	err = pci_set_of_node(dev);
2041 	if (err)
2042 		return err;
2043 	pci_set_acpi_fwnode(dev);
2044 
2045 	pci_dev_assign_slot(dev);
2046 
2047 	/*
2048 	 * Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
2049 	 * set this higher, assuming the system even supports it.
2050 	 */
2051 	dev->dma_mask = 0xffffffff;
2052 
2053 	/*
2054 	 * Assume 64-bit addresses for MSI initially. Will be changed to 32-bit
2055 	 * if MSI (rather than MSI-X) capability does not have
2056 	 * PCI_MSI_FLAGS_64BIT. Can also be overridden by driver.
2057 	 */
2058 	dev->msi_addr_mask = DMA_BIT_MASK(64);
2059 
2060 	dev_set_name(&dev->dev, "%04x:%02x:%02x.%d", pci_domain_nr(dev->bus),
2061 		     dev->bus->number, PCI_SLOT(dev->devfn),
2062 		     PCI_FUNC(dev->devfn));
2063 
2064 	class = pci_class(dev);
2065 
2066 	dev->revision = class & 0xff;
2067 	dev->class = class >> 8;		    /* upper 3 bytes */
2068 
2069 	if (pci_early_dump)
2070 		early_dump_pci_device(dev);
2071 
2072 	/* Need to have dev->class ready */
2073 	dev->cfg_size = pci_cfg_space_size(dev);
2074 
2075 	/* Need to have dev->cfg_size ready */
2076 	set_pcie_thunderbolt(dev);
2077 
2078 	set_pcie_cxl(dev);
2079 
2080 	set_pcie_untrusted(dev);
2081 
2082 	if (pci_is_pcie(dev))
2083 		dev->supported_speeds = pcie_get_supported_speeds(dev);
2084 
2085 	/* "Unknown power state" */
2086 	dev->current_state = PCI_UNKNOWN;
2087 
2088 	/* Early fixups, before probing the BARs */
2089 	pci_fixup_device(pci_fixup_early, dev);
2090 
2091 	pci_set_removable(dev);
2092 
2093 	pci_info(dev, "[%04x:%04x] type %02x class %#08x %s\n",
2094 		 dev->vendor, dev->device, dev->hdr_type, dev->class,
2095 		 pci_type_str(dev));
2096 
2097 	/* Device class may be changed after fixup */
2098 	class = dev->class >> 8;
2099 
2100 	if (dev->non_compliant_bars && !dev->mmio_always_on) {
2101 		pci_read_config_word(dev, PCI_COMMAND, &cmd);
2102 		if (cmd & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) {
2103 			pci_info(dev, "device has non-compliant BARs; disabling IO/MEM decoding\n");
2104 			cmd &= ~PCI_COMMAND_IO;
2105 			cmd &= ~PCI_COMMAND_MEMORY;
2106 			pci_write_config_word(dev, PCI_COMMAND, cmd);
2107 		}
2108 	}
2109 
2110 	dev->broken_intx_masking = pci_intx_mask_broken(dev);
2111 
2112 	switch (dev->hdr_type) {		    /* header type */
2113 	case PCI_HEADER_TYPE_NORMAL:		    /* standard header */
2114 		if (class == PCI_CLASS_BRIDGE_PCI)
2115 			goto bad;
2116 		pci_read_irq(dev);
2117 		pci_read_bases(dev, PCI_STD_NUM_BARS, PCI_ROM_ADDRESS);
2118 
2119 		pci_subsystem_ids(dev, &dev->subsystem_vendor, &dev->subsystem_device);
2120 
2121 		/*
2122 		 * Do the ugly legacy mode stuff here rather than broken chip
2123 		 * quirk code. Legacy mode ATA controllers have fixed
2124 		 * addresses. These are not always echoed in BAR0-3, and
2125 		 * BAR0-3 in a few cases contain junk!
2126 		 */
2127 		if (class == PCI_CLASS_STORAGE_IDE) {
2128 			u8 progif;
2129 			pci_read_config_byte(dev, PCI_CLASS_PROG, &progif);
2130 			if ((progif & 1) == 0) {
2131 				region.start = 0x1F0;
2132 				region.end = 0x1F7;
2133 				res = &dev->resource[0];
2134 				res->flags = LEGACY_IO_RESOURCE;
2135 				pcibios_bus_to_resource(dev->bus, res, &region);
2136 				pci_info(dev, "BAR 0 %pR: legacy IDE quirk\n",
2137 					 res);
2138 				region.start = 0x3F6;
2139 				region.end = 0x3F6;
2140 				res = &dev->resource[1];
2141 				res->flags = LEGACY_IO_RESOURCE;
2142 				pcibios_bus_to_resource(dev->bus, res, &region);
2143 				pci_info(dev, "BAR 1 %pR: legacy IDE quirk\n",
2144 					 res);
2145 			}
2146 			if ((progif & 4) == 0) {
2147 				region.start = 0x170;
2148 				region.end = 0x177;
2149 				res = &dev->resource[2];
2150 				res->flags = LEGACY_IO_RESOURCE;
2151 				pcibios_bus_to_resource(dev->bus, res, &region);
2152 				pci_info(dev, "BAR 2 %pR: legacy IDE quirk\n",
2153 					 res);
2154 				region.start = 0x376;
2155 				region.end = 0x376;
2156 				res = &dev->resource[3];
2157 				res->flags = LEGACY_IO_RESOURCE;
2158 				pcibios_bus_to_resource(dev->bus, res, &region);
2159 				pci_info(dev, "BAR 3 %pR: legacy IDE quirk\n",
2160 					 res);
2161 			}
2162 		}
2163 		break;
2164 
2165 	case PCI_HEADER_TYPE_BRIDGE:		    /* bridge header */
2166 		/*
2167 		 * The PCI-to-PCI bridge spec requires that subtractive
2168 		 * decoding (i.e. transparent) bridge must have programming
2169 		 * interface code of 0x01.
2170 		 */
2171 		pci_read_irq(dev);
2172 		dev->transparent = ((dev->class & 0xff) == 1);
2173 		pci_read_bases(dev, 2, PCI_ROM_ADDRESS1);
2174 		pci_read_bridge_windows(dev);
2175 		set_pcie_hotplug_bridge(dev);
2176 		pos = pci_find_capability(dev, PCI_CAP_ID_SSVID);
2177 		if (pos) {
2178 			pci_read_config_word(dev, pos + PCI_SSVID_VENDOR_ID, &dev->subsystem_vendor);
2179 			pci_read_config_word(dev, pos + PCI_SSVID_DEVICE_ID, &dev->subsystem_device);
2180 		}
2181 		break;
2182 
2183 	case PCI_HEADER_TYPE_CARDBUS:		    /* CardBus bridge header */
2184 		if (class != PCI_CLASS_BRIDGE_CARDBUS)
2185 			goto bad;
2186 		pci_read_irq(dev);
2187 		pci_read_bases(dev, 1, 0);
2188 		pci_read_config_word(dev, PCI_CB_SUBSYSTEM_VENDOR_ID, &dev->subsystem_vendor);
2189 		pci_read_config_word(dev, PCI_CB_SUBSYSTEM_ID, &dev->subsystem_device);
2190 		break;
2191 
2192 	default:				    /* unknown header */
2193 		pci_err(dev, "unknown header type %02x, ignoring device\n",
2194 			dev->hdr_type);
2195 		pci_release_of_node(dev);
2196 		return -EIO;
2197 
2198 	bad:
2199 		pci_err(dev, "ignoring class %#08x (doesn't match header type %02x)\n",
2200 			dev->class, dev->hdr_type);
2201 		dev->class = PCI_CLASS_NOT_DEFINED << 8;
2202 	}
2203 
2204 	/* We found a fine healthy device, go go go... */
2205 	return 0;
2206 }
2207 
2208 static void pci_configure_mps(struct pci_dev *dev)
2209 {
2210 	struct pci_dev *bridge = pci_upstream_bridge(dev);
2211 	int mps, mpss, p_mps, rc;
2212 
2213 	if (!pci_is_pcie(dev))
2214 		return;
2215 
2216 	/* MPS and MRRS fields are of type 'RsvdP' for VFs, short-circuit out */
2217 	if (dev->is_virtfn)
2218 		return;
2219 
2220 	/*
2221 	 * For Root Complex Integrated Endpoints, program the maximum
2222 	 * supported value unless limited by the PCIE_BUS_PEER2PEER case.
2223 	 */
2224 	if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END) {
2225 		if (pcie_bus_config == PCIE_BUS_PEER2PEER)
2226 			mps = 128;
2227 		else
2228 			mps = 128 << dev->pcie_mpss;
2229 		rc = pcie_set_mps(dev, mps);
2230 		if (rc) {
2231 			pci_warn(dev, "can't set Max Payload Size to %d; if necessary, use \"pci=pcie_bus_safe\" and report a bug\n",
2232 				 mps);
2233 		}
2234 		return;
2235 	}
2236 
2237 	if (!bridge || !pci_is_pcie(bridge))
2238 		return;
2239 
2240 	mps = pcie_get_mps(dev);
2241 	p_mps = pcie_get_mps(bridge);
2242 
2243 	if (mps == p_mps)
2244 		return;
2245 
2246 	if (pcie_bus_config == PCIE_BUS_TUNE_OFF) {
2247 		pci_warn(dev, "Max Payload Size %d, but upstream %s set to %d; if necessary, use \"pci=pcie_bus_safe\" and report a bug\n",
2248 			 mps, pci_name(bridge), p_mps);
2249 		return;
2250 	}
2251 
2252 	/*
2253 	 * Fancier MPS configuration is done later by
2254 	 * pcie_bus_configure_settings()
2255 	 */
2256 	if (pcie_bus_config != PCIE_BUS_DEFAULT)
2257 		return;
2258 
2259 	mpss = 128 << dev->pcie_mpss;
2260 	if (mpss < p_mps && pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) {
2261 		pcie_set_mps(bridge, mpss);
2262 		pci_info(dev, "Upstream bridge's Max Payload Size set to %d (was %d, max %d)\n",
2263 			 mpss, p_mps, 128 << bridge->pcie_mpss);
2264 		p_mps = pcie_get_mps(bridge);
2265 	}
2266 
2267 	rc = pcie_set_mps(dev, p_mps);
2268 	if (rc) {
2269 		pci_warn(dev, "can't set Max Payload Size to %d; if necessary, use \"pci=pcie_bus_safe\" and report a bug\n",
2270 			 p_mps);
2271 		return;
2272 	}
2273 
2274 	pci_info(dev, "Max Payload Size set to %d (was %d, max %d)\n",
2275 		 p_mps, mps, mpss);
2276 }
2277 
2278 int pci_configure_extended_tags(struct pci_dev *dev, void *ign)
2279 {
2280 	struct pci_host_bridge *host;
2281 	u32 cap;
2282 	u16 ctl;
2283 	int ret;
2284 
2285 	/* PCI_EXP_DEVCTL_EXT_TAG is RsvdP in VFs */
2286 	if (!pci_is_pcie(dev) || dev->is_virtfn)
2287 		return 0;
2288 
2289 	ret = pcie_capability_read_dword(dev, PCI_EXP_DEVCAP, &cap);
2290 	if (ret)
2291 		return 0;
2292 
2293 	if (!(cap & PCI_EXP_DEVCAP_EXT_TAG))
2294 		return 0;
2295 
2296 	ret = pcie_capability_read_word(dev, PCI_EXP_DEVCTL, &ctl);
2297 	if (ret)
2298 		return 0;
2299 
2300 	host = pci_find_host_bridge(dev->bus);
2301 	if (!host)
2302 		return 0;
2303 
2304 	/*
2305 	 * If some device in the hierarchy doesn't handle Extended Tags
2306 	 * correctly, make sure they're disabled.
2307 	 */
2308 	if (host->no_ext_tags) {
2309 		if (ctl & PCI_EXP_DEVCTL_EXT_TAG) {
2310 			pci_info(dev, "disabling Extended Tags\n");
2311 			pcie_capability_clear_word(dev, PCI_EXP_DEVCTL,
2312 						   PCI_EXP_DEVCTL_EXT_TAG);
2313 		}
2314 		return 0;
2315 	}
2316 
2317 	if (!(ctl & PCI_EXP_DEVCTL_EXT_TAG)) {
2318 		pci_info(dev, "enabling Extended Tags\n");
2319 		pcie_capability_set_word(dev, PCI_EXP_DEVCTL,
2320 					 PCI_EXP_DEVCTL_EXT_TAG);
2321 	}
2322 	return 0;
2323 }
2324 
2325 static void pci_dev3_init(struct pci_dev *pdev)
2326 {
2327 	u16 cap = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DEV3);
2328 	u32 val = 0;
2329 
2330 	if (!cap)
2331 		return;
2332 	pci_read_config_dword(pdev, cap + PCI_DEV3_STA, &val);
2333 	pdev->fm_enabled = !!(val & PCI_DEV3_STA_SEGMENT);
2334 }
2335 
2336 /**
2337  * pcie_relaxed_ordering_enabled - Probe for PCIe relaxed ordering enable
2338  * @dev: PCI device to query
2339  *
2340  * Returns true if the device has enabled relaxed ordering attribute.
2341  */
2342 bool pcie_relaxed_ordering_enabled(struct pci_dev *dev)
2343 {
2344 	u16 v;
2345 
2346 	pcie_capability_read_word(dev, PCI_EXP_DEVCTL, &v);
2347 
2348 	return !!(v & PCI_EXP_DEVCTL_RELAX_EN);
2349 }
2350 EXPORT_SYMBOL(pcie_relaxed_ordering_enabled);
2351 
2352 static void pci_configure_relaxed_ordering(struct pci_dev *dev)
2353 {
2354 	struct pci_dev *root;
2355 
2356 	/* PCI_EXP_DEVCTL_RELAX_EN is RsvdP in VFs */
2357 	if (dev->is_virtfn)
2358 		return;
2359 
2360 	if (!pcie_relaxed_ordering_enabled(dev))
2361 		return;
2362 
2363 	/*
2364 	 * For now, we only deal with Relaxed Ordering issues with Root
2365 	 * Ports. Peer-to-Peer DMA is another can of worms.
2366 	 */
2367 	root = pcie_find_root_port(dev);
2368 	if (!root)
2369 		return;
2370 
2371 	if (root->dev_flags & PCI_DEV_FLAGS_NO_RELAXED_ORDERING) {
2372 		pcie_capability_clear_word(dev, PCI_EXP_DEVCTL,
2373 					   PCI_EXP_DEVCTL_RELAX_EN);
2374 		pci_info(dev, "Relaxed Ordering disabled because the Root Port didn't support it\n");
2375 	}
2376 }
2377 
2378 static void pci_configure_eetlp_prefix(struct pci_dev *dev)
2379 {
2380 	struct pci_dev *bridge;
2381 	unsigned int eetlp_max;
2382 	int pcie_type;
2383 	u32 cap;
2384 
2385 	if (!pci_is_pcie(dev))
2386 		return;
2387 
2388 	pcie_capability_read_dword(dev, PCI_EXP_DEVCAP2, &cap);
2389 	if (!(cap & PCI_EXP_DEVCAP2_EE_PREFIX))
2390 		return;
2391 
2392 	pcie_type = pci_pcie_type(dev);
2393 
2394 	eetlp_max = FIELD_GET(PCI_EXP_DEVCAP2_EE_PREFIX_MAX, cap);
2395 	/* 00b means 4 */
2396 	eetlp_max = eetlp_max ?: 4;
2397 
2398 	if (pcie_type == PCI_EXP_TYPE_ROOT_PORT ||
2399 	    pcie_type == PCI_EXP_TYPE_RC_END)
2400 		dev->eetlp_prefix_max = eetlp_max;
2401 	else {
2402 		bridge = pci_upstream_bridge(dev);
2403 		if (bridge && bridge->eetlp_prefix_max)
2404 			dev->eetlp_prefix_max = eetlp_max;
2405 	}
2406 }
2407 
2408 static void pci_configure_serr(struct pci_dev *dev)
2409 {
2410 	u16 control;
2411 
2412 	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
2413 
2414 		/*
2415 		 * A bridge will not forward ERR_ messages coming from an
2416 		 * endpoint unless SERR# forwarding is enabled.
2417 		 */
2418 		pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &control);
2419 		if (!(control & PCI_BRIDGE_CTL_SERR)) {
2420 			control |= PCI_BRIDGE_CTL_SERR;
2421 			pci_write_config_word(dev, PCI_BRIDGE_CONTROL, control);
2422 		}
2423 	}
2424 }
2425 
2426 static void pci_configure_rcb(struct pci_dev *dev)
2427 {
2428 	struct pci_dev *rp;
2429 	u16 rp_lnkctl;
2430 
2431 	/*
2432 	 * Per PCIe r7.0, sec 7.5.3.7, RCB is only meaningful in Root Ports
2433 	 * (where it is read-only), Endpoints, and Bridges.  It may only be
2434 	 * set for Endpoints and Bridges if it is set in the Root Port. For
2435 	 * Endpoints, it is 'RsvdP' for Virtual Functions.
2436 	 */
2437 	if (!pci_is_pcie(dev) ||
2438 	    pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
2439 	    pci_pcie_type(dev) == PCI_EXP_TYPE_UPSTREAM ||
2440 	    pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM ||
2441 	    pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC ||
2442 	    dev->is_virtfn)
2443 		return;
2444 
2445 	/* Root Port often not visible to virtualized guests */
2446 	rp = pcie_find_root_port(dev);
2447 	if (!rp)
2448 		return;
2449 
2450 	pcie_capability_read_word(rp, PCI_EXP_LNKCTL, &rp_lnkctl);
2451 	pcie_capability_clear_and_set_word(dev, PCI_EXP_LNKCTL,
2452 					   PCI_EXP_LNKCTL_RCB,
2453 					   (rp_lnkctl & PCI_EXP_LNKCTL_RCB) ?
2454 					   PCI_EXP_LNKCTL_RCB : 0);
2455 }
2456 
2457 static void pci_configure_device(struct pci_dev *dev)
2458 {
2459 	pci_configure_mps(dev);
2460 	pci_configure_extended_tags(dev, NULL);
2461 	pci_configure_relaxed_ordering(dev);
2462 	pci_configure_ltr(dev);
2463 	pci_configure_aspm_l1ss(dev);
2464 	pci_configure_eetlp_prefix(dev);
2465 	pci_configure_serr(dev);
2466 	pci_configure_rcb(dev);
2467 
2468 	pci_acpi_program_hp_params(dev);
2469 }
2470 
2471 static void pci_release_capabilities(struct pci_dev *dev)
2472 {
2473 	pci_aer_exit(dev);
2474 	pci_rcec_exit(dev);
2475 	pci_iov_release(dev);
2476 	pci_free_cap_save_buffers(dev);
2477 }
2478 
2479 /**
2480  * pci_release_dev - Free a PCI device structure when all users of it are
2481  *		     finished
2482  * @dev: device that's been disconnected
2483  *
2484  * Will be called only by the device core when all users of this PCI device are
2485  * done.
2486  */
2487 static void pci_release_dev(struct device *dev)
2488 {
2489 	struct pci_dev *pci_dev;
2490 
2491 	pci_dev = to_pci_dev(dev);
2492 	pci_release_capabilities(pci_dev);
2493 	pci_release_of_node(pci_dev);
2494 	pcibios_release_device(pci_dev);
2495 	pci_bus_put(pci_dev->bus);
2496 	bitmap_free(pci_dev->dma_alias_mask);
2497 	dev_dbg(dev, "device released\n");
2498 	kfree(pci_dev);
2499 }
2500 
2501 static const struct device_type pci_dev_type = {
2502 	.groups = pci_dev_attr_groups,
2503 };
2504 
2505 struct pci_dev *pci_alloc_dev(struct pci_bus *bus)
2506 {
2507 	struct pci_dev *dev;
2508 
2509 	dev = kzalloc_obj(struct pci_dev);
2510 	if (!dev)
2511 		return NULL;
2512 
2513 	INIT_LIST_HEAD(&dev->bus_list);
2514 	dev->dev.type = &pci_dev_type;
2515 	dev->bus = pci_bus_get(bus);
2516 	dev->driver_exclusive_resource = (struct resource) {
2517 		.name = "PCI Exclusive",
2518 		.start = 0,
2519 		.end = -1,
2520 	};
2521 
2522 	spin_lock_init(&dev->pcie_cap_lock);
2523 #ifdef CONFIG_PCI_MSI
2524 	raw_spin_lock_init(&dev->msi_lock);
2525 #endif
2526 	return dev;
2527 }
2528 EXPORT_SYMBOL(pci_alloc_dev);
2529 
2530 static bool pci_bus_wait_rrs(struct pci_bus *bus, int devfn, u32 *l,
2531 			     int timeout)
2532 {
2533 	int delay = 1;
2534 
2535 	if (!pci_bus_rrs_vendor_id(*l))
2536 		return true;	/* not a Configuration RRS completion */
2537 
2538 	if (!timeout)
2539 		return false;	/* RRS, but caller doesn't want to wait */
2540 
2541 	/*
2542 	 * We got the reserved Vendor ID that indicates a completion with
2543 	 * Configuration Request Retry Status (RRS).  Retry until we get a
2544 	 * valid Vendor ID or we time out.
2545 	 */
2546 	while (pci_bus_rrs_vendor_id(*l)) {
2547 		if (delay > timeout) {
2548 			pr_warn("pci %04x:%02x:%02x.%d: not ready after %dms; giving up\n",
2549 				pci_domain_nr(bus), bus->number,
2550 				PCI_SLOT(devfn), PCI_FUNC(devfn), delay - 1);
2551 
2552 			return false;
2553 		}
2554 		if (delay >= 1000)
2555 			pr_info("pci %04x:%02x:%02x.%d: not ready after %dms; waiting\n",
2556 				pci_domain_nr(bus), bus->number,
2557 				PCI_SLOT(devfn), PCI_FUNC(devfn), delay - 1);
2558 
2559 		msleep(delay);
2560 		delay *= 2;
2561 
2562 		if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l))
2563 			return false;
2564 	}
2565 
2566 	if (delay >= 1000)
2567 		pr_info("pci %04x:%02x:%02x.%d: ready after %dms\n",
2568 			pci_domain_nr(bus), bus->number,
2569 			PCI_SLOT(devfn), PCI_FUNC(devfn), delay - 1);
2570 
2571 	return true;
2572 }
2573 
2574 bool pci_bus_generic_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l,
2575 					int timeout)
2576 {
2577 	if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l))
2578 		return false;
2579 
2580 	/* Some broken boards return 0 or ~0 (PCI_ERROR_RESPONSE) if a slot is empty: */
2581 	if (PCI_POSSIBLE_ERROR(*l) || *l == 0x00000000 ||
2582 	    *l == 0x0000ffff || *l == 0xffff0000)
2583 		return false;
2584 
2585 	if (pci_bus_rrs_vendor_id(*l))
2586 		return pci_bus_wait_rrs(bus, devfn, l, timeout);
2587 
2588 	return true;
2589 }
2590 
2591 bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l,
2592 				int timeout)
2593 {
2594 	return pci_bus_generic_read_dev_vendor_id(bus, devfn, l, timeout);
2595 }
2596 EXPORT_SYMBOL(pci_bus_read_dev_vendor_id);
2597 
2598 /*
2599  * Read the config data for a PCI device, sanity-check it,
2600  * and fill in the dev structure.
2601  */
2602 static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
2603 {
2604 	struct pci_dev *dev;
2605 	u32 l;
2606 
2607 	if (!pci_bus_read_dev_vendor_id(bus, devfn, &l, 60*1000))
2608 		return NULL;
2609 
2610 	dev = pci_alloc_dev(bus);
2611 	if (!dev)
2612 		return NULL;
2613 
2614 	dev->devfn = devfn;
2615 	dev->vendor = l & 0xffff;
2616 	dev->device = (l >> 16) & 0xffff;
2617 
2618 	if (pci_setup_device(dev)) {
2619 		pci_bus_put(dev->bus);
2620 		kfree(dev);
2621 		return NULL;
2622 	}
2623 
2624 	return dev;
2625 }
2626 
2627 void pcie_report_downtraining(struct pci_dev *dev)
2628 {
2629 	if (!pci_is_pcie(dev))
2630 		return;
2631 
2632 	/* Look from the device up to avoid downstream ports with no devices */
2633 	if ((pci_pcie_type(dev) != PCI_EXP_TYPE_ENDPOINT) &&
2634 	    (pci_pcie_type(dev) != PCI_EXP_TYPE_LEG_END) &&
2635 	    (pci_pcie_type(dev) != PCI_EXP_TYPE_UPSTREAM))
2636 		return;
2637 
2638 	/* Multi-function PCIe devices share the same link/status */
2639 	if (PCI_FUNC(dev->devfn) != 0 || dev->is_virtfn)
2640 		return;
2641 
2642 	/* Print link status only if the device is constrained by the fabric */
2643 	__pcie_print_link_status(dev, false);
2644 }
2645 
2646 static void pci_imm_ready_init(struct pci_dev *dev)
2647 {
2648 	u16 status;
2649 
2650 	pci_read_config_word(dev, PCI_STATUS, &status);
2651 	if (status & PCI_STATUS_IMM_READY)
2652 		dev->imm_ready = 1;
2653 }
2654 
2655 static void pci_init_capabilities(struct pci_dev *dev)
2656 {
2657 	pci_ea_init(dev);		/* Enhanced Allocation */
2658 	pci_msi_init(dev);		/* Disable MSI */
2659 	pci_msix_init(dev);		/* Disable MSI-X */
2660 
2661 	/* Buffers for saving PCIe and PCI-X capabilities */
2662 	pci_allocate_cap_save_buffers(dev);
2663 
2664 	pci_imm_ready_init(dev);	/* Immediate Readiness */
2665 	pci_pm_init(dev);		/* Power Management */
2666 	pci_vpd_init(dev);		/* Vital Product Data */
2667 	pci_configure_ari(dev);		/* Alternative Routing-ID Forwarding */
2668 	pci_iov_init(dev);		/* Single Root I/O Virtualization */
2669 	pci_ats_init(dev);		/* Address Translation Services */
2670 	pci_pri_init(dev);		/* Page Request Interface */
2671 	pci_pasid_init(dev);		/* Process Address Space ID */
2672 	pci_acs_init(dev);		/* Access Control Services */
2673 	pci_ptm_init(dev);		/* Precision Time Measurement */
2674 	pci_aer_init(dev);		/* Advanced Error Reporting */
2675 	pci_dpc_init(dev);		/* Downstream Port Containment */
2676 	pci_rcec_init(dev);		/* Root Complex Event Collector */
2677 	pci_doe_init(dev);		/* Data Object Exchange */
2678 	pci_tph_init(dev);		/* TLP Processing Hints */
2679 	pci_rebar_init(dev);		/* Resizable BAR */
2680 	pci_dev3_init(dev);		/* Device 3 capabilities */
2681 	pci_ide_init(dev);		/* Link Integrity and Data Encryption */
2682 
2683 	pcie_report_downtraining(dev);
2684 	pci_init_reset_methods(dev);
2685 }
2686 
2687 /*
2688  * This is the equivalent of pci_host_bridge_msi_domain() that acts on
2689  * devices. Firmware interfaces that can select the MSI domain on a
2690  * per-device basis should be called from here.
2691  */
2692 static struct irq_domain *pci_dev_msi_domain(struct pci_dev *dev)
2693 {
2694 	struct irq_domain *d;
2695 
2696 	/*
2697 	 * If a domain has been set through the pcibios_device_add()
2698 	 * callback, then this is the one (platform code knows best).
2699 	 */
2700 	d = dev_get_msi_domain(&dev->dev);
2701 	if (d)
2702 		return d;
2703 
2704 	/*
2705 	 * Let's see if we have a firmware interface able to provide
2706 	 * the domain.
2707 	 */
2708 	d = pci_msi_get_device_domain(dev);
2709 	if (d)
2710 		return d;
2711 
2712 	return NULL;
2713 }
2714 
2715 static void pci_set_msi_domain(struct pci_dev *dev)
2716 {
2717 	struct irq_domain *d;
2718 
2719 	/*
2720 	 * If the platform or firmware interfaces cannot supply a
2721 	 * device-specific MSI domain, then inherit the default domain
2722 	 * from the host bridge itself.
2723 	 */
2724 	d = pci_dev_msi_domain(dev);
2725 	if (!d)
2726 		d = dev_get_msi_domain(&dev->bus->dev);
2727 
2728 	dev_set_msi_domain(&dev->dev, d);
2729 }
2730 
2731 void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
2732 {
2733 	int ret;
2734 
2735 	pci_configure_device(dev);
2736 
2737 	device_initialize(&dev->dev);
2738 	dev->dev.release = pci_release_dev;
2739 
2740 	set_dev_node(&dev->dev, pcibus_to_node(bus));
2741 	dev->dev.dma_mask = &dev->dma_mask;
2742 	dev->dev.dma_parms = &dev->dma_parms;
2743 	dev->dev.coherent_dma_mask = 0xffffffffull;
2744 
2745 	dma_set_max_seg_size(&dev->dev, 65536);
2746 	dma_set_seg_boundary(&dev->dev, 0xffffffff);
2747 
2748 	pcie_failed_link_retrain(dev);
2749 
2750 	/* Fix up broken headers */
2751 	pci_fixup_device(pci_fixup_header, dev);
2752 
2753 	pci_reassigndev_resource_alignment(dev);
2754 
2755 	pci_init_capabilities(dev);
2756 
2757 	/*
2758 	 * Add the device to our list of discovered devices
2759 	 * and the bus list for fixup functions, etc.
2760 	 */
2761 	down_write(&pci_bus_sem);
2762 	list_add_tail(&dev->bus_list, &bus->devices);
2763 	up_write(&pci_bus_sem);
2764 
2765 	ret = pcibios_device_add(dev);
2766 	WARN_ON(ret < 0);
2767 
2768 	/* Set up MSI IRQ domain */
2769 	pci_set_msi_domain(dev);
2770 
2771 	/* Notifier could use PCI capabilities */
2772 	ret = device_add(&dev->dev);
2773 	WARN_ON(ret < 0);
2774 
2775 	/* Establish pdev->tsm for newly added (e.g. new SR-IOV VFs) */
2776 	pci_tsm_init(dev);
2777 
2778 	pci_npem_create(dev);
2779 
2780 	pci_doe_sysfs_init(dev);
2781 }
2782 
2783 struct pci_dev *pci_scan_single_device(struct pci_bus *bus, int devfn)
2784 {
2785 	struct pci_dev *dev;
2786 
2787 	dev = pci_get_slot(bus, devfn);
2788 	if (dev) {
2789 		pci_dev_put(dev);
2790 		return dev;
2791 	}
2792 
2793 	dev = pci_scan_device(bus, devfn);
2794 	if (!dev)
2795 		return NULL;
2796 
2797 	pci_device_add(dev, bus);
2798 
2799 	return dev;
2800 }
2801 EXPORT_SYMBOL(pci_scan_single_device);
2802 
2803 static int next_ari_fn(struct pci_bus *bus, struct pci_dev *dev, int fn)
2804 {
2805 	int pos;
2806 	u16 cap = 0;
2807 	unsigned int next_fn;
2808 
2809 	if (!dev)
2810 		return -ENODEV;
2811 
2812 	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ARI);
2813 	if (!pos)
2814 		return -ENODEV;
2815 
2816 	pci_read_config_word(dev, pos + PCI_ARI_CAP, &cap);
2817 	next_fn = PCI_ARI_CAP_NFN(cap);
2818 	if (next_fn <= fn)
2819 		return -ENODEV;	/* protect against malformed list */
2820 
2821 	return next_fn;
2822 }
2823 
2824 static int next_fn(struct pci_bus *bus, struct pci_dev *dev, int fn)
2825 {
2826 	if (pci_ari_enabled(bus))
2827 		return next_ari_fn(bus, dev, fn);
2828 
2829 	if (fn >= 7)
2830 		return -ENODEV;
2831 	/* only multifunction devices may have more functions */
2832 	if (dev && !dev->multifunction)
2833 		return -ENODEV;
2834 
2835 	return fn + 1;
2836 }
2837 
2838 static int only_one_child(struct pci_bus *bus)
2839 {
2840 	struct pci_dev *bridge = bus->self;
2841 
2842 	/*
2843 	 * Systems with unusual topologies set PCI_SCAN_ALL_PCIE_DEVS so
2844 	 * we scan for all possible devices, not just Device 0.
2845 	 */
2846 	if (pci_has_flag(PCI_SCAN_ALL_PCIE_DEVS))
2847 		return 0;
2848 
2849 	/*
2850 	 * A PCIe Downstream Port normally leads to a Link with only Device
2851 	 * 0 on it (PCIe spec r3.1, sec 7.3.1).  As an optimization, scan
2852 	 * only for Device 0 in that situation.
2853 	 */
2854 	if (bridge && pci_is_pcie(bridge) && pcie_downstream_port(bridge))
2855 		return 1;
2856 
2857 	return 0;
2858 }
2859 
2860 /**
2861  * pci_scan_slot - Scan a PCI slot on a bus for devices
2862  * @bus: PCI bus to scan
2863  * @devfn: slot number to scan (must have zero function)
2864  *
2865  * Scan a PCI slot on the specified PCI bus for devices, adding
2866  * discovered devices to the @bus->devices list.  New devices
2867  * will not have is_added set.
2868  *
2869  * Returns the number of new devices found.
2870  */
2871 int pci_scan_slot(struct pci_bus *bus, int devfn)
2872 {
2873 	struct pci_dev *dev;
2874 	int fn = 0, nr = 0;
2875 
2876 	if (only_one_child(bus) && (devfn > 0))
2877 		return 0; /* Already scanned the entire slot */
2878 
2879 	do {
2880 		dev = pci_scan_single_device(bus, devfn + fn);
2881 		if (dev) {
2882 			if (!pci_dev_is_added(dev))
2883 				nr++;
2884 			if (fn > 0)
2885 				dev->multifunction = 1;
2886 		} else if (fn == 0) {
2887 			/*
2888 			 * Function 0 is required unless we are running on
2889 			 * a hypervisor that passes through individual PCI
2890 			 * functions.
2891 			 */
2892 			if (!hypervisor_isolated_pci_functions())
2893 				break;
2894 		}
2895 		fn = next_fn(bus, dev, fn);
2896 	} while (fn >= 0);
2897 
2898 	/* Only one slot has PCIe device */
2899 	if (bus->self && nr)
2900 		pcie_aspm_init_link_state(bus->self);
2901 
2902 	return nr;
2903 }
2904 EXPORT_SYMBOL(pci_scan_slot);
2905 
2906 static int pcie_find_smpss(struct pci_dev *dev, void *data)
2907 {
2908 	u8 *smpss = data;
2909 
2910 	if (!pci_is_pcie(dev))
2911 		return 0;
2912 
2913 	/*
2914 	 * We don't have a way to change MPS settings on devices that have
2915 	 * drivers attached.  A hot-added device might support only the minimum
2916 	 * MPS setting (MPS=128).  Therefore, if the fabric contains a bridge
2917 	 * where devices may be hot-added, we limit the fabric MPS to 128 so
2918 	 * hot-added devices will work correctly.
2919 	 *
2920 	 * However, if we hot-add a device to a slot directly below a Root
2921 	 * Port, it's impossible for there to be other existing devices below
2922 	 * the port.  We don't limit the MPS in this case because we can
2923 	 * reconfigure MPS on both the Root Port and the hot-added device,
2924 	 * and there are no other devices involved.
2925 	 *
2926 	 * Note that this PCIE_BUS_SAFE path assumes no peer-to-peer DMA.
2927 	 */
2928 	if (dev->is_hotplug_bridge &&
2929 	    pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT)
2930 		*smpss = 0;
2931 
2932 	if (*smpss > dev->pcie_mpss)
2933 		*smpss = dev->pcie_mpss;
2934 
2935 	return 0;
2936 }
2937 
2938 static void pcie_write_mps(struct pci_dev *dev, int mps)
2939 {
2940 	int rc;
2941 
2942 	if (pcie_bus_config == PCIE_BUS_PERFORMANCE) {
2943 		mps = 128 << dev->pcie_mpss;
2944 
2945 		if (pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT &&
2946 		    dev->bus->self)
2947 
2948 			/*
2949 			 * For "Performance", the assumption is made that
2950 			 * downstream communication will never be larger than
2951 			 * the MRRS.  So, the MPS only needs to be configured
2952 			 * for the upstream communication.  This being the case,
2953 			 * walk from the top down and set the MPS of the child
2954 			 * to that of the parent bus.
2955 			 *
2956 			 * Configure the device MPS with the smaller of the
2957 			 * device MPSS or the bridge MPS (which is assumed to be
2958 			 * properly configured at this point to the largest
2959 			 * allowable MPS based on its parent bus).
2960 			 */
2961 			mps = min(mps, pcie_get_mps(dev->bus->self));
2962 	}
2963 
2964 	rc = pcie_set_mps(dev, mps);
2965 	if (rc)
2966 		pci_err(dev, "Failed attempting to set the MPS\n");
2967 }
2968 
2969 static void pcie_write_mrrs(struct pci_dev *dev)
2970 {
2971 	int rc, mrrs;
2972 
2973 	/*
2974 	 * In the "safe" case, do not configure the MRRS.  There appear to be
2975 	 * issues with setting MRRS to 0 on a number of devices.
2976 	 */
2977 	if (pcie_bus_config != PCIE_BUS_PERFORMANCE)
2978 		return;
2979 
2980 	/*
2981 	 * For max performance, the MRRS must be set to the largest supported
2982 	 * value.  However, it cannot be configured larger than the MPS the
2983 	 * device or the bus can support.  This should already be properly
2984 	 * configured by a prior call to pcie_write_mps().
2985 	 */
2986 	mrrs = pcie_get_mps(dev);
2987 
2988 	/*
2989 	 * MRRS is a R/W register.  Invalid values can be written, but a
2990 	 * subsequent read will verify if the value is acceptable or not.
2991 	 * If the MRRS value provided is not acceptable (e.g., too large),
2992 	 * shrink the value until it is acceptable to the HW.
2993 	 */
2994 	while (mrrs != pcie_get_readrq(dev) && mrrs >= 128) {
2995 		rc = pcie_set_readrq(dev, mrrs);
2996 		if (!rc)
2997 			break;
2998 
2999 		pci_warn(dev, "Failed attempting to set the MRRS\n");
3000 		mrrs /= 2;
3001 	}
3002 
3003 	if (mrrs < 128)
3004 		pci_err(dev, "MRRS was unable to be configured with a safe value.  If problems are experienced, try running with pci=pcie_bus_safe\n");
3005 }
3006 
3007 static int pcie_bus_configure_set(struct pci_dev *dev, void *data)
3008 {
3009 	int mps, orig_mps;
3010 
3011 	if (!pci_is_pcie(dev))
3012 		return 0;
3013 
3014 	if (pcie_bus_config == PCIE_BUS_TUNE_OFF ||
3015 	    pcie_bus_config == PCIE_BUS_DEFAULT)
3016 		return 0;
3017 
3018 	mps = 128 << *(u8 *)data;
3019 	orig_mps = pcie_get_mps(dev);
3020 
3021 	pcie_write_mps(dev, mps);
3022 	pcie_write_mrrs(dev);
3023 
3024 	pci_info(dev, "Max Payload Size set to %4d/%4d (was %4d), Max Read Rq %4d\n",
3025 		 pcie_get_mps(dev), 128 << dev->pcie_mpss,
3026 		 orig_mps, pcie_get_readrq(dev));
3027 
3028 	return 0;
3029 }
3030 
3031 /*
3032  * pcie_bus_configure_settings() requires that pci_walk_bus work in a top-down,
3033  * parents then children fashion.  If this changes, then this code will not
3034  * work as designed.
3035  */
3036 void pcie_bus_configure_settings(struct pci_bus *bus)
3037 {
3038 	u8 smpss = 0;
3039 
3040 	if (!bus->self)
3041 		return;
3042 
3043 	if (!pci_is_pcie(bus->self))
3044 		return;
3045 
3046 	/*
3047 	 * FIXME - Peer to peer DMA is possible, though the endpoint would need
3048 	 * to be aware of the MPS of the destination.  To work around this,
3049 	 * simply force the MPS of the entire system to the smallest possible.
3050 	 */
3051 	if (pcie_bus_config == PCIE_BUS_PEER2PEER)
3052 		smpss = 0;
3053 
3054 	if (pcie_bus_config == PCIE_BUS_SAFE) {
3055 		smpss = bus->self->pcie_mpss;
3056 
3057 		pcie_find_smpss(bus->self, &smpss);
3058 		pci_walk_bus(bus, pcie_find_smpss, &smpss);
3059 	}
3060 
3061 	pcie_bus_configure_set(bus->self, &smpss);
3062 	pci_walk_bus(bus, pcie_bus_configure_set, &smpss);
3063 }
3064 EXPORT_SYMBOL_GPL(pcie_bus_configure_settings);
3065 
3066 /*
3067  * Called after each bus is probed, but before its children are examined.  This
3068  * is marked as __weak because multiple architectures define it.
3069  */
3070 void __weak pcibios_fixup_bus(struct pci_bus *bus)
3071 {
3072        /* nothing to do, expected to be removed in the future */
3073 }
3074 
3075 /**
3076  * pci_scan_child_bus_extend() - Scan devices below a bus
3077  * @bus: Bus to scan for devices
3078  * @available_buses: Total number of buses available (%0 does not try to
3079  *		     extend beyond the minimal)
3080  *
3081  * Scans devices below @bus including subordinate buses. Returns new
3082  * subordinate number including all the found devices. Passing
3083  * @available_buses causes the remaining bus space to be distributed
3084  * equally between hotplug-capable bridges to allow future extension of the
3085  * hierarchy.
3086  */
3087 static unsigned int pci_scan_child_bus_extend(struct pci_bus *bus,
3088 					      unsigned int available_buses)
3089 {
3090 	unsigned int used_buses, normal_bridges = 0, hotplug_bridges = 0;
3091 	unsigned int start = bus->busn_res.start;
3092 	unsigned int devnr, cmax, max = start;
3093 	struct pci_dev *dev;
3094 
3095 	dev_dbg(&bus->dev, "scanning bus\n");
3096 
3097 	/* Go find them, Rover! */
3098 	for (devnr = 0; devnr < PCI_MAX_NR_DEVS; devnr++)
3099 		pci_scan_slot(bus, PCI_DEVFN(devnr, 0));
3100 
3101 	/* Reserve buses for SR-IOV capability */
3102 	used_buses = pci_iov_bus_range(bus);
3103 	max += used_buses;
3104 
3105 	/*
3106 	 * After performing arch-dependent fixup of the bus, look behind
3107 	 * all PCI-to-PCI bridges on this bus.
3108 	 */
3109 	if (!bus->is_added) {
3110 		dev_dbg(&bus->dev, "fixups for bus\n");
3111 		pcibios_fixup_bus(bus);
3112 		bus->is_added = 1;
3113 	}
3114 
3115 	/*
3116 	 * Calculate how many hotplug bridges and normal bridges there
3117 	 * are on this bus. We will distribute the additional available
3118 	 * buses between hotplug bridges.
3119 	 */
3120 	for_each_pci_bridge(dev, bus) {
3121 		if (dev->is_hotplug_bridge)
3122 			hotplug_bridges++;
3123 		else
3124 			normal_bridges++;
3125 	}
3126 
3127 	/*
3128 	 * Scan bridges that are already configured. We don't touch them
3129 	 * unless they are misconfigured (which will be done in the second
3130 	 * scan below).
3131 	 */
3132 	for_each_pci_bridge(dev, bus) {
3133 		cmax = max;
3134 		max = pci_scan_bridge_extend(bus, dev, max, 0, 0);
3135 
3136 		/*
3137 		 * Reserve one bus for each bridge now to avoid extending
3138 		 * hotplug bridges too much during the second scan below.
3139 		 */
3140 		used_buses++;
3141 		if (max - cmax > 1)
3142 			used_buses += max - cmax - 1;
3143 	}
3144 
3145 	/* Scan bridges that need to be reconfigured */
3146 	for_each_pci_bridge(dev, bus) {
3147 		unsigned int buses = 0;
3148 
3149 		if (!hotplug_bridges && normal_bridges == 1) {
3150 			/*
3151 			 * There is only one bridge on the bus (upstream
3152 			 * port) so it gets all available buses which it
3153 			 * can then distribute to the possible hotplug
3154 			 * bridges below.
3155 			 */
3156 			buses = available_buses;
3157 		} else if (dev->is_hotplug_bridge) {
3158 			/*
3159 			 * Distribute the extra buses between hotplug
3160 			 * bridges if any.
3161 			 */
3162 			buses = available_buses / hotplug_bridges;
3163 			buses = min(buses, available_buses - used_buses + 1);
3164 		}
3165 
3166 		cmax = max;
3167 		max = pci_scan_bridge_extend(bus, dev, cmax, buses, 1);
3168 		/* One bus is already accounted so don't add it again */
3169 		if (max - cmax > 1)
3170 			used_buses += max - cmax - 1;
3171 	}
3172 
3173 	/*
3174 	 * Make sure a hotplug bridge has at least the minimum requested
3175 	 * number of buses but allow it to grow up to the maximum available
3176 	 * bus number if there is room.
3177 	 */
3178 	if (bus->self && bus->self->is_hotplug_bridge) {
3179 		used_buses = max(available_buses, pci_hotplug_bus_size - 1);
3180 		if (max - start < used_buses) {
3181 			max = start + used_buses;
3182 
3183 			/* Do not allocate more buses than we have room left */
3184 			if (max > bus->busn_res.end)
3185 				max = bus->busn_res.end;
3186 
3187 			dev_dbg(&bus->dev, "%pR extended by %#02x\n",
3188 				&bus->busn_res, max - start);
3189 		}
3190 	}
3191 
3192 	/*
3193 	 * We've scanned the bus and so we know all about what's on
3194 	 * the other side of any bridges that may be on this bus plus
3195 	 * any devices.
3196 	 *
3197 	 * Return how far we've got finding sub-buses.
3198 	 */
3199 	dev_dbg(&bus->dev, "bus scan returning with max=%02x\n", max);
3200 	return max;
3201 }
3202 
3203 /**
3204  * pci_scan_child_bus() - Scan devices below a bus
3205  * @bus: Bus to scan for devices
3206  *
3207  * Scans devices below @bus including subordinate buses. Returns new
3208  * subordinate number including all the found devices.
3209  */
3210 unsigned int pci_scan_child_bus(struct pci_bus *bus)
3211 {
3212 	return pci_scan_child_bus_extend(bus, 0);
3213 }
3214 EXPORT_SYMBOL_GPL(pci_scan_child_bus);
3215 
3216 /**
3217  * pcibios_root_bridge_prepare - Platform-specific host bridge setup
3218  * @bridge: Host bridge to set up
3219  *
3220  * Default empty implementation.  Replace with an architecture-specific setup
3221  * routine, if necessary.
3222  */
3223 int __weak pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
3224 {
3225 	return 0;
3226 }
3227 
3228 void __weak pcibios_add_bus(struct pci_bus *bus)
3229 {
3230 }
3231 
3232 void __weak pcibios_remove_bus(struct pci_bus *bus)
3233 {
3234 }
3235 
3236 struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
3237 		struct pci_ops *ops, void *sysdata, struct list_head *resources)
3238 {
3239 	int error;
3240 	struct pci_host_bridge *bridge;
3241 
3242 	bridge = pci_alloc_host_bridge(0);
3243 	if (!bridge)
3244 		return NULL;
3245 
3246 	bridge->dev.parent = parent;
3247 
3248 	list_splice_init(resources, &bridge->windows);
3249 	bridge->sysdata = sysdata;
3250 	bridge->busnr = bus;
3251 	bridge->ops = ops;
3252 
3253 	error = pci_register_host_bridge(bridge);
3254 	if (error < 0)
3255 		goto err_out;
3256 
3257 	return bridge->bus;
3258 
3259 err_out:
3260 	put_device(&bridge->dev);
3261 	return NULL;
3262 }
3263 EXPORT_SYMBOL_GPL(pci_create_root_bus);
3264 
3265 int pci_host_probe(struct pci_host_bridge *bridge)
3266 {
3267 	struct pci_bus *bus, *child;
3268 	int ret;
3269 
3270 	pci_lock_rescan_remove();
3271 	ret = pci_scan_root_bus_bridge(bridge);
3272 	pci_unlock_rescan_remove();
3273 	if (ret < 0) {
3274 		dev_err(bridge->dev.parent, "Scanning root bridge failed");
3275 		return ret;
3276 	}
3277 
3278 	bus = bridge->bus;
3279 
3280 	/* If we must preserve the resource configuration, claim now */
3281 	if (bridge->preserve_config)
3282 		pci_bus_claim_resources(bus);
3283 
3284 	/*
3285 	 * Assign whatever was left unassigned. If we didn't claim above,
3286 	 * this will reassign everything.
3287 	 */
3288 	pci_assign_unassigned_root_bus_resources(bus);
3289 
3290 	list_for_each_entry(child, &bus->children, node)
3291 		pcie_bus_configure_settings(child);
3292 
3293 	pci_lock_rescan_remove();
3294 	pci_bus_add_devices(bus);
3295 	pci_unlock_rescan_remove();
3296 
3297 	/*
3298 	 * Ensure pm_runtime_enable() is called for the controller drivers
3299 	 * before calling pci_host_probe(). The PM framework expects that
3300 	 * if the parent device supports runtime PM, it will be enabled
3301 	 * before child runtime PM is enabled.
3302 	 */
3303 	pm_runtime_set_active(&bridge->dev);
3304 	pm_runtime_no_callbacks(&bridge->dev);
3305 	devm_pm_runtime_enable(&bridge->dev);
3306 
3307 	return 0;
3308 }
3309 EXPORT_SYMBOL_GPL(pci_host_probe);
3310 
3311 int pci_bus_insert_busn_res(struct pci_bus *b, int bus, int bus_max)
3312 {
3313 	struct resource *res = &b->busn_res;
3314 	struct resource *parent_res, *conflict;
3315 
3316 	res->start = bus;
3317 	res->end = bus_max;
3318 	res->flags = IORESOURCE_BUS;
3319 
3320 	if (!pci_is_root_bus(b))
3321 		parent_res = &b->parent->busn_res;
3322 	else {
3323 		parent_res = get_pci_domain_busn_res(pci_domain_nr(b));
3324 		res->flags |= IORESOURCE_PCI_FIXED;
3325 	}
3326 
3327 	conflict = request_resource_conflict(parent_res, res);
3328 
3329 	if (conflict)
3330 		dev_info(&b->dev,
3331 			   "busn_res: can not insert %pR under %s%pR (conflicts with %s %pR)\n",
3332 			    res, pci_is_root_bus(b) ? "domain " : "",
3333 			    parent_res, conflict->name, conflict);
3334 
3335 	return conflict == NULL;
3336 }
3337 
3338 int pci_bus_update_busn_res_end(struct pci_bus *b, int bus_max)
3339 {
3340 	struct resource *res = &b->busn_res;
3341 	struct resource old_res = *res;
3342 	resource_size_t size;
3343 	int ret;
3344 
3345 	if (res->start > bus_max)
3346 		return -EINVAL;
3347 
3348 	size = bus_max - res->start + 1;
3349 	ret = adjust_resource(res, res->start, size);
3350 	dev_info(&b->dev, "busn_res: %pR end %s updated to %02x\n",
3351 			&old_res, ret ? "can not be" : "is", bus_max);
3352 
3353 	if (!ret && !res->parent)
3354 		pci_bus_insert_busn_res(b, res->start, res->end);
3355 
3356 	return ret;
3357 }
3358 
3359 void pci_bus_release_busn_res(struct pci_bus *b)
3360 {
3361 	struct resource *res = &b->busn_res;
3362 	int ret;
3363 
3364 	if (!res->flags || !res->parent)
3365 		return;
3366 
3367 	ret = release_resource(res);
3368 	dev_info(&b->dev, "busn_res: %pR %s released\n",
3369 			res, ret ? "can not be" : "is");
3370 }
3371 
3372 int pci_scan_root_bus_bridge(struct pci_host_bridge *bridge)
3373 {
3374 	struct resource_entry *window;
3375 	bool found = false;
3376 	struct pci_bus *b;
3377 	int max, bus, ret;
3378 
3379 	if (!bridge)
3380 		return -EINVAL;
3381 
3382 	resource_list_for_each_entry(window, &bridge->windows)
3383 		if (window->res->flags & IORESOURCE_BUS) {
3384 			bridge->busnr = window->res->start;
3385 			found = true;
3386 			break;
3387 		}
3388 
3389 	ret = pci_register_host_bridge(bridge);
3390 	if (ret < 0)
3391 		return ret;
3392 
3393 	b = bridge->bus;
3394 	bus = bridge->busnr;
3395 
3396 	if (!found) {
3397 		dev_info(&b->dev,
3398 		 "No busn resource found for root bus, will use [bus %02x-ff]\n",
3399 			bus);
3400 		pci_bus_insert_busn_res(b, bus, 255);
3401 	}
3402 
3403 	max = pci_scan_child_bus(b);
3404 
3405 	if (!found)
3406 		pci_bus_update_busn_res_end(b, max);
3407 
3408 	return 0;
3409 }
3410 EXPORT_SYMBOL(pci_scan_root_bus_bridge);
3411 
3412 struct pci_bus *pci_scan_root_bus(struct device *parent, int bus,
3413 		struct pci_ops *ops, void *sysdata, struct list_head *resources)
3414 {
3415 	struct resource_entry *window;
3416 	bool found = false;
3417 	struct pci_bus *b;
3418 	int max;
3419 
3420 	resource_list_for_each_entry(window, resources)
3421 		if (window->res->flags & IORESOURCE_BUS) {
3422 			found = true;
3423 			break;
3424 		}
3425 
3426 	b = pci_create_root_bus(parent, bus, ops, sysdata, resources);
3427 	if (!b)
3428 		return NULL;
3429 
3430 	if (!found) {
3431 		dev_info(&b->dev,
3432 		 "No busn resource found for root bus, will use [bus %02x-ff]\n",
3433 			bus);
3434 		pci_bus_insert_busn_res(b, bus, 255);
3435 	}
3436 
3437 	max = pci_scan_child_bus(b);
3438 
3439 	if (!found)
3440 		pci_bus_update_busn_res_end(b, max);
3441 
3442 	return b;
3443 }
3444 EXPORT_SYMBOL(pci_scan_root_bus);
3445 
3446 struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops,
3447 					void *sysdata)
3448 {
3449 	LIST_HEAD(resources);
3450 	struct pci_bus *b;
3451 
3452 	pci_add_resource(&resources, &ioport_resource);
3453 	pci_add_resource(&resources, &iomem_resource);
3454 	pci_add_resource(&resources, &busn_resource);
3455 	b = pci_create_root_bus(NULL, bus, ops, sysdata, &resources);
3456 	if (b) {
3457 		pci_scan_child_bus(b);
3458 	} else {
3459 		pci_free_resource_list(&resources);
3460 	}
3461 	return b;
3462 }
3463 EXPORT_SYMBOL(pci_scan_bus);
3464 
3465 /**
3466  * pci_rescan_bus_bridge_resize - Scan a PCI bus for devices
3467  * @bridge: PCI bridge for the bus to scan
3468  *
3469  * Scan a PCI bus and child buses for new devices, add them,
3470  * and enable them, resizing bridge mmio/io resource if necessary
3471  * and possible.  The caller must ensure the child devices are already
3472  * removed for resizing to occur.
3473  *
3474  * Returns the max number of subordinate bus discovered.
3475  */
3476 unsigned int pci_rescan_bus_bridge_resize(struct pci_dev *bridge)
3477 {
3478 	unsigned int max;
3479 	struct pci_bus *bus = bridge->subordinate;
3480 
3481 	max = pci_scan_child_bus(bus);
3482 
3483 	pci_assign_unassigned_bridge_resources(bridge);
3484 
3485 	pci_bus_add_devices(bus);
3486 
3487 	return max;
3488 }
3489 
3490 /**
3491  * pci_rescan_bus - Scan a PCI bus for devices
3492  * @bus: PCI bus to scan
3493  *
3494  * Scan a PCI bus and child buses for new devices, add them,
3495  * and enable them.
3496  *
3497  * Returns the max number of subordinate bus discovered.
3498  */
3499 unsigned int pci_rescan_bus(struct pci_bus *bus)
3500 {
3501 	unsigned int max;
3502 
3503 	max = pci_scan_child_bus(bus);
3504 	pci_assign_unassigned_bus_resources(bus);
3505 	pci_bus_add_devices(bus);
3506 
3507 	return max;
3508 }
3509 EXPORT_SYMBOL_GPL(pci_rescan_bus);
3510 
3511 /*
3512  * pci_rescan_bus(), pci_rescan_bus_bridge_resize() and PCI device removal
3513  * routines should always be executed under this mutex.
3514  */
3515 DEFINE_MUTEX(pci_rescan_remove_lock);
3516 
3517 void pci_lock_rescan_remove(void)
3518 {
3519 	mutex_lock(&pci_rescan_remove_lock);
3520 }
3521 EXPORT_SYMBOL_GPL(pci_lock_rescan_remove);
3522 
3523 void pci_unlock_rescan_remove(void)
3524 {
3525 	mutex_unlock(&pci_rescan_remove_lock);
3526 }
3527 EXPORT_SYMBOL_GPL(pci_unlock_rescan_remove);
3528 
3529 static int __init pci_sort_bf_cmp(const struct device *d_a,
3530 				  const struct device *d_b)
3531 {
3532 	const struct pci_dev *a = to_pci_dev(d_a);
3533 	const struct pci_dev *b = to_pci_dev(d_b);
3534 
3535 	if      (pci_domain_nr(a->bus) < pci_domain_nr(b->bus)) return -1;
3536 	else if (pci_domain_nr(a->bus) > pci_domain_nr(b->bus)) return  1;
3537 
3538 	if      (a->bus->number < b->bus->number) return -1;
3539 	else if (a->bus->number > b->bus->number) return  1;
3540 
3541 	if      (a->devfn < b->devfn) return -1;
3542 	else if (a->devfn > b->devfn) return  1;
3543 
3544 	return 0;
3545 }
3546 
3547 void __init pci_sort_breadthfirst(void)
3548 {
3549 	bus_sort_breadthfirst(&pci_bus_type, &pci_sort_bf_cmp);
3550 }
3551 
3552 int pci_hp_add_bridge(struct pci_dev *dev)
3553 {
3554 	struct pci_bus *parent = dev->bus;
3555 	int busnr, start = parent->busn_res.start;
3556 	unsigned int available_buses = 0;
3557 	int end = parent->busn_res.end;
3558 
3559 	for (busnr = start; busnr <= end; busnr++) {
3560 		if (!pci_find_bus(pci_domain_nr(parent), busnr))
3561 			break;
3562 	}
3563 	if (busnr-- > end) {
3564 		pci_err(dev, "No bus number available for hot-added bridge\n");
3565 		return -1;
3566 	}
3567 
3568 	/* Scan bridges that are already configured */
3569 	busnr = pci_scan_bridge(parent, dev, busnr, 0);
3570 
3571 	/*
3572 	 * Distribute the available bus numbers between hotplug-capable
3573 	 * bridges to make extending the chain later possible.
3574 	 */
3575 	available_buses = end - busnr;
3576 
3577 	/* Scan bridges that need to be reconfigured */
3578 	pci_scan_bridge_extend(parent, dev, busnr, available_buses, 1);
3579 
3580 	if (!dev->subordinate)
3581 		return -1;
3582 
3583 	return 0;
3584 }
3585 EXPORT_SYMBOL_GPL(pci_hp_add_bridge);
3586