xref: /linux/arch/s390/pci/pci.c (revision 9cc8d0ecdd2aad42e377e971e3bb114339df609e)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright IBM Corp. 2012
4  *
5  * Author(s):
6  *   Jan Glauber <jang@linux.vnet.ibm.com>
7  *
8  * The System z PCI code is a rewrite from a prototype by
9  * the following people (Kudoz!):
10  *   Alexander Schmidt
11  *   Christoph Raisch
12  *   Hannes Hering
13  *   Hoang-Nam Nguyen
14  *   Jan-Bernd Themann
15  *   Stefan Roscher
16  *   Thomas Klein
17  */
18 
19 #define KMSG_COMPONENT "zpci"
20 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
21 
22 #include <linux/kernel.h>
23 #include <linux/slab.h>
24 #include <linux/err.h>
25 #include <linux/export.h>
26 #include <linux/delay.h>
27 #include <linux/seq_file.h>
28 #include <linux/jump_label.h>
29 #include <linux/pci.h>
30 #include <linux/printk.h>
31 #include <linux/lockdep.h>
32 #include <linux/list_sort.h>
33 
34 #include <asm/isc.h>
35 #include <asm/airq.h>
36 #include <asm/facility.h>
37 #include <asm/pci_insn.h>
38 #include <asm/pci_clp.h>
39 #include <asm/pci_dma.h>
40 
41 #include "pci_bus.h"
42 #include "pci_iov.h"
43 
44 /* list of all detected zpci devices */
45 static LIST_HEAD(zpci_list);
46 static DEFINE_SPINLOCK(zpci_list_lock);
47 
48 static DECLARE_BITMAP(zpci_domain, ZPCI_DOMAIN_BITMAP_SIZE);
49 static DEFINE_SPINLOCK(zpci_domain_lock);
50 
51 #define ZPCI_IOMAP_ENTRIES						\
52 	min(((unsigned long) ZPCI_NR_DEVICES * PCI_STD_NUM_BARS / 2),	\
53 	    ZPCI_IOMAP_MAX_ENTRIES)
54 
55 unsigned int s390_pci_no_rid;
56 
57 static DEFINE_SPINLOCK(zpci_iomap_lock);
58 static unsigned long *zpci_iomap_bitmap;
59 struct zpci_iomap_entry *zpci_iomap_start;
60 EXPORT_SYMBOL_GPL(zpci_iomap_start);
61 
62 DEFINE_STATIC_KEY_FALSE(have_mio);
63 
64 static struct kmem_cache *zdev_fmb_cache;
65 
66 /* AEN structures that must be preserved over KVM module re-insertion */
67 union zpci_sic_iib *zpci_aipb;
68 EXPORT_SYMBOL_GPL(zpci_aipb);
69 struct airq_iv *zpci_aif_sbv;
70 EXPORT_SYMBOL_GPL(zpci_aif_sbv);
71 
72 struct zpci_dev *get_zdev_by_fid(u32 fid)
73 {
74 	struct zpci_dev *tmp, *zdev = NULL;
75 
76 	spin_lock(&zpci_list_lock);
77 	list_for_each_entry(tmp, &zpci_list, entry) {
78 		if (tmp->fid == fid) {
79 			zdev = tmp;
80 			zpci_zdev_get(zdev);
81 			break;
82 		}
83 	}
84 	spin_unlock(&zpci_list_lock);
85 	return zdev;
86 }
87 
88 void zpci_remove_reserved_devices(void)
89 {
90 	struct zpci_dev *tmp, *zdev;
91 	enum zpci_state state;
92 	LIST_HEAD(remove);
93 
94 	spin_lock(&zpci_list_lock);
95 	list_for_each_entry_safe(zdev, tmp, &zpci_list, entry) {
96 		if (zdev->state == ZPCI_FN_STATE_STANDBY &&
97 		    !clp_get_state(zdev->fid, &state) &&
98 		    state == ZPCI_FN_STATE_RESERVED)
99 			list_move_tail(&zdev->entry, &remove);
100 	}
101 	spin_unlock(&zpci_list_lock);
102 
103 	list_for_each_entry_safe(zdev, tmp, &remove, entry)
104 		zpci_device_reserved(zdev);
105 }
106 
107 int pci_domain_nr(struct pci_bus *bus)
108 {
109 	return ((struct zpci_bus *) bus->sysdata)->domain_nr;
110 }
111 EXPORT_SYMBOL_GPL(pci_domain_nr);
112 
113 int pci_proc_domain(struct pci_bus *bus)
114 {
115 	return pci_domain_nr(bus);
116 }
117 EXPORT_SYMBOL_GPL(pci_proc_domain);
118 
119 /* Modify PCI: Register I/O address translation parameters */
120 int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
121 		       u64 base, u64 limit, u64 iota, u8 *status)
122 {
123 	u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, ZPCI_MOD_FC_REG_IOAT);
124 	struct zpci_fib fib = {0};
125 	u8 cc;
126 
127 	WARN_ON_ONCE(iota & 0x3fff);
128 	fib.pba = base;
129 	/* Work around off by one in ISM virt device */
130 	if (zdev->pft == PCI_FUNC_TYPE_ISM && limit > base)
131 		fib.pal = limit + (1 << 12);
132 	else
133 		fib.pal = limit;
134 	fib.iota = iota | ZPCI_IOTA_RTTO_FLAG;
135 	fib.gd = zdev->gisa;
136 	cc = zpci_mod_fc(req, &fib, status);
137 	if (cc)
138 		zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, *status);
139 	return cc;
140 }
141 EXPORT_SYMBOL_GPL(zpci_register_ioat);
142 
143 /* Modify PCI: Unregister I/O address translation parameters */
144 int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
145 {
146 	u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, ZPCI_MOD_FC_DEREG_IOAT);
147 	struct zpci_fib fib = {0};
148 	u8 cc, status;
149 
150 	fib.gd = zdev->gisa;
151 
152 	cc = zpci_mod_fc(req, &fib, &status);
153 	if (cc)
154 		zpci_dbg(3, "unreg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, status);
155 	return cc;
156 }
157 
158 /* Modify PCI: Set PCI function measurement parameters */
159 int zpci_fmb_enable_device(struct zpci_dev *zdev)
160 {
161 	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE);
162 	struct zpci_iommu_ctrs *ctrs;
163 	struct zpci_fib fib = {0};
164 	u8 cc, status;
165 
166 	if (zdev->fmb || sizeof(*zdev->fmb) < zdev->fmb_length)
167 		return -EINVAL;
168 
169 	zdev->fmb = kmem_cache_zalloc(zdev_fmb_cache, GFP_KERNEL);
170 	if (!zdev->fmb)
171 		return -ENOMEM;
172 	WARN_ON((u64) zdev->fmb & 0xf);
173 
174 	/* reset software counters */
175 	ctrs = zpci_get_iommu_ctrs(zdev);
176 	if (ctrs) {
177 		atomic64_set(&ctrs->mapped_pages, 0);
178 		atomic64_set(&ctrs->unmapped_pages, 0);
179 		atomic64_set(&ctrs->global_rpcits, 0);
180 		atomic64_set(&ctrs->sync_map_rpcits, 0);
181 		atomic64_set(&ctrs->sync_rpcits, 0);
182 	}
183 
184 
185 	fib.fmb_addr = virt_to_phys(zdev->fmb);
186 	fib.gd = zdev->gisa;
187 	cc = zpci_mod_fc(req, &fib, &status);
188 	if (cc) {
189 		kmem_cache_free(zdev_fmb_cache, zdev->fmb);
190 		zdev->fmb = NULL;
191 	}
192 	return cc ? -EIO : 0;
193 }
194 
195 /* Modify PCI: Disable PCI function measurement */
196 int zpci_fmb_disable_device(struct zpci_dev *zdev)
197 {
198 	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE);
199 	struct zpci_fib fib = {0};
200 	u8 cc, status;
201 
202 	if (!zdev->fmb)
203 		return -EINVAL;
204 
205 	fib.gd = zdev->gisa;
206 
207 	/* Function measurement is disabled if fmb address is zero */
208 	cc = zpci_mod_fc(req, &fib, &status);
209 	if (cc == 3) /* Function already gone. */
210 		cc = 0;
211 
212 	if (!cc) {
213 		kmem_cache_free(zdev_fmb_cache, zdev->fmb);
214 		zdev->fmb = NULL;
215 	}
216 	return cc ? -EIO : 0;
217 }
218 
219 static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len)
220 {
221 	u64 req = ZPCI_CREATE_REQ(zdev->fh, ZPCI_PCIAS_CFGSPC, len);
222 	u64 data;
223 	int rc;
224 
225 	rc = __zpci_load(&data, req, offset);
226 	if (!rc) {
227 		data = le64_to_cpu((__force __le64) data);
228 		data >>= (8 - len) * 8;
229 		*val = (u32) data;
230 	} else
231 		*val = 0xffffffff;
232 	return rc;
233 }
234 
235 static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len)
236 {
237 	u64 req = ZPCI_CREATE_REQ(zdev->fh, ZPCI_PCIAS_CFGSPC, len);
238 	u64 data = val;
239 	int rc;
240 
241 	data <<= (8 - len) * 8;
242 	data = (__force u64) cpu_to_le64(data);
243 	rc = __zpci_store(data, req, offset);
244 	return rc;
245 }
246 
247 resource_size_t pcibios_align_resource(void *data, const struct resource *res,
248 				       resource_size_t size,
249 				       resource_size_t align)
250 {
251 	return 0;
252 }
253 
254 void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
255 			   unsigned long prot)
256 {
257 	/*
258 	 * When PCI MIO instructions are unavailable the "physical" address
259 	 * encodes a hint for accessing the PCI memory space it represents.
260 	 * Just pass it unchanged such that ioread/iowrite can decode it.
261 	 */
262 	if (!static_branch_unlikely(&have_mio))
263 		return (void __iomem *)phys_addr;
264 
265 	return generic_ioremap_prot(phys_addr, size, __pgprot(prot));
266 }
267 EXPORT_SYMBOL(ioremap_prot);
268 
269 void iounmap(volatile void __iomem *addr)
270 {
271 	if (static_branch_likely(&have_mio))
272 		generic_iounmap(addr);
273 }
274 EXPORT_SYMBOL(iounmap);
275 
276 /* Create a virtual mapping cookie for a PCI BAR */
277 static void __iomem *pci_iomap_range_fh(struct pci_dev *pdev, int bar,
278 					unsigned long offset, unsigned long max)
279 {
280 	struct zpci_dev *zdev =	to_zpci(pdev);
281 	int idx;
282 
283 	idx = zdev->bars[bar].map_idx;
284 	spin_lock(&zpci_iomap_lock);
285 	/* Detect overrun */
286 	WARN_ON(!++zpci_iomap_start[idx].count);
287 	zpci_iomap_start[idx].fh = zdev->fh;
288 	zpci_iomap_start[idx].bar = bar;
289 	spin_unlock(&zpci_iomap_lock);
290 
291 	return (void __iomem *) ZPCI_ADDR(idx) + offset;
292 }
293 
294 static void __iomem *pci_iomap_range_mio(struct pci_dev *pdev, int bar,
295 					 unsigned long offset,
296 					 unsigned long max)
297 {
298 	unsigned long barsize = pci_resource_len(pdev, bar);
299 	struct zpci_dev *zdev = to_zpci(pdev);
300 	void __iomem *iova;
301 
302 	iova = ioremap((unsigned long) zdev->bars[bar].mio_wt, barsize);
303 	return iova ? iova + offset : iova;
304 }
305 
306 void __iomem *pci_iomap_range(struct pci_dev *pdev, int bar,
307 			      unsigned long offset, unsigned long max)
308 {
309 	if (bar >= PCI_STD_NUM_BARS || !pci_resource_len(pdev, bar))
310 		return NULL;
311 
312 	if (static_branch_likely(&have_mio))
313 		return pci_iomap_range_mio(pdev, bar, offset, max);
314 	else
315 		return pci_iomap_range_fh(pdev, bar, offset, max);
316 }
317 EXPORT_SYMBOL(pci_iomap_range);
318 
319 void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
320 {
321 	return pci_iomap_range(dev, bar, 0, maxlen);
322 }
323 EXPORT_SYMBOL(pci_iomap);
324 
325 static void __iomem *pci_iomap_wc_range_mio(struct pci_dev *pdev, int bar,
326 					    unsigned long offset, unsigned long max)
327 {
328 	unsigned long barsize = pci_resource_len(pdev, bar);
329 	struct zpci_dev *zdev = to_zpci(pdev);
330 	void __iomem *iova;
331 
332 	iova = ioremap((unsigned long) zdev->bars[bar].mio_wb, barsize);
333 	return iova ? iova + offset : iova;
334 }
335 
336 void __iomem *pci_iomap_wc_range(struct pci_dev *pdev, int bar,
337 				 unsigned long offset, unsigned long max)
338 {
339 	if (bar >= PCI_STD_NUM_BARS || !pci_resource_len(pdev, bar))
340 		return NULL;
341 
342 	if (static_branch_likely(&have_mio))
343 		return pci_iomap_wc_range_mio(pdev, bar, offset, max);
344 	else
345 		return pci_iomap_range_fh(pdev, bar, offset, max);
346 }
347 EXPORT_SYMBOL(pci_iomap_wc_range);
348 
349 void __iomem *pci_iomap_wc(struct pci_dev *dev, int bar, unsigned long maxlen)
350 {
351 	return pci_iomap_wc_range(dev, bar, 0, maxlen);
352 }
353 EXPORT_SYMBOL(pci_iomap_wc);
354 
355 static void pci_iounmap_fh(struct pci_dev *pdev, void __iomem *addr)
356 {
357 	unsigned int idx = ZPCI_IDX(addr);
358 
359 	spin_lock(&zpci_iomap_lock);
360 	/* Detect underrun */
361 	WARN_ON(!zpci_iomap_start[idx].count);
362 	if (!--zpci_iomap_start[idx].count) {
363 		zpci_iomap_start[idx].fh = 0;
364 		zpci_iomap_start[idx].bar = 0;
365 	}
366 	spin_unlock(&zpci_iomap_lock);
367 }
368 
369 static void pci_iounmap_mio(struct pci_dev *pdev, void __iomem *addr)
370 {
371 	iounmap(addr);
372 }
373 
374 void pci_iounmap(struct pci_dev *pdev, void __iomem *addr)
375 {
376 	if (static_branch_likely(&have_mio))
377 		pci_iounmap_mio(pdev, addr);
378 	else
379 		pci_iounmap_fh(pdev, addr);
380 }
381 EXPORT_SYMBOL(pci_iounmap);
382 
383 static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
384 		    int size, u32 *val)
385 {
386 	struct zpci_dev *zdev = zdev_from_bus(bus, devfn);
387 
388 	return (zdev) ? zpci_cfg_load(zdev, where, val, size) : -ENODEV;
389 }
390 
391 static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
392 		     int size, u32 val)
393 {
394 	struct zpci_dev *zdev = zdev_from_bus(bus, devfn);
395 
396 	return (zdev) ? zpci_cfg_store(zdev, where, val, size) : -ENODEV;
397 }
398 
399 static struct pci_ops pci_root_ops = {
400 	.read = pci_read,
401 	.write = pci_write,
402 };
403 
404 static void zpci_map_resources(struct pci_dev *pdev)
405 {
406 	struct zpci_dev *zdev = to_zpci(pdev);
407 	resource_size_t len;
408 	int i;
409 
410 	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
411 		len = pci_resource_len(pdev, i);
412 		if (!len)
413 			continue;
414 
415 		if (zpci_use_mio(zdev))
416 			pdev->resource[i].start =
417 				(resource_size_t __force) zdev->bars[i].mio_wt;
418 		else
419 			pdev->resource[i].start = (resource_size_t __force)
420 				pci_iomap_range_fh(pdev, i, 0, 0);
421 		pdev->resource[i].end = pdev->resource[i].start + len - 1;
422 	}
423 
424 	zpci_iov_map_resources(pdev);
425 }
426 
427 static void zpci_unmap_resources(struct pci_dev *pdev)
428 {
429 	struct zpci_dev *zdev = to_zpci(pdev);
430 	resource_size_t len;
431 	int i;
432 
433 	if (zpci_use_mio(zdev))
434 		return;
435 
436 	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
437 		len = pci_resource_len(pdev, i);
438 		if (!len)
439 			continue;
440 		pci_iounmap_fh(pdev, (void __iomem __force *)
441 			       pdev->resource[i].start);
442 	}
443 }
444 
445 static int zpci_alloc_iomap(struct zpci_dev *zdev)
446 {
447 	unsigned long entry;
448 
449 	spin_lock(&zpci_iomap_lock);
450 	entry = find_first_zero_bit(zpci_iomap_bitmap, ZPCI_IOMAP_ENTRIES);
451 	if (entry == ZPCI_IOMAP_ENTRIES) {
452 		spin_unlock(&zpci_iomap_lock);
453 		return -ENOSPC;
454 	}
455 	set_bit(entry, zpci_iomap_bitmap);
456 	spin_unlock(&zpci_iomap_lock);
457 	return entry;
458 }
459 
460 static void zpci_free_iomap(struct zpci_dev *zdev, int entry)
461 {
462 	spin_lock(&zpci_iomap_lock);
463 	memset(&zpci_iomap_start[entry], 0, sizeof(struct zpci_iomap_entry));
464 	clear_bit(entry, zpci_iomap_bitmap);
465 	spin_unlock(&zpci_iomap_lock);
466 }
467 
468 static void zpci_do_update_iomap_fh(struct zpci_dev *zdev, u32 fh)
469 {
470 	int bar, idx;
471 
472 	spin_lock(&zpci_iomap_lock);
473 	for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) {
474 		if (!zdev->bars[bar].size)
475 			continue;
476 		idx = zdev->bars[bar].map_idx;
477 		if (!zpci_iomap_start[idx].count)
478 			continue;
479 		WRITE_ONCE(zpci_iomap_start[idx].fh, zdev->fh);
480 	}
481 	spin_unlock(&zpci_iomap_lock);
482 }
483 
484 void zpci_update_fh(struct zpci_dev *zdev, u32 fh)
485 {
486 	if (!fh || zdev->fh == fh)
487 		return;
488 
489 	zdev->fh = fh;
490 	if (zpci_use_mio(zdev))
491 		return;
492 	if (zdev->has_resources && zdev_enabled(zdev))
493 		zpci_do_update_iomap_fh(zdev, fh);
494 }
495 
496 static struct resource *__alloc_res(struct zpci_dev *zdev, unsigned long start,
497 				    unsigned long size, unsigned long flags)
498 {
499 	struct resource *r;
500 
501 	r = kzalloc(sizeof(*r), GFP_KERNEL);
502 	if (!r)
503 		return NULL;
504 
505 	r->start = start;
506 	r->end = r->start + size - 1;
507 	r->flags = flags;
508 	r->name = zdev->res_name;
509 
510 	if (request_resource(&iomem_resource, r)) {
511 		kfree(r);
512 		return NULL;
513 	}
514 	return r;
515 }
516 
517 int zpci_setup_bus_resources(struct zpci_dev *zdev)
518 {
519 	unsigned long addr, size, flags;
520 	struct resource *res;
521 	int i, entry;
522 
523 	snprintf(zdev->res_name, sizeof(zdev->res_name),
524 		 "PCI Bus %04x:%02x", zdev->uid, ZPCI_BUS_NR);
525 
526 	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
527 		if (!zdev->bars[i].size)
528 			continue;
529 		entry = zpci_alloc_iomap(zdev);
530 		if (entry < 0)
531 			return entry;
532 		zdev->bars[i].map_idx = entry;
533 
534 		/* only MMIO is supported */
535 		flags = IORESOURCE_MEM;
536 		if (zdev->bars[i].val & 8)
537 			flags |= IORESOURCE_PREFETCH;
538 		if (zdev->bars[i].val & 4)
539 			flags |= IORESOURCE_MEM_64;
540 
541 		if (zpci_use_mio(zdev))
542 			addr = (unsigned long) zdev->bars[i].mio_wt;
543 		else
544 			addr = ZPCI_ADDR(entry);
545 		size = 1UL << zdev->bars[i].size;
546 
547 		res = __alloc_res(zdev, addr, size, flags);
548 		if (!res) {
549 			zpci_free_iomap(zdev, entry);
550 			return -ENOMEM;
551 		}
552 		zdev->bars[i].res = res;
553 	}
554 	zdev->has_resources = 1;
555 
556 	return 0;
557 }
558 
559 static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
560 {
561 	struct resource *res;
562 	int i;
563 
564 	pci_lock_rescan_remove();
565 	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
566 		res = zdev->bars[i].res;
567 		if (!res)
568 			continue;
569 
570 		release_resource(res);
571 		pci_bus_remove_resource(zdev->zbus->bus, res);
572 		zpci_free_iomap(zdev, zdev->bars[i].map_idx);
573 		zdev->bars[i].res = NULL;
574 		kfree(res);
575 	}
576 	zdev->has_resources = 0;
577 	pci_unlock_rescan_remove();
578 }
579 
580 int pcibios_device_add(struct pci_dev *pdev)
581 {
582 	struct zpci_dev *zdev = to_zpci(pdev);
583 	struct resource *res;
584 	int i;
585 
586 	/* The pdev has a reference to the zdev via its bus */
587 	zpci_zdev_get(zdev);
588 	if (pdev->is_physfn)
589 		pdev->no_vf_scan = 1;
590 
591 	zpci_map_resources(pdev);
592 
593 	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
594 		res = &pdev->resource[i];
595 		if (res->parent || !res->flags)
596 			continue;
597 		pci_claim_resource(pdev, i);
598 	}
599 
600 	return 0;
601 }
602 
603 void pcibios_release_device(struct pci_dev *pdev)
604 {
605 	struct zpci_dev *zdev = to_zpci(pdev);
606 
607 	zpci_unmap_resources(pdev);
608 	zpci_zdev_put(zdev);
609 }
610 
611 int pcibios_enable_device(struct pci_dev *pdev, int mask)
612 {
613 	struct zpci_dev *zdev = to_zpci(pdev);
614 
615 	zpci_debug_init_device(zdev, dev_name(&pdev->dev));
616 	zpci_fmb_enable_device(zdev);
617 
618 	return pci_enable_resources(pdev, mask);
619 }
620 
621 void pcibios_disable_device(struct pci_dev *pdev)
622 {
623 	struct zpci_dev *zdev = to_zpci(pdev);
624 
625 	zpci_fmb_disable_device(zdev);
626 	zpci_debug_exit_device(zdev);
627 }
628 
629 static int __zpci_register_domain(int domain)
630 {
631 	spin_lock(&zpci_domain_lock);
632 	if (test_bit(domain, zpci_domain)) {
633 		spin_unlock(&zpci_domain_lock);
634 		pr_err("Domain %04x is already assigned\n", domain);
635 		return -EEXIST;
636 	}
637 	set_bit(domain, zpci_domain);
638 	spin_unlock(&zpci_domain_lock);
639 	return domain;
640 }
641 
642 static int __zpci_alloc_domain(void)
643 {
644 	int domain;
645 
646 	spin_lock(&zpci_domain_lock);
647 	/*
648 	 * We can always auto allocate domains below ZPCI_NR_DEVICES.
649 	 * There is either a free domain or we have reached the maximum in
650 	 * which case we would have bailed earlier.
651 	 */
652 	domain = find_first_zero_bit(zpci_domain, ZPCI_NR_DEVICES);
653 	set_bit(domain, zpci_domain);
654 	spin_unlock(&zpci_domain_lock);
655 	return domain;
656 }
657 
658 int zpci_alloc_domain(int domain)
659 {
660 	if (zpci_unique_uid) {
661 		if (domain)
662 			return __zpci_register_domain(domain);
663 		pr_warn("UID checking was active but no UID is provided: switching to automatic domain allocation\n");
664 		update_uid_checking(false);
665 	}
666 	return __zpci_alloc_domain();
667 }
668 
669 void zpci_free_domain(int domain)
670 {
671 	spin_lock(&zpci_domain_lock);
672 	clear_bit(domain, zpci_domain);
673 	spin_unlock(&zpci_domain_lock);
674 }
675 
676 
677 int zpci_enable_device(struct zpci_dev *zdev)
678 {
679 	u32 fh = zdev->fh;
680 	int rc = 0;
681 
682 	if (clp_enable_fh(zdev, &fh, ZPCI_NR_DMA_SPACES))
683 		rc = -EIO;
684 	else
685 		zpci_update_fh(zdev, fh);
686 	return rc;
687 }
688 EXPORT_SYMBOL_GPL(zpci_enable_device);
689 
690 int zpci_disable_device(struct zpci_dev *zdev)
691 {
692 	u32 fh = zdev->fh;
693 	int cc, rc = 0;
694 
695 	cc = clp_disable_fh(zdev, &fh);
696 	if (!cc) {
697 		zpci_update_fh(zdev, fh);
698 	} else if (cc == CLP_RC_SETPCIFN_ALRDY) {
699 		pr_info("Disabling PCI function %08x had no effect as it was already disabled\n",
700 			zdev->fid);
701 		/* Function is already disabled - update handle */
702 		rc = clp_refresh_fh(zdev->fid, &fh);
703 		if (!rc) {
704 			zpci_update_fh(zdev, fh);
705 			rc = -EINVAL;
706 		}
707 	} else {
708 		rc = -EIO;
709 	}
710 	return rc;
711 }
712 EXPORT_SYMBOL_GPL(zpci_disable_device);
713 
714 /**
715  * zpci_hot_reset_device - perform a reset of the given zPCI function
716  * @zdev: the slot which should be reset
717  *
718  * Performs a low level reset of the zPCI function. The reset is low level in
719  * the sense that the zPCI function can be reset without detaching it from the
720  * common PCI subsystem. The reset may be performed while under control of
721  * either DMA or IOMMU APIs in which case the existing DMA/IOMMU translation
722  * table is reinstated at the end of the reset.
723  *
724  * After the reset the functions internal state is reset to an initial state
725  * equivalent to its state during boot when first probing a driver.
726  * Consequently after reset the PCI function requires re-initialization via the
727  * common PCI code including re-enabling IRQs via pci_alloc_irq_vectors()
728  * and enabling the function via e.g. pci_enable_device_flags(). The caller
729  * must guard against concurrent reset attempts.
730  *
731  * In most cases this function should not be called directly but through
732  * pci_reset_function() or pci_reset_bus() which handle the save/restore and
733  * locking - asserted by lockdep.
734  *
735  * Return: 0 on success and an error value otherwise
736  */
737 int zpci_hot_reset_device(struct zpci_dev *zdev)
738 {
739 	u8 status;
740 	int rc;
741 
742 	lockdep_assert_held(&zdev->state_lock);
743 	zpci_dbg(3, "rst fid:%x, fh:%x\n", zdev->fid, zdev->fh);
744 	if (zdev_enabled(zdev)) {
745 		/* Disables device access, DMAs and IRQs (reset state) */
746 		rc = zpci_disable_device(zdev);
747 		/*
748 		 * Due to a z/VM vs LPAR inconsistency in the error state the
749 		 * FH may indicate an enabled device but disable says the
750 		 * device is already disabled don't treat it as an error here.
751 		 */
752 		if (rc == -EINVAL)
753 			rc = 0;
754 		if (rc)
755 			return rc;
756 	}
757 
758 	rc = zpci_enable_device(zdev);
759 	if (rc)
760 		return rc;
761 
762 	if (zdev->dma_table)
763 		rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
764 					virt_to_phys(zdev->dma_table), &status);
765 	if (rc) {
766 		zpci_disable_device(zdev);
767 		return rc;
768 	}
769 
770 	return 0;
771 }
772 
773 /**
774  * zpci_create_device() - Create a new zpci_dev and add it to the zbus
775  * @fid: Function ID of the device to be created
776  * @fh: Current Function Handle of the device to be created
777  * @state: Initial state after creation either Standby or Configured
778  *
779  * Creates a new zpci device and adds it to its, possibly newly created, zbus
780  * as well as zpci_list.
781  *
782  * Returns: the zdev on success or an error pointer otherwise
783  */
784 struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
785 {
786 	struct zpci_dev *zdev;
787 	int rc;
788 
789 	zdev = kzalloc(sizeof(*zdev), GFP_KERNEL);
790 	if (!zdev)
791 		return ERR_PTR(-ENOMEM);
792 
793 	/* FID and Function Handle are the static/dynamic identifiers */
794 	zdev->fid = fid;
795 	zdev->fh = fh;
796 
797 	/* Query function properties and update zdev */
798 	rc = clp_query_pci_fn(zdev);
799 	if (rc)
800 		goto error;
801 	zdev->state =  state;
802 
803 	kref_init(&zdev->kref);
804 	mutex_init(&zdev->state_lock);
805 	mutex_init(&zdev->fmb_lock);
806 	mutex_init(&zdev->kzdev_lock);
807 
808 	return zdev;
809 
810 error:
811 	zpci_dbg(0, "crt fid:%x, rc:%d\n", fid, rc);
812 	kfree(zdev);
813 	return ERR_PTR(rc);
814 }
815 
816 int zpci_add_device(struct zpci_dev *zdev)
817 {
818 	int rc;
819 
820 	zpci_dbg(1, "add fid:%x, fh:%x, c:%d\n", zdev->fid, zdev->fh, zdev->state);
821 	rc = zpci_init_iommu(zdev);
822 	if (rc)
823 		goto error;
824 
825 	rc = zpci_bus_device_register(zdev, &pci_root_ops);
826 	if (rc)
827 		goto error_destroy_iommu;
828 
829 	spin_lock(&zpci_list_lock);
830 	list_add_tail(&zdev->entry, &zpci_list);
831 	spin_unlock(&zpci_list_lock);
832 	return 0;
833 
834 error_destroy_iommu:
835 	zpci_destroy_iommu(zdev);
836 error:
837 	zpci_dbg(0, "add fid:%x, rc:%d\n", zdev->fid, rc);
838 	return rc;
839 }
840 
841 bool zpci_is_device_configured(struct zpci_dev *zdev)
842 {
843 	enum zpci_state state = zdev->state;
844 
845 	return state != ZPCI_FN_STATE_RESERVED &&
846 		state != ZPCI_FN_STATE_STANDBY;
847 }
848 
849 /**
850  * zpci_scan_configured_device() - Scan a freshly configured zpci_dev
851  * @zdev: The zpci_dev to be configured
852  * @fh: The general function handle supplied by the platform
853  *
854  * Given a device in the configuration state Configured, enables, scans and
855  * adds it to the common code PCI subsystem if possible. If any failure occurs,
856  * the zpci_dev is left disabled.
857  *
858  * Return: 0 on success, or an error code otherwise
859  */
860 int zpci_scan_configured_device(struct zpci_dev *zdev, u32 fh)
861 {
862 	zpci_update_fh(zdev, fh);
863 	return zpci_bus_scan_device(zdev);
864 }
865 
866 /**
867  * zpci_deconfigure_device() - Deconfigure a zpci_dev
868  * @zdev: The zpci_dev to configure
869  *
870  * Deconfigure a zPCI function that is currently configured and possibly known
871  * to the common code PCI subsystem.
872  * If any failure occurs the device is left as is.
873  *
874  * Return: 0 on success, or an error code otherwise
875  */
876 int zpci_deconfigure_device(struct zpci_dev *zdev)
877 {
878 	int rc;
879 
880 	lockdep_assert_held(&zdev->state_lock);
881 	if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
882 		return 0;
883 
884 	if (zdev->zbus->bus)
885 		zpci_bus_remove_device(zdev, false);
886 
887 	if (zdev_enabled(zdev)) {
888 		rc = zpci_disable_device(zdev);
889 		if (rc)
890 			return rc;
891 	}
892 
893 	rc = sclp_pci_deconfigure(zdev->fid);
894 	zpci_dbg(3, "deconf fid:%x, rc:%d\n", zdev->fid, rc);
895 	if (rc)
896 		return rc;
897 	zdev->state = ZPCI_FN_STATE_STANDBY;
898 
899 	return 0;
900 }
901 
902 /**
903  * zpci_device_reserved() - Mark device as reserved
904  * @zdev: the zpci_dev that was reserved
905  *
906  * Handle the case that a given zPCI function was reserved by another system.
907  * After a call to this function the zpci_dev can not be found via
908  * get_zdev_by_fid() anymore but may still be accessible via existing
909  * references though it will not be functional anymore.
910  */
911 void zpci_device_reserved(struct zpci_dev *zdev)
912 {
913 	/*
914 	 * Remove device from zpci_list as it is going away. This also
915 	 * makes sure we ignore subsequent zPCI events for this device.
916 	 */
917 	spin_lock(&zpci_list_lock);
918 	list_del(&zdev->entry);
919 	spin_unlock(&zpci_list_lock);
920 	zdev->state = ZPCI_FN_STATE_RESERVED;
921 	zpci_dbg(3, "rsv fid:%x\n", zdev->fid);
922 	zpci_zdev_put(zdev);
923 }
924 
925 void zpci_release_device(struct kref *kref)
926 {
927 	struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref);
928 	int ret;
929 
930 	if (zdev->has_hp_slot)
931 		zpci_exit_slot(zdev);
932 
933 	if (zdev->zbus->bus)
934 		zpci_bus_remove_device(zdev, false);
935 
936 	if (zdev_enabled(zdev))
937 		zpci_disable_device(zdev);
938 
939 	switch (zdev->state) {
940 	case ZPCI_FN_STATE_CONFIGURED:
941 		ret = sclp_pci_deconfigure(zdev->fid);
942 		zpci_dbg(3, "deconf fid:%x, rc:%d\n", zdev->fid, ret);
943 		fallthrough;
944 	case ZPCI_FN_STATE_STANDBY:
945 		if (zdev->has_hp_slot)
946 			zpci_exit_slot(zdev);
947 		spin_lock(&zpci_list_lock);
948 		list_del(&zdev->entry);
949 		spin_unlock(&zpci_list_lock);
950 		zpci_dbg(3, "rsv fid:%x\n", zdev->fid);
951 		fallthrough;
952 	case ZPCI_FN_STATE_RESERVED:
953 		if (zdev->has_resources)
954 			zpci_cleanup_bus_resources(zdev);
955 		zpci_bus_device_unregister(zdev);
956 		zpci_destroy_iommu(zdev);
957 		fallthrough;
958 	default:
959 		break;
960 	}
961 	zpci_dbg(3, "rem fid:%x\n", zdev->fid);
962 	kfree_rcu(zdev, rcu);
963 }
964 
965 int zpci_report_error(struct pci_dev *pdev,
966 		      struct zpci_report_error_header *report)
967 {
968 	struct zpci_dev *zdev = to_zpci(pdev);
969 
970 	return sclp_pci_report(report, zdev->fh, zdev->fid);
971 }
972 EXPORT_SYMBOL(zpci_report_error);
973 
974 /**
975  * zpci_clear_error_state() - Clears the zPCI error state of the device
976  * @zdev: The zdev for which the zPCI error state should be reset
977  *
978  * Clear the zPCI error state of the device. If clearing the zPCI error state
979  * fails the device is left in the error state. In this case it may make sense
980  * to call zpci_io_perm_failure() on the associated pdev if it exists.
981  *
982  * Returns: 0 on success, -EIO otherwise
983  */
984 int zpci_clear_error_state(struct zpci_dev *zdev)
985 {
986 	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_RESET_ERROR);
987 	struct zpci_fib fib = {0};
988 	u8 status;
989 	int cc;
990 
991 	cc = zpci_mod_fc(req, &fib, &status);
992 	if (cc) {
993 		zpci_dbg(3, "ces fid:%x, cc:%d, status:%x\n", zdev->fid, cc, status);
994 		return -EIO;
995 	}
996 
997 	return 0;
998 }
999 
1000 /**
1001  * zpci_reset_load_store_blocked() - Re-enables L/S from error state
1002  * @zdev: The zdev for which to unblock load/store access
1003  *
1004  * Re-enables load/store access for a PCI function in the error state while
1005  * keeping DMA blocked. In this state drivers can poke MMIO space to determine
1006  * if error recovery is possible while catching any rogue DMA access from the
1007  * device.
1008  *
1009  * Returns: 0 on success, -EIO otherwise
1010  */
1011 int zpci_reset_load_store_blocked(struct zpci_dev *zdev)
1012 {
1013 	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_RESET_BLOCK);
1014 	struct zpci_fib fib = {0};
1015 	u8 status;
1016 	int cc;
1017 
1018 	cc = zpci_mod_fc(req, &fib, &status);
1019 	if (cc) {
1020 		zpci_dbg(3, "rls fid:%x, cc:%d, status:%x\n", zdev->fid, cc, status);
1021 		return -EIO;
1022 	}
1023 
1024 	return 0;
1025 }
1026 
1027 static int zpci_mem_init(void)
1028 {
1029 	BUILD_BUG_ON(!is_power_of_2(__alignof__(struct zpci_fmb)) ||
1030 		     __alignof__(struct zpci_fmb) < sizeof(struct zpci_fmb));
1031 
1032 	zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb),
1033 					   __alignof__(struct zpci_fmb), 0, NULL);
1034 	if (!zdev_fmb_cache)
1035 		goto error_fmb;
1036 
1037 	zpci_iomap_start = kcalloc(ZPCI_IOMAP_ENTRIES,
1038 				   sizeof(*zpci_iomap_start), GFP_KERNEL);
1039 	if (!zpci_iomap_start)
1040 		goto error_iomap;
1041 
1042 	zpci_iomap_bitmap = kcalloc(BITS_TO_LONGS(ZPCI_IOMAP_ENTRIES),
1043 				    sizeof(*zpci_iomap_bitmap), GFP_KERNEL);
1044 	if (!zpci_iomap_bitmap)
1045 		goto error_iomap_bitmap;
1046 
1047 	if (static_branch_likely(&have_mio))
1048 		clp_setup_writeback_mio();
1049 
1050 	return 0;
1051 error_iomap_bitmap:
1052 	kfree(zpci_iomap_start);
1053 error_iomap:
1054 	kmem_cache_destroy(zdev_fmb_cache);
1055 error_fmb:
1056 	return -ENOMEM;
1057 }
1058 
1059 static void zpci_mem_exit(void)
1060 {
1061 	kfree(zpci_iomap_bitmap);
1062 	kfree(zpci_iomap_start);
1063 	kmem_cache_destroy(zdev_fmb_cache);
1064 }
1065 
1066 static unsigned int s390_pci_probe __initdata = 1;
1067 unsigned int s390_pci_force_floating __initdata;
1068 static unsigned int s390_pci_initialized;
1069 
1070 char * __init pcibios_setup(char *str)
1071 {
1072 	if (!strcmp(str, "off")) {
1073 		s390_pci_probe = 0;
1074 		return NULL;
1075 	}
1076 	if (!strcmp(str, "nomio")) {
1077 		get_lowcore()->machine_flags &= ~MACHINE_FLAG_PCI_MIO;
1078 		return NULL;
1079 	}
1080 	if (!strcmp(str, "force_floating")) {
1081 		s390_pci_force_floating = 1;
1082 		return NULL;
1083 	}
1084 	if (!strcmp(str, "norid")) {
1085 		s390_pci_no_rid = 1;
1086 		return NULL;
1087 	}
1088 	return str;
1089 }
1090 
1091 bool zpci_is_enabled(void)
1092 {
1093 	return s390_pci_initialized;
1094 }
1095 
1096 static int zpci_cmp_rid(void *priv, const struct list_head *a,
1097 			const struct list_head *b)
1098 {
1099 	struct zpci_dev *za = container_of(a, struct zpci_dev, entry);
1100 	struct zpci_dev *zb = container_of(b, struct zpci_dev, entry);
1101 
1102 	/*
1103 	 * PCI functions without RID available maintain original order
1104 	 * between themselves but sort before those with RID.
1105 	 */
1106 	if (za->rid == zb->rid)
1107 		return za->rid_available > zb->rid_available;
1108 	/*
1109 	 * PCI functions with RID sort by RID ascending.
1110 	 */
1111 	return za->rid > zb->rid;
1112 }
1113 
1114 static void zpci_add_devices(struct list_head *scan_list)
1115 {
1116 	struct zpci_dev *zdev, *tmp;
1117 
1118 	list_sort(NULL, scan_list, &zpci_cmp_rid);
1119 	list_for_each_entry_safe(zdev, tmp, scan_list, entry) {
1120 		list_del_init(&zdev->entry);
1121 		zpci_add_device(zdev);
1122 	}
1123 }
1124 
1125 int zpci_scan_devices(void)
1126 {
1127 	LIST_HEAD(scan_list);
1128 	int rc;
1129 
1130 	rc = clp_scan_pci_devices(&scan_list);
1131 	if (rc)
1132 		return rc;
1133 
1134 	zpci_add_devices(&scan_list);
1135 	zpci_bus_scan_busses();
1136 	return 0;
1137 }
1138 
1139 static int __init pci_base_init(void)
1140 {
1141 	int rc;
1142 
1143 	if (!s390_pci_probe)
1144 		return 0;
1145 
1146 	if (!test_facility(69) || !test_facility(71)) {
1147 		pr_info("PCI is not supported because CPU facilities 69 or 71 are not available\n");
1148 		return 0;
1149 	}
1150 
1151 	if (MACHINE_HAS_PCI_MIO) {
1152 		static_branch_enable(&have_mio);
1153 		system_ctl_set_bit(2, CR2_MIO_ADDRESSING_BIT);
1154 	}
1155 
1156 	rc = zpci_debug_init();
1157 	if (rc)
1158 		goto out;
1159 
1160 	rc = zpci_mem_init();
1161 	if (rc)
1162 		goto out_mem;
1163 
1164 	rc = zpci_irq_init();
1165 	if (rc)
1166 		goto out_irq;
1167 
1168 	rc = zpci_scan_devices();
1169 	if (rc)
1170 		goto out_find;
1171 
1172 	s390_pci_initialized = 1;
1173 	return 0;
1174 
1175 out_find:
1176 	zpci_irq_exit();
1177 out_irq:
1178 	zpci_mem_exit();
1179 out_mem:
1180 	zpci_debug_exit();
1181 out:
1182 	return rc;
1183 }
1184 subsys_initcall_sync(pci_base_init);
1185