xref: /linux/drivers/iommu/s390-iommu.c (revision 336b4dae6dfecc9aa53a3a68c71b9c1c1d466388)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for s390 PCI devices
4  *
5  * Copyright IBM Corp. 2015
6  * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
7  */
8 
9 #include <linux/pci.h>
10 #include <linux/iommu.h>
11 #include <linux/iommu-helper.h>
12 #include <linux/sizes.h>
13 #include <linux/rculist.h>
14 #include <linux/rcupdate.h>
15 #include <asm/pci_dma.h>
16 
17 #include "dma-iommu.h"
18 
19 static const struct iommu_ops s390_iommu_ops, s390_iommu_rtr_ops;
20 
21 static struct kmem_cache *dma_region_table_cache;
22 static struct kmem_cache *dma_page_table_cache;
23 
24 static u64 s390_iommu_aperture;
25 static u32 s390_iommu_aperture_factor = 1;
26 
27 struct s390_domain {
28 	struct iommu_domain	domain;
29 	struct list_head	devices;
30 	struct zpci_iommu_ctrs	ctrs;
31 	unsigned long		*dma_table;
32 	spinlock_t		list_lock;
33 	struct rcu_head		rcu;
34 };
35 
36 static struct iommu_domain blocking_domain;
37 
calc_rtx(dma_addr_t ptr)38 static inline unsigned int calc_rtx(dma_addr_t ptr)
39 {
40 	return ((unsigned long)ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK;
41 }
42 
calc_sx(dma_addr_t ptr)43 static inline unsigned int calc_sx(dma_addr_t ptr)
44 {
45 	return ((unsigned long)ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK;
46 }
47 
calc_px(dma_addr_t ptr)48 static inline unsigned int calc_px(dma_addr_t ptr)
49 {
50 	return ((unsigned long)ptr >> PAGE_SHIFT) & ZPCI_PT_MASK;
51 }
52 
set_pt_pfaa(unsigned long * entry,phys_addr_t pfaa)53 static inline void set_pt_pfaa(unsigned long *entry, phys_addr_t pfaa)
54 {
55 	*entry &= ZPCI_PTE_FLAG_MASK;
56 	*entry |= (pfaa & ZPCI_PTE_ADDR_MASK);
57 }
58 
set_rt_sto(unsigned long * entry,phys_addr_t sto)59 static inline void set_rt_sto(unsigned long *entry, phys_addr_t sto)
60 {
61 	*entry &= ZPCI_RTE_FLAG_MASK;
62 	*entry |= (sto & ZPCI_RTE_ADDR_MASK);
63 	*entry |= ZPCI_TABLE_TYPE_RTX;
64 }
65 
set_st_pto(unsigned long * entry,phys_addr_t pto)66 static inline void set_st_pto(unsigned long *entry, phys_addr_t pto)
67 {
68 	*entry &= ZPCI_STE_FLAG_MASK;
69 	*entry |= (pto & ZPCI_STE_ADDR_MASK);
70 	*entry |= ZPCI_TABLE_TYPE_SX;
71 }
72 
validate_rt_entry(unsigned long * entry)73 static inline void validate_rt_entry(unsigned long *entry)
74 {
75 	*entry &= ~ZPCI_TABLE_VALID_MASK;
76 	*entry &= ~ZPCI_TABLE_OFFSET_MASK;
77 	*entry |= ZPCI_TABLE_VALID;
78 	*entry |= ZPCI_TABLE_LEN_RTX;
79 }
80 
validate_st_entry(unsigned long * entry)81 static inline void validate_st_entry(unsigned long *entry)
82 {
83 	*entry &= ~ZPCI_TABLE_VALID_MASK;
84 	*entry |= ZPCI_TABLE_VALID;
85 }
86 
invalidate_pt_entry(unsigned long * entry)87 static inline void invalidate_pt_entry(unsigned long *entry)
88 {
89 	WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID);
90 	*entry &= ~ZPCI_PTE_VALID_MASK;
91 	*entry |= ZPCI_PTE_INVALID;
92 }
93 
validate_pt_entry(unsigned long * entry)94 static inline void validate_pt_entry(unsigned long *entry)
95 {
96 	WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID);
97 	*entry &= ~ZPCI_PTE_VALID_MASK;
98 	*entry |= ZPCI_PTE_VALID;
99 }
100 
entry_set_protected(unsigned long * entry)101 static inline void entry_set_protected(unsigned long *entry)
102 {
103 	*entry &= ~ZPCI_TABLE_PROT_MASK;
104 	*entry |= ZPCI_TABLE_PROTECTED;
105 }
106 
entry_clr_protected(unsigned long * entry)107 static inline void entry_clr_protected(unsigned long *entry)
108 {
109 	*entry &= ~ZPCI_TABLE_PROT_MASK;
110 	*entry |= ZPCI_TABLE_UNPROTECTED;
111 }
112 
reg_entry_isvalid(unsigned long entry)113 static inline int reg_entry_isvalid(unsigned long entry)
114 {
115 	return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID;
116 }
117 
pt_entry_isvalid(unsigned long entry)118 static inline int pt_entry_isvalid(unsigned long entry)
119 {
120 	return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID;
121 }
122 
get_rt_sto(unsigned long entry)123 static inline unsigned long *get_rt_sto(unsigned long entry)
124 {
125 	if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX)
126 		return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK);
127 	else
128 		return NULL;
129 }
130 
get_st_pto(unsigned long entry)131 static inline unsigned long *get_st_pto(unsigned long entry)
132 {
133 	if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX)
134 		return phys_to_virt(entry & ZPCI_STE_ADDR_MASK);
135 	else
136 		return NULL;
137 }
138 
dma_alloc_cpu_table_caches(void)139 static int __init dma_alloc_cpu_table_caches(void)
140 {
141 	dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables",
142 						   ZPCI_TABLE_SIZE,
143 						   ZPCI_TABLE_ALIGN,
144 						   0, NULL);
145 	if (!dma_region_table_cache)
146 		return -ENOMEM;
147 
148 	dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables",
149 						 ZPCI_PT_SIZE,
150 						 ZPCI_PT_ALIGN,
151 						 0, NULL);
152 	if (!dma_page_table_cache) {
153 		kmem_cache_destroy(dma_region_table_cache);
154 		return -ENOMEM;
155 	}
156 	return 0;
157 }
158 
dma_alloc_cpu_table(gfp_t gfp)159 static unsigned long *dma_alloc_cpu_table(gfp_t gfp)
160 {
161 	unsigned long *table, *entry;
162 
163 	table = kmem_cache_alloc(dma_region_table_cache, gfp);
164 	if (!table)
165 		return NULL;
166 
167 	for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
168 		*entry = ZPCI_TABLE_INVALID;
169 	return table;
170 }
171 
dma_free_cpu_table(void * table)172 static void dma_free_cpu_table(void *table)
173 {
174 	kmem_cache_free(dma_region_table_cache, table);
175 }
176 
dma_free_page_table(void * table)177 static void dma_free_page_table(void *table)
178 {
179 	kmem_cache_free(dma_page_table_cache, table);
180 }
181 
dma_free_seg_table(unsigned long entry)182 static void dma_free_seg_table(unsigned long entry)
183 {
184 	unsigned long *sto = get_rt_sto(entry);
185 	int sx;
186 
187 	for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++)
188 		if (reg_entry_isvalid(sto[sx]))
189 			dma_free_page_table(get_st_pto(sto[sx]));
190 
191 	dma_free_cpu_table(sto);
192 }
193 
dma_cleanup_tables(unsigned long * table)194 static void dma_cleanup_tables(unsigned long *table)
195 {
196 	int rtx;
197 
198 	if (!table)
199 		return;
200 
201 	for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
202 		if (reg_entry_isvalid(table[rtx]))
203 			dma_free_seg_table(table[rtx]);
204 
205 	dma_free_cpu_table(table);
206 }
207 
dma_alloc_page_table(gfp_t gfp)208 static unsigned long *dma_alloc_page_table(gfp_t gfp)
209 {
210 	unsigned long *table, *entry;
211 
212 	table = kmem_cache_alloc(dma_page_table_cache, gfp);
213 	if (!table)
214 		return NULL;
215 
216 	for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
217 		*entry = ZPCI_PTE_INVALID;
218 	return table;
219 }
220 
dma_get_seg_table_origin(unsigned long * rtep,gfp_t gfp)221 static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp)
222 {
223 	unsigned long old_rte, rte;
224 	unsigned long *sto;
225 
226 	rte = READ_ONCE(*rtep);
227 	if (reg_entry_isvalid(rte)) {
228 		sto = get_rt_sto(rte);
229 	} else {
230 		sto = dma_alloc_cpu_table(gfp);
231 		if (!sto)
232 			return NULL;
233 
234 		set_rt_sto(&rte, virt_to_phys(sto));
235 		validate_rt_entry(&rte);
236 		entry_clr_protected(&rte);
237 
238 		old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte);
239 		if (old_rte != ZPCI_TABLE_INVALID) {
240 			/* Somone else was faster, use theirs */
241 			dma_free_cpu_table(sto);
242 			sto = get_rt_sto(old_rte);
243 		}
244 	}
245 	return sto;
246 }
247 
dma_get_page_table_origin(unsigned long * step,gfp_t gfp)248 static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp)
249 {
250 	unsigned long old_ste, ste;
251 	unsigned long *pto;
252 
253 	ste = READ_ONCE(*step);
254 	if (reg_entry_isvalid(ste)) {
255 		pto = get_st_pto(ste);
256 	} else {
257 		pto = dma_alloc_page_table(gfp);
258 		if (!pto)
259 			return NULL;
260 		set_st_pto(&ste, virt_to_phys(pto));
261 		validate_st_entry(&ste);
262 		entry_clr_protected(&ste);
263 
264 		old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste);
265 		if (old_ste != ZPCI_TABLE_INVALID) {
266 			/* Somone else was faster, use theirs */
267 			dma_free_page_table(pto);
268 			pto = get_st_pto(old_ste);
269 		}
270 	}
271 	return pto;
272 }
273 
dma_walk_cpu_trans(unsigned long * rto,dma_addr_t dma_addr,gfp_t gfp)274 static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, gfp_t gfp)
275 {
276 	unsigned long *sto, *pto;
277 	unsigned int rtx, sx, px;
278 
279 	rtx = calc_rtx(dma_addr);
280 	sto = dma_get_seg_table_origin(&rto[rtx], gfp);
281 	if (!sto)
282 		return NULL;
283 
284 	sx = calc_sx(dma_addr);
285 	pto = dma_get_page_table_origin(&sto[sx], gfp);
286 	if (!pto)
287 		return NULL;
288 
289 	px = calc_px(dma_addr);
290 	return &pto[px];
291 }
292 
dma_update_cpu_trans(unsigned long * ptep,phys_addr_t page_addr,int flags)293 static void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags)
294 {
295 	unsigned long pte;
296 
297 	pte = READ_ONCE(*ptep);
298 	if (flags & ZPCI_PTE_INVALID) {
299 		invalidate_pt_entry(&pte);
300 	} else {
301 		set_pt_pfaa(&pte, page_addr);
302 		validate_pt_entry(&pte);
303 	}
304 
305 	if (flags & ZPCI_TABLE_PROTECTED)
306 		entry_set_protected(&pte);
307 	else
308 		entry_clr_protected(&pte);
309 
310 	xchg(ptep, pte);
311 }
312 
to_s390_domain(struct iommu_domain * dom)313 static struct s390_domain *to_s390_domain(struct iommu_domain *dom)
314 {
315 	return container_of(dom, struct s390_domain, domain);
316 }
317 
s390_iommu_capable(struct device * dev,enum iommu_cap cap)318 static bool s390_iommu_capable(struct device *dev, enum iommu_cap cap)
319 {
320 	struct zpci_dev *zdev = to_zpci_dev(dev);
321 
322 	switch (cap) {
323 	case IOMMU_CAP_CACHE_COHERENCY:
324 		return true;
325 	case IOMMU_CAP_DEFERRED_FLUSH:
326 		return zdev->pft != PCI_FUNC_TYPE_ISM;
327 	default:
328 		return false;
329 	}
330 }
331 
s390_domain_alloc_paging(struct device * dev)332 static struct iommu_domain *s390_domain_alloc_paging(struct device *dev)
333 {
334 	struct s390_domain *s390_domain;
335 
336 	s390_domain = kzalloc(sizeof(*s390_domain), GFP_KERNEL);
337 	if (!s390_domain)
338 		return NULL;
339 
340 	s390_domain->dma_table = dma_alloc_cpu_table(GFP_KERNEL);
341 	if (!s390_domain->dma_table) {
342 		kfree(s390_domain);
343 		return NULL;
344 	}
345 	s390_domain->domain.geometry.force_aperture = true;
346 	s390_domain->domain.geometry.aperture_start = 0;
347 	s390_domain->domain.geometry.aperture_end = ZPCI_TABLE_SIZE_RT - 1;
348 
349 	spin_lock_init(&s390_domain->list_lock);
350 	INIT_LIST_HEAD_RCU(&s390_domain->devices);
351 
352 	return &s390_domain->domain;
353 }
354 
s390_iommu_rcu_free_domain(struct rcu_head * head)355 static void s390_iommu_rcu_free_domain(struct rcu_head *head)
356 {
357 	struct s390_domain *s390_domain = container_of(head, struct s390_domain, rcu);
358 
359 	dma_cleanup_tables(s390_domain->dma_table);
360 	kfree(s390_domain);
361 }
362 
s390_domain_free(struct iommu_domain * domain)363 static void s390_domain_free(struct iommu_domain *domain)
364 {
365 	struct s390_domain *s390_domain = to_s390_domain(domain);
366 
367 	rcu_read_lock();
368 	WARN_ON(!list_empty(&s390_domain->devices));
369 	rcu_read_unlock();
370 
371 	call_rcu(&s390_domain->rcu, s390_iommu_rcu_free_domain);
372 }
373 
zdev_s390_domain_update(struct zpci_dev * zdev,struct iommu_domain * domain)374 static void zdev_s390_domain_update(struct zpci_dev *zdev,
375 				    struct iommu_domain *domain)
376 {
377 	unsigned long flags;
378 
379 	spin_lock_irqsave(&zdev->dom_lock, flags);
380 	zdev->s390_domain = domain;
381 	spin_unlock_irqrestore(&zdev->dom_lock, flags);
382 }
383 
s390_iommu_domain_reg_ioat(struct zpci_dev * zdev,struct iommu_domain * domain,u8 * status)384 static int s390_iommu_domain_reg_ioat(struct zpci_dev *zdev,
385 				      struct iommu_domain *domain, u8 *status)
386 {
387 	struct s390_domain *s390_domain;
388 	int rc = 0;
389 	u64 iota;
390 
391 	switch (domain->type) {
392 	case IOMMU_DOMAIN_IDENTITY:
393 		rc = zpci_register_ioat(zdev, 0, zdev->start_dma,
394 					zdev->end_dma, 0, status);
395 		break;
396 	case IOMMU_DOMAIN_BLOCKED:
397 		/* Nothing to do in this case */
398 		break;
399 	default:
400 		s390_domain = to_s390_domain(domain);
401 		iota = virt_to_phys(s390_domain->dma_table) |
402 		       ZPCI_IOTA_RTTO_FLAG;
403 		rc = zpci_register_ioat(zdev, 0, zdev->start_dma,
404 					zdev->end_dma, iota, status);
405 	}
406 
407 	return rc;
408 }
409 
zpci_iommu_register_ioat(struct zpci_dev * zdev,u8 * status)410 int zpci_iommu_register_ioat(struct zpci_dev *zdev, u8 *status)
411 {
412 	unsigned long flags;
413 	int rc;
414 
415 	spin_lock_irqsave(&zdev->dom_lock, flags);
416 
417 	rc = s390_iommu_domain_reg_ioat(zdev, zdev->s390_domain, status);
418 
419 	spin_unlock_irqrestore(&zdev->dom_lock, flags);
420 
421 	return rc;
422 }
423 
blocking_domain_attach_device(struct iommu_domain * domain,struct device * dev)424 static int blocking_domain_attach_device(struct iommu_domain *domain,
425 					 struct device *dev)
426 {
427 	struct zpci_dev *zdev = to_zpci_dev(dev);
428 	struct s390_domain *s390_domain;
429 	unsigned long flags;
430 
431 	if (zdev->s390_domain->type == IOMMU_DOMAIN_BLOCKED)
432 		return 0;
433 
434 	s390_domain = to_s390_domain(zdev->s390_domain);
435 	if (zdev->dma_table) {
436 		spin_lock_irqsave(&s390_domain->list_lock, flags);
437 		list_del_rcu(&zdev->iommu_list);
438 		spin_unlock_irqrestore(&s390_domain->list_lock, flags);
439 	}
440 
441 	zpci_unregister_ioat(zdev, 0);
442 	zdev->dma_table = NULL;
443 	zdev_s390_domain_update(zdev, domain);
444 
445 	return 0;
446 }
447 
s390_iommu_attach_device(struct iommu_domain * domain,struct device * dev)448 static int s390_iommu_attach_device(struct iommu_domain *domain,
449 				    struct device *dev)
450 {
451 	struct s390_domain *s390_domain = to_s390_domain(domain);
452 	struct zpci_dev *zdev = to_zpci_dev(dev);
453 	unsigned long flags;
454 	u8 status;
455 	int cc;
456 
457 	if (!zdev)
458 		return -ENODEV;
459 
460 	if (WARN_ON(domain->geometry.aperture_start > zdev->end_dma ||
461 		domain->geometry.aperture_end < zdev->start_dma))
462 		return -EINVAL;
463 
464 	blocking_domain_attach_device(&blocking_domain, dev);
465 
466 	/* If we fail now DMA remains blocked via blocking domain */
467 	cc = s390_iommu_domain_reg_ioat(zdev, domain, &status);
468 	if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL)
469 		return -EIO;
470 	zdev->dma_table = s390_domain->dma_table;
471 	zdev_s390_domain_update(zdev, domain);
472 
473 	spin_lock_irqsave(&s390_domain->list_lock, flags);
474 	list_add_rcu(&zdev->iommu_list, &s390_domain->devices);
475 	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
476 
477 	return 0;
478 }
479 
s390_iommu_get_resv_regions(struct device * dev,struct list_head * list)480 static void s390_iommu_get_resv_regions(struct device *dev,
481 					struct list_head *list)
482 {
483 	struct zpci_dev *zdev = to_zpci_dev(dev);
484 	struct iommu_resv_region *region;
485 
486 	if (zdev->start_dma) {
487 		region = iommu_alloc_resv_region(0, zdev->start_dma, 0,
488 						 IOMMU_RESV_RESERVED, GFP_KERNEL);
489 		if (!region)
490 			return;
491 		list_add_tail(&region->list, list);
492 	}
493 
494 	if (zdev->end_dma < ZPCI_TABLE_SIZE_RT - 1) {
495 		region = iommu_alloc_resv_region(zdev->end_dma + 1,
496 						 ZPCI_TABLE_SIZE_RT - zdev->end_dma - 1,
497 						 0, IOMMU_RESV_RESERVED, GFP_KERNEL);
498 		if (!region)
499 			return;
500 		list_add_tail(&region->list, list);
501 	}
502 }
503 
s390_iommu_probe_device(struct device * dev)504 static struct iommu_device *s390_iommu_probe_device(struct device *dev)
505 {
506 	struct zpci_dev *zdev;
507 
508 	if (!dev_is_pci(dev))
509 		return ERR_PTR(-ENODEV);
510 
511 	zdev = to_zpci_dev(dev);
512 
513 	if (zdev->start_dma > zdev->end_dma ||
514 	    zdev->start_dma > ZPCI_TABLE_SIZE_RT - 1)
515 		return ERR_PTR(-EINVAL);
516 
517 	if (zdev->end_dma > ZPCI_TABLE_SIZE_RT - 1)
518 		zdev->end_dma = ZPCI_TABLE_SIZE_RT - 1;
519 
520 	if (zdev->tlb_refresh)
521 		dev->iommu->shadow_on_flush = 1;
522 
523 	/* Start with DMA blocked */
524 	spin_lock_init(&zdev->dom_lock);
525 	zdev_s390_domain_update(zdev, &blocking_domain);
526 
527 	return &zdev->iommu_dev;
528 }
529 
zpci_refresh_all(struct zpci_dev * zdev)530 static int zpci_refresh_all(struct zpci_dev *zdev)
531 {
532 	return zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma,
533 				  zdev->end_dma - zdev->start_dma + 1);
534 }
535 
s390_iommu_flush_iotlb_all(struct iommu_domain * domain)536 static void s390_iommu_flush_iotlb_all(struct iommu_domain *domain)
537 {
538 	struct s390_domain *s390_domain = to_s390_domain(domain);
539 	struct zpci_dev *zdev;
540 
541 	rcu_read_lock();
542 	list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
543 		atomic64_inc(&s390_domain->ctrs.global_rpcits);
544 		zpci_refresh_all(zdev);
545 	}
546 	rcu_read_unlock();
547 }
548 
s390_iommu_iotlb_sync(struct iommu_domain * domain,struct iommu_iotlb_gather * gather)549 static void s390_iommu_iotlb_sync(struct iommu_domain *domain,
550 				  struct iommu_iotlb_gather *gather)
551 {
552 	struct s390_domain *s390_domain = to_s390_domain(domain);
553 	size_t size = gather->end - gather->start + 1;
554 	struct zpci_dev *zdev;
555 
556 	/* If gather was never added to there is nothing to flush */
557 	if (!gather->end)
558 		return;
559 
560 	rcu_read_lock();
561 	list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
562 		atomic64_inc(&s390_domain->ctrs.sync_rpcits);
563 		zpci_refresh_trans((u64)zdev->fh << 32, gather->start,
564 				   size);
565 	}
566 	rcu_read_unlock();
567 }
568 
s390_iommu_iotlb_sync_map(struct iommu_domain * domain,unsigned long iova,size_t size)569 static int s390_iommu_iotlb_sync_map(struct iommu_domain *domain,
570 				     unsigned long iova, size_t size)
571 {
572 	struct s390_domain *s390_domain = to_s390_domain(domain);
573 	struct zpci_dev *zdev;
574 	int ret = 0;
575 
576 	rcu_read_lock();
577 	list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
578 		if (!zdev->tlb_refresh)
579 			continue;
580 		atomic64_inc(&s390_domain->ctrs.sync_map_rpcits);
581 		ret = zpci_refresh_trans((u64)zdev->fh << 32,
582 					 iova, size);
583 		/*
584 		 * let the hypervisor discover invalidated entries
585 		 * allowing it to free IOVAs and unpin pages
586 		 */
587 		if (ret == -ENOMEM) {
588 			ret = zpci_refresh_all(zdev);
589 			if (ret)
590 				break;
591 		}
592 	}
593 	rcu_read_unlock();
594 
595 	return ret;
596 }
597 
s390_iommu_validate_trans(struct s390_domain * s390_domain,phys_addr_t pa,dma_addr_t dma_addr,unsigned long nr_pages,int flags,gfp_t gfp)598 static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
599 				     phys_addr_t pa, dma_addr_t dma_addr,
600 				     unsigned long nr_pages, int flags,
601 				     gfp_t gfp)
602 {
603 	phys_addr_t page_addr = pa & PAGE_MASK;
604 	unsigned long *entry;
605 	unsigned long i;
606 	int rc;
607 
608 	for (i = 0; i < nr_pages; i++) {
609 		entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr,
610 					   gfp);
611 		if (unlikely(!entry)) {
612 			rc = -ENOMEM;
613 			goto undo_cpu_trans;
614 		}
615 		dma_update_cpu_trans(entry, page_addr, flags);
616 		page_addr += PAGE_SIZE;
617 		dma_addr += PAGE_SIZE;
618 	}
619 
620 	return 0;
621 
622 undo_cpu_trans:
623 	while (i-- > 0) {
624 		dma_addr -= PAGE_SIZE;
625 		entry = dma_walk_cpu_trans(s390_domain->dma_table,
626 					   dma_addr, gfp);
627 		if (!entry)
628 			break;
629 		dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID);
630 	}
631 
632 	return rc;
633 }
634 
s390_iommu_invalidate_trans(struct s390_domain * s390_domain,dma_addr_t dma_addr,unsigned long nr_pages)635 static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain,
636 				       dma_addr_t dma_addr, unsigned long nr_pages)
637 {
638 	unsigned long *entry;
639 	unsigned long i;
640 	int rc = 0;
641 
642 	for (i = 0; i < nr_pages; i++) {
643 		entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr,
644 					   GFP_ATOMIC);
645 		if (unlikely(!entry)) {
646 			rc = -EINVAL;
647 			break;
648 		}
649 		dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID);
650 		dma_addr += PAGE_SIZE;
651 	}
652 
653 	return rc;
654 }
655 
s390_iommu_map_pages(struct iommu_domain * domain,unsigned long iova,phys_addr_t paddr,size_t pgsize,size_t pgcount,int prot,gfp_t gfp,size_t * mapped)656 static int s390_iommu_map_pages(struct iommu_domain *domain,
657 				unsigned long iova, phys_addr_t paddr,
658 				size_t pgsize, size_t pgcount,
659 				int prot, gfp_t gfp, size_t *mapped)
660 {
661 	struct s390_domain *s390_domain = to_s390_domain(domain);
662 	size_t size = pgcount << __ffs(pgsize);
663 	int flags = ZPCI_PTE_VALID, rc = 0;
664 
665 	if (pgsize != SZ_4K)
666 		return -EINVAL;
667 
668 	if (iova < s390_domain->domain.geometry.aperture_start ||
669 	    (iova + size - 1) > s390_domain->domain.geometry.aperture_end)
670 		return -EINVAL;
671 
672 	if (!IS_ALIGNED(iova | paddr, pgsize))
673 		return -EINVAL;
674 
675 	if (!(prot & IOMMU_WRITE))
676 		flags |= ZPCI_TABLE_PROTECTED;
677 
678 	rc = s390_iommu_validate_trans(s390_domain, paddr, iova,
679 				     pgcount, flags, gfp);
680 	if (!rc) {
681 		*mapped = size;
682 		atomic64_add(pgcount, &s390_domain->ctrs.mapped_pages);
683 	}
684 
685 	return rc;
686 }
687 
s390_iommu_iova_to_phys(struct iommu_domain * domain,dma_addr_t iova)688 static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
689 					   dma_addr_t iova)
690 {
691 	struct s390_domain *s390_domain = to_s390_domain(domain);
692 	unsigned long *rto, *sto, *pto;
693 	unsigned long ste, pte, rte;
694 	unsigned int rtx, sx, px;
695 	phys_addr_t phys = 0;
696 
697 	if (iova < domain->geometry.aperture_start ||
698 	    iova > domain->geometry.aperture_end)
699 		return 0;
700 
701 	rtx = calc_rtx(iova);
702 	sx = calc_sx(iova);
703 	px = calc_px(iova);
704 	rto = s390_domain->dma_table;
705 
706 	rte = READ_ONCE(rto[rtx]);
707 	if (reg_entry_isvalid(rte)) {
708 		sto = get_rt_sto(rte);
709 		ste = READ_ONCE(sto[sx]);
710 		if (reg_entry_isvalid(ste)) {
711 			pto = get_st_pto(ste);
712 			pte = READ_ONCE(pto[px]);
713 			if (pt_entry_isvalid(pte))
714 				phys = pte & ZPCI_PTE_ADDR_MASK;
715 		}
716 	}
717 
718 	return phys;
719 }
720 
s390_iommu_unmap_pages(struct iommu_domain * domain,unsigned long iova,size_t pgsize,size_t pgcount,struct iommu_iotlb_gather * gather)721 static size_t s390_iommu_unmap_pages(struct iommu_domain *domain,
722 				     unsigned long iova,
723 				     size_t pgsize, size_t pgcount,
724 				     struct iommu_iotlb_gather *gather)
725 {
726 	struct s390_domain *s390_domain = to_s390_domain(domain);
727 	size_t size = pgcount << __ffs(pgsize);
728 	int rc;
729 
730 	if (WARN_ON(iova < s390_domain->domain.geometry.aperture_start ||
731 	    (iova + size - 1) > s390_domain->domain.geometry.aperture_end))
732 		return 0;
733 
734 	rc = s390_iommu_invalidate_trans(s390_domain, iova, pgcount);
735 	if (rc)
736 		return 0;
737 
738 	iommu_iotlb_gather_add_range(gather, iova, size);
739 	atomic64_add(pgcount, &s390_domain->ctrs.unmapped_pages);
740 
741 	return size;
742 }
743 
zpci_get_iommu_ctrs(struct zpci_dev * zdev)744 struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev)
745 {
746 	struct s390_domain *s390_domain;
747 
748 	lockdep_assert_held(&zdev->dom_lock);
749 
750 	if (zdev->s390_domain->type == IOMMU_DOMAIN_BLOCKED)
751 		return NULL;
752 
753 	s390_domain = to_s390_domain(zdev->s390_domain);
754 	return &s390_domain->ctrs;
755 }
756 
zpci_init_iommu(struct zpci_dev * zdev)757 int zpci_init_iommu(struct zpci_dev *zdev)
758 {
759 	u64 aperture_size;
760 	int rc = 0;
761 
762 	rc = iommu_device_sysfs_add(&zdev->iommu_dev, NULL, NULL,
763 				    "s390-iommu.%08x", zdev->fid);
764 	if (rc)
765 		goto out_err;
766 
767 	if (zdev->rtr_avail) {
768 		rc = iommu_device_register(&zdev->iommu_dev,
769 					   &s390_iommu_rtr_ops, NULL);
770 	} else {
771 		rc = iommu_device_register(&zdev->iommu_dev, &s390_iommu_ops,
772 					   NULL);
773 	}
774 	if (rc)
775 		goto out_sysfs;
776 
777 	zdev->start_dma = PAGE_ALIGN(zdev->start_dma);
778 	aperture_size = min3(s390_iommu_aperture,
779 			     ZPCI_TABLE_SIZE_RT - zdev->start_dma,
780 			     zdev->end_dma - zdev->start_dma + 1);
781 	zdev->end_dma = zdev->start_dma + aperture_size - 1;
782 
783 	return 0;
784 
785 out_sysfs:
786 	iommu_device_sysfs_remove(&zdev->iommu_dev);
787 
788 out_err:
789 	return rc;
790 }
791 
zpci_destroy_iommu(struct zpci_dev * zdev)792 void zpci_destroy_iommu(struct zpci_dev *zdev)
793 {
794 	iommu_device_unregister(&zdev->iommu_dev);
795 	iommu_device_sysfs_remove(&zdev->iommu_dev);
796 }
797 
s390_iommu_setup(char * str)798 static int __init s390_iommu_setup(char *str)
799 {
800 	if (!strcmp(str, "strict")) {
801 		pr_warn("s390_iommu=strict deprecated; use iommu.strict=1 instead\n");
802 		iommu_set_dma_strict();
803 	}
804 	return 1;
805 }
806 
807 __setup("s390_iommu=", s390_iommu_setup);
808 
s390_iommu_aperture_setup(char * str)809 static int __init s390_iommu_aperture_setup(char *str)
810 {
811 	if (kstrtou32(str, 10, &s390_iommu_aperture_factor))
812 		s390_iommu_aperture_factor = 1;
813 	return 1;
814 }
815 
816 __setup("s390_iommu_aperture=", s390_iommu_aperture_setup);
817 
s390_iommu_init(void)818 static int __init s390_iommu_init(void)
819 {
820 	int rc;
821 
822 	iommu_dma_forcedac = true;
823 	s390_iommu_aperture = (u64)virt_to_phys(high_memory);
824 	if (!s390_iommu_aperture_factor)
825 		s390_iommu_aperture = ULONG_MAX;
826 	else
827 		s390_iommu_aperture *= s390_iommu_aperture_factor;
828 
829 	rc = dma_alloc_cpu_table_caches();
830 	if (rc)
831 		return rc;
832 
833 	return rc;
834 }
835 subsys_initcall(s390_iommu_init);
836 
s390_attach_dev_identity(struct iommu_domain * domain,struct device * dev)837 static int s390_attach_dev_identity(struct iommu_domain *domain,
838 				    struct device *dev)
839 {
840 	struct zpci_dev *zdev = to_zpci_dev(dev);
841 	u8 status;
842 	int cc;
843 
844 	blocking_domain_attach_device(&blocking_domain, dev);
845 
846 	/* If we fail now DMA remains blocked via blocking domain */
847 	cc = s390_iommu_domain_reg_ioat(zdev, domain, &status);
848 
849 	/*
850 	 * If the device is undergoing error recovery the reset code
851 	 * will re-establish the new domain.
852 	 */
853 	if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL)
854 		return -EIO;
855 
856 	zdev_s390_domain_update(zdev, domain);
857 
858 	return 0;
859 }
860 
861 static const struct iommu_domain_ops s390_identity_ops = {
862 	.attach_dev = s390_attach_dev_identity,
863 };
864 
865 static struct iommu_domain s390_identity_domain = {
866 	.type = IOMMU_DOMAIN_IDENTITY,
867 	.ops = &s390_identity_ops,
868 };
869 
870 static struct iommu_domain blocking_domain = {
871 	.type = IOMMU_DOMAIN_BLOCKED,
872 	.ops = &(const struct iommu_domain_ops) {
873 		.attach_dev	= blocking_domain_attach_device,
874 	}
875 };
876 
877 #define S390_IOMMU_COMMON_OPS() \
878 	.blocked_domain		= &blocking_domain, \
879 	.release_domain		= &blocking_domain, \
880 	.capable = s390_iommu_capable, \
881 	.domain_alloc_paging = s390_domain_alloc_paging, \
882 	.probe_device = s390_iommu_probe_device, \
883 	.device_group = generic_device_group, \
884 	.pgsize_bitmap = SZ_4K, \
885 	.get_resv_regions = s390_iommu_get_resv_regions, \
886 	.default_domain_ops = &(const struct iommu_domain_ops) { \
887 		.attach_dev	= s390_iommu_attach_device, \
888 		.map_pages	= s390_iommu_map_pages, \
889 		.unmap_pages	= s390_iommu_unmap_pages, \
890 		.flush_iotlb_all = s390_iommu_flush_iotlb_all, \
891 		.iotlb_sync      = s390_iommu_iotlb_sync, \
892 		.iotlb_sync_map  = s390_iommu_iotlb_sync_map, \
893 		.iova_to_phys	= s390_iommu_iova_to_phys, \
894 		.free		= s390_domain_free, \
895 	}
896 
897 static const struct iommu_ops s390_iommu_ops = {
898 	S390_IOMMU_COMMON_OPS()
899 };
900 
901 static const struct iommu_ops s390_iommu_rtr_ops = {
902 	.identity_domain	= &s390_identity_domain,
903 	S390_IOMMU_COMMON_OPS()
904 };
905