xref: /linux/arch/powerpc/platforms/powernv/pci-ioda-tce.c (revision c8bfe3fad4f86a029da7157bae9699c816f0c309)
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * TCE helpers for IODA PCI/PCIe on PowerNV platforms
4  *
5  * Copyright 2018 IBM Corp.
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; either version
10  * 2 of the License, or (at your option) any later version.
11  */
12 
13 #include <linux/kernel.h>
14 #include <linux/iommu.h>
15 
16 #include <asm/iommu.h>
17 #include <asm/tce.h>
18 #include "pci.h"
19 
20 unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb)
21 {
22 	struct pci_controller *hose = phb->hose;
23 	struct device_node *dn = hose->dn;
24 	unsigned long mask = 0;
25 	int i, rc, count;
26 	u32 val;
27 
28 	count = of_property_count_u32_elems(dn, "ibm,supported-tce-sizes");
29 	if (count <= 0) {
30 		mask = SZ_4K | SZ_64K;
31 		/* Add 16M for POWER8 by default */
32 		if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
33 				!cpu_has_feature(CPU_FTR_ARCH_300))
34 			mask |= SZ_16M | SZ_256M;
35 		return mask;
36 	}
37 
38 	for (i = 0; i < count; i++) {
39 		rc = of_property_read_u32_index(dn, "ibm,supported-tce-sizes",
40 						i, &val);
41 		if (rc == 0)
42 			mask |= 1ULL << val;
43 	}
44 
45 	return mask;
46 }
47 
48 void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
49 		void *tce_mem, u64 tce_size,
50 		u64 dma_offset, unsigned int page_shift)
51 {
52 	tbl->it_blocksize = 16;
53 	tbl->it_base = (unsigned long)tce_mem;
54 	tbl->it_page_shift = page_shift;
55 	tbl->it_offset = dma_offset >> tbl->it_page_shift;
56 	tbl->it_index = 0;
57 	tbl->it_size = tce_size >> 3;
58 	tbl->it_busno = 0;
59 	tbl->it_type = TCE_PCI;
60 }
61 
62 static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift)
63 {
64 	struct page *tce_mem = NULL;
65 	__be64 *addr;
66 
67 	tce_mem = alloc_pages_node(nid, GFP_ATOMIC | __GFP_NOWARN,
68 			shift - PAGE_SHIFT);
69 	if (!tce_mem) {
70 		pr_err("Failed to allocate a TCE memory, level shift=%d\n",
71 				shift);
72 		return NULL;
73 	}
74 	addr = page_address(tce_mem);
75 	memset(addr, 0, 1UL << shift);
76 
77 	return addr;
78 }
79 
80 static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
81 		unsigned long size, unsigned int levels);
82 
83 static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc)
84 {
85 	__be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base;
86 	int  level = tbl->it_indirect_levels;
87 	const long shift = ilog2(tbl->it_level_size);
88 	unsigned long mask = (tbl->it_level_size - 1) << (level * shift);
89 
90 	while (level) {
91 		int n = (idx & mask) >> (level * shift);
92 		unsigned long oldtce, tce = be64_to_cpu(READ_ONCE(tmp[n]));
93 
94 		if (!tce) {
95 			__be64 *tmp2;
96 
97 			if (!alloc)
98 				return NULL;
99 
100 			tmp2 = pnv_alloc_tce_level(tbl->it_nid,
101 					ilog2(tbl->it_level_size) + 3);
102 			if (!tmp2)
103 				return NULL;
104 
105 			tce = __pa(tmp2) | TCE_PCI_READ | TCE_PCI_WRITE;
106 			oldtce = be64_to_cpu(cmpxchg(&tmp[n], 0,
107 					cpu_to_be64(tce)));
108 			if (oldtce) {
109 				pnv_pci_ioda2_table_do_free_pages(tmp2,
110 					ilog2(tbl->it_level_size) + 3, 1);
111 				tce = oldtce;
112 			}
113 		}
114 
115 		tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE));
116 		idx &= ~mask;
117 		mask >>= shift;
118 		--level;
119 	}
120 
121 	return tmp + idx;
122 }
123 
124 int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
125 		unsigned long uaddr, enum dma_data_direction direction,
126 		unsigned long attrs)
127 {
128 	u64 proto_tce = iommu_direction_to_tce_perm(direction);
129 	u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
130 	long i;
131 
132 	if (proto_tce & TCE_PCI_WRITE)
133 		proto_tce |= TCE_PCI_READ;
134 
135 	for (i = 0; i < npages; i++) {
136 		unsigned long newtce = proto_tce |
137 			((rpn + i) << tbl->it_page_shift);
138 		unsigned long idx = index - tbl->it_offset + i;
139 
140 		*(pnv_tce(tbl, false, idx, true)) = cpu_to_be64(newtce);
141 	}
142 
143 	return 0;
144 }
145 
146 #ifdef CONFIG_IOMMU_API
147 int pnv_tce_xchg(struct iommu_table *tbl, long index,
148 		unsigned long *hpa, enum dma_data_direction *direction)
149 {
150 	u64 proto_tce = iommu_direction_to_tce_perm(*direction);
151 	unsigned long newtce = *hpa | proto_tce, oldtce;
152 	unsigned long idx = index - tbl->it_offset;
153 	__be64 *ptce = NULL;
154 
155 	BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
156 
157 	if (*direction == DMA_NONE) {
158 		ptce = pnv_tce(tbl, false, idx, false);
159 		if (!ptce) {
160 			*hpa = 0;
161 			return 0;
162 		}
163 	}
164 
165 	if (!ptce) {
166 		ptce = pnv_tce(tbl, false, idx, true);
167 		if (!ptce)
168 			return -ENOMEM;
169 	}
170 
171 	if (newtce & TCE_PCI_WRITE)
172 		newtce |= TCE_PCI_READ;
173 
174 	oldtce = be64_to_cpu(xchg(ptce, cpu_to_be64(newtce)));
175 	*hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
176 	*direction = iommu_tce_direction(oldtce);
177 
178 	return 0;
179 }
180 
181 __be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index, bool alloc)
182 {
183 	if (WARN_ON_ONCE(!tbl->it_userspace))
184 		return NULL;
185 
186 	return pnv_tce(tbl, true, index - tbl->it_offset, alloc);
187 }
188 #endif
189 
190 void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
191 {
192 	long i;
193 
194 	for (i = 0; i < npages; i++) {
195 		unsigned long idx = index - tbl->it_offset + i;
196 		__be64 *ptce = pnv_tce(tbl, false, idx,	false);
197 
198 		if (ptce)
199 			*ptce = cpu_to_be64(0);
200 		else
201 			/* Skip the rest of the level */
202 			i |= tbl->it_level_size - 1;
203 	}
204 }
205 
206 unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
207 {
208 	__be64 *ptce = pnv_tce(tbl, false, index - tbl->it_offset, false);
209 
210 	if (!ptce)
211 		return 0;
212 
213 	return be64_to_cpu(*ptce);
214 }
215 
216 static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
217 		unsigned long size, unsigned int levels)
218 {
219 	const unsigned long addr_ul = (unsigned long) addr &
220 			~(TCE_PCI_READ | TCE_PCI_WRITE);
221 
222 	if (levels) {
223 		long i;
224 		u64 *tmp = (u64 *) addr_ul;
225 
226 		for (i = 0; i < size; ++i) {
227 			unsigned long hpa = be64_to_cpu(tmp[i]);
228 
229 			if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE)))
230 				continue;
231 
232 			pnv_pci_ioda2_table_do_free_pages(__va(hpa), size,
233 					levels - 1);
234 		}
235 	}
236 
237 	free_pages(addr_ul, get_order(size << 3));
238 }
239 
240 void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl)
241 {
242 	const unsigned long size = tbl->it_indirect_levels ?
243 			tbl->it_level_size : tbl->it_size;
244 
245 	if (!tbl->it_size)
246 		return;
247 
248 	pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size,
249 			tbl->it_indirect_levels);
250 	if (tbl->it_userspace) {
251 		pnv_pci_ioda2_table_do_free_pages(tbl->it_userspace, size,
252 				tbl->it_indirect_levels);
253 	}
254 }
255 
256 static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift,
257 		unsigned int levels, unsigned long limit,
258 		unsigned long *current_offset, unsigned long *total_allocated)
259 {
260 	__be64 *addr, *tmp;
261 	unsigned long allocated = 1UL << shift;
262 	unsigned int entries = 1UL << (shift - 3);
263 	long i;
264 
265 	addr = pnv_alloc_tce_level(nid, shift);
266 	*total_allocated += allocated;
267 
268 	--levels;
269 	if (!levels) {
270 		*current_offset += allocated;
271 		return addr;
272 	}
273 
274 	for (i = 0; i < entries; ++i) {
275 		tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift,
276 				levels, limit, current_offset, total_allocated);
277 		if (!tmp)
278 			break;
279 
280 		addr[i] = cpu_to_be64(__pa(tmp) |
281 				TCE_PCI_READ | TCE_PCI_WRITE);
282 
283 		if (*current_offset >= limit)
284 			break;
285 	}
286 
287 	return addr;
288 }
289 
290 long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
291 		__u32 page_shift, __u64 window_size, __u32 levels,
292 		bool alloc_userspace_copy, struct iommu_table *tbl)
293 {
294 	void *addr, *uas = NULL;
295 	unsigned long offset = 0, level_shift, total_allocated = 0;
296 	unsigned long total_allocated_uas = 0;
297 	const unsigned int window_shift = ilog2(window_size);
298 	unsigned int entries_shift = window_shift - page_shift;
299 	unsigned int table_shift = max_t(unsigned int, entries_shift + 3,
300 			PAGE_SHIFT);
301 	const unsigned long tce_table_size = 1UL << table_shift;
302 
303 	if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
304 		return -EINVAL;
305 
306 	if (!is_power_of_2(window_size))
307 		return -EINVAL;
308 
309 	/* Adjust direct table size from window_size and levels */
310 	entries_shift = (entries_shift + levels - 1) / levels;
311 	level_shift = entries_shift + 3;
312 	level_shift = max_t(unsigned int, level_shift, PAGE_SHIFT);
313 
314 	if ((level_shift - 3) * levels + page_shift >= 55)
315 		return -EINVAL;
316 
317 	/* Allocate TCE table */
318 	addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
319 			1, tce_table_size, &offset, &total_allocated);
320 
321 	/* addr==NULL means that the first level allocation failed */
322 	if (!addr)
323 		return -ENOMEM;
324 
325 	/*
326 	 * First level was allocated but some lower level failed as
327 	 * we did not allocate as much as we wanted,
328 	 * release partially allocated table.
329 	 */
330 	if (levels == 1 && offset < tce_table_size)
331 		goto free_tces_exit;
332 
333 	/* Allocate userspace view of the TCE table */
334 	if (alloc_userspace_copy) {
335 		offset = 0;
336 		uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
337 				1, tce_table_size, &offset,
338 				&total_allocated_uas);
339 		if (!uas)
340 			goto free_tces_exit;
341 		if (levels == 1 && (offset < tce_table_size ||
342 				total_allocated_uas != total_allocated))
343 			goto free_uas_exit;
344 	}
345 
346 	/* Setup linux iommu table */
347 	pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset,
348 			page_shift);
349 	tbl->it_level_size = 1ULL << (level_shift - 3);
350 	tbl->it_indirect_levels = levels - 1;
351 	tbl->it_userspace = uas;
352 	tbl->it_nid = nid;
353 
354 	pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d/%d\n",
355 			window_size, tce_table_size, bus_offset, tbl->it_base,
356 			tbl->it_userspace, 1, levels);
357 
358 	return 0;
359 
360 free_uas_exit:
361 	pnv_pci_ioda2_table_do_free_pages(uas,
362 			1ULL << (level_shift - 3), levels - 1);
363 free_tces_exit:
364 	pnv_pci_ioda2_table_do_free_pages(addr,
365 			1ULL << (level_shift - 3), levels - 1);
366 
367 	return -ENOMEM;
368 }
369 
370 void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
371 		struct iommu_table_group *table_group)
372 {
373 	long i;
374 	bool found;
375 	struct iommu_table_group_link *tgl;
376 
377 	if (!tbl || !table_group)
378 		return;
379 
380 	/* Remove link to a group from table's list of attached groups */
381 	found = false;
382 
383 	rcu_read_lock();
384 	list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
385 		if (tgl->table_group == table_group) {
386 			list_del_rcu(&tgl->next);
387 			kfree_rcu(tgl, rcu);
388 			found = true;
389 			break;
390 		}
391 	}
392 	rcu_read_unlock();
393 
394 	if (WARN_ON(!found))
395 		return;
396 
397 	/* Clean a pointer to iommu_table in iommu_table_group::tables[] */
398 	found = false;
399 	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
400 		if (table_group->tables[i] == tbl) {
401 			iommu_tce_table_put(tbl);
402 			table_group->tables[i] = NULL;
403 			found = true;
404 			break;
405 		}
406 	}
407 	WARN_ON(!found);
408 }
409 
410 long pnv_pci_link_table_and_group(int node, int num,
411 		struct iommu_table *tbl,
412 		struct iommu_table_group *table_group)
413 {
414 	struct iommu_table_group_link *tgl = NULL;
415 
416 	if (WARN_ON(!tbl || !table_group))
417 		return -EINVAL;
418 
419 	tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
420 			node);
421 	if (!tgl)
422 		return -ENOMEM;
423 
424 	tgl->table_group = table_group;
425 	list_add_rcu(&tgl->next, &tbl->it_group_list);
426 
427 	table_group->tables[num] = iommu_tce_table_get(tbl);
428 
429 	return 0;
430 }
431