xref: /linux/arch/powerpc/platforms/powernv/pci-ioda-tce.c (revision 847e6563aa8c6e335397b821b215ac4313580638)
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * TCE helpers for IODA PCI/PCIe on PowerNV platforms
4  *
5  * Copyright 2018 IBM Corp.
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; either version
10  * 2 of the License, or (at your option) any later version.
11  */
12 
13 #include <linux/kernel.h>
14 #include <linux/iommu.h>
15 
16 #include <asm/iommu.h>
17 #include <asm/tce.h>
18 #include "pci.h"
19 
20 void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
21 		void *tce_mem, u64 tce_size,
22 		u64 dma_offset, unsigned int page_shift)
23 {
24 	tbl->it_blocksize = 16;
25 	tbl->it_base = (unsigned long)tce_mem;
26 	tbl->it_page_shift = page_shift;
27 	tbl->it_offset = dma_offset >> tbl->it_page_shift;
28 	tbl->it_index = 0;
29 	tbl->it_size = tce_size >> 3;
30 	tbl->it_busno = 0;
31 	tbl->it_type = TCE_PCI;
32 }
33 
34 static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift)
35 {
36 	struct page *tce_mem = NULL;
37 	__be64 *addr;
38 
39 	tce_mem = alloc_pages_node(nid, GFP_KERNEL, shift - PAGE_SHIFT);
40 	if (!tce_mem) {
41 		pr_err("Failed to allocate a TCE memory, level shift=%d\n",
42 				shift);
43 		return NULL;
44 	}
45 	addr = page_address(tce_mem);
46 	memset(addr, 0, 1UL << shift);
47 
48 	return addr;
49 }
50 
51 static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc)
52 {
53 	__be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base;
54 	int  level = tbl->it_indirect_levels;
55 	const long shift = ilog2(tbl->it_level_size);
56 	unsigned long mask = (tbl->it_level_size - 1) << (level * shift);
57 
58 	while (level) {
59 		int n = (idx & mask) >> (level * shift);
60 		unsigned long tce;
61 
62 		if (tmp[n] == 0) {
63 			__be64 *tmp2;
64 
65 			if (!alloc)
66 				return NULL;
67 
68 			tmp2 = pnv_alloc_tce_level(tbl->it_nid,
69 					ilog2(tbl->it_level_size) + 3);
70 			if (!tmp2)
71 				return NULL;
72 
73 			tmp[n] = cpu_to_be64(__pa(tmp2) |
74 					TCE_PCI_READ | TCE_PCI_WRITE);
75 		}
76 		tce = be64_to_cpu(tmp[n]);
77 
78 		tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE));
79 		idx &= ~mask;
80 		mask >>= shift;
81 		--level;
82 	}
83 
84 	return tmp + idx;
85 }
86 
87 int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
88 		unsigned long uaddr, enum dma_data_direction direction,
89 		unsigned long attrs)
90 {
91 	u64 proto_tce = iommu_direction_to_tce_perm(direction);
92 	u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
93 	long i;
94 
95 	if (proto_tce & TCE_PCI_WRITE)
96 		proto_tce |= TCE_PCI_READ;
97 
98 	for (i = 0; i < npages; i++) {
99 		unsigned long newtce = proto_tce |
100 			((rpn + i) << tbl->it_page_shift);
101 		unsigned long idx = index - tbl->it_offset + i;
102 
103 		*(pnv_tce(tbl, false, idx, true)) = cpu_to_be64(newtce);
104 	}
105 
106 	return 0;
107 }
108 
109 #ifdef CONFIG_IOMMU_API
110 int pnv_tce_xchg(struct iommu_table *tbl, long index,
111 		unsigned long *hpa, enum dma_data_direction *direction,
112 		bool alloc)
113 {
114 	u64 proto_tce = iommu_direction_to_tce_perm(*direction);
115 	unsigned long newtce = *hpa | proto_tce, oldtce;
116 	unsigned long idx = index - tbl->it_offset;
117 	__be64 *ptce = NULL;
118 
119 	BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
120 
121 	if (*direction == DMA_NONE) {
122 		ptce = pnv_tce(tbl, false, idx, false);
123 		if (!ptce) {
124 			*hpa = 0;
125 			return 0;
126 		}
127 	}
128 
129 	if (!ptce) {
130 		ptce = pnv_tce(tbl, false, idx, alloc);
131 		if (!ptce)
132 			return alloc ? H_HARDWARE : H_TOO_HARD;
133 	}
134 
135 	if (newtce & TCE_PCI_WRITE)
136 		newtce |= TCE_PCI_READ;
137 
138 	oldtce = be64_to_cpu(xchg(ptce, cpu_to_be64(newtce)));
139 	*hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
140 	*direction = iommu_tce_direction(oldtce);
141 
142 	return 0;
143 }
144 
145 __be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index, bool alloc)
146 {
147 	if (WARN_ON_ONCE(!tbl->it_userspace))
148 		return NULL;
149 
150 	return pnv_tce(tbl, true, index - tbl->it_offset, alloc);
151 }
152 #endif
153 
154 void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
155 {
156 	long i;
157 
158 	for (i = 0; i < npages; i++) {
159 		unsigned long idx = index - tbl->it_offset + i;
160 		__be64 *ptce = pnv_tce(tbl, false, idx,	false);
161 
162 		if (ptce)
163 			*ptce = cpu_to_be64(0);
164 	}
165 }
166 
167 unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
168 {
169 	__be64 *ptce = pnv_tce(tbl, false, index - tbl->it_offset, false);
170 
171 	if (!ptce)
172 		return 0;
173 
174 	return be64_to_cpu(*ptce);
175 }
176 
177 static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
178 		unsigned long size, unsigned int levels)
179 {
180 	const unsigned long addr_ul = (unsigned long) addr &
181 			~(TCE_PCI_READ | TCE_PCI_WRITE);
182 
183 	if (levels) {
184 		long i;
185 		u64 *tmp = (u64 *) addr_ul;
186 
187 		for (i = 0; i < size; ++i) {
188 			unsigned long hpa = be64_to_cpu(tmp[i]);
189 
190 			if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE)))
191 				continue;
192 
193 			pnv_pci_ioda2_table_do_free_pages(__va(hpa), size,
194 					levels - 1);
195 		}
196 	}
197 
198 	free_pages(addr_ul, get_order(size << 3));
199 }
200 
201 void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl)
202 {
203 	const unsigned long size = tbl->it_indirect_levels ?
204 			tbl->it_level_size : tbl->it_size;
205 
206 	if (!tbl->it_size)
207 		return;
208 
209 	pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size,
210 			tbl->it_indirect_levels);
211 	if (tbl->it_userspace) {
212 		pnv_pci_ioda2_table_do_free_pages(tbl->it_userspace, size,
213 				tbl->it_indirect_levels);
214 	}
215 }
216 
217 static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift,
218 		unsigned int levels, unsigned long limit,
219 		unsigned long *current_offset, unsigned long *total_allocated)
220 {
221 	__be64 *addr, *tmp;
222 	unsigned long allocated = 1UL << shift;
223 	unsigned int entries = 1UL << (shift - 3);
224 	long i;
225 
226 	addr = pnv_alloc_tce_level(nid, shift);
227 	*total_allocated += allocated;
228 
229 	--levels;
230 	if (!levels) {
231 		*current_offset += allocated;
232 		return addr;
233 	}
234 
235 	for (i = 0; i < entries; ++i) {
236 		tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift,
237 				levels, limit, current_offset, total_allocated);
238 		if (!tmp)
239 			break;
240 
241 		addr[i] = cpu_to_be64(__pa(tmp) |
242 				TCE_PCI_READ | TCE_PCI_WRITE);
243 
244 		if (*current_offset >= limit)
245 			break;
246 	}
247 
248 	return addr;
249 }
250 
251 long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
252 		__u32 page_shift, __u64 window_size, __u32 levels,
253 		bool alloc_userspace_copy, struct iommu_table *tbl)
254 {
255 	void *addr, *uas = NULL;
256 	unsigned long offset = 0, level_shift, total_allocated = 0;
257 	unsigned long total_allocated_uas = 0;
258 	const unsigned int window_shift = ilog2(window_size);
259 	unsigned int entries_shift = window_shift - page_shift;
260 	unsigned int table_shift = max_t(unsigned int, entries_shift + 3,
261 			PAGE_SHIFT);
262 	const unsigned long tce_table_size = 1UL << table_shift;
263 	unsigned int tmplevels = levels;
264 
265 	if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
266 		return -EINVAL;
267 
268 	if (!is_power_of_2(window_size))
269 		return -EINVAL;
270 
271 	if (alloc_userspace_copy && (window_size > (1ULL << 32)))
272 		tmplevels = 1;
273 
274 	/* Adjust direct table size from window_size and levels */
275 	entries_shift = (entries_shift + levels - 1) / levels;
276 	level_shift = entries_shift + 3;
277 	level_shift = max_t(unsigned int, level_shift, PAGE_SHIFT);
278 
279 	if ((level_shift - 3) * levels + page_shift >= 55)
280 		return -EINVAL;
281 
282 	/* Allocate TCE table */
283 	addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
284 			tmplevels, tce_table_size, &offset, &total_allocated);
285 
286 	/* addr==NULL means that the first level allocation failed */
287 	if (!addr)
288 		return -ENOMEM;
289 
290 	/*
291 	 * First level was allocated but some lower level failed as
292 	 * we did not allocate as much as we wanted,
293 	 * release partially allocated table.
294 	 */
295 	if (tmplevels == levels && offset < tce_table_size)
296 		goto free_tces_exit;
297 
298 	/* Allocate userspace view of the TCE table */
299 	if (alloc_userspace_copy) {
300 		offset = 0;
301 		uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
302 				tmplevels, tce_table_size, &offset,
303 				&total_allocated_uas);
304 		if (!uas)
305 			goto free_tces_exit;
306 		if (tmplevels == levels && (offset < tce_table_size ||
307 				total_allocated_uas != total_allocated))
308 			goto free_uas_exit;
309 	}
310 
311 	/* Setup linux iommu table */
312 	pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset,
313 			page_shift);
314 	tbl->it_level_size = 1ULL << (level_shift - 3);
315 	tbl->it_indirect_levels = levels - 1;
316 	tbl->it_allocated_size = total_allocated;
317 	tbl->it_userspace = uas;
318 	tbl->it_nid = nid;
319 
320 	pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d/%d\n",
321 			window_size, tce_table_size, bus_offset, tbl->it_base,
322 			tbl->it_userspace, tmplevels, levels);
323 
324 	return 0;
325 
326 free_uas_exit:
327 	pnv_pci_ioda2_table_do_free_pages(uas,
328 			1ULL << (level_shift - 3), levels - 1);
329 free_tces_exit:
330 	pnv_pci_ioda2_table_do_free_pages(addr,
331 			1ULL << (level_shift - 3), levels - 1);
332 
333 	return -ENOMEM;
334 }
335 
336 static void pnv_iommu_table_group_link_free(struct rcu_head *head)
337 {
338 	struct iommu_table_group_link *tgl = container_of(head,
339 			struct iommu_table_group_link, rcu);
340 
341 	kfree(tgl);
342 }
343 
344 void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
345 		struct iommu_table_group *table_group)
346 {
347 	long i;
348 	bool found;
349 	struct iommu_table_group_link *tgl;
350 
351 	if (!tbl || !table_group)
352 		return;
353 
354 	/* Remove link to a group from table's list of attached groups */
355 	found = false;
356 	list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
357 		if (tgl->table_group == table_group) {
358 			list_del_rcu(&tgl->next);
359 			call_rcu(&tgl->rcu, pnv_iommu_table_group_link_free);
360 			found = true;
361 			break;
362 		}
363 	}
364 	if (WARN_ON(!found))
365 		return;
366 
367 	/* Clean a pointer to iommu_table in iommu_table_group::tables[] */
368 	found = false;
369 	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
370 		if (table_group->tables[i] == tbl) {
371 			iommu_tce_table_put(tbl);
372 			table_group->tables[i] = NULL;
373 			found = true;
374 			break;
375 		}
376 	}
377 	WARN_ON(!found);
378 }
379 
380 long pnv_pci_link_table_and_group(int node, int num,
381 		struct iommu_table *tbl,
382 		struct iommu_table_group *table_group)
383 {
384 	struct iommu_table_group_link *tgl = NULL;
385 
386 	if (WARN_ON(!tbl || !table_group))
387 		return -EINVAL;
388 
389 	tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
390 			node);
391 	if (!tgl)
392 		return -ENOMEM;
393 
394 	tgl->table_group = table_group;
395 	list_add_rcu(&tgl->next, &tbl->it_group_list);
396 
397 	table_group->tables[num] = iommu_tce_table_get(tbl);
398 
399 	return 0;
400 }
401