xref: /linux/drivers/accel/habanalabs/common/mmu/mmu_v2.c (revision 4b660dbd9ee2059850fd30e0df420ca7a38a1856)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2016-2020 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "../habanalabs.h"
9 #include "../../include/hw_ip/mmu/mmu_general.h"
10 #include "../../include/hw_ip/mmu/mmu_v2_0.h"
11 
12 #include <linux/slab.h>
13 
14 /**
15  * hl_mmu_v2_ctx_init() - initialize a context for using the MMU module.
16  * @ctx: pointer to the context structure to initialize.
17  *
18  * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all
19  * page tables hops related to this context.
20  * Return: 0 on success, non-zero otherwise.
21  */
22 static int hl_mmu_v2_ctx_init(struct hl_ctx *ctx)
23 {
24 	hash_init(ctx->mmu_shadow_hash);
25 
26 	return 0;
27 }
28 
29 /*
30  * hl_mmu_v2_ctx_fini - disable a ctx from using the mmu module
31  *
32  * @ctx: pointer to the context structure
33  *
34  * This function does the following:
35  * - Free any pgts which were not freed yet
36  * - Free the mutex
37  * - Free DRAM default page mapping hops
38  */
39 static void hl_mmu_v2_ctx_fini(struct hl_ctx *ctx)
40 {
41 	struct hl_device *hdev = ctx->hdev;
42 	struct pgt_info *pgt_info;
43 	struct hlist_node *tmp;
44 	int i;
45 
46 	if (!hash_empty(ctx->mmu_shadow_hash))
47 		dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n",
48 			ctx->asid);
49 
50 	hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) {
51 		dev_err_ratelimited(hdev->dev,
52 			"pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
53 			pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
54 		hl_mmu_dr_free_pgt_node(ctx, pgt_info);
55 	}
56 }
57 
58 static int hl_mmu_v2_unmap(struct hl_ctx *ctx,	u64 virt_addr, bool is_dram_addr)
59 {
60 	u64 hop_addr[MMU_ARCH_6_HOPS] = { 0 }, hop_pte_addr[MMU_ARCH_6_HOPS] = { 0 }, curr_pte,
61 							scrambled_virt_addr;
62 	struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
63 	struct hl_device *hdev = ctx->hdev;
64 	struct hl_mmu_properties *mmu_prop;
65 	bool is_huge = false;
66 	int i, hop_last;
67 
68 	/* device resident in V2 are allowed only for HMMU */
69 	if (!is_dram_addr)
70 		return -EINVAL;
71 
72 	mmu_prop = &prop->dmmu;
73 
74 	hop_last = mmu_prop->num_hops - 1;
75 
76 	scrambled_virt_addr = hdev->asic_funcs->scramble_addr(hdev, virt_addr);
77 
78 	hop_addr[0] = hl_mmu_dr_get_hop0_addr(ctx);
79 	hop_pte_addr[0] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0,
80 					hop_addr[0], scrambled_virt_addr);
81 	if (hop_pte_addr[0] == U64_MAX)
82 		return -EFAULT;
83 
84 	curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[0];
85 
86 	for (i = 1 ; i < mmu_prop->num_hops ; i++) {
87 		hop_addr[i] = hl_mmu_get_next_hop_addr(ctx, curr_pte);
88 		if (hop_addr[i] == ULLONG_MAX)
89 			goto not_mapped;
90 
91 		hop_pte_addr[i] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i,
92 					hop_addr[i], scrambled_virt_addr);
93 		if (hop_pte_addr[i] == U64_MAX)
94 			return -EFAULT;
95 
96 		curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[i];
97 
98 		if ((i <= hop_last) && (curr_pte & mmu_prop->last_mask)) {
99 			hop_last = i;
100 			is_huge = true;
101 			break;
102 		}
103 	}
104 
105 	if (is_dram_addr && !is_huge) {
106 		dev_err(hdev->dev, "DRAM unmapping should use huge pages only\n");
107 		return -EFAULT;
108 	}
109 
110 	if (!(curr_pte & PAGE_PRESENT_MASK))
111 		goto not_mapped;
112 
113 	for (i = hop_last ; i > 0 ; i--) {
114 		hl_mmu_dr_clear_pte(ctx, hop_pte_addr[i]);
115 		if (hl_mmu_dr_put_pte(ctx, hop_addr[i]))
116 			goto mapped;
117 	}
118 	hl_mmu_dr_clear_pte(ctx, hop_pte_addr[0]);
119 
120 mapped:
121 	return 0;
122 
123 not_mapped:
124 	dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
125 		virt_addr);
126 
127 	return -EINVAL;
128 }
129 
130 static int hl_mmu_v2_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
131 							u32 page_size, bool is_dram_addr)
132 {
133 	u64 hop_addr[MMU_ARCH_6_HOPS] = { 0 }, hop_pte_addr[MMU_ARCH_6_HOPS] = { 0 },
134 			curr_pte = 0, scrambled_virt_addr, scrambled_phys_addr;
135 	struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
136 	bool hop_new[MMU_ARCH_6_HOPS] = { false };
137 	struct hl_device *hdev = ctx->hdev;
138 	struct hl_mmu_properties *mmu_prop;
139 	int rc, i, hop_last;
140 
141 	/* device resident in V2 are allowed only for HMMU */
142 	if (!is_dram_addr)
143 		return -EINVAL;
144 
145 	mmu_prop = &prop->dmmu;
146 
147 	hop_last = mmu_prop->num_hops - 1;
148 
149 	scrambled_virt_addr = hdev->asic_funcs->scramble_addr(hdev, virt_addr);
150 	scrambled_phys_addr = hdev->asic_funcs->scramble_addr(hdev, phys_addr);
151 
152 	/* First hop is preallocated therefore it is treated differently  */
153 	hop_addr[0] = hl_mmu_dr_get_hop0_addr(ctx);
154 	hop_pte_addr[0] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0,
155 						hop_addr[0], scrambled_virt_addr);
156 	curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[0];
157 
158 	/* Handle hop1 to hop_last */
159 	for (i = 1 ; i <= hop_last ; i++) {
160 		hop_addr[i] = hl_mmu_dr_get_alloc_next_hop_addr(ctx, curr_pte, &hop_new[i]);
161 		if (hop_addr[i] == ULLONG_MAX) {
162 			rc = -ENOMEM;
163 			goto err;
164 		}
165 
166 		hop_pte_addr[i] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i,
167 					hop_addr[i], scrambled_virt_addr);
168 		if (hop_pte_addr[i] == U64_MAX) {
169 			rc = -EINVAL;
170 			goto err;
171 		}
172 
173 		if (!hop_pte_addr[i]) {
174 			rc = -EINVAL;
175 			goto err;
176 		}
177 
178 		curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[i];
179 	}
180 
181 	if (curr_pte & PAGE_PRESENT_MASK) {
182 		dev_err(hdev->dev,
183 			"mapping already exists for virt_addr 0x%llx\n",
184 				virt_addr);
185 
186 		for (i = 0 ; i <= hop_last ; i++)
187 			dev_dbg(hdev->dev, "hop%d pte: 0x%llx (0x%llx)\n",
188 				i, *(u64 *) (uintptr_t) hop_pte_addr[i],
189 				hop_pte_addr[i]);
190 
191 		rc = -EINVAL;
192 		goto err;
193 	}
194 
195 	curr_pte = (scrambled_phys_addr & HOP_PHYS_ADDR_MASK)
196 					| mmu_prop->last_mask | PAGE_PRESENT_MASK;
197 
198 	/* Write the PTEs */
199 	hl_mmu_dr_write_final_pte(ctx, hop_pte_addr[hop_last], curr_pte);
200 
201 	/* for each new hop, add its address to the table of previous-hop */
202 	for (i = 1 ; i <= hop_last ; i++) {
203 		if (hop_new[i]) {
204 			curr_pte = (hop_addr[i] & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
205 			hl_mmu_dr_write_pte(ctx, hop_pte_addr[i - 1], curr_pte);
206 
207 			if (i - 1)
208 				hl_mmu_dr_get_pte(ctx, hop_addr[i - 1]);
209 		}
210 	}
211 	hl_mmu_dr_get_pte(ctx, hop_addr[hop_last]);
212 
213 	return 0;
214 
215 err:
216 	for (i = 1 ; i <= hop_last ; i++)
217 		if (hop_new[i] && (hop_addr[i] != U64_MAX))
218 			hl_mmu_dr_free_hop(ctx, hop_addr[i]);
219 
220 	return rc;
221 }
222 
223 /*
224  * hl_mmu_v2_swap_out - marks all mapping of the given ctx as swapped out
225  *
226  * @ctx: pointer to the context structure
227  *
228  */
229 static void hl_mmu_v2_swap_out(struct hl_ctx *ctx)
230 {
231 
232 }
233 
234 /*
235  * hl_mmu_v2_swap_in - marks all mapping of the given ctx as swapped in
236  *
237  * @ctx: pointer to the context structure
238  *
239  */
240 static void hl_mmu_v2_swap_in(struct hl_ctx *ctx)
241 {
242 
243 }
244 
245 static int hl_mmu_v2_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_info *hops)
246 {
247 	struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
248 	struct hl_device *hdev = ctx->hdev;
249 	struct hl_mmu_properties *mmu_prop;
250 	bool is_dram_addr;
251 	int i;
252 
253 	is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
254 						prop->dmmu.start_addr,
255 						prop->dmmu.end_addr);
256 
257 	/* device resident in V2 are allowed only for HMMU */
258 	if (!is_dram_addr)
259 		return -EINVAL;
260 
261 	mmu_prop = &prop->dmmu;
262 	hops->range_type = HL_VA_RANGE_TYPE_DRAM;
263 
264 	hops->scrambled_vaddr = hdev->asic_funcs->scramble_addr(hdev, virt_addr);
265 
266 	hops->hop_info[0].hop_addr = hl_mmu_dr_get_phys_hop0_addr(ctx);
267 	hops->hop_info[0].hop_pte_addr = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0,
268 						hops->hop_info[0].hop_addr,
269 							hops->scrambled_vaddr);
270 	if (hops->hop_info[0].hop_pte_addr == U64_MAX)
271 		return -EFAULT;
272 
273 	hops->hop_info[0].hop_pte_val = hdev->asic_funcs->read_pte(hdev,
274 						hops->hop_info[0].hop_pte_addr);
275 	if (hops->hop_info[0].hop_pte_val == U64_MAX)
276 		return -EFAULT;
277 
278 	for (i = 1 ; i < mmu_prop->num_hops ; i++) {
279 		hops->hop_info[i].hop_addr =
280 			hl_mmu_get_next_hop_addr(ctx, hops->hop_info[i - 1].hop_pte_val);
281 		if (hops->hop_info[i].hop_addr == ULLONG_MAX)
282 			return -EFAULT;
283 
284 		hops->hop_info[i].hop_pte_addr =
285 				hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i,
286 						hops->hop_info[i].hop_addr,
287 						hops->scrambled_vaddr);
288 		if (hops->hop_info[i].hop_pte_addr == U64_MAX)
289 			return -EFAULT;
290 
291 		hops->hop_info[i].hop_pte_val =
292 				hdev->asic_funcs->read_pte(hdev,
293 					hops->hop_info[i].hop_pte_addr);
294 
295 		if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK))
296 			return -EFAULT;
297 
298 		if (hops->hop_info[i].hop_pte_val & mmu_prop->last_mask)
299 			break;
300 	}
301 
302 	/* if passed over all hops then no last hop was found */
303 	if (i == mmu_prop->num_hops)
304 		return -EFAULT;
305 
306 	if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK))
307 		return -EFAULT;
308 
309 	if (hops->scrambled_vaddr != virt_addr)
310 		hops->unscrambled_paddr = hdev->asic_funcs->descramble_addr
311 				(hdev, hops->hop_info[i].hop_pte_val);
312 	else
313 		hops->unscrambled_paddr = hops->hop_info[i].hop_pte_val;
314 
315 	hops->used_hops = i + 1;
316 
317 	return 0;
318 }
319 
320 /*
321  * hl_mmu_v2_prepare - prepare mmu_if for working with mmu v2
322  *
323  * @hdev: pointer to the device structure
324  * @mmu_if: pointer to the mmu interface structure
325  */
326 void hl_mmu_v2_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu)
327 {
328 	mmu->init = hl_mmu_dr_init;
329 	mmu->fini = hl_mmu_dr_fini;
330 	mmu->ctx_init = hl_mmu_v2_ctx_init;
331 	mmu->ctx_fini = hl_mmu_v2_ctx_fini;
332 	mmu->map = hl_mmu_v2_map;
333 	mmu->unmap = hl_mmu_v2_unmap;
334 	mmu->flush = hl_mmu_dr_flush;
335 	mmu->swap_out = hl_mmu_v2_swap_out;
336 	mmu->swap_in = hl_mmu_v2_swap_in;
337 	mmu->get_tlb_info = hl_mmu_v2_get_tlb_info;
338 }
339