xref: /linux/drivers/vdpa/mlx5/core/mr.c (revision 9fffa4e9b3b158f63334e603e610da7d529a0f9a)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
3 
4 #include <linux/vhost_types.h>
5 #include <linux/vdpa.h>
6 #include <linux/gcd.h>
7 #include <linux/string.h>
8 #include <linux/mlx5/qp.h>
9 #include "mlx5_vdpa.h"
10 
11 /* DIV_ROUND_UP where the divider is a power of 2 give by its log base 2 value */
12 #define MLX5_DIV_ROUND_UP_POW2(_n, _s) \
13 ({ \
14 	u64 __s = _s; \
15 	u64 _res; \
16 	_res = (((_n) + (1 << (__s)) - 1) >> (__s)); \
17 	_res; \
18 })
19 
20 static int get_octo_len(u64 len, int page_shift)
21 {
22 	u64 page_size = 1ULL << page_shift;
23 	int npages;
24 
25 	npages = ALIGN(len, page_size) >> page_shift;
26 	return (npages + 1) / 2;
27 }
28 
29 static void mlx5_set_access_mode(void *mkc, int mode)
30 {
31 	MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3);
32 	MLX5_SET(mkc, mkc, access_mode_4_2, mode >> 2);
33 }
34 
35 static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt)
36 {
37 	struct scatterlist *sg;
38 	int nsg = mr->nsg;
39 	u64 dma_addr;
40 	u64 dma_len;
41 	int j = 0;
42 	int i;
43 
44 	for_each_sg(mr->sg_head.sgl, sg, mr->nent, i) {
45 		for (dma_addr = sg_dma_address(sg), dma_len = sg_dma_len(sg);
46 		     nsg && dma_len;
47 		     nsg--, dma_addr += BIT(mr->log_size), dma_len -= BIT(mr->log_size))
48 			mtt[j++] = cpu_to_be64(dma_addr);
49 	}
50 }
51 
52 struct mlx5_create_mkey_mem {
53 	u8 out[MLX5_ST_SZ_BYTES(create_mkey_out)];
54 	u8 in[MLX5_ST_SZ_BYTES(create_mkey_in)];
55 	__be64 mtt[];
56 };
57 
58 struct mlx5_destroy_mkey_mem {
59 	u8 out[MLX5_ST_SZ_BYTES(destroy_mkey_out)];
60 	u8 in[MLX5_ST_SZ_BYTES(destroy_mkey_in)];
61 };
62 
63 static void fill_create_direct_mr(struct mlx5_vdpa_dev *mvdev,
64 				  struct mlx5_vdpa_direct_mr *mr,
65 				  struct mlx5_create_mkey_mem *mem)
66 {
67 	void *in = &mem->in;
68 	void *mkc;
69 
70 	MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
71 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
72 	MLX5_SET(mkc, mkc, lw, !!(mr->perm & VHOST_MAP_WO));
73 	MLX5_SET(mkc, mkc, lr, !!(mr->perm & VHOST_MAP_RO));
74 	mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_MTT);
75 	MLX5_SET(mkc, mkc, qpn, 0xffffff);
76 	MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
77 	MLX5_SET64(mkc, mkc, start_addr, mr->offset);
78 	MLX5_SET64(mkc, mkc, len, mr->end - mr->start);
79 	MLX5_SET(mkc, mkc, log_page_size, mr->log_size);
80 	MLX5_SET(mkc, mkc, translations_octword_size,
81 		 get_octo_len(mr->end - mr->start, mr->log_size));
82 	MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
83 		 get_octo_len(mr->end - mr->start, mr->log_size));
84 	populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt));
85 
86 	MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
87 	MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
88 }
89 
90 static void create_direct_mr_end(struct mlx5_vdpa_dev *mvdev,
91 				 struct mlx5_vdpa_direct_mr *mr,
92 				 struct mlx5_create_mkey_mem *mem)
93 {
94 	u32 mkey_index = MLX5_GET(create_mkey_out, mem->out, mkey_index);
95 
96 	mr->mr = mlx5_idx_to_mkey(mkey_index);
97 }
98 
99 static void fill_destroy_direct_mr(struct mlx5_vdpa_dev *mvdev,
100 				   struct mlx5_vdpa_direct_mr *mr,
101 				   struct mlx5_destroy_mkey_mem *mem)
102 {
103 	void *in = &mem->in;
104 
105 	MLX5_SET(destroy_mkey_in, in, uid, mvdev->res.uid);
106 	MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY);
107 	MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mr->mr));
108 }
109 
110 static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
111 {
112 	if (!mr->mr)
113 		return;
114 
115 	mlx5_vdpa_destroy_mkey(mvdev, mr->mr);
116 }
117 
118 static u64 map_start(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
119 {
120 	return max_t(u64, map->start, mr->start);
121 }
122 
123 static u64 map_end(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
124 {
125 	return min_t(u64, map->last + 1, mr->end);
126 }
127 
128 static u64 maplen(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
129 {
130 	return map_end(map, mr) - map_start(map, mr);
131 }
132 
133 #define MLX5_VDPA_INVALID_START_ADDR ((u64)-1)
134 #define MLX5_VDPA_INVALID_LEN ((u64)-1)
135 
136 static u64 indir_start_addr(struct mlx5_vdpa_mr *mkey)
137 {
138 	struct mlx5_vdpa_direct_mr *s;
139 
140 	s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list);
141 	if (!s)
142 		return MLX5_VDPA_INVALID_START_ADDR;
143 
144 	return s->start;
145 }
146 
147 static u64 indir_len(struct mlx5_vdpa_mr *mkey)
148 {
149 	struct mlx5_vdpa_direct_mr *s;
150 	struct mlx5_vdpa_direct_mr *e;
151 
152 	s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list);
153 	if (!s)
154 		return MLX5_VDPA_INVALID_LEN;
155 
156 	e = list_last_entry(&mkey->head, struct mlx5_vdpa_direct_mr, list);
157 
158 	return e->end - s->start;
159 }
160 
161 #define LOG_MAX_KLM_SIZE 30
162 #define MAX_KLM_SIZE BIT(LOG_MAX_KLM_SIZE)
163 
164 static u32 klm_bcount(u64 size)
165 {
166 	return (u32)size;
167 }
168 
169 static void fill_indir(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey, void *in)
170 {
171 	struct mlx5_vdpa_direct_mr *dmr;
172 	struct mlx5_klm *klmarr;
173 	struct mlx5_klm *klm;
174 	bool first = true;
175 	u64 preve;
176 	int i;
177 
178 	klmarr = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
179 	i = 0;
180 	list_for_each_entry(dmr, &mkey->head, list) {
181 again:
182 		klm = &klmarr[i++];
183 		if (first) {
184 			preve = dmr->start;
185 			first = false;
186 		}
187 
188 		if (preve == dmr->start) {
189 			klm->key = cpu_to_be32(dmr->mr);
190 			klm->bcount = cpu_to_be32(klm_bcount(dmr->end - dmr->start));
191 			preve = dmr->end;
192 		} else {
193 			klm->key = cpu_to_be32(mvdev->res.null_mkey);
194 			klm->bcount = cpu_to_be32(klm_bcount(dmr->start - preve));
195 			preve = dmr->start;
196 			goto again;
197 		}
198 	}
199 }
200 
201 static int klm_byte_size(int nklms)
202 {
203 	return 16 * ALIGN(nklms, 4);
204 }
205 
206 #define MLX5_VDPA_MTT_ALIGN 16
207 
208 static int create_direct_keys(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
209 {
210 	struct mlx5_vdpa_async_cmd *cmds;
211 	struct mlx5_vdpa_direct_mr *dmr;
212 	int err = 0;
213 	int i = 0;
214 
215 	cmds = kvcalloc(mr->num_directs, sizeof(*cmds), GFP_KERNEL);
216 	if (!cmds)
217 		return -ENOMEM;
218 
219 	list_for_each_entry(dmr, &mr->head, list) {
220 		struct mlx5_create_mkey_mem *cmd_mem;
221 		int mttlen, mttcount;
222 
223 		mttlen = roundup(MLX5_ST_SZ_BYTES(mtt) * dmr->nsg, MLX5_VDPA_MTT_ALIGN);
224 		mttcount = mttlen / sizeof(cmd_mem->mtt[0]);
225 		cmd_mem = kvcalloc(1, struct_size(cmd_mem, mtt, mttcount), GFP_KERNEL);
226 		if (!cmd_mem) {
227 			err = -ENOMEM;
228 			goto done;
229 		}
230 
231 		cmds[i].out = cmd_mem->out;
232 		cmds[i].outlen = sizeof(cmd_mem->out);
233 		cmds[i].in = cmd_mem->in;
234 		cmds[i].inlen = struct_size(cmd_mem, mtt, mttcount);
235 
236 		fill_create_direct_mr(mvdev, dmr, cmd_mem);
237 
238 		i++;
239 	}
240 
241 	err = mlx5_vdpa_exec_async_cmds(mvdev, cmds, mr->num_directs);
242 	if (err) {
243 
244 		mlx5_vdpa_err(mvdev, "error issuing MTT mkey creation for direct mrs: %d\n", err);
245 		goto done;
246 	}
247 
248 	i = 0;
249 	list_for_each_entry(dmr, &mr->head, list) {
250 		struct mlx5_vdpa_async_cmd *cmd = &cmds[i++];
251 		struct mlx5_create_mkey_mem *cmd_mem;
252 
253 		cmd_mem = container_of(cmd->out, struct mlx5_create_mkey_mem, out);
254 
255 		if (!cmd->err) {
256 			create_direct_mr_end(mvdev, dmr, cmd_mem);
257 		} else {
258 			err = err ? err : cmd->err;
259 			mlx5_vdpa_err(mvdev, "error creating MTT mkey [0x%llx, 0x%llx]: %d\n",
260 				dmr->start, dmr->end, cmd->err);
261 		}
262 	}
263 
264 done:
265 	for (i = i-1; i >= 0; i--) {
266 		struct mlx5_create_mkey_mem *cmd_mem;
267 
268 		cmd_mem = container_of(cmds[i].out, struct mlx5_create_mkey_mem, out);
269 		kvfree(cmd_mem);
270 	}
271 
272 	kvfree(cmds);
273 	return err;
274 }
275 
276 DEFINE_FREE(free_cmds, struct mlx5_vdpa_async_cmd *, kvfree(_T))
277 DEFINE_FREE(free_cmd_mem, struct mlx5_destroy_mkey_mem *, kvfree(_T))
278 
279 static int destroy_direct_keys(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
280 {
281 	struct mlx5_destroy_mkey_mem *cmd_mem __free(free_cmd_mem) = NULL;
282 	struct mlx5_vdpa_async_cmd *cmds __free(free_cmds) = NULL;
283 	struct mlx5_vdpa_direct_mr *dmr;
284 	int err = 0;
285 	int i = 0;
286 
287 	cmds = kvcalloc(mr->num_directs, sizeof(*cmds), GFP_KERNEL);
288 	cmd_mem = kvcalloc(mr->num_directs, sizeof(*cmd_mem), GFP_KERNEL);
289 	if (!cmds || !cmd_mem)
290 		return -ENOMEM;
291 
292 	list_for_each_entry(dmr, &mr->head, list) {
293 		cmds[i].out = cmd_mem[i].out;
294 		cmds[i].outlen = sizeof(cmd_mem[i].out);
295 		cmds[i].in = cmd_mem[i].in;
296 		cmds[i].inlen = sizeof(cmd_mem[i].in);
297 		fill_destroy_direct_mr(mvdev, dmr, &cmd_mem[i]);
298 		i++;
299 	}
300 
301 	err = mlx5_vdpa_exec_async_cmds(mvdev, cmds, mr->num_directs);
302 	if (err) {
303 
304 		mlx5_vdpa_err(mvdev, "error issuing MTT mkey deletion for direct mrs: %d\n", err);
305 		return err;
306 	}
307 
308 	i = 0;
309 	list_for_each_entry(dmr, &mr->head, list) {
310 		struct mlx5_vdpa_async_cmd *cmd = &cmds[i++];
311 
312 		dmr->mr = 0;
313 		if (cmd->err) {
314 			err = err ? err : cmd->err;
315 			mlx5_vdpa_err(mvdev, "error deleting MTT mkey [0x%llx, 0x%llx]: %d\n",
316 				dmr->start, dmr->end, cmd->err);
317 		}
318 	}
319 
320 	return err;
321 }
322 
323 static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
324 {
325 	int inlen;
326 	void *mkc;
327 	void *in;
328 	int err;
329 	u64 start;
330 	u64 len;
331 
332 	start = indir_start_addr(mr);
333 	len = indir_len(mr);
334 	if (start == MLX5_VDPA_INVALID_START_ADDR || len == MLX5_VDPA_INVALID_LEN)
335 		return -EINVAL;
336 
337 	inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + klm_byte_size(mr->num_klms);
338 	in = kzalloc(inlen, GFP_KERNEL);
339 	if (!in)
340 		return -ENOMEM;
341 
342 	MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
343 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
344 	MLX5_SET(mkc, mkc, lw, 1);
345 	MLX5_SET(mkc, mkc, lr, 1);
346 	mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_KLMS);
347 	MLX5_SET(mkc, mkc, qpn, 0xffffff);
348 	MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
349 	MLX5_SET64(mkc, mkc, start_addr, start);
350 	MLX5_SET64(mkc, mkc, len, len);
351 	MLX5_SET(mkc, mkc, translations_octword_size, klm_byte_size(mr->num_klms) / 16);
352 	MLX5_SET(create_mkey_in, in, translations_octword_actual_size, mr->num_klms);
353 	fill_indir(mvdev, mr, in);
354 	err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen);
355 	kfree(in);
356 	return err;
357 }
358 
359 static void destroy_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey)
360 {
361 	mlx5_vdpa_destroy_mkey(mvdev, mkey->mkey);
362 }
363 
364 static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr,
365 			 struct vhost_iotlb *iotlb)
366 {
367 	struct vhost_iotlb_map *map;
368 	unsigned long lgcd = 0;
369 	int log_entity_size;
370 	unsigned long size;
371 	u64 start = 0;
372 	int err;
373 	struct page *pg;
374 	unsigned int nsg;
375 	int sglen;
376 	u64 pa;
377 	u64 paend;
378 	struct scatterlist *sg;
379 	struct device *dma = mvdev->vdev.dma_dev;
380 
381 	for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1);
382 	     map; map = vhost_iotlb_itree_next(map, start, mr->end - 1)) {
383 		size = maplen(map, mr);
384 		lgcd = gcd(lgcd, size);
385 		start += size;
386 	}
387 	log_entity_size = ilog2(lgcd);
388 
389 	sglen = 1 << log_entity_size;
390 	nsg = MLX5_DIV_ROUND_UP_POW2(mr->end - mr->start, log_entity_size);
391 
392 	err = sg_alloc_table(&mr->sg_head, nsg, GFP_KERNEL);
393 	if (err)
394 		return err;
395 
396 	sg = mr->sg_head.sgl;
397 	for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1);
398 	     map; map = vhost_iotlb_itree_next(map, mr->start, mr->end - 1)) {
399 		paend = map->addr + maplen(map, mr);
400 		for (pa = map->addr; pa < paend; pa += sglen) {
401 			pg = pfn_to_page(__phys_to_pfn(pa));
402 			if (!sg) {
403 				mlx5_vdpa_warn(mvdev, "sg null. start 0x%llx, end 0x%llx\n",
404 					       map->start, map->last + 1);
405 				err = -ENOMEM;
406 				goto err_map;
407 			}
408 			sg_set_page(sg, pg, sglen, 0);
409 			sg = sg_next(sg);
410 			if (!sg)
411 				goto done;
412 		}
413 	}
414 done:
415 	mr->log_size = log_entity_size;
416 	mr->nsg = nsg;
417 	mr->nent = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
418 	if (!mr->nent) {
419 		err = -ENOMEM;
420 		goto err_map;
421 	}
422 
423 	return 0;
424 
425 err_map:
426 	sg_free_table(&mr->sg_head);
427 	return err;
428 }
429 
430 static void unmap_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
431 {
432 	struct device *dma = mvdev->vdev.dma_dev;
433 
434 	destroy_direct_mr(mvdev, mr);
435 	dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
436 	sg_free_table(&mr->sg_head);
437 }
438 
439 static int add_direct_chain(struct mlx5_vdpa_dev *mvdev,
440 			    struct mlx5_vdpa_mr *mr,
441 			    u64 start,
442 			    u64 size,
443 			    u8 perm,
444 			    struct vhost_iotlb *iotlb)
445 {
446 	struct mlx5_vdpa_direct_mr *dmr;
447 	struct mlx5_vdpa_direct_mr *n;
448 	LIST_HEAD(tmp);
449 	u64 st;
450 	u64 sz;
451 	int err;
452 
453 	st = start;
454 	while (size) {
455 		sz = (u32)min_t(u64, MAX_KLM_SIZE, size);
456 		dmr = kzalloc(sizeof(*dmr), GFP_KERNEL);
457 		if (!dmr) {
458 			err = -ENOMEM;
459 			goto err_alloc;
460 		}
461 
462 		dmr->start = st;
463 		dmr->end = st + sz;
464 		dmr->perm = perm;
465 		err = map_direct_mr(mvdev, dmr, iotlb);
466 		if (err) {
467 			kfree(dmr);
468 			goto err_alloc;
469 		}
470 
471 		list_add_tail(&dmr->list, &tmp);
472 		size -= sz;
473 		mr->num_directs++;
474 		mr->num_klms++;
475 		st += sz;
476 	}
477 	list_splice_tail(&tmp, &mr->head);
478 	return 0;
479 
480 err_alloc:
481 	list_for_each_entry_safe(dmr, n, &mr->head, list) {
482 		list_del_init(&dmr->list);
483 		unmap_direct_mr(mvdev, dmr);
484 		kfree(dmr);
485 	}
486 	return err;
487 }
488 
489 /* The iotlb pointer contains a list of maps. Go over the maps, possibly
490  * merging mergeable maps, and create direct memory keys that provide the
491  * device access to memory. The direct mkeys are then referred to by the
492  * indirect memory key that provides access to the enitre address space given
493  * by iotlb.
494  */
495 static int create_user_mr(struct mlx5_vdpa_dev *mvdev,
496 			  struct mlx5_vdpa_mr *mr,
497 			  struct vhost_iotlb *iotlb)
498 {
499 	struct mlx5_vdpa_direct_mr *dmr;
500 	struct mlx5_vdpa_direct_mr *n;
501 	struct vhost_iotlb_map *map;
502 	u32 pperm = U16_MAX;
503 	u64 last = U64_MAX;
504 	u64 ps = U64_MAX;
505 	u64 pe = U64_MAX;
506 	u64 start = 0;
507 	int err = 0;
508 	int nnuls;
509 
510 	INIT_LIST_HEAD(&mr->head);
511 	for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
512 	     map = vhost_iotlb_itree_next(map, start, last)) {
513 		start = map->start;
514 		if (pe == map->start && pperm == map->perm) {
515 			pe = map->last + 1;
516 		} else {
517 			if (ps != U64_MAX) {
518 				if (pe < map->start) {
519 					/* We have a hole in the map. Check how
520 					 * many null keys are required to fill it.
521 					 */
522 					nnuls = MLX5_DIV_ROUND_UP_POW2(map->start - pe,
523 								       LOG_MAX_KLM_SIZE);
524 					mr->num_klms += nnuls;
525 				}
526 				err = add_direct_chain(mvdev, mr, ps, pe - ps, pperm, iotlb);
527 				if (err)
528 					goto err_chain;
529 			}
530 			ps = map->start;
531 			pe = map->last + 1;
532 			pperm = map->perm;
533 		}
534 	}
535 	err = add_direct_chain(mvdev, mr, ps, pe - ps, pperm, iotlb);
536 	if (err)
537 		goto err_chain;
538 
539 	err = create_direct_keys(mvdev, mr);
540 	if (err)
541 		goto err_chain;
542 
543 	/* Create the memory key that defines the guests's address space. This
544 	 * memory key refers to the direct keys that contain the MTT
545 	 * translations
546 	 */
547 	err = create_indirect_key(mvdev, mr);
548 	if (err)
549 		goto err_chain;
550 
551 	mr->user_mr = true;
552 	return 0;
553 
554 err_chain:
555 	list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) {
556 		list_del_init(&dmr->list);
557 		unmap_direct_mr(mvdev, dmr);
558 		kfree(dmr);
559 	}
560 	return err;
561 }
562 
563 static int create_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
564 {
565 	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
566 	void *mkc;
567 	u32 *in;
568 	int err;
569 
570 	in = kzalloc(inlen, GFP_KERNEL);
571 	if (!in)
572 		return -ENOMEM;
573 
574 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
575 
576 	MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
577 	MLX5_SET(mkc, mkc, length64, 1);
578 	MLX5_SET(mkc, mkc, lw, 1);
579 	MLX5_SET(mkc, mkc, lr, 1);
580 	MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
581 	MLX5_SET(mkc, mkc, qpn, 0xffffff);
582 
583 	err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen);
584 	if (!err)
585 		mr->user_mr = false;
586 
587 	kfree(in);
588 	return err;
589 }
590 
591 static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
592 {
593 	mlx5_vdpa_destroy_mkey(mvdev, mr->mkey);
594 }
595 
596 static int dup_iotlb(struct vhost_iotlb *dst, struct vhost_iotlb *src)
597 {
598 	struct vhost_iotlb_map *map;
599 	u64 start = 0, last = ULLONG_MAX;
600 	int err;
601 
602 	if (dst == src)
603 		return -EINVAL;
604 
605 	if (!src) {
606 		err = vhost_iotlb_add_range(dst, start, last, start, VHOST_ACCESS_RW);
607 		return err;
608 	}
609 
610 	for (map = vhost_iotlb_itree_first(src, start, last); map;
611 		map = vhost_iotlb_itree_next(map, start, last)) {
612 		err = vhost_iotlb_add_range(dst, map->start, map->last,
613 					    map->addr, map->perm);
614 		if (err)
615 			return err;
616 	}
617 	return 0;
618 }
619 
620 static void prune_iotlb(struct vhost_iotlb *iotlb)
621 {
622 	vhost_iotlb_del_range(iotlb, 0, ULLONG_MAX);
623 }
624 
625 static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
626 {
627 	struct mlx5_vdpa_direct_mr *dmr;
628 	struct mlx5_vdpa_direct_mr *n;
629 
630 	destroy_indirect_key(mvdev, mr);
631 	destroy_direct_keys(mvdev, mr);
632 	list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) {
633 		list_del_init(&dmr->list);
634 		unmap_direct_mr(mvdev, dmr);
635 		kfree(dmr);
636 	}
637 }
638 
639 static void _mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
640 {
641 	if (WARN_ON(!mr))
642 		return;
643 
644 	if (mr->user_mr)
645 		destroy_user_mr(mvdev, mr);
646 	else
647 		destroy_dma_mr(mvdev, mr);
648 
649 	vhost_iotlb_free(mr->iotlb);
650 
651 	list_del(&mr->mr_list);
652 
653 	kfree(mr);
654 }
655 
656 /* There can be multiple .set_map() operations in quick succession.
657  * This large delay is a simple way to prevent the MR cleanup from blocking
658  * .set_map() MR creation in this scenario.
659  */
660 #define MLX5_VDPA_MR_GC_TRIGGER_MS 2000
661 
662 static void mlx5_vdpa_mr_gc_handler(struct work_struct *work)
663 {
664 	struct mlx5_vdpa_mr_resources *mres;
665 	struct mlx5_vdpa_mr *mr, *tmp;
666 	struct mlx5_vdpa_dev *mvdev;
667 
668 	mres = container_of(work, struct mlx5_vdpa_mr_resources, gc_dwork_ent.work);
669 
670 	if (atomic_read(&mres->shutdown)) {
671 		mutex_lock(&mres->lock);
672 	} else if (!mutex_trylock(&mres->lock)) {
673 		queue_delayed_work(mres->wq_gc, &mres->gc_dwork_ent,
674 				   msecs_to_jiffies(MLX5_VDPA_MR_GC_TRIGGER_MS));
675 		return;
676 	}
677 
678 	mvdev = container_of(mres, struct mlx5_vdpa_dev, mres);
679 
680 	list_for_each_entry_safe(mr, tmp, &mres->mr_gc_list_head, mr_list) {
681 		_mlx5_vdpa_destroy_mr(mvdev, mr);
682 	}
683 
684 	mutex_unlock(&mres->lock);
685 }
686 
687 static void _mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev,
688 			      struct mlx5_vdpa_mr *mr)
689 {
690 	struct mlx5_vdpa_mr_resources *mres = &mvdev->mres;
691 
692 	if (!mr)
693 		return;
694 
695 	if (refcount_dec_and_test(&mr->refcount)) {
696 		list_move_tail(&mr->mr_list, &mres->mr_gc_list_head);
697 		queue_delayed_work(mres->wq_gc, &mres->gc_dwork_ent,
698 				   msecs_to_jiffies(MLX5_VDPA_MR_GC_TRIGGER_MS));
699 	}
700 }
701 
702 void mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev,
703 		      struct mlx5_vdpa_mr *mr)
704 {
705 	mutex_lock(&mvdev->mres.lock);
706 	_mlx5_vdpa_put_mr(mvdev, mr);
707 	mutex_unlock(&mvdev->mres.lock);
708 }
709 
710 static void _mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev,
711 			      struct mlx5_vdpa_mr *mr)
712 {
713 	if (!mr)
714 		return;
715 
716 	refcount_inc(&mr->refcount);
717 }
718 
719 void mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev,
720 		      struct mlx5_vdpa_mr *mr)
721 {
722 	mutex_lock(&mvdev->mres.lock);
723 	_mlx5_vdpa_get_mr(mvdev, mr);
724 	mutex_unlock(&mvdev->mres.lock);
725 }
726 
727 void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev,
728 			 struct mlx5_vdpa_mr *new_mr,
729 			 unsigned int asid)
730 {
731 	struct mlx5_vdpa_mr *old_mr = mvdev->mres.mr[asid];
732 
733 	mutex_lock(&mvdev->mres.lock);
734 
735 	_mlx5_vdpa_put_mr(mvdev, old_mr);
736 	mvdev->mres.mr[asid] = new_mr;
737 
738 	mutex_unlock(&mvdev->mres.lock);
739 }
740 
741 static void mlx5_vdpa_show_mr_leaks(struct mlx5_vdpa_dev *mvdev)
742 {
743 	struct mlx5_vdpa_mr *mr;
744 
745 	mutex_lock(&mvdev->mres.lock);
746 
747 	list_for_each_entry(mr, &mvdev->mres.mr_list_head, mr_list) {
748 
749 		mlx5_vdpa_warn(mvdev, "mkey still alive after resource delete: "
750 				      "mr: %p, mkey: 0x%x, refcount: %u\n",
751 				       mr, mr->mkey, refcount_read(&mr->refcount));
752 	}
753 
754 	mutex_unlock(&mvdev->mres.lock);
755 
756 }
757 
758 void mlx5_vdpa_clean_mrs(struct mlx5_vdpa_dev *mvdev)
759 {
760 	if (!mvdev->res.valid)
761 		return;
762 
763 	for (int i = 0; i < MLX5_VDPA_NUM_AS; i++)
764 		mlx5_vdpa_update_mr(mvdev, NULL, i);
765 
766 	prune_iotlb(mvdev->cvq.iotlb);
767 
768 	mlx5_vdpa_show_mr_leaks(mvdev);
769 }
770 
771 static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
772 				struct mlx5_vdpa_mr *mr,
773 				struct vhost_iotlb *iotlb)
774 {
775 	int err;
776 
777 	if (iotlb)
778 		err = create_user_mr(mvdev, mr, iotlb);
779 	else
780 		err = create_dma_mr(mvdev, mr);
781 
782 	if (err)
783 		return err;
784 
785 	mr->iotlb = vhost_iotlb_alloc(0, 0);
786 	if (!mr->iotlb) {
787 		err = -ENOMEM;
788 		goto err_mr;
789 	}
790 
791 	err = dup_iotlb(mr->iotlb, iotlb);
792 	if (err)
793 		goto err_iotlb;
794 
795 	list_add_tail(&mr->mr_list, &mvdev->mres.mr_list_head);
796 
797 	return 0;
798 
799 err_iotlb:
800 	vhost_iotlb_free(mr->iotlb);
801 
802 err_mr:
803 	if (iotlb)
804 		destroy_user_mr(mvdev, mr);
805 	else
806 		destroy_dma_mr(mvdev, mr);
807 
808 	return err;
809 }
810 
811 struct mlx5_vdpa_mr *mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
812 					 struct vhost_iotlb *iotlb)
813 {
814 	struct mlx5_vdpa_mr *mr;
815 	int err;
816 
817 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
818 	if (!mr)
819 		return ERR_PTR(-ENOMEM);
820 
821 	mutex_lock(&mvdev->mres.lock);
822 	err = _mlx5_vdpa_create_mr(mvdev, mr, iotlb);
823 	mutex_unlock(&mvdev->mres.lock);
824 
825 	if (err)
826 		goto out_err;
827 
828 	refcount_set(&mr->refcount, 1);
829 
830 	return mr;
831 
832 out_err:
833 	kfree(mr);
834 	return ERR_PTR(err);
835 }
836 
837 int mlx5_vdpa_update_cvq_iotlb(struct mlx5_vdpa_dev *mvdev,
838 				struct vhost_iotlb *iotlb,
839 				unsigned int asid)
840 {
841 	int err;
842 
843 	if (mvdev->mres.group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
844 		return 0;
845 
846 	spin_lock(&mvdev->cvq.iommu_lock);
847 
848 	prune_iotlb(mvdev->cvq.iotlb);
849 	err = dup_iotlb(mvdev->cvq.iotlb, iotlb);
850 
851 	spin_unlock(&mvdev->cvq.iommu_lock);
852 
853 	return err;
854 }
855 
856 int mlx5_vdpa_create_dma_mr(struct mlx5_vdpa_dev *mvdev)
857 {
858 	struct mlx5_vdpa_mr *mr;
859 
860 	mr = mlx5_vdpa_create_mr(mvdev, NULL);
861 	if (IS_ERR(mr))
862 		return PTR_ERR(mr);
863 
864 	mlx5_vdpa_update_mr(mvdev, mr, 0);
865 
866 	return mlx5_vdpa_update_cvq_iotlb(mvdev, NULL, 0);
867 }
868 
869 int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
870 {
871 	if (asid >= MLX5_VDPA_NUM_AS)
872 		return -EINVAL;
873 
874 	mlx5_vdpa_update_mr(mvdev, NULL, asid);
875 
876 	if (asid == 0 && MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
877 		if (mlx5_vdpa_create_dma_mr(mvdev))
878 			mlx5_vdpa_warn(mvdev, "create DMA MR failed\n");
879 	} else {
880 		mlx5_vdpa_update_cvq_iotlb(mvdev, NULL, asid);
881 	}
882 
883 	return 0;
884 }
885 
886 int mlx5_vdpa_init_mr_resources(struct mlx5_vdpa_dev *mvdev)
887 {
888 	struct mlx5_vdpa_mr_resources *mres = &mvdev->mres;
889 
890 	mres->wq_gc = create_singlethread_workqueue("mlx5_vdpa_mr_gc");
891 	if (!mres->wq_gc)
892 		return -ENOMEM;
893 
894 	INIT_DELAYED_WORK(&mres->gc_dwork_ent, mlx5_vdpa_mr_gc_handler);
895 
896 	mutex_init(&mres->lock);
897 
898 	INIT_LIST_HEAD(&mres->mr_list_head);
899 	INIT_LIST_HEAD(&mres->mr_gc_list_head);
900 
901 	return 0;
902 }
903 
904 void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev)
905 {
906 	struct mlx5_vdpa_mr_resources *mres = &mvdev->mres;
907 
908 	atomic_set(&mres->shutdown, 1);
909 
910 	flush_delayed_work(&mres->gc_dwork_ent);
911 	destroy_workqueue(mres->wq_gc);
912 	mres->wq_gc = NULL;
913 	mutex_destroy(&mres->lock);
914 }
915