xref: /linux/fs/smb/smbdirect/mr.c (revision 0fc8f6200d2313278fbf4539bbab74677c685531)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *   Copyright (C) 2017, Microsoft Corporation.
4  *   Copyright (c) 2025, Stefan Metzmacher
5  */
6 
7 #include "internal.h"
8 
9 /*
10  * Allocate MRs used for RDMA read/write
11  * The number of MRs will not exceed hardware capability in responder_resources
12  * All MRs are kept in mr_list. The MR can be recovered after it's used
13  * Recovery is done in smbd_mr_recovery_work. The content of list entry changes
14  * as MRs are used and recovered for I/O, but the list links will not change
15  */
16 int smbdirect_connection_create_mr_list(struct smbdirect_socket *sc)
17 {
18 	const struct smbdirect_socket_parameters *sp = &sc->parameters;
19 	struct smbdirect_mr_io *mr;
20 	int ret;
21 	u32 i;
22 
23 	if (sp->responder_resources == 0) {
24 		smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR,
25 			"responder_resources negotiated as 0\n");
26 		return -EINVAL;
27 	}
28 
29 	/* Allocate more MRs (2x) than hardware responder_resources */
30 	for (i = 0; i < sp->responder_resources * 2; i++) {
31 		mr = kzalloc_obj(*mr);
32 		if (!mr) {
33 			ret = -ENOMEM;
34 			goto kzalloc_mr_failed;
35 		}
36 
37 		kref_init(&mr->kref);
38 		mutex_init(&mr->mutex);
39 
40 		mr->mr = ib_alloc_mr(sc->ib.pd,
41 				     sc->mr_io.type,
42 				     sp->max_frmr_depth);
43 		if (IS_ERR(mr->mr)) {
44 			ret = PTR_ERR(mr->mr);
45 			smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR,
46 				"ib_alloc_mr failed ret=%d (%1pe) type=0x%x max_frmr_depth=%u\n",
47 				ret, SMBDIRECT_DEBUG_ERR_PTR(ret),
48 				sc->mr_io.type, sp->max_frmr_depth);
49 			goto ib_alloc_mr_failed;
50 		}
51 		mr->sgt.sgl = kzalloc_objs(struct scatterlist, sp->max_frmr_depth);
52 		if (!mr->sgt.sgl) {
53 			ret = -ENOMEM;
54 			smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR,
55 				"failed to allocate sgl, max_frmr_depth=%u\n",
56 				sp->max_frmr_depth);
57 			goto kcalloc_sgl_failed;
58 		}
59 		mr->state = SMBDIRECT_MR_READY;
60 		mr->socket = sc;
61 
62 		list_add_tail(&mr->list, &sc->mr_io.all.list);
63 		atomic_inc(&sc->mr_io.ready.count);
64 	}
65 
66 	return 0;
67 
68 kcalloc_sgl_failed:
69 	ib_dereg_mr(mr->mr);
70 ib_alloc_mr_failed:
71 	mutex_destroy(&mr->mutex);
72 	kfree(mr);
73 kzalloc_mr_failed:
74 	smbdirect_connection_destroy_mr_list(sc);
75 	return ret;
76 }
77 
78 static void smbdirect_mr_io_disable_locked(struct smbdirect_mr_io *mr)
79 {
80 	struct smbdirect_socket *sc = mr->socket;
81 
82 	lockdep_assert_held(&mr->mutex);
83 
84 	if (mr->state == SMBDIRECT_MR_DISABLED)
85 		return;
86 
87 	if (mr->mr)
88 		ib_dereg_mr(mr->mr);
89 	if (mr->sgt.nents)
90 		ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir);
91 	kfree(mr->sgt.sgl);
92 
93 	mr->mr = NULL;
94 	mr->sgt.sgl = NULL;
95 	mr->sgt.nents = 0;
96 
97 	mr->state = SMBDIRECT_MR_DISABLED;
98 }
99 
100 static void smbdirect_mr_io_free_locked(struct kref *kref)
101 {
102 	struct smbdirect_mr_io *mr =
103 		container_of(kref, struct smbdirect_mr_io, kref);
104 
105 	lockdep_assert_held(&mr->mutex);
106 
107 	/*
108 	 * smbdirect_mr_io_disable_locked() should already be called!
109 	 */
110 	if (WARN_ON_ONCE(mr->state != SMBDIRECT_MR_DISABLED))
111 		smbdirect_mr_io_disable_locked(mr);
112 
113 	mutex_unlock(&mr->mutex);
114 	mutex_destroy(&mr->mutex);
115 	kfree(mr);
116 }
117 
118 void smbdirect_connection_destroy_mr_list(struct smbdirect_socket *sc)
119 {
120 	struct smbdirect_mr_io *mr, *tmp;
121 	LIST_HEAD(all_list);
122 	unsigned long flags;
123 
124 	spin_lock_irqsave(&sc->mr_io.all.lock, flags);
125 	list_splice_tail_init(&sc->mr_io.all.list, &all_list);
126 	spin_unlock_irqrestore(&sc->mr_io.all.lock, flags);
127 
128 	list_for_each_entry_safe(mr, tmp, &all_list, list) {
129 		mutex_lock(&mr->mutex);
130 
131 		smbdirect_mr_io_disable_locked(mr);
132 		list_del(&mr->list);
133 		mr->socket = NULL;
134 
135 		/*
136 		 * No kref_put_mutex() as it's already locked.
137 		 *
138 		 * If smbdirect_mr_io_free_locked() is called
139 		 * and the mutex is unlocked and mr is gone,
140 		 * in that case kref_put() returned 1.
141 		 *
142 		 * If kref_put() returned 0 we know that
143 		 * smbdirect_mr_io_free_locked() didn't
144 		 * run. Not by us nor by anyone else, as we
145 		 * still hold the mutex, so we need to unlock.
146 		 *
147 		 * If the mr is still registered it will
148 		 * be dangling (detached from the connection
149 		 * waiting for smbd_deregister_mr() to be
150 		 * called in order to free the memory.
151 		 */
152 		if (!kref_put(&mr->kref, smbdirect_mr_io_free_locked))
153 			mutex_unlock(&mr->mutex);
154 	}
155 }
156 
157 /*
158  * Get a MR from mr_list. This function waits until there is at least one MR
159  * available in the list. There may be several CPUs issuing I/O trying to get MR
160  * at the same time, mr_list_lock is used to protect this situation.
161  */
162 static struct smbdirect_mr_io *
163 smbdirect_connection_get_mr_io(struct smbdirect_socket *sc)
164 {
165 	struct smbdirect_mr_io *mr;
166 	unsigned long flags;
167 	int ret;
168 
169 again:
170 	ret = wait_event_interruptible(sc->mr_io.ready.wait_queue,
171 				       atomic_read(&sc->mr_io.ready.count) ||
172 				       sc->status != SMBDIRECT_SOCKET_CONNECTED);
173 	if (ret) {
174 		smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR,
175 			"wait_event_interruptible ret=%d (%1pe)\n",
176 			ret, SMBDIRECT_DEBUG_ERR_PTR(ret));
177 		return NULL;
178 	}
179 
180 	if (sc->status != SMBDIRECT_SOCKET_CONNECTED) {
181 		smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR,
182 			"sc->status=%s sc->first_error=%1pe\n",
183 			smbdirect_socket_status_string(sc->status),
184 			SMBDIRECT_DEBUG_ERR_PTR(sc->first_error));
185 		return NULL;
186 	}
187 
188 	spin_lock_irqsave(&sc->mr_io.all.lock, flags);
189 	list_for_each_entry(mr, &sc->mr_io.all.list, list) {
190 		if (mr->state == SMBDIRECT_MR_READY) {
191 			mr->state = SMBDIRECT_MR_REGISTERED;
192 			kref_get(&mr->kref);
193 			spin_unlock_irqrestore(&sc->mr_io.all.lock, flags);
194 			atomic_dec(&sc->mr_io.ready.count);
195 			atomic_inc(&sc->mr_io.used.count);
196 			return mr;
197 		}
198 	}
199 
200 	spin_unlock_irqrestore(&sc->mr_io.all.lock, flags);
201 	/*
202 	 * It is possible that we could fail to get MR because other processes may
203 	 * try to acquire a MR at the same time. If this is the case, retry it.
204 	 */
205 	goto again;
206 }
207 
208 static void smbdirect_connection_mr_io_register_done(struct ib_cq *cq, struct ib_wc *wc)
209 {
210 	struct smbdirect_mr_io *mr =
211 		container_of(wc->wr_cqe, struct smbdirect_mr_io, cqe);
212 	struct smbdirect_socket *sc = mr->socket;
213 
214 	if (wc->status != IB_WC_SUCCESS) {
215 		smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR,
216 			"wc->status=%s opcode=%d\n",
217 			ib_wc_status_msg(wc->status), wc->opcode);
218 		smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED);
219 	}
220 }
221 
222 static void smbdirect_connection_mr_io_local_inv_done(struct ib_cq *cq, struct ib_wc *wc)
223 {
224 	struct smbdirect_mr_io *mr =
225 		container_of(wc->wr_cqe, struct smbdirect_mr_io, cqe);
226 	struct smbdirect_socket *sc = mr->socket;
227 
228 	mr->state = SMBDIRECT_MR_INVALIDATED;
229 	if (wc->status != IB_WC_SUCCESS) {
230 		smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR,
231 			"invalidate failed status=%s\n",
232 			ib_wc_status_msg(wc->status));
233 		smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED);
234 	}
235 	complete(&mr->invalidate_done);
236 }
237 
238 /*
239  * Transcribe the pages from an iterator into an MR scatterlist.
240  */
241 static int smbdirect_iter_to_sgt(struct iov_iter *iter,
242 				 struct sg_table *sgt,
243 				 unsigned int max_sg)
244 {
245 	int ret;
246 
247 	memset(sgt->sgl, 0, max_sg * sizeof(struct scatterlist));
248 
249 	ret = extract_iter_to_sg(iter, iov_iter_count(iter), sgt, max_sg, 0);
250 	WARN_ON(ret < 0);
251 	if (sgt->nents > 0)
252 		sg_mark_end(&sgt->sgl[sgt->nents - 1]);
253 
254 	return ret;
255 }
256 
257 /*
258  * Register memory for RDMA read/write
259  * iter: the buffer to register memory with
260  * writing: true if this is a RDMA write (SMB read), false for RDMA read
261  * need_invalidate: true if this MR needs to be locally invalidated after I/O
262  * return value: the MR registered, NULL if failed.
263  */
264 struct smbdirect_mr_io *
265 smbdirect_connection_register_mr_io(struct smbdirect_socket *sc,
266 				    struct iov_iter *iter,
267 				    bool writing,
268 				    bool need_invalidate)
269 {
270 	const struct smbdirect_socket_parameters *sp = &sc->parameters;
271 	struct smbdirect_mr_io *mr;
272 	int ret, num_pages;
273 	struct ib_reg_wr *reg_wr;
274 
275 	num_pages = iov_iter_npages(iter, sp->max_frmr_depth + 1);
276 	if (num_pages > sp->max_frmr_depth) {
277 		smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR,
278 			"num_pages=%d max_frmr_depth=%d\n",
279 			num_pages, sp->max_frmr_depth);
280 		WARN_ON_ONCE(1);
281 		return NULL;
282 	}
283 
284 	mr = smbdirect_connection_get_mr_io(sc);
285 	if (!mr) {
286 		smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR,
287 			"smbdirect_connection_get_mr_io returning NULL\n");
288 		return NULL;
289 	}
290 
291 	mutex_lock(&mr->mutex);
292 
293 	mr->dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
294 	mr->need_invalidate = need_invalidate;
295 	mr->sgt.nents = 0;
296 	mr->sgt.orig_nents = 0;
297 
298 	smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_INFO,
299 		"num_pages=%u count=%zu depth=%u\n",
300 		num_pages, iov_iter_count(iter), sp->max_frmr_depth);
301 	smbdirect_iter_to_sgt(iter, &mr->sgt, sp->max_frmr_depth);
302 
303 	ret = ib_dma_map_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir);
304 	if (!ret) {
305 		smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR,
306 			"ib_dma_map_sg num_pages=%u dir=%x ret=%d (%1pe)\n",
307 			num_pages, mr->dir, ret, SMBDIRECT_DEBUG_ERR_PTR(ret));
308 		goto dma_map_error;
309 	}
310 
311 	ret = ib_map_mr_sg(mr->mr, mr->sgt.sgl, mr->sgt.nents, NULL, PAGE_SIZE);
312 	if (ret != mr->sgt.nents) {
313 		smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR,
314 			"ib_map_mr_sg failed ret = %d nents = %u\n",
315 			ret, mr->sgt.nents);
316 		goto map_mr_error;
317 	}
318 
319 	ib_update_fast_reg_key(mr->mr, ib_inc_rkey(mr->mr->rkey));
320 	reg_wr = &mr->wr;
321 	reg_wr->wr.opcode = IB_WR_REG_MR;
322 	mr->cqe.done = smbdirect_connection_mr_io_register_done;
323 	reg_wr->wr.wr_cqe = &mr->cqe;
324 	reg_wr->wr.num_sge = 0;
325 	reg_wr->wr.send_flags = IB_SEND_SIGNALED;
326 	reg_wr->mr = mr->mr;
327 	reg_wr->key = mr->mr->rkey;
328 	reg_wr->access = writing ?
329 			IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
330 			IB_ACCESS_REMOTE_READ;
331 
332 	/*
333 	 * There is no need for waiting for complemtion on ib_post_send
334 	 * on IB_WR_REG_MR. Hardware enforces a barrier and order of execution
335 	 * on the next ib_post_send when we actually send I/O to remote peer
336 	 */
337 	ret = ib_post_send(sc->ib.qp, &reg_wr->wr, NULL);
338 	if (!ret) {
339 		/*
340 		 * smbdirect_connection_get_mr_io() gave us a reference
341 		 * via kref_get(&mr->kref), we keep that and let
342 		 * the caller use smbdirect_connection_deregister_mr_io()
343 		 * to remove it again.
344 		 */
345 		mutex_unlock(&mr->mutex);
346 		return mr;
347 	}
348 
349 	smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR,
350 		"ib_post_send failed ret=%d (%1pe) reg_wr->key=0x%x\n",
351 		ret, SMBDIRECT_DEBUG_ERR_PTR(ret), reg_wr->key);
352 
353 map_mr_error:
354 	ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir);
355 
356 dma_map_error:
357 	mr->sgt.nents = 0;
358 	mr->state = SMBDIRECT_MR_ERROR;
359 	atomic_dec(&sc->mr_io.used.count);
360 
361 	smbdirect_socket_schedule_cleanup(sc, ret);
362 
363 	/*
364 	 * smbdirect_connection_get_mr_io() gave us a reference
365 	 * via kref_get(&mr->kref), we need to remove it again
366 	 * on error.
367 	 *
368 	 * No kref_put_mutex() as it's already locked.
369 	 *
370 	 * If smbdirect_mr_io_free_locked() is called
371 	 * and the mutex is unlocked and mr is gone,
372 	 * in that case kref_put() returned 1.
373 	 *
374 	 * If kref_put() returned 0 we know that
375 	 * smbdirect_mr_io_free_locked() didn't
376 	 * run. Not by us nor by anyone else, as we
377 	 * still hold the mutex, so we need to unlock.
378 	 */
379 	if (!kref_put(&mr->kref, smbdirect_mr_io_free_locked))
380 		mutex_unlock(&mr->mutex);
381 	return NULL;
382 }
383 __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_register_mr_io);
384 
385 void smbdirect_mr_io_fill_buffer_descriptor(struct smbdirect_mr_io *mr,
386 					    struct smbdirect_buffer_descriptor_v1 *v1)
387 {
388 	mutex_lock(&mr->mutex);
389 	if (mr->state == SMBDIRECT_MR_REGISTERED) {
390 		v1->offset = cpu_to_le64(mr->mr->iova);
391 		v1->token = cpu_to_le32(mr->mr->rkey);
392 		v1->length = cpu_to_le32(mr->mr->length);
393 	} else {
394 		v1->offset = cpu_to_le64(U64_MAX);
395 		v1->token = cpu_to_le32(U32_MAX);
396 		v1->length = cpu_to_le32(U32_MAX);
397 	}
398 	mutex_unlock(&mr->mutex);
399 }
400 __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_mr_io_fill_buffer_descriptor);
401 
402 /*
403  * Deregister a MR after I/O is done
404  * This function may wait if remote invalidation is not used
405  * and we have to locally invalidate the buffer to prevent data is being
406  * modified by remote peer after upper layer consumes it
407  */
408 void smbdirect_connection_deregister_mr_io(struct smbdirect_mr_io *mr)
409 {
410 	struct smbdirect_socket *sc = mr->socket;
411 	int ret = 0;
412 
413 lock_again:
414 	mutex_lock(&mr->mutex);
415 	if (mr->state == SMBDIRECT_MR_DISABLED)
416 		goto put_kref;
417 
418 	if (sc->status != SMBDIRECT_SOCKET_CONNECTED) {
419 		smbdirect_mr_io_disable_locked(mr);
420 		goto put_kref;
421 	}
422 
423 	if (mr->need_invalidate) {
424 		struct ib_send_wr *wr = &mr->inv_wr;
425 
426 		/* Need to finish local invalidation before returning */
427 		wr->opcode = IB_WR_LOCAL_INV;
428 		mr->cqe.done = smbdirect_connection_mr_io_local_inv_done;
429 		wr->wr_cqe = &mr->cqe;
430 		wr->num_sge = 0;
431 		wr->ex.invalidate_rkey = mr->mr->rkey;
432 		wr->send_flags = IB_SEND_SIGNALED;
433 
434 		init_completion(&mr->invalidate_done);
435 		ret = ib_post_send(sc->ib.qp, wr, NULL);
436 		if (ret) {
437 			smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR,
438 				"ib_post_send failed ret=%d (%1pe)\n",
439 				ret, SMBDIRECT_DEBUG_ERR_PTR(ret));
440 			smbdirect_mr_io_disable_locked(mr);
441 			smbdirect_socket_schedule_cleanup(sc, ret);
442 			goto done;
443 		}
444 
445 		/*
446 		 * We still hold the reference to mr
447 		 * so we can unlock while waiting.
448 		 */
449 		mutex_unlock(&mr->mutex);
450 		wait_for_completion(&mr->invalidate_done);
451 		mr->need_invalidate = false;
452 		goto lock_again;
453 	} else
454 		/*
455 		 * For remote invalidation, just set it to SMBDIRECT_MR_INVALIDATED
456 		 * and defer to mr_recovery_work to recover the MR for next use
457 		 */
458 		mr->state = SMBDIRECT_MR_INVALIDATED;
459 
460 	if (mr->sgt.nents) {
461 		ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir);
462 		mr->sgt.nents = 0;
463 	}
464 
465 	WARN_ONCE(mr->state != SMBDIRECT_MR_INVALIDATED,
466 		  "mr->state[%u] != SMBDIRECT_MR_INVALIDATED[%u]\n",
467 		  mr->state, SMBDIRECT_MR_INVALIDATED);
468 	mr->state = SMBDIRECT_MR_READY;
469 	if (atomic_inc_return(&sc->mr_io.ready.count) == 1)
470 		wake_up(&sc->mr_io.ready.wait_queue);
471 
472 done:
473 	atomic_dec(&sc->mr_io.used.count);
474 
475 put_kref:
476 	/*
477 	 * No kref_put_mutex() as it's already locked.
478 	 *
479 	 * If smbdirect_mr_io_free_locked() is called
480 	 * and the mutex is unlocked and mr is gone,
481 	 * in that case kref_put() returned 1.
482 	 *
483 	 * If kref_put() returned 0 we know that
484 	 * smbdirect_mr_io_free_locked() didn't
485 	 * run. Not by us nor by anyone else, as we
486 	 * still hold the mutex, so we need to unlock
487 	 * and keep the mr in SMBDIRECT_MR_READY or
488 	 * SMBDIRECT_MR_ERROR state.
489 	 */
490 	if (!kref_put(&mr->kref, smbdirect_mr_io_free_locked))
491 		mutex_unlock(&mr->mutex);
492 }
493 __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_deregister_mr_io);
494