1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (C) 2017, Microsoft Corporation.
4 * Copyright (c) 2025, Stefan Metzmacher
5 */
6
7 #include "internal.h"
8
9 /*
10 * Allocate MRs used for RDMA read/write
11 * The number of MRs will not exceed hardware capability in responder_resources
12 * All MRs are kept in mr_list. The MR can be recovered after it's used
13 * Recovery is done in smbd_mr_recovery_work. The content of list entry changes
14 * as MRs are used and recovered for I/O, but the list links will not change
15 */
smbdirect_connection_create_mr_list(struct smbdirect_socket * sc)16 int smbdirect_connection_create_mr_list(struct smbdirect_socket *sc)
17 {
18 const struct smbdirect_socket_parameters *sp = &sc->parameters;
19 struct smbdirect_mr_io *mr;
20 int ret;
21 u32 i;
22
23 if (sp->responder_resources == 0) {
24 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR,
25 "responder_resources negotiated as 0\n");
26 return -EINVAL;
27 }
28
29 /* Allocate more MRs (2x) than hardware responder_resources */
30 for (i = 0; i < sp->responder_resources * 2; i++) {
31 mr = kzalloc_obj(*mr);
32 if (!mr) {
33 ret = -ENOMEM;
34 goto kzalloc_mr_failed;
35 }
36
37 kref_init(&mr->kref);
38 mutex_init(&mr->mutex);
39
40 mr->mr = ib_alloc_mr(sc->ib.pd,
41 sc->mr_io.type,
42 sp->max_frmr_depth);
43 if (IS_ERR(mr->mr)) {
44 ret = PTR_ERR(mr->mr);
45 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR,
46 "ib_alloc_mr failed ret=%d (%1pe) type=0x%x max_frmr_depth=%u\n",
47 ret, SMBDIRECT_DEBUG_ERR_PTR(ret),
48 sc->mr_io.type, sp->max_frmr_depth);
49 goto ib_alloc_mr_failed;
50 }
51 mr->sgt.sgl = kzalloc_objs(struct scatterlist, sp->max_frmr_depth);
52 if (!mr->sgt.sgl) {
53 ret = -ENOMEM;
54 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR,
55 "failed to allocate sgl, max_frmr_depth=%u\n",
56 sp->max_frmr_depth);
57 goto kcalloc_sgl_failed;
58 }
59 mr->state = SMBDIRECT_MR_READY;
60 mr->socket = sc;
61
62 list_add_tail(&mr->list, &sc->mr_io.all.list);
63 atomic_inc(&sc->mr_io.ready.count);
64 }
65
66 return 0;
67
68 kcalloc_sgl_failed:
69 ib_dereg_mr(mr->mr);
70 ib_alloc_mr_failed:
71 mutex_destroy(&mr->mutex);
72 kfree(mr);
73 kzalloc_mr_failed:
74 smbdirect_connection_destroy_mr_list(sc);
75 return ret;
76 }
77
smbdirect_mr_io_disable_locked(struct smbdirect_mr_io * mr)78 static void smbdirect_mr_io_disable_locked(struct smbdirect_mr_io *mr)
79 {
80 struct smbdirect_socket *sc = mr->socket;
81
82 lockdep_assert_held(&mr->mutex);
83
84 if (mr->state == SMBDIRECT_MR_DISABLED)
85 return;
86
87 if (mr->mr)
88 ib_dereg_mr(mr->mr);
89 if (mr->sgt.nents)
90 ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir);
91 kfree(mr->sgt.sgl);
92
93 mr->mr = NULL;
94 mr->sgt.sgl = NULL;
95 mr->sgt.nents = 0;
96
97 mr->state = SMBDIRECT_MR_DISABLED;
98 }
99
smbdirect_mr_io_free_locked(struct kref * kref)100 static void smbdirect_mr_io_free_locked(struct kref *kref)
101 {
102 struct smbdirect_mr_io *mr =
103 container_of(kref, struct smbdirect_mr_io, kref);
104
105 lockdep_assert_held(&mr->mutex);
106
107 /*
108 * smbdirect_mr_io_disable_locked() should already be called!
109 */
110 if (WARN_ON_ONCE(mr->state != SMBDIRECT_MR_DISABLED))
111 smbdirect_mr_io_disable_locked(mr);
112
113 mutex_unlock(&mr->mutex);
114 mutex_destroy(&mr->mutex);
115 kfree(mr);
116 }
117
smbdirect_connection_destroy_mr_list(struct smbdirect_socket * sc)118 void smbdirect_connection_destroy_mr_list(struct smbdirect_socket *sc)
119 {
120 struct smbdirect_mr_io *mr, *tmp;
121 LIST_HEAD(all_list);
122 unsigned long flags;
123
124 spin_lock_irqsave(&sc->mr_io.all.lock, flags);
125 list_splice_tail_init(&sc->mr_io.all.list, &all_list);
126 spin_unlock_irqrestore(&sc->mr_io.all.lock, flags);
127
128 list_for_each_entry_safe(mr, tmp, &all_list, list) {
129 mutex_lock(&mr->mutex);
130
131 smbdirect_mr_io_disable_locked(mr);
132 list_del(&mr->list);
133 mr->socket = NULL;
134
135 /*
136 * No kref_put_mutex() as it's already locked.
137 *
138 * If smbdirect_mr_io_free_locked() is called
139 * and the mutex is unlocked and mr is gone,
140 * in that case kref_put() returned 1.
141 *
142 * If kref_put() returned 0 we know that
143 * smbdirect_mr_io_free_locked() didn't
144 * run. Not by us nor by anyone else, as we
145 * still hold the mutex, so we need to unlock.
146 *
147 * If the mr is still registered it will
148 * be dangling (detached from the connection
149 * waiting for smbd_deregister_mr() to be
150 * called in order to free the memory.
151 */
152 if (!kref_put(&mr->kref, smbdirect_mr_io_free_locked))
153 mutex_unlock(&mr->mutex);
154 }
155 }
156
157 /*
158 * Get a MR from mr_list. This function waits until there is at least one MR
159 * available in the list. There may be several CPUs issuing I/O trying to get MR
160 * at the same time, mr_list_lock is used to protect this situation.
161 */
162 static struct smbdirect_mr_io *
smbdirect_connection_get_mr_io(struct smbdirect_socket * sc)163 smbdirect_connection_get_mr_io(struct smbdirect_socket *sc)
164 {
165 struct smbdirect_mr_io *mr;
166 unsigned long flags;
167 int ret;
168
169 again:
170 ret = wait_event_interruptible(sc->mr_io.ready.wait_queue,
171 atomic_read(&sc->mr_io.ready.count) ||
172 sc->status != SMBDIRECT_SOCKET_CONNECTED);
173 if (ret) {
174 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR,
175 "wait_event_interruptible ret=%d (%1pe)\n",
176 ret, SMBDIRECT_DEBUG_ERR_PTR(ret));
177 return NULL;
178 }
179
180 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) {
181 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR,
182 "sc->status=%s sc->first_error=%1pe\n",
183 smbdirect_socket_status_string(sc->status),
184 SMBDIRECT_DEBUG_ERR_PTR(sc->first_error));
185 return NULL;
186 }
187
188 spin_lock_irqsave(&sc->mr_io.all.lock, flags);
189 list_for_each_entry(mr, &sc->mr_io.all.list, list) {
190 if (mr->state == SMBDIRECT_MR_READY) {
191 mr->state = SMBDIRECT_MR_REGISTERED;
192 kref_get(&mr->kref);
193 spin_unlock_irqrestore(&sc->mr_io.all.lock, flags);
194 atomic_dec(&sc->mr_io.ready.count);
195 atomic_inc(&sc->mr_io.used.count);
196 return mr;
197 }
198 }
199
200 spin_unlock_irqrestore(&sc->mr_io.all.lock, flags);
201 /*
202 * It is possible that we could fail to get MR because other processes may
203 * try to acquire a MR at the same time. If this is the case, retry it.
204 */
205 goto again;
206 }
207
smbdirect_connection_mr_io_register_done(struct ib_cq * cq,struct ib_wc * wc)208 static void smbdirect_connection_mr_io_register_done(struct ib_cq *cq, struct ib_wc *wc)
209 {
210 struct smbdirect_mr_io *mr =
211 container_of(wc->wr_cqe, struct smbdirect_mr_io, cqe);
212 struct smbdirect_socket *sc = mr->socket;
213
214 if (wc->status != IB_WC_SUCCESS) {
215 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR,
216 "wc->status=%s opcode=%d\n",
217 ib_wc_status_msg(wc->status), wc->opcode);
218 smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED);
219 }
220 }
221
smbdirect_connection_mr_io_local_inv_done(struct ib_cq * cq,struct ib_wc * wc)222 static void smbdirect_connection_mr_io_local_inv_done(struct ib_cq *cq, struct ib_wc *wc)
223 {
224 struct smbdirect_mr_io *mr =
225 container_of(wc->wr_cqe, struct smbdirect_mr_io, cqe);
226 struct smbdirect_socket *sc = mr->socket;
227
228 mr->state = SMBDIRECT_MR_INVALIDATED;
229 if (wc->status != IB_WC_SUCCESS) {
230 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR,
231 "invalidate failed status=%s\n",
232 ib_wc_status_msg(wc->status));
233 smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED);
234 }
235 complete(&mr->invalidate_done);
236 }
237
238 /*
239 * Transcribe the pages from an iterator into an MR scatterlist.
240 */
smbdirect_iter_to_sgt(struct iov_iter * iter,struct sg_table * sgt,unsigned int max_sg)241 static int smbdirect_iter_to_sgt(struct iov_iter *iter,
242 struct sg_table *sgt,
243 unsigned int max_sg)
244 {
245 int ret;
246
247 memset(sgt->sgl, 0, max_sg * sizeof(struct scatterlist));
248
249 ret = extract_iter_to_sg(iter, iov_iter_count(iter), sgt, max_sg, 0);
250 WARN_ON(ret < 0);
251 if (sgt->nents > 0)
252 sg_mark_end(&sgt->sgl[sgt->nents - 1]);
253
254 return ret;
255 }
256
257 /*
258 * Register memory for RDMA read/write
259 * iter: the buffer to register memory with
260 * writing: true if this is a RDMA write (SMB read), false for RDMA read
261 * need_invalidate: true if this MR needs to be locally invalidated after I/O
262 * return value: the MR registered, NULL if failed.
263 */
264 struct smbdirect_mr_io *
smbdirect_connection_register_mr_io(struct smbdirect_socket * sc,struct iov_iter * iter,bool writing,bool need_invalidate)265 smbdirect_connection_register_mr_io(struct smbdirect_socket *sc,
266 struct iov_iter *iter,
267 bool writing,
268 bool need_invalidate)
269 {
270 const struct smbdirect_socket_parameters *sp = &sc->parameters;
271 struct smbdirect_mr_io *mr;
272 int ret, num_pages, num_mapped;
273 struct ib_reg_wr *reg_wr;
274
275 num_pages = iov_iter_npages(iter, sp->max_frmr_depth + 1);
276 if (num_pages > sp->max_frmr_depth) {
277 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR,
278 "num_pages=%d max_frmr_depth=%d\n",
279 num_pages, sp->max_frmr_depth);
280 WARN_ON_ONCE(1);
281 return NULL;
282 }
283
284 mr = smbdirect_connection_get_mr_io(sc);
285 if (!mr) {
286 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR,
287 "smbdirect_connection_get_mr_io returning NULL\n");
288 return NULL;
289 }
290
291 mutex_lock(&mr->mutex);
292
293 mr->dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
294 mr->need_invalidate = need_invalidate;
295 mr->sgt.nents = 0;
296 mr->sgt.orig_nents = 0;
297
298 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_INFO,
299 "num_pages=%u count=%zu depth=%u\n",
300 num_pages, iov_iter_count(iter), sp->max_frmr_depth);
301 smbdirect_iter_to_sgt(iter, &mr->sgt, sp->max_frmr_depth);
302
303 num_mapped = ib_dma_map_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir);
304 if (!num_mapped) {
305 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR,
306 "ib_dma_map_sg num_pages=%u dir=%x num_mapped=%d\n",
307 num_pages, mr->dir, num_mapped);
308 ret = -EIO;
309 goto dma_map_error;
310 }
311
312 ret = ib_map_mr_sg(mr->mr, mr->sgt.sgl, num_mapped, NULL, PAGE_SIZE);
313 if (ret != num_mapped) {
314 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR,
315 "ib_map_mr_sg failed ret = %d num_mapped = %u\n",
316 ret, num_mapped);
317 if (ret >= 0)
318 ret = -EIO;
319 goto map_mr_error;
320 }
321
322 ib_update_fast_reg_key(mr->mr, ib_inc_rkey(mr->mr->rkey));
323 reg_wr = &mr->wr;
324 reg_wr->wr.opcode = IB_WR_REG_MR;
325 mr->cqe.done = smbdirect_connection_mr_io_register_done;
326 reg_wr->wr.wr_cqe = &mr->cqe;
327 reg_wr->wr.num_sge = 0;
328 reg_wr->wr.send_flags = IB_SEND_SIGNALED;
329 reg_wr->mr = mr->mr;
330 reg_wr->key = mr->mr->rkey;
331 reg_wr->access = writing ?
332 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
333 IB_ACCESS_REMOTE_READ;
334
335 /*
336 * There is no need for waiting for complemtion on ib_post_send
337 * on IB_WR_REG_MR. Hardware enforces a barrier and order of execution
338 * on the next ib_post_send when we actually send I/O to remote peer
339 */
340 ret = ib_post_send(sc->ib.qp, ®_wr->wr, NULL);
341 if (!ret) {
342 /*
343 * smbdirect_connection_get_mr_io() gave us a reference
344 * via kref_get(&mr->kref), we keep that and let
345 * the caller use smbdirect_connection_deregister_mr_io()
346 * to remove it again.
347 */
348 mutex_unlock(&mr->mutex);
349 return mr;
350 }
351
352 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR,
353 "ib_post_send failed ret=%d (%1pe) reg_wr->key=0x%x\n",
354 ret, SMBDIRECT_DEBUG_ERR_PTR(ret), reg_wr->key);
355
356 map_mr_error:
357 ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir);
358
359 dma_map_error:
360 mr->sgt.nents = 0;
361 mr->state = SMBDIRECT_MR_ERROR;
362 atomic_dec(&sc->mr_io.used.count);
363
364 smbdirect_socket_schedule_cleanup(sc, ret);
365
366 /*
367 * smbdirect_connection_get_mr_io() gave us a reference
368 * via kref_get(&mr->kref), we need to remove it again
369 * on error.
370 *
371 * No kref_put_mutex() as it's already locked.
372 *
373 * If smbdirect_mr_io_free_locked() is called
374 * and the mutex is unlocked and mr is gone,
375 * in that case kref_put() returned 1.
376 *
377 * If kref_put() returned 0 we know that
378 * smbdirect_mr_io_free_locked() didn't
379 * run. Not by us nor by anyone else, as we
380 * still hold the mutex, so we need to unlock.
381 */
382 if (!kref_put(&mr->kref, smbdirect_mr_io_free_locked))
383 mutex_unlock(&mr->mutex);
384 return NULL;
385 }
386 EXPORT_SYMBOL_GPL(smbdirect_connection_register_mr_io);
387
smbdirect_mr_io_fill_buffer_descriptor(struct smbdirect_mr_io * mr,struct smbdirect_buffer_descriptor_v1 * v1)388 void smbdirect_mr_io_fill_buffer_descriptor(struct smbdirect_mr_io *mr,
389 struct smbdirect_buffer_descriptor_v1 *v1)
390 {
391 mutex_lock(&mr->mutex);
392 if (mr->state == SMBDIRECT_MR_REGISTERED) {
393 v1->offset = cpu_to_le64(mr->mr->iova);
394 v1->token = cpu_to_le32(mr->mr->rkey);
395 v1->length = cpu_to_le32(mr->mr->length);
396 } else {
397 v1->offset = cpu_to_le64(U64_MAX);
398 v1->token = cpu_to_le32(U32_MAX);
399 v1->length = cpu_to_le32(U32_MAX);
400 }
401 mutex_unlock(&mr->mutex);
402 }
403 EXPORT_SYMBOL_GPL(smbdirect_mr_io_fill_buffer_descriptor);
404
405 /*
406 * Deregister a MR after I/O is done
407 * This function may wait if remote invalidation is not used
408 * and we have to locally invalidate the buffer to prevent data is being
409 * modified by remote peer after upper layer consumes it
410 */
smbdirect_connection_deregister_mr_io(struct smbdirect_mr_io * mr)411 void smbdirect_connection_deregister_mr_io(struct smbdirect_mr_io *mr)
412 {
413 struct smbdirect_socket *sc = mr->socket;
414 int ret = 0;
415
416 lock_again:
417 mutex_lock(&mr->mutex);
418 if (mr->state == SMBDIRECT_MR_DISABLED)
419 goto put_kref;
420
421 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) {
422 smbdirect_mr_io_disable_locked(mr);
423 goto put_kref;
424 }
425
426 if (mr->need_invalidate) {
427 struct ib_send_wr *wr = &mr->inv_wr;
428
429 /* Need to finish local invalidation before returning */
430 wr->opcode = IB_WR_LOCAL_INV;
431 mr->cqe.done = smbdirect_connection_mr_io_local_inv_done;
432 wr->wr_cqe = &mr->cqe;
433 wr->num_sge = 0;
434 wr->ex.invalidate_rkey = mr->mr->rkey;
435 wr->send_flags = IB_SEND_SIGNALED;
436
437 init_completion(&mr->invalidate_done);
438 ret = ib_post_send(sc->ib.qp, wr, NULL);
439 if (ret) {
440 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR,
441 "ib_post_send failed ret=%d (%1pe)\n",
442 ret, SMBDIRECT_DEBUG_ERR_PTR(ret));
443 smbdirect_mr_io_disable_locked(mr);
444 smbdirect_socket_schedule_cleanup(sc, ret);
445 goto done;
446 }
447
448 /*
449 * We still hold the reference to mr
450 * so we can unlock while waiting.
451 */
452 mutex_unlock(&mr->mutex);
453 wait_for_completion(&mr->invalidate_done);
454 mr->need_invalidate = false;
455 goto lock_again;
456 } else
457 /*
458 * For remote invalidation, just set it to SMBDIRECT_MR_INVALIDATED
459 * and defer to mr_recovery_work to recover the MR for next use
460 */
461 mr->state = SMBDIRECT_MR_INVALIDATED;
462
463 if (mr->sgt.nents) {
464 ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir);
465 mr->sgt.nents = 0;
466 }
467
468 WARN_ONCE(mr->state != SMBDIRECT_MR_INVALIDATED,
469 "mr->state[%u] != SMBDIRECT_MR_INVALIDATED[%u]\n",
470 mr->state, SMBDIRECT_MR_INVALIDATED);
471 mr->state = SMBDIRECT_MR_READY;
472 if (atomic_inc_return(&sc->mr_io.ready.count) == 1)
473 wake_up(&sc->mr_io.ready.wait_queue);
474
475 done:
476 atomic_dec(&sc->mr_io.used.count);
477
478 put_kref:
479 /*
480 * No kref_put_mutex() as it's already locked.
481 *
482 * If smbdirect_mr_io_free_locked() is called
483 * and the mutex is unlocked and mr is gone,
484 * in that case kref_put() returned 1.
485 *
486 * If kref_put() returned 0 we know that
487 * smbdirect_mr_io_free_locked() didn't
488 * run. Not by us nor by anyone else, as we
489 * still hold the mutex, so we need to unlock
490 * and keep the mr in SMBDIRECT_MR_READY or
491 * SMBDIRECT_MR_ERROR state.
492 */
493 if (!kref_put(&mr->kref, smbdirect_mr_io_free_locked))
494 mutex_unlock(&mr->mutex);
495 }
496 EXPORT_SYMBOL_GPL(smbdirect_connection_deregister_mr_io);
497