1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2017, Microsoft Corporation. 4 * Copyright (c) 2025, Stefan Metzmacher 5 */ 6 7 #include "internal.h" 8 9 /* 10 * Allocate MRs used for RDMA read/write 11 * The number of MRs will not exceed hardware capability in responder_resources 12 * All MRs are kept in mr_list. The MR can be recovered after it's used 13 * Recovery is done in smbd_mr_recovery_work. The content of list entry changes 14 * as MRs are used and recovered for I/O, but the list links will not change 15 */ 16 int smbdirect_connection_create_mr_list(struct smbdirect_socket *sc) 17 { 18 const struct smbdirect_socket_parameters *sp = &sc->parameters; 19 struct smbdirect_mr_io *mr; 20 int ret; 21 u32 i; 22 23 if (sp->responder_resources == 0) { 24 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 25 "responder_resources negotiated as 0\n"); 26 return -EINVAL; 27 } 28 29 /* Allocate more MRs (2x) than hardware responder_resources */ 30 for (i = 0; i < sp->responder_resources * 2; i++) { 31 mr = kzalloc_obj(*mr); 32 if (!mr) { 33 ret = -ENOMEM; 34 goto kzalloc_mr_failed; 35 } 36 37 kref_init(&mr->kref); 38 mutex_init(&mr->mutex); 39 40 mr->mr = ib_alloc_mr(sc->ib.pd, 41 sc->mr_io.type, 42 sp->max_frmr_depth); 43 if (IS_ERR(mr->mr)) { 44 ret = PTR_ERR(mr->mr); 45 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 46 "ib_alloc_mr failed ret=%d (%1pe) type=0x%x max_frmr_depth=%u\n", 47 ret, SMBDIRECT_DEBUG_ERR_PTR(ret), 48 sc->mr_io.type, sp->max_frmr_depth); 49 goto ib_alloc_mr_failed; 50 } 51 mr->sgt.sgl = kzalloc_objs(struct scatterlist, sp->max_frmr_depth); 52 if (!mr->sgt.sgl) { 53 ret = -ENOMEM; 54 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 55 "failed to allocate sgl, max_frmr_depth=%u\n", 56 sp->max_frmr_depth); 57 goto kcalloc_sgl_failed; 58 } 59 mr->state = SMBDIRECT_MR_READY; 60 mr->socket = sc; 61 62 list_add_tail(&mr->list, &sc->mr_io.all.list); 63 atomic_inc(&sc->mr_io.ready.count); 64 } 65 66 return 0; 67 68 kcalloc_sgl_failed: 69 ib_dereg_mr(mr->mr); 70 ib_alloc_mr_failed: 71 mutex_destroy(&mr->mutex); 72 kfree(mr); 73 kzalloc_mr_failed: 74 smbdirect_connection_destroy_mr_list(sc); 75 return ret; 76 } 77 78 static void smbdirect_mr_io_disable_locked(struct smbdirect_mr_io *mr) 79 { 80 struct smbdirect_socket *sc = mr->socket; 81 82 lockdep_assert_held(&mr->mutex); 83 84 if (mr->state == SMBDIRECT_MR_DISABLED) 85 return; 86 87 if (mr->mr) 88 ib_dereg_mr(mr->mr); 89 if (mr->sgt.nents) 90 ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); 91 kfree(mr->sgt.sgl); 92 93 mr->mr = NULL; 94 mr->sgt.sgl = NULL; 95 mr->sgt.nents = 0; 96 97 mr->state = SMBDIRECT_MR_DISABLED; 98 } 99 100 static void smbdirect_mr_io_free_locked(struct kref *kref) 101 { 102 struct smbdirect_mr_io *mr = 103 container_of(kref, struct smbdirect_mr_io, kref); 104 105 lockdep_assert_held(&mr->mutex); 106 107 /* 108 * smbdirect_mr_io_disable_locked() should already be called! 109 */ 110 if (WARN_ON_ONCE(mr->state != SMBDIRECT_MR_DISABLED)) 111 smbdirect_mr_io_disable_locked(mr); 112 113 mutex_unlock(&mr->mutex); 114 mutex_destroy(&mr->mutex); 115 kfree(mr); 116 } 117 118 void smbdirect_connection_destroy_mr_list(struct smbdirect_socket *sc) 119 { 120 struct smbdirect_mr_io *mr, *tmp; 121 LIST_HEAD(all_list); 122 unsigned long flags; 123 124 spin_lock_irqsave(&sc->mr_io.all.lock, flags); 125 list_splice_tail_init(&sc->mr_io.all.list, &all_list); 126 spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); 127 128 list_for_each_entry_safe(mr, tmp, &all_list, list) { 129 mutex_lock(&mr->mutex); 130 131 smbdirect_mr_io_disable_locked(mr); 132 list_del(&mr->list); 133 mr->socket = NULL; 134 135 /* 136 * No kref_put_mutex() as it's already locked. 137 * 138 * If smbdirect_mr_io_free_locked() is called 139 * and the mutex is unlocked and mr is gone, 140 * in that case kref_put() returned 1. 141 * 142 * If kref_put() returned 0 we know that 143 * smbdirect_mr_io_free_locked() didn't 144 * run. Not by us nor by anyone else, as we 145 * still hold the mutex, so we need to unlock. 146 * 147 * If the mr is still registered it will 148 * be dangling (detached from the connection 149 * waiting for smbd_deregister_mr() to be 150 * called in order to free the memory. 151 */ 152 if (!kref_put(&mr->kref, smbdirect_mr_io_free_locked)) 153 mutex_unlock(&mr->mutex); 154 } 155 } 156 157 /* 158 * Get a MR from mr_list. This function waits until there is at least one MR 159 * available in the list. There may be several CPUs issuing I/O trying to get MR 160 * at the same time, mr_list_lock is used to protect this situation. 161 */ 162 static struct smbdirect_mr_io * 163 smbdirect_connection_get_mr_io(struct smbdirect_socket *sc) 164 { 165 struct smbdirect_mr_io *mr; 166 unsigned long flags; 167 int ret; 168 169 again: 170 ret = wait_event_interruptible(sc->mr_io.ready.wait_queue, 171 atomic_read(&sc->mr_io.ready.count) || 172 sc->status != SMBDIRECT_SOCKET_CONNECTED); 173 if (ret) { 174 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 175 "wait_event_interruptible ret=%d (%1pe)\n", 176 ret, SMBDIRECT_DEBUG_ERR_PTR(ret)); 177 return NULL; 178 } 179 180 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { 181 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 182 "sc->status=%s sc->first_error=%1pe\n", 183 smbdirect_socket_status_string(sc->status), 184 SMBDIRECT_DEBUG_ERR_PTR(sc->first_error)); 185 return NULL; 186 } 187 188 spin_lock_irqsave(&sc->mr_io.all.lock, flags); 189 list_for_each_entry(mr, &sc->mr_io.all.list, list) { 190 if (mr->state == SMBDIRECT_MR_READY) { 191 mr->state = SMBDIRECT_MR_REGISTERED; 192 kref_get(&mr->kref); 193 spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); 194 atomic_dec(&sc->mr_io.ready.count); 195 atomic_inc(&sc->mr_io.used.count); 196 return mr; 197 } 198 } 199 200 spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); 201 /* 202 * It is possible that we could fail to get MR because other processes may 203 * try to acquire a MR at the same time. If this is the case, retry it. 204 */ 205 goto again; 206 } 207 208 static void smbdirect_connection_mr_io_register_done(struct ib_cq *cq, struct ib_wc *wc) 209 { 210 struct smbdirect_mr_io *mr = 211 container_of(wc->wr_cqe, struct smbdirect_mr_io, cqe); 212 struct smbdirect_socket *sc = mr->socket; 213 214 if (wc->status != IB_WC_SUCCESS) { 215 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 216 "wc->status=%s opcode=%d\n", 217 ib_wc_status_msg(wc->status), wc->opcode); 218 smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); 219 } 220 } 221 222 static void smbdirect_connection_mr_io_local_inv_done(struct ib_cq *cq, struct ib_wc *wc) 223 { 224 struct smbdirect_mr_io *mr = 225 container_of(wc->wr_cqe, struct smbdirect_mr_io, cqe); 226 struct smbdirect_socket *sc = mr->socket; 227 228 mr->state = SMBDIRECT_MR_INVALIDATED; 229 if (wc->status != IB_WC_SUCCESS) { 230 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 231 "invalidate failed status=%s\n", 232 ib_wc_status_msg(wc->status)); 233 smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); 234 } 235 complete(&mr->invalidate_done); 236 } 237 238 /* 239 * Transcribe the pages from an iterator into an MR scatterlist. 240 */ 241 static int smbdirect_iter_to_sgt(struct iov_iter *iter, 242 struct sg_table *sgt, 243 unsigned int max_sg) 244 { 245 int ret; 246 247 memset(sgt->sgl, 0, max_sg * sizeof(struct scatterlist)); 248 249 ret = extract_iter_to_sg(iter, iov_iter_count(iter), sgt, max_sg, 0); 250 WARN_ON(ret < 0); 251 if (sgt->nents > 0) 252 sg_mark_end(&sgt->sgl[sgt->nents - 1]); 253 254 return ret; 255 } 256 257 /* 258 * Register memory for RDMA read/write 259 * iter: the buffer to register memory with 260 * writing: true if this is a RDMA write (SMB read), false for RDMA read 261 * need_invalidate: true if this MR needs to be locally invalidated after I/O 262 * return value: the MR registered, NULL if failed. 263 */ 264 struct smbdirect_mr_io * 265 smbdirect_connection_register_mr_io(struct smbdirect_socket *sc, 266 struct iov_iter *iter, 267 bool writing, 268 bool need_invalidate) 269 { 270 const struct smbdirect_socket_parameters *sp = &sc->parameters; 271 struct smbdirect_mr_io *mr; 272 int ret, num_pages; 273 struct ib_reg_wr *reg_wr; 274 275 num_pages = iov_iter_npages(iter, sp->max_frmr_depth + 1); 276 if (num_pages > sp->max_frmr_depth) { 277 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 278 "num_pages=%d max_frmr_depth=%d\n", 279 num_pages, sp->max_frmr_depth); 280 WARN_ON_ONCE(1); 281 return NULL; 282 } 283 284 mr = smbdirect_connection_get_mr_io(sc); 285 if (!mr) { 286 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 287 "smbdirect_connection_get_mr_io returning NULL\n"); 288 return NULL; 289 } 290 291 mutex_lock(&mr->mutex); 292 293 mr->dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; 294 mr->need_invalidate = need_invalidate; 295 mr->sgt.nents = 0; 296 mr->sgt.orig_nents = 0; 297 298 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_INFO, 299 "num_pages=%u count=%zu depth=%u\n", 300 num_pages, iov_iter_count(iter), sp->max_frmr_depth); 301 smbdirect_iter_to_sgt(iter, &mr->sgt, sp->max_frmr_depth); 302 303 ret = ib_dma_map_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); 304 if (!ret) { 305 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 306 "ib_dma_map_sg num_pages=%u dir=%x ret=%d (%1pe)\n", 307 num_pages, mr->dir, ret, SMBDIRECT_DEBUG_ERR_PTR(ret)); 308 goto dma_map_error; 309 } 310 311 ret = ib_map_mr_sg(mr->mr, mr->sgt.sgl, mr->sgt.nents, NULL, PAGE_SIZE); 312 if (ret != mr->sgt.nents) { 313 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 314 "ib_map_mr_sg failed ret = %d nents = %u\n", 315 ret, mr->sgt.nents); 316 goto map_mr_error; 317 } 318 319 ib_update_fast_reg_key(mr->mr, ib_inc_rkey(mr->mr->rkey)); 320 reg_wr = &mr->wr; 321 reg_wr->wr.opcode = IB_WR_REG_MR; 322 mr->cqe.done = smbdirect_connection_mr_io_register_done; 323 reg_wr->wr.wr_cqe = &mr->cqe; 324 reg_wr->wr.num_sge = 0; 325 reg_wr->wr.send_flags = IB_SEND_SIGNALED; 326 reg_wr->mr = mr->mr; 327 reg_wr->key = mr->mr->rkey; 328 reg_wr->access = writing ? 329 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : 330 IB_ACCESS_REMOTE_READ; 331 332 /* 333 * There is no need for waiting for complemtion on ib_post_send 334 * on IB_WR_REG_MR. Hardware enforces a barrier and order of execution 335 * on the next ib_post_send when we actually send I/O to remote peer 336 */ 337 ret = ib_post_send(sc->ib.qp, ®_wr->wr, NULL); 338 if (!ret) { 339 /* 340 * smbdirect_connection_get_mr_io() gave us a reference 341 * via kref_get(&mr->kref), we keep that and let 342 * the caller use smbdirect_connection_deregister_mr_io() 343 * to remove it again. 344 */ 345 mutex_unlock(&mr->mutex); 346 return mr; 347 } 348 349 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 350 "ib_post_send failed ret=%d (%1pe) reg_wr->key=0x%x\n", 351 ret, SMBDIRECT_DEBUG_ERR_PTR(ret), reg_wr->key); 352 353 map_mr_error: 354 ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); 355 356 dma_map_error: 357 mr->sgt.nents = 0; 358 mr->state = SMBDIRECT_MR_ERROR; 359 atomic_dec(&sc->mr_io.used.count); 360 361 smbdirect_socket_schedule_cleanup(sc, ret); 362 363 /* 364 * smbdirect_connection_get_mr_io() gave us a reference 365 * via kref_get(&mr->kref), we need to remove it again 366 * on error. 367 * 368 * No kref_put_mutex() as it's already locked. 369 * 370 * If smbdirect_mr_io_free_locked() is called 371 * and the mutex is unlocked and mr is gone, 372 * in that case kref_put() returned 1. 373 * 374 * If kref_put() returned 0 we know that 375 * smbdirect_mr_io_free_locked() didn't 376 * run. Not by us nor by anyone else, as we 377 * still hold the mutex, so we need to unlock. 378 */ 379 if (!kref_put(&mr->kref, smbdirect_mr_io_free_locked)) 380 mutex_unlock(&mr->mutex); 381 return NULL; 382 } 383 __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_register_mr_io); 384 385 void smbdirect_mr_io_fill_buffer_descriptor(struct smbdirect_mr_io *mr, 386 struct smbdirect_buffer_descriptor_v1 *v1) 387 { 388 mutex_lock(&mr->mutex); 389 if (mr->state == SMBDIRECT_MR_REGISTERED) { 390 v1->offset = cpu_to_le64(mr->mr->iova); 391 v1->token = cpu_to_le32(mr->mr->rkey); 392 v1->length = cpu_to_le32(mr->mr->length); 393 } else { 394 v1->offset = cpu_to_le64(U64_MAX); 395 v1->token = cpu_to_le32(U32_MAX); 396 v1->length = cpu_to_le32(U32_MAX); 397 } 398 mutex_unlock(&mr->mutex); 399 } 400 __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_mr_io_fill_buffer_descriptor); 401 402 /* 403 * Deregister a MR after I/O is done 404 * This function may wait if remote invalidation is not used 405 * and we have to locally invalidate the buffer to prevent data is being 406 * modified by remote peer after upper layer consumes it 407 */ 408 void smbdirect_connection_deregister_mr_io(struct smbdirect_mr_io *mr) 409 { 410 struct smbdirect_socket *sc = mr->socket; 411 int ret = 0; 412 413 lock_again: 414 mutex_lock(&mr->mutex); 415 if (mr->state == SMBDIRECT_MR_DISABLED) 416 goto put_kref; 417 418 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { 419 smbdirect_mr_io_disable_locked(mr); 420 goto put_kref; 421 } 422 423 if (mr->need_invalidate) { 424 struct ib_send_wr *wr = &mr->inv_wr; 425 426 /* Need to finish local invalidation before returning */ 427 wr->opcode = IB_WR_LOCAL_INV; 428 mr->cqe.done = smbdirect_connection_mr_io_local_inv_done; 429 wr->wr_cqe = &mr->cqe; 430 wr->num_sge = 0; 431 wr->ex.invalidate_rkey = mr->mr->rkey; 432 wr->send_flags = IB_SEND_SIGNALED; 433 434 init_completion(&mr->invalidate_done); 435 ret = ib_post_send(sc->ib.qp, wr, NULL); 436 if (ret) { 437 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 438 "ib_post_send failed ret=%d (%1pe)\n", 439 ret, SMBDIRECT_DEBUG_ERR_PTR(ret)); 440 smbdirect_mr_io_disable_locked(mr); 441 smbdirect_socket_schedule_cleanup(sc, ret); 442 goto done; 443 } 444 445 /* 446 * We still hold the reference to mr 447 * so we can unlock while waiting. 448 */ 449 mutex_unlock(&mr->mutex); 450 wait_for_completion(&mr->invalidate_done); 451 mr->need_invalidate = false; 452 goto lock_again; 453 } else 454 /* 455 * For remote invalidation, just set it to SMBDIRECT_MR_INVALIDATED 456 * and defer to mr_recovery_work to recover the MR for next use 457 */ 458 mr->state = SMBDIRECT_MR_INVALIDATED; 459 460 if (mr->sgt.nents) { 461 ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); 462 mr->sgt.nents = 0; 463 } 464 465 WARN_ONCE(mr->state != SMBDIRECT_MR_INVALIDATED, 466 "mr->state[%u] != SMBDIRECT_MR_INVALIDATED[%u]\n", 467 mr->state, SMBDIRECT_MR_INVALIDATED); 468 mr->state = SMBDIRECT_MR_READY; 469 if (atomic_inc_return(&sc->mr_io.ready.count) == 1) 470 wake_up(&sc->mr_io.ready.wait_queue); 471 472 done: 473 atomic_dec(&sc->mr_io.used.count); 474 475 put_kref: 476 /* 477 * No kref_put_mutex() as it's already locked. 478 * 479 * If smbdirect_mr_io_free_locked() is called 480 * and the mutex is unlocked and mr is gone, 481 * in that case kref_put() returned 1. 482 * 483 * If kref_put() returned 0 we know that 484 * smbdirect_mr_io_free_locked() didn't 485 * run. Not by us nor by anyone else, as we 486 * still hold the mutex, so we need to unlock 487 * and keep the mr in SMBDIRECT_MR_READY or 488 * SMBDIRECT_MR_ERROR state. 489 */ 490 if (!kref_put(&mr->kref, smbdirect_mr_io_free_locked)) 491 mutex_unlock(&mr->mutex); 492 } 493 __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_deregister_mr_io); 494