1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2017, Microsoft Corporation. 4 * Copyright (c) 2025, Stefan Metzmacher 5 */ 6 7 #include "internal.h" 8 9 /* 10 * Allocate MRs used for RDMA read/write 11 * The number of MRs will not exceed hardware capability in responder_resources 12 * All MRs are kept in mr_list. The MR can be recovered after it's used 13 * Recovery is done in smbd_mr_recovery_work. The content of list entry changes 14 * as MRs are used and recovered for I/O, but the list links will not change 15 */ 16 int smbdirect_connection_create_mr_list(struct smbdirect_socket *sc) 17 { 18 const struct smbdirect_socket_parameters *sp = &sc->parameters; 19 struct smbdirect_mr_io *mr; 20 int ret; 21 u32 i; 22 23 if (sp->responder_resources == 0) { 24 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 25 "responder_resources negotiated as 0\n"); 26 return -EINVAL; 27 } 28 29 /* Allocate more MRs (2x) than hardware responder_resources */ 30 for (i = 0; i < sp->responder_resources * 2; i++) { 31 mr = kzalloc_obj(*mr); 32 if (!mr) { 33 ret = -ENOMEM; 34 goto kzalloc_mr_failed; 35 } 36 37 kref_init(&mr->kref); 38 mutex_init(&mr->mutex); 39 40 mr->mr = ib_alloc_mr(sc->ib.pd, 41 sc->mr_io.type, 42 sp->max_frmr_depth); 43 if (IS_ERR(mr->mr)) { 44 ret = PTR_ERR(mr->mr); 45 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 46 "ib_alloc_mr failed ret=%d (%1pe) type=0x%x max_frmr_depth=%u\n", 47 ret, SMBDIRECT_DEBUG_ERR_PTR(ret), 48 sc->mr_io.type, sp->max_frmr_depth); 49 goto ib_alloc_mr_failed; 50 } 51 mr->sgt.sgl = kzalloc_objs(struct scatterlist, sp->max_frmr_depth); 52 if (!mr->sgt.sgl) { 53 ret = -ENOMEM; 54 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 55 "failed to allocate sgl, max_frmr_depth=%u\n", 56 sp->max_frmr_depth); 57 goto kcalloc_sgl_failed; 58 } 59 mr->state = SMBDIRECT_MR_READY; 60 mr->socket = sc; 61 62 list_add_tail(&mr->list, &sc->mr_io.all.list); 63 atomic_inc(&sc->mr_io.ready.count); 64 } 65 66 return 0; 67 68 kcalloc_sgl_failed: 69 ib_dereg_mr(mr->mr); 70 ib_alloc_mr_failed: 71 mutex_destroy(&mr->mutex); 72 kfree(mr); 73 kzalloc_mr_failed: 74 smbdirect_connection_destroy_mr_list(sc); 75 return ret; 76 } 77 78 static void smbdirect_mr_io_disable_locked(struct smbdirect_mr_io *mr) 79 { 80 struct smbdirect_socket *sc = mr->socket; 81 82 lockdep_assert_held(&mr->mutex); 83 84 if (mr->state == SMBDIRECT_MR_DISABLED) 85 return; 86 87 if (mr->mr) 88 ib_dereg_mr(mr->mr); 89 if (mr->sgt.nents) 90 ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); 91 kfree(mr->sgt.sgl); 92 93 mr->mr = NULL; 94 mr->sgt.sgl = NULL; 95 mr->sgt.nents = 0; 96 97 mr->state = SMBDIRECT_MR_DISABLED; 98 } 99 100 static void smbdirect_mr_io_free_locked(struct kref *kref) 101 { 102 struct smbdirect_mr_io *mr = 103 container_of(kref, struct smbdirect_mr_io, kref); 104 105 lockdep_assert_held(&mr->mutex); 106 107 /* 108 * smbdirect_mr_io_disable_locked() should already be called! 109 */ 110 if (WARN_ON_ONCE(mr->state != SMBDIRECT_MR_DISABLED)) 111 smbdirect_mr_io_disable_locked(mr); 112 113 mutex_unlock(&mr->mutex); 114 mutex_destroy(&mr->mutex); 115 kfree(mr); 116 } 117 118 void smbdirect_connection_destroy_mr_list(struct smbdirect_socket *sc) 119 { 120 struct smbdirect_mr_io *mr, *tmp; 121 LIST_HEAD(all_list); 122 unsigned long flags; 123 124 spin_lock_irqsave(&sc->mr_io.all.lock, flags); 125 list_splice_tail_init(&sc->mr_io.all.list, &all_list); 126 spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); 127 128 list_for_each_entry_safe(mr, tmp, &all_list, list) { 129 mutex_lock(&mr->mutex); 130 131 smbdirect_mr_io_disable_locked(mr); 132 list_del(&mr->list); 133 mr->socket = NULL; 134 135 /* 136 * No kref_put_mutex() as it's already locked. 137 * 138 * If smbdirect_mr_io_free_locked() is called 139 * and the mutex is unlocked and mr is gone, 140 * in that case kref_put() returned 1. 141 * 142 * If kref_put() returned 0 we know that 143 * smbdirect_mr_io_free_locked() didn't 144 * run. Not by us nor by anyone else, as we 145 * still hold the mutex, so we need to unlock. 146 * 147 * If the mr is still registered it will 148 * be dangling (detached from the connection 149 * waiting for smbd_deregister_mr() to be 150 * called in order to free the memory. 151 */ 152 if (!kref_put(&mr->kref, smbdirect_mr_io_free_locked)) 153 mutex_unlock(&mr->mutex); 154 } 155 } 156 157 /* 158 * Get a MR from mr_list. This function waits until there is at least one MR 159 * available in the list. There may be several CPUs issuing I/O trying to get MR 160 * at the same time, mr_list_lock is used to protect this situation. 161 */ 162 static struct smbdirect_mr_io * 163 smbdirect_connection_get_mr_io(struct smbdirect_socket *sc) 164 { 165 struct smbdirect_mr_io *mr; 166 unsigned long flags; 167 int ret; 168 169 again: 170 ret = wait_event_interruptible(sc->mr_io.ready.wait_queue, 171 atomic_read(&sc->mr_io.ready.count) || 172 sc->status != SMBDIRECT_SOCKET_CONNECTED); 173 if (ret) { 174 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 175 "wait_event_interruptible ret=%d (%1pe)\n", 176 ret, SMBDIRECT_DEBUG_ERR_PTR(ret)); 177 return NULL; 178 } 179 180 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { 181 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 182 "sc->status=%s sc->first_error=%1pe\n", 183 smbdirect_socket_status_string(sc->status), 184 SMBDIRECT_DEBUG_ERR_PTR(sc->first_error)); 185 return NULL; 186 } 187 188 spin_lock_irqsave(&sc->mr_io.all.lock, flags); 189 list_for_each_entry(mr, &sc->mr_io.all.list, list) { 190 if (mr->state == SMBDIRECT_MR_READY) { 191 mr->state = SMBDIRECT_MR_REGISTERED; 192 kref_get(&mr->kref); 193 spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); 194 atomic_dec(&sc->mr_io.ready.count); 195 atomic_inc(&sc->mr_io.used.count); 196 return mr; 197 } 198 } 199 200 spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); 201 /* 202 * It is possible that we could fail to get MR because other processes may 203 * try to acquire a MR at the same time. If this is the case, retry it. 204 */ 205 goto again; 206 } 207 208 static void smbdirect_connection_mr_io_register_done(struct ib_cq *cq, struct ib_wc *wc) 209 { 210 struct smbdirect_mr_io *mr = 211 container_of(wc->wr_cqe, struct smbdirect_mr_io, cqe); 212 struct smbdirect_socket *sc = mr->socket; 213 214 if (wc->status != IB_WC_SUCCESS) { 215 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 216 "wc->status=%s opcode=%d\n", 217 ib_wc_status_msg(wc->status), wc->opcode); 218 smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); 219 } 220 } 221 222 static void smbdirect_connection_mr_io_local_inv_done(struct ib_cq *cq, struct ib_wc *wc) 223 { 224 struct smbdirect_mr_io *mr = 225 container_of(wc->wr_cqe, struct smbdirect_mr_io, cqe); 226 struct smbdirect_socket *sc = mr->socket; 227 228 mr->state = SMBDIRECT_MR_INVALIDATED; 229 if (wc->status != IB_WC_SUCCESS) { 230 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 231 "invalidate failed status=%s\n", 232 ib_wc_status_msg(wc->status)); 233 smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); 234 } 235 complete(&mr->invalidate_done); 236 } 237 238 /* 239 * Transcribe the pages from an iterator into an MR scatterlist. 240 */ 241 static int smbdirect_iter_to_sgt(struct iov_iter *iter, 242 struct sg_table *sgt, 243 unsigned int max_sg) 244 { 245 int ret; 246 247 memset(sgt->sgl, 0, max_sg * sizeof(struct scatterlist)); 248 249 ret = extract_iter_to_sg(iter, iov_iter_count(iter), sgt, max_sg, 0); 250 WARN_ON(ret < 0); 251 if (sgt->nents > 0) 252 sg_mark_end(&sgt->sgl[sgt->nents - 1]); 253 254 return ret; 255 } 256 257 /* 258 * Register memory for RDMA read/write 259 * iter: the buffer to register memory with 260 * writing: true if this is a RDMA write (SMB read), false for RDMA read 261 * need_invalidate: true if this MR needs to be locally invalidated after I/O 262 * return value: the MR registered, NULL if failed. 263 */ 264 struct smbdirect_mr_io * 265 smbdirect_connection_register_mr_io(struct smbdirect_socket *sc, 266 struct iov_iter *iter, 267 bool writing, 268 bool need_invalidate) 269 { 270 const struct smbdirect_socket_parameters *sp = &sc->parameters; 271 struct smbdirect_mr_io *mr; 272 int ret, num_pages, num_mapped; 273 struct ib_reg_wr *reg_wr; 274 275 num_pages = iov_iter_npages(iter, sp->max_frmr_depth + 1); 276 if (num_pages > sp->max_frmr_depth) { 277 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 278 "num_pages=%d max_frmr_depth=%d\n", 279 num_pages, sp->max_frmr_depth); 280 WARN_ON_ONCE(1); 281 return NULL; 282 } 283 284 mr = smbdirect_connection_get_mr_io(sc); 285 if (!mr) { 286 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 287 "smbdirect_connection_get_mr_io returning NULL\n"); 288 return NULL; 289 } 290 291 mutex_lock(&mr->mutex); 292 293 mr->dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; 294 mr->need_invalidate = need_invalidate; 295 mr->sgt.nents = 0; 296 mr->sgt.orig_nents = 0; 297 298 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_INFO, 299 "num_pages=%u count=%zu depth=%u\n", 300 num_pages, iov_iter_count(iter), sp->max_frmr_depth); 301 smbdirect_iter_to_sgt(iter, &mr->sgt, sp->max_frmr_depth); 302 303 num_mapped = ib_dma_map_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); 304 if (!num_mapped) { 305 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 306 "ib_dma_map_sg num_pages=%u dir=%x num_mapped=%d\n", 307 num_pages, mr->dir, num_mapped); 308 ret = -EIO; 309 goto dma_map_error; 310 } 311 312 ret = ib_map_mr_sg(mr->mr, mr->sgt.sgl, num_mapped, NULL, PAGE_SIZE); 313 if (ret != num_mapped) { 314 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 315 "ib_map_mr_sg failed ret = %d num_mapped = %u\n", 316 ret, num_mapped); 317 if (ret >= 0) 318 ret = -EIO; 319 goto map_mr_error; 320 } 321 322 ib_update_fast_reg_key(mr->mr, ib_inc_rkey(mr->mr->rkey)); 323 reg_wr = &mr->wr; 324 reg_wr->wr.opcode = IB_WR_REG_MR; 325 mr->cqe.done = smbdirect_connection_mr_io_register_done; 326 reg_wr->wr.wr_cqe = &mr->cqe; 327 reg_wr->wr.num_sge = 0; 328 reg_wr->wr.send_flags = IB_SEND_SIGNALED; 329 reg_wr->mr = mr->mr; 330 reg_wr->key = mr->mr->rkey; 331 reg_wr->access = writing ? 332 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : 333 IB_ACCESS_REMOTE_READ; 334 335 /* 336 * There is no need for waiting for complemtion on ib_post_send 337 * on IB_WR_REG_MR. Hardware enforces a barrier and order of execution 338 * on the next ib_post_send when we actually send I/O to remote peer 339 */ 340 ret = ib_post_send(sc->ib.qp, ®_wr->wr, NULL); 341 if (!ret) { 342 /* 343 * smbdirect_connection_get_mr_io() gave us a reference 344 * via kref_get(&mr->kref), we keep that and let 345 * the caller use smbdirect_connection_deregister_mr_io() 346 * to remove it again. 347 */ 348 mutex_unlock(&mr->mutex); 349 return mr; 350 } 351 352 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 353 "ib_post_send failed ret=%d (%1pe) reg_wr->key=0x%x\n", 354 ret, SMBDIRECT_DEBUG_ERR_PTR(ret), reg_wr->key); 355 356 map_mr_error: 357 ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); 358 359 dma_map_error: 360 mr->sgt.nents = 0; 361 mr->state = SMBDIRECT_MR_ERROR; 362 atomic_dec(&sc->mr_io.used.count); 363 364 smbdirect_socket_schedule_cleanup(sc, ret); 365 366 /* 367 * smbdirect_connection_get_mr_io() gave us a reference 368 * via kref_get(&mr->kref), we need to remove it again 369 * on error. 370 * 371 * No kref_put_mutex() as it's already locked. 372 * 373 * If smbdirect_mr_io_free_locked() is called 374 * and the mutex is unlocked and mr is gone, 375 * in that case kref_put() returned 1. 376 * 377 * If kref_put() returned 0 we know that 378 * smbdirect_mr_io_free_locked() didn't 379 * run. Not by us nor by anyone else, as we 380 * still hold the mutex, so we need to unlock. 381 */ 382 if (!kref_put(&mr->kref, smbdirect_mr_io_free_locked)) 383 mutex_unlock(&mr->mutex); 384 return NULL; 385 } 386 EXPORT_SYMBOL_GPL(smbdirect_connection_register_mr_io); 387 388 void smbdirect_mr_io_fill_buffer_descriptor(struct smbdirect_mr_io *mr, 389 struct smbdirect_buffer_descriptor_v1 *v1) 390 { 391 mutex_lock(&mr->mutex); 392 if (mr->state == SMBDIRECT_MR_REGISTERED) { 393 v1->offset = cpu_to_le64(mr->mr->iova); 394 v1->token = cpu_to_le32(mr->mr->rkey); 395 v1->length = cpu_to_le32(mr->mr->length); 396 } else { 397 v1->offset = cpu_to_le64(U64_MAX); 398 v1->token = cpu_to_le32(U32_MAX); 399 v1->length = cpu_to_le32(U32_MAX); 400 } 401 mutex_unlock(&mr->mutex); 402 } 403 EXPORT_SYMBOL_GPL(smbdirect_mr_io_fill_buffer_descriptor); 404 405 /* 406 * Deregister a MR after I/O is done 407 * This function may wait if remote invalidation is not used 408 * and we have to locally invalidate the buffer to prevent data is being 409 * modified by remote peer after upper layer consumes it 410 */ 411 void smbdirect_connection_deregister_mr_io(struct smbdirect_mr_io *mr) 412 { 413 struct smbdirect_socket *sc = mr->socket; 414 int ret = 0; 415 416 lock_again: 417 mutex_lock(&mr->mutex); 418 if (mr->state == SMBDIRECT_MR_DISABLED) 419 goto put_kref; 420 421 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { 422 smbdirect_mr_io_disable_locked(mr); 423 goto put_kref; 424 } 425 426 if (mr->need_invalidate) { 427 struct ib_send_wr *wr = &mr->inv_wr; 428 429 /* Need to finish local invalidation before returning */ 430 wr->opcode = IB_WR_LOCAL_INV; 431 mr->cqe.done = smbdirect_connection_mr_io_local_inv_done; 432 wr->wr_cqe = &mr->cqe; 433 wr->num_sge = 0; 434 wr->ex.invalidate_rkey = mr->mr->rkey; 435 wr->send_flags = IB_SEND_SIGNALED; 436 437 init_completion(&mr->invalidate_done); 438 ret = ib_post_send(sc->ib.qp, wr, NULL); 439 if (ret) { 440 smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 441 "ib_post_send failed ret=%d (%1pe)\n", 442 ret, SMBDIRECT_DEBUG_ERR_PTR(ret)); 443 smbdirect_mr_io_disable_locked(mr); 444 smbdirect_socket_schedule_cleanup(sc, ret); 445 goto done; 446 } 447 448 /* 449 * We still hold the reference to mr 450 * so we can unlock while waiting. 451 */ 452 mutex_unlock(&mr->mutex); 453 wait_for_completion(&mr->invalidate_done); 454 mr->need_invalidate = false; 455 goto lock_again; 456 } else 457 /* 458 * For remote invalidation, just set it to SMBDIRECT_MR_INVALIDATED 459 * and defer to mr_recovery_work to recover the MR for next use 460 */ 461 mr->state = SMBDIRECT_MR_INVALIDATED; 462 463 if (mr->sgt.nents) { 464 ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); 465 mr->sgt.nents = 0; 466 } 467 468 WARN_ONCE(mr->state != SMBDIRECT_MR_INVALIDATED, 469 "mr->state[%u] != SMBDIRECT_MR_INVALIDATED[%u]\n", 470 mr->state, SMBDIRECT_MR_INVALIDATED); 471 mr->state = SMBDIRECT_MR_READY; 472 if (atomic_inc_return(&sc->mr_io.ready.count) == 1) 473 wake_up(&sc->mr_io.ready.wait_queue); 474 475 done: 476 atomic_dec(&sc->mr_io.used.count); 477 478 put_kref: 479 /* 480 * No kref_put_mutex() as it's already locked. 481 * 482 * If smbdirect_mr_io_free_locked() is called 483 * and the mutex is unlocked and mr is gone, 484 * in that case kref_put() returned 1. 485 * 486 * If kref_put() returned 0 we know that 487 * smbdirect_mr_io_free_locked() didn't 488 * run. Not by us nor by anyone else, as we 489 * still hold the mutex, so we need to unlock 490 * and keep the mr in SMBDIRECT_MR_READY or 491 * SMBDIRECT_MR_ERROR state. 492 */ 493 if (!kref_put(&mr->kref, smbdirect_mr_io_free_locked)) 494 mutex_unlock(&mr->mutex); 495 } 496 EXPORT_SYMBOL_GPL(smbdirect_connection_deregister_mr_io); 497