1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2017, Microsoft Corporation. 4 * Copyright (C) 2018, LG Electronics. 5 * 6 * Author(s): Long Li <longli@microsoft.com>, 7 * Hyunchul Lee <hyc.lee@gmail.com> 8 */ 9 10 #define SUBMOD_NAME "smb_direct" 11 12 #include <linux/kthread.h> 13 #include <linux/list.h> 14 #include <linux/mempool.h> 15 #include <linux/highmem.h> 16 #include <linux/scatterlist.h> 17 #include <linux/string_choices.h> 18 #include <rdma/ib_verbs.h> 19 #include <rdma/rdma_cm.h> 20 #include <rdma/rw.h> 21 22 #include "glob.h" 23 #include "connection.h" 24 #include "smb_common.h" 25 #include "../common/smb2status.h" 26 #include "transport_rdma.h" 27 28 #define SMB_DIRECT_PORT_IWARP 5445 29 #define SMB_DIRECT_PORT_INFINIBAND 445 30 31 #define SMB_DIRECT_VERSION_LE cpu_to_le16(0x0100) 32 33 /* SMB_DIRECT negotiation timeout in seconds */ 34 #define SMB_DIRECT_NEGOTIATE_TIMEOUT 120 35 36 #define SMB_DIRECT_MAX_SEND_SGES 6 37 #define SMB_DIRECT_MAX_RECV_SGES 1 38 39 /* 40 * Default maximum number of RDMA read/write outstanding on this connection 41 * This value is possibly decreased during QP creation on hardware limit 42 */ 43 #define SMB_DIRECT_CM_INITIATOR_DEPTH 8 44 45 /* Maximum number of retries on data transfer operations */ 46 #define SMB_DIRECT_CM_RETRY 6 47 /* No need to retry on Receiver Not Ready since SMB_DIRECT manages credits */ 48 #define SMB_DIRECT_CM_RNR_RETRY 0 49 50 /* 51 * User configurable initial values per SMB_DIRECT transport connection 52 * as defined in [MS-SMBD] 3.1.1.1 53 * Those may change after a SMB_DIRECT negotiation 54 */ 55 56 /* Set 445 port to SMB Direct port by default */ 57 static int smb_direct_port = SMB_DIRECT_PORT_INFINIBAND; 58 59 /* The local peer's maximum number of credits to grant to the peer */ 60 static int smb_direct_receive_credit_max = 255; 61 62 /* The remote peer's credit request of local peer */ 63 static int smb_direct_send_credit_target = 255; 64 65 /* The maximum single message size can be sent to remote peer */ 66 static int smb_direct_max_send_size = 1364; 67 68 /* The maximum fragmented upper-layer payload receive size supported */ 69 static int smb_direct_max_fragmented_recv_size = 1024 * 1024; 70 71 /* The maximum single-message size which can be received */ 72 static int smb_direct_max_receive_size = 1364; 73 74 static int smb_direct_max_read_write_size = SMBD_DEFAULT_IOSIZE; 75 76 static LIST_HEAD(smb_direct_device_list); 77 static DEFINE_RWLOCK(smb_direct_device_lock); 78 79 struct smb_direct_device { 80 struct ib_device *ib_dev; 81 struct list_head list; 82 }; 83 84 static struct smb_direct_listener { 85 struct rdma_cm_id *cm_id; 86 } smb_direct_listener; 87 88 static struct workqueue_struct *smb_direct_wq; 89 90 enum smb_direct_status { 91 SMB_DIRECT_CS_NEW = 0, 92 SMB_DIRECT_CS_CONNECTED, 93 SMB_DIRECT_CS_DISCONNECTING, 94 SMB_DIRECT_CS_DISCONNECTED, 95 }; 96 97 struct smb_direct_transport { 98 struct ksmbd_transport transport; 99 100 enum smb_direct_status status; 101 bool full_packet_received; 102 wait_queue_head_t wait_status; 103 104 struct rdma_cm_id *cm_id; 105 struct ib_cq *send_cq; 106 struct ib_cq *recv_cq; 107 struct ib_pd *pd; 108 struct ib_qp *qp; 109 110 int max_send_size; 111 int max_recv_size; 112 int max_fragmented_send_size; 113 int max_fragmented_recv_size; 114 int max_rdma_rw_size; 115 116 spinlock_t reassembly_queue_lock; 117 struct list_head reassembly_queue; 118 int reassembly_data_length; 119 int reassembly_queue_length; 120 int first_entry_offset; 121 wait_queue_head_t wait_reassembly_queue; 122 123 spinlock_t receive_credit_lock; 124 int recv_credits; 125 int count_avail_recvmsg; 126 int recv_credit_max; 127 int recv_credit_target; 128 129 spinlock_t recvmsg_queue_lock; 130 struct list_head recvmsg_queue; 131 132 int send_credit_target; 133 atomic_t send_credits; 134 spinlock_t lock_new_recv_credits; 135 int new_recv_credits; 136 int max_rw_credits; 137 int pages_per_rw_credit; 138 atomic_t rw_credits; 139 140 wait_queue_head_t wait_send_credits; 141 wait_queue_head_t wait_rw_credits; 142 143 mempool_t *sendmsg_mempool; 144 struct kmem_cache *sendmsg_cache; 145 mempool_t *recvmsg_mempool; 146 struct kmem_cache *recvmsg_cache; 147 148 wait_queue_head_t wait_send_pending; 149 atomic_t send_pending; 150 151 struct delayed_work post_recv_credits_work; 152 struct work_struct send_immediate_work; 153 struct work_struct disconnect_work; 154 155 bool negotiation_requested; 156 }; 157 158 #define KSMBD_TRANS(t) ((struct ksmbd_transport *)&((t)->transport)) 159 #define SMBD_TRANS(t) ((struct smb_direct_transport *)container_of(t, \ 160 struct smb_direct_transport, transport)) 161 enum { 162 SMB_DIRECT_MSG_NEGOTIATE_REQ = 0, 163 SMB_DIRECT_MSG_DATA_TRANSFER 164 }; 165 166 static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops; 167 168 struct smb_direct_send_ctx { 169 struct list_head msg_list; 170 int wr_cnt; 171 bool need_invalidate_rkey; 172 unsigned int remote_key; 173 }; 174 175 struct smb_direct_sendmsg { 176 struct smb_direct_transport *transport; 177 struct ib_send_wr wr; 178 struct list_head list; 179 int num_sge; 180 struct ib_sge sge[SMB_DIRECT_MAX_SEND_SGES]; 181 struct ib_cqe cqe; 182 u8 packet[]; 183 }; 184 185 struct smb_direct_recvmsg { 186 struct smb_direct_transport *transport; 187 struct list_head list; 188 int type; 189 struct ib_sge sge; 190 struct ib_cqe cqe; 191 bool first_segment; 192 u8 packet[]; 193 }; 194 195 struct smb_direct_rdma_rw_msg { 196 struct smb_direct_transport *t; 197 struct ib_cqe cqe; 198 int status; 199 struct completion *completion; 200 struct list_head list; 201 struct rdma_rw_ctx rw_ctx; 202 struct sg_table sgt; 203 struct scatterlist sg_list[]; 204 }; 205 206 void init_smbd_max_io_size(unsigned int sz) 207 { 208 sz = clamp_val(sz, SMBD_MIN_IOSIZE, SMBD_MAX_IOSIZE); 209 smb_direct_max_read_write_size = sz; 210 } 211 212 unsigned int get_smbd_max_read_write_size(void) 213 { 214 return smb_direct_max_read_write_size; 215 } 216 217 static inline int get_buf_page_count(void *buf, int size) 218 { 219 return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) - 220 (uintptr_t)buf / PAGE_SIZE; 221 } 222 223 static void smb_direct_destroy_pools(struct smb_direct_transport *transport); 224 static void smb_direct_post_recv_credits(struct work_struct *work); 225 static int smb_direct_post_send_data(struct smb_direct_transport *t, 226 struct smb_direct_send_ctx *send_ctx, 227 struct kvec *iov, int niov, 228 int remaining_data_length); 229 230 static inline struct smb_direct_transport * 231 smb_trans_direct_transfort(struct ksmbd_transport *t) 232 { 233 return container_of(t, struct smb_direct_transport, transport); 234 } 235 236 static inline void 237 *smb_direct_recvmsg_payload(struct smb_direct_recvmsg *recvmsg) 238 { 239 return (void *)recvmsg->packet; 240 } 241 242 static inline bool is_receive_credit_post_required(int receive_credits, 243 int avail_recvmsg_count) 244 { 245 return receive_credits <= (smb_direct_receive_credit_max >> 3) && 246 avail_recvmsg_count >= (receive_credits >> 2); 247 } 248 249 static struct 250 smb_direct_recvmsg *get_free_recvmsg(struct smb_direct_transport *t) 251 { 252 struct smb_direct_recvmsg *recvmsg = NULL; 253 254 spin_lock(&t->recvmsg_queue_lock); 255 if (!list_empty(&t->recvmsg_queue)) { 256 recvmsg = list_first_entry(&t->recvmsg_queue, 257 struct smb_direct_recvmsg, 258 list); 259 list_del(&recvmsg->list); 260 } 261 spin_unlock(&t->recvmsg_queue_lock); 262 return recvmsg; 263 } 264 265 static void put_recvmsg(struct smb_direct_transport *t, 266 struct smb_direct_recvmsg *recvmsg) 267 { 268 if (likely(recvmsg->sge.length != 0)) { 269 ib_dma_unmap_single(t->cm_id->device, 270 recvmsg->sge.addr, 271 recvmsg->sge.length, 272 DMA_FROM_DEVICE); 273 recvmsg->sge.length = 0; 274 } 275 276 spin_lock(&t->recvmsg_queue_lock); 277 list_add(&recvmsg->list, &t->recvmsg_queue); 278 spin_unlock(&t->recvmsg_queue_lock); 279 } 280 281 static void enqueue_reassembly(struct smb_direct_transport *t, 282 struct smb_direct_recvmsg *recvmsg, 283 int data_length) 284 { 285 spin_lock(&t->reassembly_queue_lock); 286 list_add_tail(&recvmsg->list, &t->reassembly_queue); 287 t->reassembly_queue_length++; 288 /* 289 * Make sure reassembly_data_length is updated after list and 290 * reassembly_queue_length are updated. On the dequeue side 291 * reassembly_data_length is checked without a lock to determine 292 * if reassembly_queue_length and list is up to date 293 */ 294 virt_wmb(); 295 t->reassembly_data_length += data_length; 296 spin_unlock(&t->reassembly_queue_lock); 297 } 298 299 static struct smb_direct_recvmsg *get_first_reassembly(struct smb_direct_transport *t) 300 { 301 if (!list_empty(&t->reassembly_queue)) 302 return list_first_entry(&t->reassembly_queue, 303 struct smb_direct_recvmsg, list); 304 else 305 return NULL; 306 } 307 308 static void smb_direct_disconnect_rdma_work(struct work_struct *work) 309 { 310 struct smb_direct_transport *t = 311 container_of(work, struct smb_direct_transport, 312 disconnect_work); 313 314 if (t->status == SMB_DIRECT_CS_CONNECTED) { 315 t->status = SMB_DIRECT_CS_DISCONNECTING; 316 rdma_disconnect(t->cm_id); 317 } 318 } 319 320 static void 321 smb_direct_disconnect_rdma_connection(struct smb_direct_transport *t) 322 { 323 if (t->status == SMB_DIRECT_CS_CONNECTED) 324 queue_work(smb_direct_wq, &t->disconnect_work); 325 } 326 327 static void smb_direct_send_immediate_work(struct work_struct *work) 328 { 329 struct smb_direct_transport *t = container_of(work, 330 struct smb_direct_transport, send_immediate_work); 331 332 if (t->status != SMB_DIRECT_CS_CONNECTED) 333 return; 334 335 smb_direct_post_send_data(t, NULL, NULL, 0, 0); 336 } 337 338 static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) 339 { 340 struct smb_direct_transport *t; 341 struct ksmbd_conn *conn; 342 343 t = kzalloc(sizeof(*t), KSMBD_DEFAULT_GFP); 344 if (!t) 345 return NULL; 346 347 t->cm_id = cm_id; 348 cm_id->context = t; 349 350 t->status = SMB_DIRECT_CS_NEW; 351 init_waitqueue_head(&t->wait_status); 352 353 spin_lock_init(&t->reassembly_queue_lock); 354 INIT_LIST_HEAD(&t->reassembly_queue); 355 t->reassembly_data_length = 0; 356 t->reassembly_queue_length = 0; 357 init_waitqueue_head(&t->wait_reassembly_queue); 358 init_waitqueue_head(&t->wait_send_credits); 359 init_waitqueue_head(&t->wait_rw_credits); 360 361 spin_lock_init(&t->receive_credit_lock); 362 spin_lock_init(&t->recvmsg_queue_lock); 363 INIT_LIST_HEAD(&t->recvmsg_queue); 364 365 init_waitqueue_head(&t->wait_send_pending); 366 atomic_set(&t->send_pending, 0); 367 368 spin_lock_init(&t->lock_new_recv_credits); 369 370 INIT_DELAYED_WORK(&t->post_recv_credits_work, 371 smb_direct_post_recv_credits); 372 INIT_WORK(&t->send_immediate_work, smb_direct_send_immediate_work); 373 INIT_WORK(&t->disconnect_work, smb_direct_disconnect_rdma_work); 374 375 conn = ksmbd_conn_alloc(); 376 if (!conn) 377 goto err; 378 conn->transport = KSMBD_TRANS(t); 379 KSMBD_TRANS(t)->conn = conn; 380 KSMBD_TRANS(t)->ops = &ksmbd_smb_direct_transport_ops; 381 return t; 382 err: 383 kfree(t); 384 return NULL; 385 } 386 387 static void smb_direct_free_transport(struct ksmbd_transport *kt) 388 { 389 kfree(SMBD_TRANS(kt)); 390 } 391 392 static void free_transport(struct smb_direct_transport *t) 393 { 394 struct smb_direct_recvmsg *recvmsg; 395 396 wake_up_interruptible(&t->wait_send_credits); 397 398 ksmbd_debug(RDMA, "wait for all send posted to IB to finish\n"); 399 wait_event(t->wait_send_pending, 400 atomic_read(&t->send_pending) == 0); 401 402 cancel_work_sync(&t->disconnect_work); 403 cancel_delayed_work_sync(&t->post_recv_credits_work); 404 cancel_work_sync(&t->send_immediate_work); 405 406 if (t->qp) { 407 ib_drain_qp(t->qp); 408 ib_mr_pool_destroy(t->qp, &t->qp->rdma_mrs); 409 t->qp = NULL; 410 rdma_destroy_qp(t->cm_id); 411 } 412 413 ksmbd_debug(RDMA, "drain the reassembly queue\n"); 414 do { 415 spin_lock(&t->reassembly_queue_lock); 416 recvmsg = get_first_reassembly(t); 417 if (recvmsg) { 418 list_del(&recvmsg->list); 419 spin_unlock(&t->reassembly_queue_lock); 420 put_recvmsg(t, recvmsg); 421 } else { 422 spin_unlock(&t->reassembly_queue_lock); 423 } 424 } while (recvmsg); 425 t->reassembly_data_length = 0; 426 427 if (t->send_cq) 428 ib_free_cq(t->send_cq); 429 if (t->recv_cq) 430 ib_free_cq(t->recv_cq); 431 if (t->pd) 432 ib_dealloc_pd(t->pd); 433 if (t->cm_id) 434 rdma_destroy_id(t->cm_id); 435 436 smb_direct_destroy_pools(t); 437 ksmbd_conn_free(KSMBD_TRANS(t)->conn); 438 } 439 440 static struct smb_direct_sendmsg 441 *smb_direct_alloc_sendmsg(struct smb_direct_transport *t) 442 { 443 struct smb_direct_sendmsg *msg; 444 445 msg = mempool_alloc(t->sendmsg_mempool, KSMBD_DEFAULT_GFP); 446 if (!msg) 447 return ERR_PTR(-ENOMEM); 448 msg->transport = t; 449 INIT_LIST_HEAD(&msg->list); 450 msg->num_sge = 0; 451 return msg; 452 } 453 454 static void smb_direct_free_sendmsg(struct smb_direct_transport *t, 455 struct smb_direct_sendmsg *msg) 456 { 457 int i; 458 459 if (msg->num_sge > 0) { 460 ib_dma_unmap_single(t->cm_id->device, 461 msg->sge[0].addr, msg->sge[0].length, 462 DMA_TO_DEVICE); 463 for (i = 1; i < msg->num_sge; i++) 464 ib_dma_unmap_page(t->cm_id->device, 465 msg->sge[i].addr, msg->sge[i].length, 466 DMA_TO_DEVICE); 467 } 468 mempool_free(msg, t->sendmsg_mempool); 469 } 470 471 static int smb_direct_check_recvmsg(struct smb_direct_recvmsg *recvmsg) 472 { 473 switch (recvmsg->type) { 474 case SMB_DIRECT_MSG_DATA_TRANSFER: { 475 struct smb_direct_data_transfer *req = 476 (struct smb_direct_data_transfer *)recvmsg->packet; 477 struct smb2_hdr *hdr = (struct smb2_hdr *)(recvmsg->packet 478 + le32_to_cpu(req->data_offset)); 479 ksmbd_debug(RDMA, 480 "CreditGranted: %u, CreditRequested: %u, DataLength: %u, RemainingDataLength: %u, SMB: %x, Command: %u\n", 481 le16_to_cpu(req->credits_granted), 482 le16_to_cpu(req->credits_requested), 483 req->data_length, req->remaining_data_length, 484 hdr->ProtocolId, hdr->Command); 485 break; 486 } 487 case SMB_DIRECT_MSG_NEGOTIATE_REQ: { 488 struct smb_direct_negotiate_req *req = 489 (struct smb_direct_negotiate_req *)recvmsg->packet; 490 ksmbd_debug(RDMA, 491 "MinVersion: %u, MaxVersion: %u, CreditRequested: %u, MaxSendSize: %u, MaxRecvSize: %u, MaxFragmentedSize: %u\n", 492 le16_to_cpu(req->min_version), 493 le16_to_cpu(req->max_version), 494 le16_to_cpu(req->credits_requested), 495 le32_to_cpu(req->preferred_send_size), 496 le32_to_cpu(req->max_receive_size), 497 le32_to_cpu(req->max_fragmented_size)); 498 if (le16_to_cpu(req->min_version) > 0x0100 || 499 le16_to_cpu(req->max_version) < 0x0100) 500 return -EOPNOTSUPP; 501 if (le16_to_cpu(req->credits_requested) <= 0 || 502 le32_to_cpu(req->max_receive_size) <= 128 || 503 le32_to_cpu(req->max_fragmented_size) <= 504 128 * 1024) 505 return -ECONNABORTED; 506 507 break; 508 } 509 default: 510 return -EINVAL; 511 } 512 return 0; 513 } 514 515 static void recv_done(struct ib_cq *cq, struct ib_wc *wc) 516 { 517 struct smb_direct_recvmsg *recvmsg; 518 struct smb_direct_transport *t; 519 520 recvmsg = container_of(wc->wr_cqe, struct smb_direct_recvmsg, cqe); 521 t = recvmsg->transport; 522 523 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { 524 put_recvmsg(t, recvmsg); 525 if (wc->status != IB_WC_WR_FLUSH_ERR) { 526 pr_err("Recv error. status='%s (%d)' opcode=%d\n", 527 ib_wc_status_msg(wc->status), wc->status, 528 wc->opcode); 529 smb_direct_disconnect_rdma_connection(t); 530 } 531 return; 532 } 533 534 ksmbd_debug(RDMA, "Recv completed. status='%s (%d)', opcode=%d\n", 535 ib_wc_status_msg(wc->status), wc->status, 536 wc->opcode); 537 538 ib_dma_sync_single_for_cpu(wc->qp->device, recvmsg->sge.addr, 539 recvmsg->sge.length, DMA_FROM_DEVICE); 540 541 switch (recvmsg->type) { 542 case SMB_DIRECT_MSG_NEGOTIATE_REQ: 543 if (wc->byte_len < sizeof(struct smb_direct_negotiate_req)) { 544 put_recvmsg(t, recvmsg); 545 smb_direct_disconnect_rdma_connection(t); 546 return; 547 } 548 t->negotiation_requested = true; 549 t->full_packet_received = true; 550 t->status = SMB_DIRECT_CS_CONNECTED; 551 enqueue_reassembly(t, recvmsg, 0); 552 wake_up_interruptible(&t->wait_status); 553 return; 554 case SMB_DIRECT_MSG_DATA_TRANSFER: { 555 struct smb_direct_data_transfer *data_transfer = 556 (struct smb_direct_data_transfer *)recvmsg->packet; 557 unsigned int data_length; 558 int avail_recvmsg_count, receive_credits; 559 560 if (wc->byte_len < 561 offsetof(struct smb_direct_data_transfer, padding)) { 562 put_recvmsg(t, recvmsg); 563 smb_direct_disconnect_rdma_connection(t); 564 return; 565 } 566 567 data_length = le32_to_cpu(data_transfer->data_length); 568 if (data_length) { 569 if (wc->byte_len < sizeof(struct smb_direct_data_transfer) + 570 (u64)data_length) { 571 put_recvmsg(t, recvmsg); 572 smb_direct_disconnect_rdma_connection(t); 573 return; 574 } 575 576 if (t->full_packet_received) 577 recvmsg->first_segment = true; 578 579 if (le32_to_cpu(data_transfer->remaining_data_length)) 580 t->full_packet_received = false; 581 else 582 t->full_packet_received = true; 583 584 spin_lock(&t->receive_credit_lock); 585 receive_credits = --(t->recv_credits); 586 avail_recvmsg_count = t->count_avail_recvmsg; 587 spin_unlock(&t->receive_credit_lock); 588 } else { 589 spin_lock(&t->receive_credit_lock); 590 receive_credits = --(t->recv_credits); 591 avail_recvmsg_count = ++(t->count_avail_recvmsg); 592 spin_unlock(&t->receive_credit_lock); 593 } 594 595 t->recv_credit_target = 596 le16_to_cpu(data_transfer->credits_requested); 597 atomic_add(le16_to_cpu(data_transfer->credits_granted), 598 &t->send_credits); 599 600 if (le16_to_cpu(data_transfer->flags) & 601 SMB_DIRECT_RESPONSE_REQUESTED) 602 queue_work(smb_direct_wq, &t->send_immediate_work); 603 604 if (atomic_read(&t->send_credits) > 0) 605 wake_up_interruptible(&t->wait_send_credits); 606 607 if (is_receive_credit_post_required(receive_credits, avail_recvmsg_count)) 608 mod_delayed_work(smb_direct_wq, 609 &t->post_recv_credits_work, 0); 610 611 if (data_length) { 612 enqueue_reassembly(t, recvmsg, (int)data_length); 613 wake_up_interruptible(&t->wait_reassembly_queue); 614 } else 615 put_recvmsg(t, recvmsg); 616 617 return; 618 } 619 } 620 621 /* 622 * This is an internal error! 623 */ 624 WARN_ON_ONCE(recvmsg->type != SMB_DIRECT_MSG_DATA_TRANSFER); 625 put_recvmsg(t, recvmsg); 626 smb_direct_disconnect_rdma_connection(t); 627 } 628 629 static int smb_direct_post_recv(struct smb_direct_transport *t, 630 struct smb_direct_recvmsg *recvmsg) 631 { 632 struct ib_recv_wr wr; 633 int ret; 634 635 recvmsg->sge.addr = ib_dma_map_single(t->cm_id->device, 636 recvmsg->packet, t->max_recv_size, 637 DMA_FROM_DEVICE); 638 ret = ib_dma_mapping_error(t->cm_id->device, recvmsg->sge.addr); 639 if (ret) 640 return ret; 641 recvmsg->sge.length = t->max_recv_size; 642 recvmsg->sge.lkey = t->pd->local_dma_lkey; 643 recvmsg->cqe.done = recv_done; 644 645 wr.wr_cqe = &recvmsg->cqe; 646 wr.next = NULL; 647 wr.sg_list = &recvmsg->sge; 648 wr.num_sge = 1; 649 650 ret = ib_post_recv(t->qp, &wr, NULL); 651 if (ret) { 652 pr_err("Can't post recv: %d\n", ret); 653 ib_dma_unmap_single(t->cm_id->device, 654 recvmsg->sge.addr, recvmsg->sge.length, 655 DMA_FROM_DEVICE); 656 recvmsg->sge.length = 0; 657 smb_direct_disconnect_rdma_connection(t); 658 return ret; 659 } 660 return ret; 661 } 662 663 static int smb_direct_read(struct ksmbd_transport *t, char *buf, 664 unsigned int size, int unused) 665 { 666 struct smb_direct_recvmsg *recvmsg; 667 struct smb_direct_data_transfer *data_transfer; 668 int to_copy, to_read, data_read, offset; 669 u32 data_length, remaining_data_length, data_offset; 670 int rc; 671 struct smb_direct_transport *st = smb_trans_direct_transfort(t); 672 673 again: 674 if (st->status != SMB_DIRECT_CS_CONNECTED) { 675 pr_err("disconnected\n"); 676 return -ENOTCONN; 677 } 678 679 /* 680 * No need to hold the reassembly queue lock all the time as we are 681 * the only one reading from the front of the queue. The transport 682 * may add more entries to the back of the queue at the same time 683 */ 684 if (st->reassembly_data_length >= size) { 685 int queue_length; 686 int queue_removed = 0; 687 688 /* 689 * Need to make sure reassembly_data_length is read before 690 * reading reassembly_queue_length and calling 691 * get_first_reassembly. This call is lock free 692 * as we never read at the end of the queue which are being 693 * updated in SOFTIRQ as more data is received 694 */ 695 virt_rmb(); 696 queue_length = st->reassembly_queue_length; 697 data_read = 0; 698 to_read = size; 699 offset = st->first_entry_offset; 700 while (data_read < size) { 701 recvmsg = get_first_reassembly(st); 702 data_transfer = smb_direct_recvmsg_payload(recvmsg); 703 data_length = le32_to_cpu(data_transfer->data_length); 704 remaining_data_length = 705 le32_to_cpu(data_transfer->remaining_data_length); 706 data_offset = le32_to_cpu(data_transfer->data_offset); 707 708 /* 709 * The upper layer expects RFC1002 length at the 710 * beginning of the payload. Return it to indicate 711 * the total length of the packet. This minimize the 712 * change to upper layer packet processing logic. This 713 * will be eventually remove when an intermediate 714 * transport layer is added 715 */ 716 if (recvmsg->first_segment && size == 4) { 717 unsigned int rfc1002_len = 718 data_length + remaining_data_length; 719 *((__be32 *)buf) = cpu_to_be32(rfc1002_len); 720 data_read = 4; 721 recvmsg->first_segment = false; 722 ksmbd_debug(RDMA, 723 "returning rfc1002 length %d\n", 724 rfc1002_len); 725 goto read_rfc1002_done; 726 } 727 728 to_copy = min_t(int, data_length - offset, to_read); 729 memcpy(buf + data_read, (char *)data_transfer + data_offset + offset, 730 to_copy); 731 732 /* move on to the next buffer? */ 733 if (to_copy == data_length - offset) { 734 queue_length--; 735 /* 736 * No need to lock if we are not at the 737 * end of the queue 738 */ 739 if (queue_length) { 740 list_del(&recvmsg->list); 741 } else { 742 spin_lock_irq(&st->reassembly_queue_lock); 743 list_del(&recvmsg->list); 744 spin_unlock_irq(&st->reassembly_queue_lock); 745 } 746 queue_removed++; 747 put_recvmsg(st, recvmsg); 748 offset = 0; 749 } else { 750 offset += to_copy; 751 } 752 753 to_read -= to_copy; 754 data_read += to_copy; 755 } 756 757 spin_lock_irq(&st->reassembly_queue_lock); 758 st->reassembly_data_length -= data_read; 759 st->reassembly_queue_length -= queue_removed; 760 spin_unlock_irq(&st->reassembly_queue_lock); 761 762 spin_lock(&st->receive_credit_lock); 763 st->count_avail_recvmsg += queue_removed; 764 if (is_receive_credit_post_required(st->recv_credits, st->count_avail_recvmsg)) { 765 spin_unlock(&st->receive_credit_lock); 766 mod_delayed_work(smb_direct_wq, 767 &st->post_recv_credits_work, 0); 768 } else { 769 spin_unlock(&st->receive_credit_lock); 770 } 771 772 st->first_entry_offset = offset; 773 ksmbd_debug(RDMA, 774 "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n", 775 data_read, st->reassembly_data_length, 776 st->first_entry_offset); 777 read_rfc1002_done: 778 return data_read; 779 } 780 781 ksmbd_debug(RDMA, "wait_event on more data\n"); 782 rc = wait_event_interruptible(st->wait_reassembly_queue, 783 st->reassembly_data_length >= size || 784 st->status != SMB_DIRECT_CS_CONNECTED); 785 if (rc) 786 return -EINTR; 787 788 goto again; 789 } 790 791 static void smb_direct_post_recv_credits(struct work_struct *work) 792 { 793 struct smb_direct_transport *t = container_of(work, 794 struct smb_direct_transport, post_recv_credits_work.work); 795 struct smb_direct_recvmsg *recvmsg; 796 int receive_credits, credits = 0; 797 int ret; 798 799 spin_lock(&t->receive_credit_lock); 800 receive_credits = t->recv_credits; 801 spin_unlock(&t->receive_credit_lock); 802 803 if (receive_credits < t->recv_credit_target) { 804 while (true) { 805 recvmsg = get_free_recvmsg(t); 806 if (!recvmsg) 807 break; 808 809 recvmsg->type = SMB_DIRECT_MSG_DATA_TRANSFER; 810 recvmsg->first_segment = false; 811 812 ret = smb_direct_post_recv(t, recvmsg); 813 if (ret) { 814 pr_err("Can't post recv: %d\n", ret); 815 put_recvmsg(t, recvmsg); 816 break; 817 } 818 credits++; 819 } 820 } 821 822 spin_lock(&t->receive_credit_lock); 823 t->recv_credits += credits; 824 t->count_avail_recvmsg -= credits; 825 spin_unlock(&t->receive_credit_lock); 826 827 spin_lock(&t->lock_new_recv_credits); 828 t->new_recv_credits += credits; 829 spin_unlock(&t->lock_new_recv_credits); 830 831 if (credits) 832 queue_work(smb_direct_wq, &t->send_immediate_work); 833 } 834 835 static void send_done(struct ib_cq *cq, struct ib_wc *wc) 836 { 837 struct smb_direct_sendmsg *sendmsg, *sibling; 838 struct smb_direct_transport *t; 839 struct list_head *pos, *prev, *end; 840 841 sendmsg = container_of(wc->wr_cqe, struct smb_direct_sendmsg, cqe); 842 t = sendmsg->transport; 843 844 ksmbd_debug(RDMA, "Send completed. status='%s (%d)', opcode=%d\n", 845 ib_wc_status_msg(wc->status), wc->status, 846 wc->opcode); 847 848 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { 849 pr_err("Send error. status='%s (%d)', opcode=%d\n", 850 ib_wc_status_msg(wc->status), wc->status, 851 wc->opcode); 852 smb_direct_disconnect_rdma_connection(t); 853 } 854 855 if (atomic_dec_and_test(&t->send_pending)) 856 wake_up(&t->wait_send_pending); 857 858 /* iterate and free the list of messages in reverse. the list's head 859 * is invalid. 860 */ 861 for (pos = &sendmsg->list, prev = pos->prev, end = sendmsg->list.next; 862 prev != end; pos = prev, prev = prev->prev) { 863 sibling = container_of(pos, struct smb_direct_sendmsg, list); 864 smb_direct_free_sendmsg(t, sibling); 865 } 866 867 sibling = container_of(pos, struct smb_direct_sendmsg, list); 868 smb_direct_free_sendmsg(t, sibling); 869 } 870 871 static int manage_credits_prior_sending(struct smb_direct_transport *t) 872 { 873 int new_credits; 874 875 spin_lock(&t->lock_new_recv_credits); 876 new_credits = t->new_recv_credits; 877 t->new_recv_credits = 0; 878 spin_unlock(&t->lock_new_recv_credits); 879 880 return new_credits; 881 } 882 883 static int smb_direct_post_send(struct smb_direct_transport *t, 884 struct ib_send_wr *wr) 885 { 886 int ret; 887 888 atomic_inc(&t->send_pending); 889 ret = ib_post_send(t->qp, wr, NULL); 890 if (ret) { 891 pr_err("failed to post send: %d\n", ret); 892 if (atomic_dec_and_test(&t->send_pending)) 893 wake_up(&t->wait_send_pending); 894 smb_direct_disconnect_rdma_connection(t); 895 } 896 return ret; 897 } 898 899 static void smb_direct_send_ctx_init(struct smb_direct_transport *t, 900 struct smb_direct_send_ctx *send_ctx, 901 bool need_invalidate_rkey, 902 unsigned int remote_key) 903 { 904 INIT_LIST_HEAD(&send_ctx->msg_list); 905 send_ctx->wr_cnt = 0; 906 send_ctx->need_invalidate_rkey = need_invalidate_rkey; 907 send_ctx->remote_key = remote_key; 908 } 909 910 static int smb_direct_flush_send_list(struct smb_direct_transport *t, 911 struct smb_direct_send_ctx *send_ctx, 912 bool is_last) 913 { 914 struct smb_direct_sendmsg *first, *last; 915 int ret; 916 917 if (list_empty(&send_ctx->msg_list)) 918 return 0; 919 920 first = list_first_entry(&send_ctx->msg_list, 921 struct smb_direct_sendmsg, 922 list); 923 last = list_last_entry(&send_ctx->msg_list, 924 struct smb_direct_sendmsg, 925 list); 926 927 last->wr.send_flags = IB_SEND_SIGNALED; 928 last->wr.wr_cqe = &last->cqe; 929 if (is_last && send_ctx->need_invalidate_rkey) { 930 last->wr.opcode = IB_WR_SEND_WITH_INV; 931 last->wr.ex.invalidate_rkey = send_ctx->remote_key; 932 } 933 934 ret = smb_direct_post_send(t, &first->wr); 935 if (!ret) { 936 smb_direct_send_ctx_init(t, send_ctx, 937 send_ctx->need_invalidate_rkey, 938 send_ctx->remote_key); 939 } else { 940 atomic_add(send_ctx->wr_cnt, &t->send_credits); 941 wake_up(&t->wait_send_credits); 942 list_for_each_entry_safe(first, last, &send_ctx->msg_list, 943 list) { 944 smb_direct_free_sendmsg(t, first); 945 } 946 } 947 return ret; 948 } 949 950 static int wait_for_credits(struct smb_direct_transport *t, 951 wait_queue_head_t *waitq, atomic_t *total_credits, 952 int needed) 953 { 954 int ret; 955 956 do { 957 if (atomic_sub_return(needed, total_credits) >= 0) 958 return 0; 959 960 atomic_add(needed, total_credits); 961 ret = wait_event_interruptible(*waitq, 962 atomic_read(total_credits) >= needed || 963 t->status != SMB_DIRECT_CS_CONNECTED); 964 965 if (t->status != SMB_DIRECT_CS_CONNECTED) 966 return -ENOTCONN; 967 else if (ret < 0) 968 return ret; 969 } while (true); 970 } 971 972 static int wait_for_send_credits(struct smb_direct_transport *t, 973 struct smb_direct_send_ctx *send_ctx) 974 { 975 int ret; 976 977 if (send_ctx && 978 (send_ctx->wr_cnt >= 16 || atomic_read(&t->send_credits) <= 1)) { 979 ret = smb_direct_flush_send_list(t, send_ctx, false); 980 if (ret) 981 return ret; 982 } 983 984 return wait_for_credits(t, &t->wait_send_credits, &t->send_credits, 1); 985 } 986 987 static int wait_for_rw_credits(struct smb_direct_transport *t, int credits) 988 { 989 return wait_for_credits(t, &t->wait_rw_credits, &t->rw_credits, credits); 990 } 991 992 static int calc_rw_credits(struct smb_direct_transport *t, 993 char *buf, unsigned int len) 994 { 995 return DIV_ROUND_UP(get_buf_page_count(buf, len), 996 t->pages_per_rw_credit); 997 } 998 999 static int smb_direct_create_header(struct smb_direct_transport *t, 1000 int size, int remaining_data_length, 1001 struct smb_direct_sendmsg **sendmsg_out) 1002 { 1003 struct smb_direct_sendmsg *sendmsg; 1004 struct smb_direct_data_transfer *packet; 1005 int header_length; 1006 int ret; 1007 1008 sendmsg = smb_direct_alloc_sendmsg(t); 1009 if (IS_ERR(sendmsg)) 1010 return PTR_ERR(sendmsg); 1011 1012 /* Fill in the packet header */ 1013 packet = (struct smb_direct_data_transfer *)sendmsg->packet; 1014 packet->credits_requested = cpu_to_le16(t->send_credit_target); 1015 packet->credits_granted = cpu_to_le16(manage_credits_prior_sending(t)); 1016 1017 packet->flags = 0; 1018 packet->reserved = 0; 1019 if (!size) 1020 packet->data_offset = 0; 1021 else 1022 packet->data_offset = cpu_to_le32(24); 1023 packet->data_length = cpu_to_le32(size); 1024 packet->remaining_data_length = cpu_to_le32(remaining_data_length); 1025 packet->padding = 0; 1026 1027 ksmbd_debug(RDMA, 1028 "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n", 1029 le16_to_cpu(packet->credits_requested), 1030 le16_to_cpu(packet->credits_granted), 1031 le32_to_cpu(packet->data_offset), 1032 le32_to_cpu(packet->data_length), 1033 le32_to_cpu(packet->remaining_data_length)); 1034 1035 /* Map the packet to DMA */ 1036 header_length = sizeof(struct smb_direct_data_transfer); 1037 /* If this is a packet without payload, don't send padding */ 1038 if (!size) 1039 header_length = 1040 offsetof(struct smb_direct_data_transfer, padding); 1041 1042 sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device, 1043 (void *)packet, 1044 header_length, 1045 DMA_TO_DEVICE); 1046 ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr); 1047 if (ret) { 1048 smb_direct_free_sendmsg(t, sendmsg); 1049 return ret; 1050 } 1051 1052 sendmsg->num_sge = 1; 1053 sendmsg->sge[0].length = header_length; 1054 sendmsg->sge[0].lkey = t->pd->local_dma_lkey; 1055 1056 *sendmsg_out = sendmsg; 1057 return 0; 1058 } 1059 1060 static int get_sg_list(void *buf, int size, struct scatterlist *sg_list, int nentries) 1061 { 1062 bool high = is_vmalloc_addr(buf); 1063 struct page *page; 1064 int offset, len; 1065 int i = 0; 1066 1067 if (size <= 0 || nentries < get_buf_page_count(buf, size)) 1068 return -EINVAL; 1069 1070 offset = offset_in_page(buf); 1071 buf -= offset; 1072 while (size > 0) { 1073 len = min_t(int, PAGE_SIZE - offset, size); 1074 if (high) 1075 page = vmalloc_to_page(buf); 1076 else 1077 page = kmap_to_page(buf); 1078 1079 if (!sg_list) 1080 return -EINVAL; 1081 sg_set_page(sg_list, page, len, offset); 1082 sg_list = sg_next(sg_list); 1083 1084 buf += PAGE_SIZE; 1085 size -= len; 1086 offset = 0; 1087 i++; 1088 } 1089 return i; 1090 } 1091 1092 static int get_mapped_sg_list(struct ib_device *device, void *buf, int size, 1093 struct scatterlist *sg_list, int nentries, 1094 enum dma_data_direction dir) 1095 { 1096 int npages; 1097 1098 npages = get_sg_list(buf, size, sg_list, nentries); 1099 if (npages < 0) 1100 return -EINVAL; 1101 return ib_dma_map_sg(device, sg_list, npages, dir); 1102 } 1103 1104 static int post_sendmsg(struct smb_direct_transport *t, 1105 struct smb_direct_send_ctx *send_ctx, 1106 struct smb_direct_sendmsg *msg) 1107 { 1108 int i; 1109 1110 for (i = 0; i < msg->num_sge; i++) 1111 ib_dma_sync_single_for_device(t->cm_id->device, 1112 msg->sge[i].addr, msg->sge[i].length, 1113 DMA_TO_DEVICE); 1114 1115 msg->cqe.done = send_done; 1116 msg->wr.opcode = IB_WR_SEND; 1117 msg->wr.sg_list = &msg->sge[0]; 1118 msg->wr.num_sge = msg->num_sge; 1119 msg->wr.next = NULL; 1120 1121 if (send_ctx) { 1122 msg->wr.wr_cqe = NULL; 1123 msg->wr.send_flags = 0; 1124 if (!list_empty(&send_ctx->msg_list)) { 1125 struct smb_direct_sendmsg *last; 1126 1127 last = list_last_entry(&send_ctx->msg_list, 1128 struct smb_direct_sendmsg, 1129 list); 1130 last->wr.next = &msg->wr; 1131 } 1132 list_add_tail(&msg->list, &send_ctx->msg_list); 1133 send_ctx->wr_cnt++; 1134 return 0; 1135 } 1136 1137 msg->wr.wr_cqe = &msg->cqe; 1138 msg->wr.send_flags = IB_SEND_SIGNALED; 1139 return smb_direct_post_send(t, &msg->wr); 1140 } 1141 1142 static int smb_direct_post_send_data(struct smb_direct_transport *t, 1143 struct smb_direct_send_ctx *send_ctx, 1144 struct kvec *iov, int niov, 1145 int remaining_data_length) 1146 { 1147 int i, j, ret; 1148 struct smb_direct_sendmsg *msg; 1149 int data_length; 1150 struct scatterlist sg[SMB_DIRECT_MAX_SEND_SGES - 1]; 1151 1152 ret = wait_for_send_credits(t, send_ctx); 1153 if (ret) 1154 return ret; 1155 1156 data_length = 0; 1157 for (i = 0; i < niov; i++) 1158 data_length += iov[i].iov_len; 1159 1160 ret = smb_direct_create_header(t, data_length, remaining_data_length, 1161 &msg); 1162 if (ret) { 1163 atomic_inc(&t->send_credits); 1164 return ret; 1165 } 1166 1167 for (i = 0; i < niov; i++) { 1168 struct ib_sge *sge; 1169 int sg_cnt; 1170 1171 sg_init_table(sg, SMB_DIRECT_MAX_SEND_SGES - 1); 1172 sg_cnt = get_mapped_sg_list(t->cm_id->device, 1173 iov[i].iov_base, iov[i].iov_len, 1174 sg, SMB_DIRECT_MAX_SEND_SGES - 1, 1175 DMA_TO_DEVICE); 1176 if (sg_cnt <= 0) { 1177 pr_err("failed to map buffer\n"); 1178 ret = -ENOMEM; 1179 goto err; 1180 } else if (sg_cnt + msg->num_sge > SMB_DIRECT_MAX_SEND_SGES) { 1181 pr_err("buffer not fitted into sges\n"); 1182 ret = -E2BIG; 1183 ib_dma_unmap_sg(t->cm_id->device, sg, sg_cnt, 1184 DMA_TO_DEVICE); 1185 goto err; 1186 } 1187 1188 for (j = 0; j < sg_cnt; j++) { 1189 sge = &msg->sge[msg->num_sge]; 1190 sge->addr = sg_dma_address(&sg[j]); 1191 sge->length = sg_dma_len(&sg[j]); 1192 sge->lkey = t->pd->local_dma_lkey; 1193 msg->num_sge++; 1194 } 1195 } 1196 1197 ret = post_sendmsg(t, send_ctx, msg); 1198 if (ret) 1199 goto err; 1200 return 0; 1201 err: 1202 smb_direct_free_sendmsg(t, msg); 1203 atomic_inc(&t->send_credits); 1204 return ret; 1205 } 1206 1207 static int smb_direct_writev(struct ksmbd_transport *t, 1208 struct kvec *iov, int niovs, int buflen, 1209 bool need_invalidate, unsigned int remote_key) 1210 { 1211 struct smb_direct_transport *st = smb_trans_direct_transfort(t); 1212 int remaining_data_length; 1213 int start, i, j; 1214 int max_iov_size = st->max_send_size - 1215 sizeof(struct smb_direct_data_transfer); 1216 int ret; 1217 struct kvec vec; 1218 struct smb_direct_send_ctx send_ctx; 1219 1220 if (st->status != SMB_DIRECT_CS_CONNECTED) 1221 return -ENOTCONN; 1222 1223 //FIXME: skip RFC1002 header.. 1224 buflen -= 4; 1225 1226 remaining_data_length = buflen; 1227 ksmbd_debug(RDMA, "Sending smb (RDMA): smb_len=%u\n", buflen); 1228 1229 smb_direct_send_ctx_init(st, &send_ctx, need_invalidate, remote_key); 1230 start = i = 1; 1231 buflen = 0; 1232 while (true) { 1233 buflen += iov[i].iov_len; 1234 if (buflen > max_iov_size) { 1235 if (i > start) { 1236 remaining_data_length -= 1237 (buflen - iov[i].iov_len); 1238 ret = smb_direct_post_send_data(st, &send_ctx, 1239 &iov[start], i - start, 1240 remaining_data_length); 1241 if (ret) 1242 goto done; 1243 } else { 1244 /* iov[start] is too big, break it */ 1245 int nvec = (buflen + max_iov_size - 1) / 1246 max_iov_size; 1247 1248 for (j = 0; j < nvec; j++) { 1249 vec.iov_base = 1250 (char *)iov[start].iov_base + 1251 j * max_iov_size; 1252 vec.iov_len = 1253 min_t(int, max_iov_size, 1254 buflen - max_iov_size * j); 1255 remaining_data_length -= vec.iov_len; 1256 ret = smb_direct_post_send_data(st, &send_ctx, &vec, 1, 1257 remaining_data_length); 1258 if (ret) 1259 goto done; 1260 } 1261 i++; 1262 if (i == niovs) 1263 break; 1264 } 1265 start = i; 1266 buflen = 0; 1267 } else { 1268 i++; 1269 if (i == niovs) { 1270 /* send out all remaining vecs */ 1271 remaining_data_length -= buflen; 1272 ret = smb_direct_post_send_data(st, &send_ctx, 1273 &iov[start], i - start, 1274 remaining_data_length); 1275 if (ret) 1276 goto done; 1277 break; 1278 } 1279 } 1280 } 1281 1282 done: 1283 ret = smb_direct_flush_send_list(st, &send_ctx, true); 1284 1285 /* 1286 * As an optimization, we don't wait for individual I/O to finish 1287 * before sending the next one. 1288 * Send them all and wait for pending send count to get to 0 1289 * that means all the I/Os have been out and we are good to return 1290 */ 1291 1292 wait_event(st->wait_send_pending, 1293 atomic_read(&st->send_pending) == 0); 1294 return ret; 1295 } 1296 1297 static void smb_direct_free_rdma_rw_msg(struct smb_direct_transport *t, 1298 struct smb_direct_rdma_rw_msg *msg, 1299 enum dma_data_direction dir) 1300 { 1301 rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port, 1302 msg->sgt.sgl, msg->sgt.nents, dir); 1303 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); 1304 kfree(msg); 1305 } 1306 1307 static void read_write_done(struct ib_cq *cq, struct ib_wc *wc, 1308 enum dma_data_direction dir) 1309 { 1310 struct smb_direct_rdma_rw_msg *msg = container_of(wc->wr_cqe, 1311 struct smb_direct_rdma_rw_msg, cqe); 1312 struct smb_direct_transport *t = msg->t; 1313 1314 if (wc->status != IB_WC_SUCCESS) { 1315 msg->status = -EIO; 1316 pr_err("read/write error. opcode = %d, status = %s(%d)\n", 1317 wc->opcode, ib_wc_status_msg(wc->status), wc->status); 1318 if (wc->status != IB_WC_WR_FLUSH_ERR) 1319 smb_direct_disconnect_rdma_connection(t); 1320 } 1321 1322 complete(msg->completion); 1323 } 1324 1325 static void read_done(struct ib_cq *cq, struct ib_wc *wc) 1326 { 1327 read_write_done(cq, wc, DMA_FROM_DEVICE); 1328 } 1329 1330 static void write_done(struct ib_cq *cq, struct ib_wc *wc) 1331 { 1332 read_write_done(cq, wc, DMA_TO_DEVICE); 1333 } 1334 1335 static int smb_direct_rdma_xmit(struct smb_direct_transport *t, 1336 void *buf, int buf_len, 1337 struct smb2_buffer_desc_v1 *desc, 1338 unsigned int desc_len, 1339 bool is_read) 1340 { 1341 struct smb_direct_rdma_rw_msg *msg, *next_msg; 1342 int i, ret; 1343 DECLARE_COMPLETION_ONSTACK(completion); 1344 struct ib_send_wr *first_wr; 1345 LIST_HEAD(msg_list); 1346 char *desc_buf; 1347 int credits_needed; 1348 unsigned int desc_buf_len, desc_num = 0; 1349 1350 if (t->status != SMB_DIRECT_CS_CONNECTED) 1351 return -ENOTCONN; 1352 1353 if (buf_len > t->max_rdma_rw_size) 1354 return -EINVAL; 1355 1356 /* calculate needed credits */ 1357 credits_needed = 0; 1358 desc_buf = buf; 1359 for (i = 0; i < desc_len / sizeof(*desc); i++) { 1360 if (!buf_len) 1361 break; 1362 1363 desc_buf_len = le32_to_cpu(desc[i].length); 1364 if (!desc_buf_len) 1365 return -EINVAL; 1366 1367 if (desc_buf_len > buf_len) { 1368 desc_buf_len = buf_len; 1369 desc[i].length = cpu_to_le32(desc_buf_len); 1370 buf_len = 0; 1371 } 1372 1373 credits_needed += calc_rw_credits(t, desc_buf, desc_buf_len); 1374 desc_buf += desc_buf_len; 1375 buf_len -= desc_buf_len; 1376 desc_num++; 1377 } 1378 1379 ksmbd_debug(RDMA, "RDMA %s, len %#x, needed credits %#x\n", 1380 str_read_write(is_read), buf_len, credits_needed); 1381 1382 ret = wait_for_rw_credits(t, credits_needed); 1383 if (ret < 0) 1384 return ret; 1385 1386 /* build rdma_rw_ctx for each descriptor */ 1387 desc_buf = buf; 1388 for (i = 0; i < desc_num; i++) { 1389 msg = kzalloc(struct_size(msg, sg_list, SG_CHUNK_SIZE), 1390 KSMBD_DEFAULT_GFP); 1391 if (!msg) { 1392 ret = -ENOMEM; 1393 goto out; 1394 } 1395 1396 desc_buf_len = le32_to_cpu(desc[i].length); 1397 1398 msg->t = t; 1399 msg->cqe.done = is_read ? read_done : write_done; 1400 msg->completion = &completion; 1401 1402 msg->sgt.sgl = &msg->sg_list[0]; 1403 ret = sg_alloc_table_chained(&msg->sgt, 1404 get_buf_page_count(desc_buf, desc_buf_len), 1405 msg->sg_list, SG_CHUNK_SIZE); 1406 if (ret) { 1407 kfree(msg); 1408 ret = -ENOMEM; 1409 goto out; 1410 } 1411 1412 ret = get_sg_list(desc_buf, desc_buf_len, 1413 msg->sgt.sgl, msg->sgt.orig_nents); 1414 if (ret < 0) { 1415 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); 1416 kfree(msg); 1417 goto out; 1418 } 1419 1420 ret = rdma_rw_ctx_init(&msg->rw_ctx, t->qp, t->qp->port, 1421 msg->sgt.sgl, 1422 get_buf_page_count(desc_buf, desc_buf_len), 1423 0, 1424 le64_to_cpu(desc[i].offset), 1425 le32_to_cpu(desc[i].token), 1426 is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 1427 if (ret < 0) { 1428 pr_err("failed to init rdma_rw_ctx: %d\n", ret); 1429 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); 1430 kfree(msg); 1431 goto out; 1432 } 1433 1434 list_add_tail(&msg->list, &msg_list); 1435 desc_buf += desc_buf_len; 1436 } 1437 1438 /* concatenate work requests of rdma_rw_ctxs */ 1439 first_wr = NULL; 1440 list_for_each_entry_reverse(msg, &msg_list, list) { 1441 first_wr = rdma_rw_ctx_wrs(&msg->rw_ctx, t->qp, t->qp->port, 1442 &msg->cqe, first_wr); 1443 } 1444 1445 ret = ib_post_send(t->qp, first_wr, NULL); 1446 if (ret) { 1447 pr_err("failed to post send wr for RDMA R/W: %d\n", ret); 1448 goto out; 1449 } 1450 1451 msg = list_last_entry(&msg_list, struct smb_direct_rdma_rw_msg, list); 1452 wait_for_completion(&completion); 1453 ret = msg->status; 1454 out: 1455 list_for_each_entry_safe(msg, next_msg, &msg_list, list) { 1456 list_del(&msg->list); 1457 smb_direct_free_rdma_rw_msg(t, msg, 1458 is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 1459 } 1460 atomic_add(credits_needed, &t->rw_credits); 1461 wake_up(&t->wait_rw_credits); 1462 return ret; 1463 } 1464 1465 static int smb_direct_rdma_write(struct ksmbd_transport *t, 1466 void *buf, unsigned int buflen, 1467 struct smb2_buffer_desc_v1 *desc, 1468 unsigned int desc_len) 1469 { 1470 return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen, 1471 desc, desc_len, false); 1472 } 1473 1474 static int smb_direct_rdma_read(struct ksmbd_transport *t, 1475 void *buf, unsigned int buflen, 1476 struct smb2_buffer_desc_v1 *desc, 1477 unsigned int desc_len) 1478 { 1479 return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen, 1480 desc, desc_len, true); 1481 } 1482 1483 static void smb_direct_disconnect(struct ksmbd_transport *t) 1484 { 1485 struct smb_direct_transport *st = smb_trans_direct_transfort(t); 1486 1487 ksmbd_debug(RDMA, "Disconnecting cm_id=%p\n", st->cm_id); 1488 1489 smb_direct_disconnect_rdma_work(&st->disconnect_work); 1490 wait_event_interruptible(st->wait_status, 1491 st->status == SMB_DIRECT_CS_DISCONNECTED); 1492 free_transport(st); 1493 } 1494 1495 static void smb_direct_shutdown(struct ksmbd_transport *t) 1496 { 1497 struct smb_direct_transport *st = smb_trans_direct_transfort(t); 1498 1499 ksmbd_debug(RDMA, "smb-direct shutdown cm_id=%p\n", st->cm_id); 1500 1501 smb_direct_disconnect_rdma_work(&st->disconnect_work); 1502 } 1503 1504 static int smb_direct_cm_handler(struct rdma_cm_id *cm_id, 1505 struct rdma_cm_event *event) 1506 { 1507 struct smb_direct_transport *t = cm_id->context; 1508 1509 ksmbd_debug(RDMA, "RDMA CM event. cm_id=%p event=%s (%d)\n", 1510 cm_id, rdma_event_msg(event->event), event->event); 1511 1512 switch (event->event) { 1513 case RDMA_CM_EVENT_ESTABLISHED: { 1514 t->status = SMB_DIRECT_CS_CONNECTED; 1515 wake_up_interruptible(&t->wait_status); 1516 break; 1517 } 1518 case RDMA_CM_EVENT_DEVICE_REMOVAL: 1519 case RDMA_CM_EVENT_DISCONNECTED: { 1520 ib_drain_qp(t->qp); 1521 1522 t->status = SMB_DIRECT_CS_DISCONNECTED; 1523 wake_up_interruptible(&t->wait_status); 1524 wake_up_interruptible(&t->wait_reassembly_queue); 1525 wake_up(&t->wait_send_credits); 1526 break; 1527 } 1528 case RDMA_CM_EVENT_CONNECT_ERROR: { 1529 t->status = SMB_DIRECT_CS_DISCONNECTED; 1530 wake_up_interruptible(&t->wait_status); 1531 break; 1532 } 1533 default: 1534 pr_err("Unexpected RDMA CM event. cm_id=%p, event=%s (%d)\n", 1535 cm_id, rdma_event_msg(event->event), 1536 event->event); 1537 break; 1538 } 1539 return 0; 1540 } 1541 1542 static void smb_direct_qpair_handler(struct ib_event *event, void *context) 1543 { 1544 struct smb_direct_transport *t = context; 1545 1546 ksmbd_debug(RDMA, "Received QP event. cm_id=%p, event=%s (%d)\n", 1547 t->cm_id, ib_event_msg(event->event), event->event); 1548 1549 switch (event->event) { 1550 case IB_EVENT_CQ_ERR: 1551 case IB_EVENT_QP_FATAL: 1552 smb_direct_disconnect_rdma_connection(t); 1553 break; 1554 default: 1555 break; 1556 } 1557 } 1558 1559 static int smb_direct_send_negotiate_response(struct smb_direct_transport *t, 1560 int failed) 1561 { 1562 struct smb_direct_sendmsg *sendmsg; 1563 struct smb_direct_negotiate_resp *resp; 1564 int ret; 1565 1566 sendmsg = smb_direct_alloc_sendmsg(t); 1567 if (IS_ERR(sendmsg)) 1568 return -ENOMEM; 1569 1570 resp = (struct smb_direct_negotiate_resp *)sendmsg->packet; 1571 if (failed) { 1572 memset(resp, 0, sizeof(*resp)); 1573 resp->min_version = cpu_to_le16(0x0100); 1574 resp->max_version = cpu_to_le16(0x0100); 1575 resp->status = STATUS_NOT_SUPPORTED; 1576 } else { 1577 resp->status = STATUS_SUCCESS; 1578 resp->min_version = SMB_DIRECT_VERSION_LE; 1579 resp->max_version = SMB_DIRECT_VERSION_LE; 1580 resp->negotiated_version = SMB_DIRECT_VERSION_LE; 1581 resp->reserved = 0; 1582 resp->credits_requested = 1583 cpu_to_le16(t->send_credit_target); 1584 resp->credits_granted = cpu_to_le16(manage_credits_prior_sending(t)); 1585 resp->max_readwrite_size = cpu_to_le32(t->max_rdma_rw_size); 1586 resp->preferred_send_size = cpu_to_le32(t->max_send_size); 1587 resp->max_receive_size = cpu_to_le32(t->max_recv_size); 1588 resp->max_fragmented_size = 1589 cpu_to_le32(t->max_fragmented_recv_size); 1590 } 1591 1592 sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device, 1593 (void *)resp, sizeof(*resp), 1594 DMA_TO_DEVICE); 1595 ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr); 1596 if (ret) { 1597 smb_direct_free_sendmsg(t, sendmsg); 1598 return ret; 1599 } 1600 1601 sendmsg->num_sge = 1; 1602 sendmsg->sge[0].length = sizeof(*resp); 1603 sendmsg->sge[0].lkey = t->pd->local_dma_lkey; 1604 1605 ret = post_sendmsg(t, NULL, sendmsg); 1606 if (ret) { 1607 smb_direct_free_sendmsg(t, sendmsg); 1608 return ret; 1609 } 1610 1611 wait_event(t->wait_send_pending, 1612 atomic_read(&t->send_pending) == 0); 1613 return 0; 1614 } 1615 1616 static int smb_direct_accept_client(struct smb_direct_transport *t) 1617 { 1618 struct rdma_conn_param conn_param; 1619 struct ib_port_immutable port_immutable; 1620 u32 ird_ord_hdr[2]; 1621 int ret; 1622 1623 memset(&conn_param, 0, sizeof(conn_param)); 1624 conn_param.initiator_depth = min_t(u8, t->cm_id->device->attrs.max_qp_rd_atom, 1625 SMB_DIRECT_CM_INITIATOR_DEPTH); 1626 conn_param.responder_resources = 0; 1627 1628 t->cm_id->device->ops.get_port_immutable(t->cm_id->device, 1629 t->cm_id->port_num, 1630 &port_immutable); 1631 if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) { 1632 ird_ord_hdr[0] = conn_param.responder_resources; 1633 ird_ord_hdr[1] = 1; 1634 conn_param.private_data = ird_ord_hdr; 1635 conn_param.private_data_len = sizeof(ird_ord_hdr); 1636 } else { 1637 conn_param.private_data = NULL; 1638 conn_param.private_data_len = 0; 1639 } 1640 conn_param.retry_count = SMB_DIRECT_CM_RETRY; 1641 conn_param.rnr_retry_count = SMB_DIRECT_CM_RNR_RETRY; 1642 conn_param.flow_control = 0; 1643 1644 ret = rdma_accept(t->cm_id, &conn_param); 1645 if (ret) { 1646 pr_err("error at rdma_accept: %d\n", ret); 1647 return ret; 1648 } 1649 return 0; 1650 } 1651 1652 static int smb_direct_prepare_negotiation(struct smb_direct_transport *t) 1653 { 1654 int ret; 1655 struct smb_direct_recvmsg *recvmsg; 1656 1657 recvmsg = get_free_recvmsg(t); 1658 if (!recvmsg) 1659 return -ENOMEM; 1660 recvmsg->type = SMB_DIRECT_MSG_NEGOTIATE_REQ; 1661 1662 ret = smb_direct_post_recv(t, recvmsg); 1663 if (ret) { 1664 pr_err("Can't post recv: %d\n", ret); 1665 goto out_err; 1666 } 1667 1668 t->negotiation_requested = false; 1669 ret = smb_direct_accept_client(t); 1670 if (ret) { 1671 pr_err("Can't accept client\n"); 1672 goto out_err; 1673 } 1674 1675 smb_direct_post_recv_credits(&t->post_recv_credits_work.work); 1676 return 0; 1677 out_err: 1678 put_recvmsg(t, recvmsg); 1679 return ret; 1680 } 1681 1682 static unsigned int smb_direct_get_max_fr_pages(struct smb_direct_transport *t) 1683 { 1684 return min_t(unsigned int, 1685 t->cm_id->device->attrs.max_fast_reg_page_list_len, 1686 256); 1687 } 1688 1689 static int smb_direct_init_params(struct smb_direct_transport *t, 1690 struct ib_qp_cap *cap) 1691 { 1692 struct ib_device *device = t->cm_id->device; 1693 int max_send_sges, max_rw_wrs, max_send_wrs; 1694 unsigned int max_sge_per_wr, wrs_per_credit; 1695 1696 /* need 3 more sge. because a SMB_DIRECT header, SMB2 header, 1697 * SMB2 response could be mapped. 1698 */ 1699 t->max_send_size = smb_direct_max_send_size; 1700 max_send_sges = DIV_ROUND_UP(t->max_send_size, PAGE_SIZE) + 3; 1701 if (max_send_sges > SMB_DIRECT_MAX_SEND_SGES) { 1702 pr_err("max_send_size %d is too large\n", t->max_send_size); 1703 return -EINVAL; 1704 } 1705 1706 /* Calculate the number of work requests for RDMA R/W. 1707 * The maximum number of pages which can be registered 1708 * with one Memory region can be transferred with one 1709 * R/W credit. And at least 4 work requests for each credit 1710 * are needed for MR registration, RDMA R/W, local & remote 1711 * MR invalidation. 1712 */ 1713 t->max_rdma_rw_size = smb_direct_max_read_write_size; 1714 t->pages_per_rw_credit = smb_direct_get_max_fr_pages(t); 1715 t->max_rw_credits = DIV_ROUND_UP(t->max_rdma_rw_size, 1716 (t->pages_per_rw_credit - 1) * 1717 PAGE_SIZE); 1718 1719 max_sge_per_wr = min_t(unsigned int, device->attrs.max_send_sge, 1720 device->attrs.max_sge_rd); 1721 max_sge_per_wr = max_t(unsigned int, max_sge_per_wr, 1722 max_send_sges); 1723 wrs_per_credit = max_t(unsigned int, 4, 1724 DIV_ROUND_UP(t->pages_per_rw_credit, 1725 max_sge_per_wr) + 1); 1726 max_rw_wrs = t->max_rw_credits * wrs_per_credit; 1727 1728 max_send_wrs = smb_direct_send_credit_target + max_rw_wrs; 1729 if (max_send_wrs > device->attrs.max_cqe || 1730 max_send_wrs > device->attrs.max_qp_wr) { 1731 pr_err("consider lowering send_credit_target = %d\n", 1732 smb_direct_send_credit_target); 1733 pr_err("Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", 1734 device->attrs.max_cqe, device->attrs.max_qp_wr); 1735 return -EINVAL; 1736 } 1737 1738 if (smb_direct_receive_credit_max > device->attrs.max_cqe || 1739 smb_direct_receive_credit_max > device->attrs.max_qp_wr) { 1740 pr_err("consider lowering receive_credit_max = %d\n", 1741 smb_direct_receive_credit_max); 1742 pr_err("Possible CQE overrun, device reporting max_cpe %d max_qp_wr %d\n", 1743 device->attrs.max_cqe, device->attrs.max_qp_wr); 1744 return -EINVAL; 1745 } 1746 1747 if (device->attrs.max_recv_sge < SMB_DIRECT_MAX_RECV_SGES) { 1748 pr_err("warning: device max_recv_sge = %d too small\n", 1749 device->attrs.max_recv_sge); 1750 return -EINVAL; 1751 } 1752 1753 t->recv_credits = 0; 1754 t->count_avail_recvmsg = 0; 1755 1756 t->recv_credit_max = smb_direct_receive_credit_max; 1757 t->recv_credit_target = 10; 1758 t->new_recv_credits = 0; 1759 1760 t->send_credit_target = smb_direct_send_credit_target; 1761 atomic_set(&t->send_credits, 0); 1762 atomic_set(&t->rw_credits, t->max_rw_credits); 1763 1764 t->max_send_size = smb_direct_max_send_size; 1765 t->max_recv_size = smb_direct_max_receive_size; 1766 t->max_fragmented_recv_size = smb_direct_max_fragmented_recv_size; 1767 1768 cap->max_send_wr = max_send_wrs; 1769 cap->max_recv_wr = t->recv_credit_max; 1770 cap->max_send_sge = max_sge_per_wr; 1771 cap->max_recv_sge = SMB_DIRECT_MAX_RECV_SGES; 1772 cap->max_inline_data = 0; 1773 cap->max_rdma_ctxs = t->max_rw_credits; 1774 return 0; 1775 } 1776 1777 static void smb_direct_destroy_pools(struct smb_direct_transport *t) 1778 { 1779 struct smb_direct_recvmsg *recvmsg; 1780 1781 while ((recvmsg = get_free_recvmsg(t))) 1782 mempool_free(recvmsg, t->recvmsg_mempool); 1783 1784 mempool_destroy(t->recvmsg_mempool); 1785 t->recvmsg_mempool = NULL; 1786 1787 kmem_cache_destroy(t->recvmsg_cache); 1788 t->recvmsg_cache = NULL; 1789 1790 mempool_destroy(t->sendmsg_mempool); 1791 t->sendmsg_mempool = NULL; 1792 1793 kmem_cache_destroy(t->sendmsg_cache); 1794 t->sendmsg_cache = NULL; 1795 } 1796 1797 static int smb_direct_create_pools(struct smb_direct_transport *t) 1798 { 1799 char name[80]; 1800 int i; 1801 struct smb_direct_recvmsg *recvmsg; 1802 1803 snprintf(name, sizeof(name), "smb_direct_rqst_pool_%p", t); 1804 t->sendmsg_cache = kmem_cache_create(name, 1805 sizeof(struct smb_direct_sendmsg) + 1806 sizeof(struct smb_direct_negotiate_resp), 1807 0, SLAB_HWCACHE_ALIGN, NULL); 1808 if (!t->sendmsg_cache) 1809 return -ENOMEM; 1810 1811 t->sendmsg_mempool = mempool_create(t->send_credit_target, 1812 mempool_alloc_slab, mempool_free_slab, 1813 t->sendmsg_cache); 1814 if (!t->sendmsg_mempool) 1815 goto err; 1816 1817 snprintf(name, sizeof(name), "smb_direct_resp_%p", t); 1818 t->recvmsg_cache = kmem_cache_create(name, 1819 sizeof(struct smb_direct_recvmsg) + 1820 t->max_recv_size, 1821 0, SLAB_HWCACHE_ALIGN, NULL); 1822 if (!t->recvmsg_cache) 1823 goto err; 1824 1825 t->recvmsg_mempool = 1826 mempool_create(t->recv_credit_max, mempool_alloc_slab, 1827 mempool_free_slab, t->recvmsg_cache); 1828 if (!t->recvmsg_mempool) 1829 goto err; 1830 1831 INIT_LIST_HEAD(&t->recvmsg_queue); 1832 1833 for (i = 0; i < t->recv_credit_max; i++) { 1834 recvmsg = mempool_alloc(t->recvmsg_mempool, KSMBD_DEFAULT_GFP); 1835 if (!recvmsg) 1836 goto err; 1837 recvmsg->transport = t; 1838 recvmsg->sge.length = 0; 1839 list_add(&recvmsg->list, &t->recvmsg_queue); 1840 } 1841 t->count_avail_recvmsg = t->recv_credit_max; 1842 1843 return 0; 1844 err: 1845 smb_direct_destroy_pools(t); 1846 return -ENOMEM; 1847 } 1848 1849 static int smb_direct_create_qpair(struct smb_direct_transport *t, 1850 struct ib_qp_cap *cap) 1851 { 1852 int ret; 1853 struct ib_qp_init_attr qp_attr; 1854 int pages_per_rw; 1855 1856 t->pd = ib_alloc_pd(t->cm_id->device, 0); 1857 if (IS_ERR(t->pd)) { 1858 pr_err("Can't create RDMA PD\n"); 1859 ret = PTR_ERR(t->pd); 1860 t->pd = NULL; 1861 return ret; 1862 } 1863 1864 t->send_cq = ib_alloc_cq(t->cm_id->device, t, 1865 smb_direct_send_credit_target + cap->max_rdma_ctxs, 1866 0, IB_POLL_WORKQUEUE); 1867 if (IS_ERR(t->send_cq)) { 1868 pr_err("Can't create RDMA send CQ\n"); 1869 ret = PTR_ERR(t->send_cq); 1870 t->send_cq = NULL; 1871 goto err; 1872 } 1873 1874 t->recv_cq = ib_alloc_cq(t->cm_id->device, t, 1875 t->recv_credit_max, 0, IB_POLL_WORKQUEUE); 1876 if (IS_ERR(t->recv_cq)) { 1877 pr_err("Can't create RDMA recv CQ\n"); 1878 ret = PTR_ERR(t->recv_cq); 1879 t->recv_cq = NULL; 1880 goto err; 1881 } 1882 1883 memset(&qp_attr, 0, sizeof(qp_attr)); 1884 qp_attr.event_handler = smb_direct_qpair_handler; 1885 qp_attr.qp_context = t; 1886 qp_attr.cap = *cap; 1887 qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 1888 qp_attr.qp_type = IB_QPT_RC; 1889 qp_attr.send_cq = t->send_cq; 1890 qp_attr.recv_cq = t->recv_cq; 1891 qp_attr.port_num = ~0; 1892 1893 ret = rdma_create_qp(t->cm_id, t->pd, &qp_attr); 1894 if (ret) { 1895 pr_err("Can't create RDMA QP: %d\n", ret); 1896 goto err; 1897 } 1898 1899 t->qp = t->cm_id->qp; 1900 t->cm_id->event_handler = smb_direct_cm_handler; 1901 1902 pages_per_rw = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1; 1903 if (pages_per_rw > t->cm_id->device->attrs.max_sgl_rd) { 1904 ret = ib_mr_pool_init(t->qp, &t->qp->rdma_mrs, 1905 t->max_rw_credits, IB_MR_TYPE_MEM_REG, 1906 t->pages_per_rw_credit, 0); 1907 if (ret) { 1908 pr_err("failed to init mr pool count %d pages %d\n", 1909 t->max_rw_credits, t->pages_per_rw_credit); 1910 goto err; 1911 } 1912 } 1913 1914 return 0; 1915 err: 1916 if (t->qp) { 1917 t->qp = NULL; 1918 rdma_destroy_qp(t->cm_id); 1919 } 1920 if (t->recv_cq) { 1921 ib_destroy_cq(t->recv_cq); 1922 t->recv_cq = NULL; 1923 } 1924 if (t->send_cq) { 1925 ib_destroy_cq(t->send_cq); 1926 t->send_cq = NULL; 1927 } 1928 if (t->pd) { 1929 ib_dealloc_pd(t->pd); 1930 t->pd = NULL; 1931 } 1932 return ret; 1933 } 1934 1935 static int smb_direct_prepare(struct ksmbd_transport *t) 1936 { 1937 struct smb_direct_transport *st = smb_trans_direct_transfort(t); 1938 struct smb_direct_recvmsg *recvmsg; 1939 struct smb_direct_negotiate_req *req; 1940 int ret; 1941 1942 ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n"); 1943 ret = wait_event_interruptible_timeout(st->wait_status, 1944 st->negotiation_requested || 1945 st->status == SMB_DIRECT_CS_DISCONNECTED, 1946 SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ); 1947 if (ret <= 0 || st->status == SMB_DIRECT_CS_DISCONNECTED) 1948 return ret < 0 ? ret : -ETIMEDOUT; 1949 1950 recvmsg = get_first_reassembly(st); 1951 if (!recvmsg) 1952 return -ECONNABORTED; 1953 1954 ret = smb_direct_check_recvmsg(recvmsg); 1955 if (ret == -ECONNABORTED) 1956 goto out; 1957 1958 req = (struct smb_direct_negotiate_req *)recvmsg->packet; 1959 st->max_recv_size = min_t(int, st->max_recv_size, 1960 le32_to_cpu(req->preferred_send_size)); 1961 st->max_send_size = min_t(int, st->max_send_size, 1962 le32_to_cpu(req->max_receive_size)); 1963 st->max_fragmented_send_size = 1964 le32_to_cpu(req->max_fragmented_size); 1965 st->max_fragmented_recv_size = 1966 (st->recv_credit_max * st->max_recv_size) / 2; 1967 1968 ret = smb_direct_send_negotiate_response(st, ret); 1969 out: 1970 spin_lock_irq(&st->reassembly_queue_lock); 1971 st->reassembly_queue_length--; 1972 list_del(&recvmsg->list); 1973 spin_unlock_irq(&st->reassembly_queue_lock); 1974 put_recvmsg(st, recvmsg); 1975 1976 return ret; 1977 } 1978 1979 static int smb_direct_connect(struct smb_direct_transport *st) 1980 { 1981 int ret; 1982 struct ib_qp_cap qp_cap; 1983 1984 ret = smb_direct_init_params(st, &qp_cap); 1985 if (ret) { 1986 pr_err("Can't configure RDMA parameters\n"); 1987 return ret; 1988 } 1989 1990 ret = smb_direct_create_pools(st); 1991 if (ret) { 1992 pr_err("Can't init RDMA pool: %d\n", ret); 1993 return ret; 1994 } 1995 1996 ret = smb_direct_create_qpair(st, &qp_cap); 1997 if (ret) { 1998 pr_err("Can't accept RDMA client: %d\n", ret); 1999 return ret; 2000 } 2001 2002 ret = smb_direct_prepare_negotiation(st); 2003 if (ret) { 2004 pr_err("Can't negotiate: %d\n", ret); 2005 return ret; 2006 } 2007 return 0; 2008 } 2009 2010 static bool rdma_frwr_is_supported(struct ib_device_attr *attrs) 2011 { 2012 if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) 2013 return false; 2014 if (attrs->max_fast_reg_page_list_len == 0) 2015 return false; 2016 return true; 2017 } 2018 2019 static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id) 2020 { 2021 struct smb_direct_transport *t; 2022 struct task_struct *handler; 2023 int ret; 2024 2025 if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) { 2026 ksmbd_debug(RDMA, 2027 "Fast Registration Work Requests is not supported. device capabilities=%llx\n", 2028 new_cm_id->device->attrs.device_cap_flags); 2029 return -EPROTONOSUPPORT; 2030 } 2031 2032 t = alloc_transport(new_cm_id); 2033 if (!t) 2034 return -ENOMEM; 2035 2036 ret = smb_direct_connect(t); 2037 if (ret) 2038 goto out_err; 2039 2040 handler = kthread_run(ksmbd_conn_handler_loop, 2041 KSMBD_TRANS(t)->conn, "ksmbd:r%u", 2042 smb_direct_port); 2043 if (IS_ERR(handler)) { 2044 ret = PTR_ERR(handler); 2045 pr_err("Can't start thread\n"); 2046 goto out_err; 2047 } 2048 2049 return 0; 2050 out_err: 2051 free_transport(t); 2052 return ret; 2053 } 2054 2055 static int smb_direct_listen_handler(struct rdma_cm_id *cm_id, 2056 struct rdma_cm_event *event) 2057 { 2058 switch (event->event) { 2059 case RDMA_CM_EVENT_CONNECT_REQUEST: { 2060 int ret = smb_direct_handle_connect_request(cm_id); 2061 2062 if (ret) { 2063 pr_err("Can't create transport: %d\n", ret); 2064 return ret; 2065 } 2066 2067 ksmbd_debug(RDMA, "Received connection request. cm_id=%p\n", 2068 cm_id); 2069 break; 2070 } 2071 default: 2072 pr_err("Unexpected listen event. cm_id=%p, event=%s (%d)\n", 2073 cm_id, rdma_event_msg(event->event), event->event); 2074 break; 2075 } 2076 return 0; 2077 } 2078 2079 static int smb_direct_listen(int port) 2080 { 2081 int ret; 2082 struct rdma_cm_id *cm_id; 2083 struct sockaddr_in sin = { 2084 .sin_family = AF_INET, 2085 .sin_addr.s_addr = htonl(INADDR_ANY), 2086 .sin_port = htons(port), 2087 }; 2088 2089 cm_id = rdma_create_id(&init_net, smb_direct_listen_handler, 2090 &smb_direct_listener, RDMA_PS_TCP, IB_QPT_RC); 2091 if (IS_ERR(cm_id)) { 2092 pr_err("Can't create cm id: %ld\n", PTR_ERR(cm_id)); 2093 return PTR_ERR(cm_id); 2094 } 2095 2096 ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); 2097 if (ret) { 2098 pr_err("Can't bind: %d\n", ret); 2099 goto err; 2100 } 2101 2102 smb_direct_listener.cm_id = cm_id; 2103 2104 ret = rdma_listen(cm_id, 10); 2105 if (ret) { 2106 pr_err("Can't listen: %d\n", ret); 2107 goto err; 2108 } 2109 return 0; 2110 err: 2111 smb_direct_listener.cm_id = NULL; 2112 rdma_destroy_id(cm_id); 2113 return ret; 2114 } 2115 2116 static int smb_direct_ib_client_add(struct ib_device *ib_dev) 2117 { 2118 struct smb_direct_device *smb_dev; 2119 2120 /* Set 5445 port if device type is iWARP(No IB) */ 2121 if (ib_dev->node_type != RDMA_NODE_IB_CA) 2122 smb_direct_port = SMB_DIRECT_PORT_IWARP; 2123 2124 if (!rdma_frwr_is_supported(&ib_dev->attrs)) 2125 return 0; 2126 2127 smb_dev = kzalloc(sizeof(*smb_dev), KSMBD_DEFAULT_GFP); 2128 if (!smb_dev) 2129 return -ENOMEM; 2130 smb_dev->ib_dev = ib_dev; 2131 2132 write_lock(&smb_direct_device_lock); 2133 list_add(&smb_dev->list, &smb_direct_device_list); 2134 write_unlock(&smb_direct_device_lock); 2135 2136 ksmbd_debug(RDMA, "ib device added: name %s\n", ib_dev->name); 2137 return 0; 2138 } 2139 2140 static void smb_direct_ib_client_remove(struct ib_device *ib_dev, 2141 void *client_data) 2142 { 2143 struct smb_direct_device *smb_dev, *tmp; 2144 2145 write_lock(&smb_direct_device_lock); 2146 list_for_each_entry_safe(smb_dev, tmp, &smb_direct_device_list, list) { 2147 if (smb_dev->ib_dev == ib_dev) { 2148 list_del(&smb_dev->list); 2149 kfree(smb_dev); 2150 break; 2151 } 2152 } 2153 write_unlock(&smb_direct_device_lock); 2154 } 2155 2156 static struct ib_client smb_direct_ib_client = { 2157 .name = "ksmbd_smb_direct_ib", 2158 .add = smb_direct_ib_client_add, 2159 .remove = smb_direct_ib_client_remove, 2160 }; 2161 2162 int ksmbd_rdma_init(void) 2163 { 2164 int ret; 2165 2166 smb_direct_listener.cm_id = NULL; 2167 2168 ret = ib_register_client(&smb_direct_ib_client); 2169 if (ret) { 2170 pr_err("failed to ib_register_client\n"); 2171 return ret; 2172 } 2173 2174 /* When a client is running out of send credits, the credits are 2175 * granted by the server's sending a packet using this queue. 2176 * This avoids the situation that a clients cannot send packets 2177 * for lack of credits 2178 */ 2179 smb_direct_wq = alloc_workqueue("ksmbd-smb_direct-wq", 2180 WQ_HIGHPRI | WQ_MEM_RECLAIM, 0); 2181 if (!smb_direct_wq) 2182 return -ENOMEM; 2183 2184 ret = smb_direct_listen(smb_direct_port); 2185 if (ret) { 2186 destroy_workqueue(smb_direct_wq); 2187 smb_direct_wq = NULL; 2188 pr_err("Can't listen: %d\n", ret); 2189 return ret; 2190 } 2191 2192 ksmbd_debug(RDMA, "init RDMA listener. cm_id=%p\n", 2193 smb_direct_listener.cm_id); 2194 return 0; 2195 } 2196 2197 void ksmbd_rdma_stop_listening(void) 2198 { 2199 if (!smb_direct_listener.cm_id) 2200 return; 2201 2202 ib_unregister_client(&smb_direct_ib_client); 2203 rdma_destroy_id(smb_direct_listener.cm_id); 2204 2205 smb_direct_listener.cm_id = NULL; 2206 } 2207 2208 void ksmbd_rdma_destroy(void) 2209 { 2210 if (smb_direct_wq) { 2211 destroy_workqueue(smb_direct_wq); 2212 smb_direct_wq = NULL; 2213 } 2214 } 2215 2216 bool ksmbd_rdma_capable_netdev(struct net_device *netdev) 2217 { 2218 struct smb_direct_device *smb_dev; 2219 int i; 2220 bool rdma_capable = false; 2221 2222 read_lock(&smb_direct_device_lock); 2223 list_for_each_entry(smb_dev, &smb_direct_device_list, list) { 2224 for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) { 2225 struct net_device *ndev; 2226 2227 ndev = ib_device_get_netdev(smb_dev->ib_dev, i + 1); 2228 if (!ndev) 2229 continue; 2230 2231 if (ndev == netdev) { 2232 dev_put(ndev); 2233 rdma_capable = true; 2234 goto out; 2235 } 2236 dev_put(ndev); 2237 } 2238 } 2239 out: 2240 read_unlock(&smb_direct_device_lock); 2241 2242 if (rdma_capable == false) { 2243 struct ib_device *ibdev; 2244 2245 ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN); 2246 if (ibdev) { 2247 rdma_capable = rdma_frwr_is_supported(&ibdev->attrs); 2248 ib_device_put(ibdev); 2249 } 2250 } 2251 2252 ksmbd_debug(RDMA, "netdev(%s) rdma capable : %s\n", 2253 netdev->name, str_true_false(rdma_capable)); 2254 2255 return rdma_capable; 2256 } 2257 2258 static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = { 2259 .prepare = smb_direct_prepare, 2260 .disconnect = smb_direct_disconnect, 2261 .shutdown = smb_direct_shutdown, 2262 .writev = smb_direct_writev, 2263 .read = smb_direct_read, 2264 .rdma_read = smb_direct_rdma_read, 2265 .rdma_write = smb_direct_rdma_write, 2266 .free_transport = smb_direct_free_transport, 2267 }; 2268