1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2017, Microsoft Corporation. 4 * Copyright (C) 2018, LG Electronics. 5 * 6 * Author(s): Long Li <longli@microsoft.com>, 7 * Hyunchul Lee <hyc.lee@gmail.com> 8 */ 9 10 #define SUBMOD_NAME "smb_direct" 11 12 #include <linux/kthread.h> 13 #include <linux/list.h> 14 #include <linux/mempool.h> 15 #include <linux/highmem.h> 16 #include <linux/scatterlist.h> 17 #include <linux/string_choices.h> 18 #include <rdma/ib_verbs.h> 19 #include <rdma/rdma_cm.h> 20 #include <rdma/rw.h> 21 22 #include "glob.h" 23 #include "connection.h" 24 #include "smb_common.h" 25 #include "../common/smb2status.h" 26 #include "transport_rdma.h" 27 28 #define SMB_DIRECT_PORT_IWARP 5445 29 #define SMB_DIRECT_PORT_INFINIBAND 445 30 31 #define SMB_DIRECT_VERSION_LE cpu_to_le16(0x0100) 32 33 /* SMB_DIRECT negotiation timeout in seconds */ 34 #define SMB_DIRECT_NEGOTIATE_TIMEOUT 120 35 36 #define SMB_DIRECT_MAX_SEND_SGES 6 37 #define SMB_DIRECT_MAX_RECV_SGES 1 38 39 /* 40 * Default maximum number of RDMA read/write outstanding on this connection 41 * This value is possibly decreased during QP creation on hardware limit 42 */ 43 #define SMB_DIRECT_CM_INITIATOR_DEPTH 8 44 45 /* Maximum number of retries on data transfer operations */ 46 #define SMB_DIRECT_CM_RETRY 6 47 /* No need to retry on Receiver Not Ready since SMB_DIRECT manages credits */ 48 #define SMB_DIRECT_CM_RNR_RETRY 0 49 50 /* 51 * User configurable initial values per SMB_DIRECT transport connection 52 * as defined in [MS-SMBD] 3.1.1.1 53 * Those may change after a SMB_DIRECT negotiation 54 */ 55 56 /* Set 445 port to SMB Direct port by default */ 57 static int smb_direct_port = SMB_DIRECT_PORT_INFINIBAND; 58 59 /* The local peer's maximum number of credits to grant to the peer */ 60 static int smb_direct_receive_credit_max = 255; 61 62 /* The remote peer's credit request of local peer */ 63 static int smb_direct_send_credit_target = 255; 64 65 /* The maximum single message size can be sent to remote peer */ 66 static int smb_direct_max_send_size = 1364; 67 68 /* The maximum fragmented upper-layer payload receive size supported */ 69 static int smb_direct_max_fragmented_recv_size = 1024 * 1024; 70 71 /* The maximum single-message size which can be received */ 72 static int smb_direct_max_receive_size = 1364; 73 74 static int smb_direct_max_read_write_size = SMBD_DEFAULT_IOSIZE; 75 76 static LIST_HEAD(smb_direct_device_list); 77 static DEFINE_RWLOCK(smb_direct_device_lock); 78 79 struct smb_direct_device { 80 struct ib_device *ib_dev; 81 struct list_head list; 82 }; 83 84 static struct smb_direct_listener { 85 struct rdma_cm_id *cm_id; 86 } smb_direct_listener; 87 88 static struct workqueue_struct *smb_direct_wq; 89 90 enum smb_direct_status { 91 SMB_DIRECT_CS_NEW = 0, 92 SMB_DIRECT_CS_CONNECTED, 93 SMB_DIRECT_CS_DISCONNECTING, 94 SMB_DIRECT_CS_DISCONNECTED, 95 }; 96 97 struct smb_direct_transport { 98 struct ksmbd_transport transport; 99 100 enum smb_direct_status status; 101 bool full_packet_received; 102 wait_queue_head_t wait_status; 103 104 struct rdma_cm_id *cm_id; 105 struct ib_cq *send_cq; 106 struct ib_cq *recv_cq; 107 struct ib_pd *pd; 108 struct ib_qp *qp; 109 110 int max_send_size; 111 int max_recv_size; 112 int max_fragmented_send_size; 113 int max_fragmented_recv_size; 114 int max_rdma_rw_size; 115 116 spinlock_t reassembly_queue_lock; 117 struct list_head reassembly_queue; 118 int reassembly_data_length; 119 int reassembly_queue_length; 120 int first_entry_offset; 121 wait_queue_head_t wait_reassembly_queue; 122 123 spinlock_t receive_credit_lock; 124 int recv_credits; 125 int count_avail_recvmsg; 126 int recv_credit_max; 127 int recv_credit_target; 128 129 spinlock_t recvmsg_queue_lock; 130 struct list_head recvmsg_queue; 131 132 spinlock_t empty_recvmsg_queue_lock; 133 struct list_head empty_recvmsg_queue; 134 135 int send_credit_target; 136 atomic_t send_credits; 137 spinlock_t lock_new_recv_credits; 138 int new_recv_credits; 139 int max_rw_credits; 140 int pages_per_rw_credit; 141 atomic_t rw_credits; 142 143 wait_queue_head_t wait_send_credits; 144 wait_queue_head_t wait_rw_credits; 145 146 mempool_t *sendmsg_mempool; 147 struct kmem_cache *sendmsg_cache; 148 mempool_t *recvmsg_mempool; 149 struct kmem_cache *recvmsg_cache; 150 151 wait_queue_head_t wait_send_pending; 152 atomic_t send_pending; 153 154 struct delayed_work post_recv_credits_work; 155 struct work_struct send_immediate_work; 156 struct work_struct disconnect_work; 157 158 bool negotiation_requested; 159 }; 160 161 #define KSMBD_TRANS(t) ((struct ksmbd_transport *)&((t)->transport)) 162 #define SMBD_TRANS(t) ((struct smb_direct_transport *)container_of(t, \ 163 struct smb_direct_transport, transport)) 164 enum { 165 SMB_DIRECT_MSG_NEGOTIATE_REQ = 0, 166 SMB_DIRECT_MSG_DATA_TRANSFER 167 }; 168 169 static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops; 170 171 struct smb_direct_send_ctx { 172 struct list_head msg_list; 173 int wr_cnt; 174 bool need_invalidate_rkey; 175 unsigned int remote_key; 176 }; 177 178 struct smb_direct_sendmsg { 179 struct smb_direct_transport *transport; 180 struct ib_send_wr wr; 181 struct list_head list; 182 int num_sge; 183 struct ib_sge sge[SMB_DIRECT_MAX_SEND_SGES]; 184 struct ib_cqe cqe; 185 u8 packet[]; 186 }; 187 188 struct smb_direct_recvmsg { 189 struct smb_direct_transport *transport; 190 struct list_head list; 191 int type; 192 struct ib_sge sge; 193 struct ib_cqe cqe; 194 bool first_segment; 195 u8 packet[]; 196 }; 197 198 struct smb_direct_rdma_rw_msg { 199 struct smb_direct_transport *t; 200 struct ib_cqe cqe; 201 int status; 202 struct completion *completion; 203 struct list_head list; 204 struct rdma_rw_ctx rw_ctx; 205 struct sg_table sgt; 206 struct scatterlist sg_list[]; 207 }; 208 209 void init_smbd_max_io_size(unsigned int sz) 210 { 211 sz = clamp_val(sz, SMBD_MIN_IOSIZE, SMBD_MAX_IOSIZE); 212 smb_direct_max_read_write_size = sz; 213 } 214 215 unsigned int get_smbd_max_read_write_size(void) 216 { 217 return smb_direct_max_read_write_size; 218 } 219 220 static inline int get_buf_page_count(void *buf, int size) 221 { 222 return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) - 223 (uintptr_t)buf / PAGE_SIZE; 224 } 225 226 static void smb_direct_destroy_pools(struct smb_direct_transport *transport); 227 static void smb_direct_post_recv_credits(struct work_struct *work); 228 static int smb_direct_post_send_data(struct smb_direct_transport *t, 229 struct smb_direct_send_ctx *send_ctx, 230 struct kvec *iov, int niov, 231 int remaining_data_length); 232 233 static inline struct smb_direct_transport * 234 smb_trans_direct_transfort(struct ksmbd_transport *t) 235 { 236 return container_of(t, struct smb_direct_transport, transport); 237 } 238 239 static inline void 240 *smb_direct_recvmsg_payload(struct smb_direct_recvmsg *recvmsg) 241 { 242 return (void *)recvmsg->packet; 243 } 244 245 static inline bool is_receive_credit_post_required(int receive_credits, 246 int avail_recvmsg_count) 247 { 248 return receive_credits <= (smb_direct_receive_credit_max >> 3) && 249 avail_recvmsg_count >= (receive_credits >> 2); 250 } 251 252 static struct 253 smb_direct_recvmsg *get_free_recvmsg(struct smb_direct_transport *t) 254 { 255 struct smb_direct_recvmsg *recvmsg = NULL; 256 257 spin_lock(&t->recvmsg_queue_lock); 258 if (!list_empty(&t->recvmsg_queue)) { 259 recvmsg = list_first_entry(&t->recvmsg_queue, 260 struct smb_direct_recvmsg, 261 list); 262 list_del(&recvmsg->list); 263 } 264 spin_unlock(&t->recvmsg_queue_lock); 265 return recvmsg; 266 } 267 268 static void put_recvmsg(struct smb_direct_transport *t, 269 struct smb_direct_recvmsg *recvmsg) 270 { 271 ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr, 272 recvmsg->sge.length, DMA_FROM_DEVICE); 273 274 spin_lock(&t->recvmsg_queue_lock); 275 list_add(&recvmsg->list, &t->recvmsg_queue); 276 spin_unlock(&t->recvmsg_queue_lock); 277 } 278 279 static struct 280 smb_direct_recvmsg *get_empty_recvmsg(struct smb_direct_transport *t) 281 { 282 struct smb_direct_recvmsg *recvmsg = NULL; 283 284 spin_lock(&t->empty_recvmsg_queue_lock); 285 if (!list_empty(&t->empty_recvmsg_queue)) { 286 recvmsg = list_first_entry(&t->empty_recvmsg_queue, 287 struct smb_direct_recvmsg, list); 288 list_del(&recvmsg->list); 289 } 290 spin_unlock(&t->empty_recvmsg_queue_lock); 291 return recvmsg; 292 } 293 294 static void put_empty_recvmsg(struct smb_direct_transport *t, 295 struct smb_direct_recvmsg *recvmsg) 296 { 297 ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr, 298 recvmsg->sge.length, DMA_FROM_DEVICE); 299 300 spin_lock(&t->empty_recvmsg_queue_lock); 301 list_add_tail(&recvmsg->list, &t->empty_recvmsg_queue); 302 spin_unlock(&t->empty_recvmsg_queue_lock); 303 } 304 305 static void enqueue_reassembly(struct smb_direct_transport *t, 306 struct smb_direct_recvmsg *recvmsg, 307 int data_length) 308 { 309 spin_lock(&t->reassembly_queue_lock); 310 list_add_tail(&recvmsg->list, &t->reassembly_queue); 311 t->reassembly_queue_length++; 312 /* 313 * Make sure reassembly_data_length is updated after list and 314 * reassembly_queue_length are updated. On the dequeue side 315 * reassembly_data_length is checked without a lock to determine 316 * if reassembly_queue_length and list is up to date 317 */ 318 virt_wmb(); 319 t->reassembly_data_length += data_length; 320 spin_unlock(&t->reassembly_queue_lock); 321 } 322 323 static struct smb_direct_recvmsg *get_first_reassembly(struct smb_direct_transport *t) 324 { 325 if (!list_empty(&t->reassembly_queue)) 326 return list_first_entry(&t->reassembly_queue, 327 struct smb_direct_recvmsg, list); 328 else 329 return NULL; 330 } 331 332 static void smb_direct_disconnect_rdma_work(struct work_struct *work) 333 { 334 struct smb_direct_transport *t = 335 container_of(work, struct smb_direct_transport, 336 disconnect_work); 337 338 if (t->status == SMB_DIRECT_CS_CONNECTED) { 339 t->status = SMB_DIRECT_CS_DISCONNECTING; 340 rdma_disconnect(t->cm_id); 341 } 342 } 343 344 static void 345 smb_direct_disconnect_rdma_connection(struct smb_direct_transport *t) 346 { 347 if (t->status == SMB_DIRECT_CS_CONNECTED) 348 queue_work(smb_direct_wq, &t->disconnect_work); 349 } 350 351 static void smb_direct_send_immediate_work(struct work_struct *work) 352 { 353 struct smb_direct_transport *t = container_of(work, 354 struct smb_direct_transport, send_immediate_work); 355 356 if (t->status != SMB_DIRECT_CS_CONNECTED) 357 return; 358 359 smb_direct_post_send_data(t, NULL, NULL, 0, 0); 360 } 361 362 static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) 363 { 364 struct smb_direct_transport *t; 365 struct ksmbd_conn *conn; 366 367 t = kzalloc(sizeof(*t), KSMBD_DEFAULT_GFP); 368 if (!t) 369 return NULL; 370 371 t->cm_id = cm_id; 372 cm_id->context = t; 373 374 t->status = SMB_DIRECT_CS_NEW; 375 init_waitqueue_head(&t->wait_status); 376 377 spin_lock_init(&t->reassembly_queue_lock); 378 INIT_LIST_HEAD(&t->reassembly_queue); 379 t->reassembly_data_length = 0; 380 t->reassembly_queue_length = 0; 381 init_waitqueue_head(&t->wait_reassembly_queue); 382 init_waitqueue_head(&t->wait_send_credits); 383 init_waitqueue_head(&t->wait_rw_credits); 384 385 spin_lock_init(&t->receive_credit_lock); 386 spin_lock_init(&t->recvmsg_queue_lock); 387 INIT_LIST_HEAD(&t->recvmsg_queue); 388 389 spin_lock_init(&t->empty_recvmsg_queue_lock); 390 INIT_LIST_HEAD(&t->empty_recvmsg_queue); 391 392 init_waitqueue_head(&t->wait_send_pending); 393 atomic_set(&t->send_pending, 0); 394 395 spin_lock_init(&t->lock_new_recv_credits); 396 397 INIT_DELAYED_WORK(&t->post_recv_credits_work, 398 smb_direct_post_recv_credits); 399 INIT_WORK(&t->send_immediate_work, smb_direct_send_immediate_work); 400 INIT_WORK(&t->disconnect_work, smb_direct_disconnect_rdma_work); 401 402 conn = ksmbd_conn_alloc(); 403 if (!conn) 404 goto err; 405 conn->transport = KSMBD_TRANS(t); 406 KSMBD_TRANS(t)->conn = conn; 407 KSMBD_TRANS(t)->ops = &ksmbd_smb_direct_transport_ops; 408 return t; 409 err: 410 kfree(t); 411 return NULL; 412 } 413 414 static void smb_direct_free_transport(struct ksmbd_transport *kt) 415 { 416 kfree(SMBD_TRANS(kt)); 417 } 418 419 static void free_transport(struct smb_direct_transport *t) 420 { 421 struct smb_direct_recvmsg *recvmsg; 422 423 wake_up_interruptible(&t->wait_send_credits); 424 425 ksmbd_debug(RDMA, "wait for all send posted to IB to finish\n"); 426 wait_event(t->wait_send_pending, 427 atomic_read(&t->send_pending) == 0); 428 429 cancel_work_sync(&t->disconnect_work); 430 cancel_delayed_work_sync(&t->post_recv_credits_work); 431 cancel_work_sync(&t->send_immediate_work); 432 433 if (t->qp) { 434 ib_drain_qp(t->qp); 435 ib_mr_pool_destroy(t->qp, &t->qp->rdma_mrs); 436 ib_destroy_qp(t->qp); 437 } 438 439 ksmbd_debug(RDMA, "drain the reassembly queue\n"); 440 do { 441 spin_lock(&t->reassembly_queue_lock); 442 recvmsg = get_first_reassembly(t); 443 if (recvmsg) { 444 list_del(&recvmsg->list); 445 spin_unlock(&t->reassembly_queue_lock); 446 put_recvmsg(t, recvmsg); 447 } else { 448 spin_unlock(&t->reassembly_queue_lock); 449 } 450 } while (recvmsg); 451 t->reassembly_data_length = 0; 452 453 if (t->send_cq) 454 ib_free_cq(t->send_cq); 455 if (t->recv_cq) 456 ib_free_cq(t->recv_cq); 457 if (t->pd) 458 ib_dealloc_pd(t->pd); 459 if (t->cm_id) 460 rdma_destroy_id(t->cm_id); 461 462 smb_direct_destroy_pools(t); 463 ksmbd_conn_free(KSMBD_TRANS(t)->conn); 464 } 465 466 static struct smb_direct_sendmsg 467 *smb_direct_alloc_sendmsg(struct smb_direct_transport *t) 468 { 469 struct smb_direct_sendmsg *msg; 470 471 msg = mempool_alloc(t->sendmsg_mempool, KSMBD_DEFAULT_GFP); 472 if (!msg) 473 return ERR_PTR(-ENOMEM); 474 msg->transport = t; 475 INIT_LIST_HEAD(&msg->list); 476 msg->num_sge = 0; 477 return msg; 478 } 479 480 static void smb_direct_free_sendmsg(struct smb_direct_transport *t, 481 struct smb_direct_sendmsg *msg) 482 { 483 int i; 484 485 if (msg->num_sge > 0) { 486 ib_dma_unmap_single(t->cm_id->device, 487 msg->sge[0].addr, msg->sge[0].length, 488 DMA_TO_DEVICE); 489 for (i = 1; i < msg->num_sge; i++) 490 ib_dma_unmap_page(t->cm_id->device, 491 msg->sge[i].addr, msg->sge[i].length, 492 DMA_TO_DEVICE); 493 } 494 mempool_free(msg, t->sendmsg_mempool); 495 } 496 497 static int smb_direct_check_recvmsg(struct smb_direct_recvmsg *recvmsg) 498 { 499 switch (recvmsg->type) { 500 case SMB_DIRECT_MSG_DATA_TRANSFER: { 501 struct smb_direct_data_transfer *req = 502 (struct smb_direct_data_transfer *)recvmsg->packet; 503 struct smb2_hdr *hdr = (struct smb2_hdr *)(recvmsg->packet 504 + le32_to_cpu(req->data_offset)); 505 ksmbd_debug(RDMA, 506 "CreditGranted: %u, CreditRequested: %u, DataLength: %u, RemainingDataLength: %u, SMB: %x, Command: %u\n", 507 le16_to_cpu(req->credits_granted), 508 le16_to_cpu(req->credits_requested), 509 req->data_length, req->remaining_data_length, 510 hdr->ProtocolId, hdr->Command); 511 break; 512 } 513 case SMB_DIRECT_MSG_NEGOTIATE_REQ: { 514 struct smb_direct_negotiate_req *req = 515 (struct smb_direct_negotiate_req *)recvmsg->packet; 516 ksmbd_debug(RDMA, 517 "MinVersion: %u, MaxVersion: %u, CreditRequested: %u, MaxSendSize: %u, MaxRecvSize: %u, MaxFragmentedSize: %u\n", 518 le16_to_cpu(req->min_version), 519 le16_to_cpu(req->max_version), 520 le16_to_cpu(req->credits_requested), 521 le32_to_cpu(req->preferred_send_size), 522 le32_to_cpu(req->max_receive_size), 523 le32_to_cpu(req->max_fragmented_size)); 524 if (le16_to_cpu(req->min_version) > 0x0100 || 525 le16_to_cpu(req->max_version) < 0x0100) 526 return -EOPNOTSUPP; 527 if (le16_to_cpu(req->credits_requested) <= 0 || 528 le32_to_cpu(req->max_receive_size) <= 128 || 529 le32_to_cpu(req->max_fragmented_size) <= 530 128 * 1024) 531 return -ECONNABORTED; 532 533 break; 534 } 535 default: 536 return -EINVAL; 537 } 538 return 0; 539 } 540 541 static void recv_done(struct ib_cq *cq, struct ib_wc *wc) 542 { 543 struct smb_direct_recvmsg *recvmsg; 544 struct smb_direct_transport *t; 545 546 recvmsg = container_of(wc->wr_cqe, struct smb_direct_recvmsg, cqe); 547 t = recvmsg->transport; 548 549 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { 550 if (wc->status != IB_WC_WR_FLUSH_ERR) { 551 pr_err("Recv error. status='%s (%d)' opcode=%d\n", 552 ib_wc_status_msg(wc->status), wc->status, 553 wc->opcode); 554 smb_direct_disconnect_rdma_connection(t); 555 } 556 put_empty_recvmsg(t, recvmsg); 557 return; 558 } 559 560 ksmbd_debug(RDMA, "Recv completed. status='%s (%d)', opcode=%d\n", 561 ib_wc_status_msg(wc->status), wc->status, 562 wc->opcode); 563 564 ib_dma_sync_single_for_cpu(wc->qp->device, recvmsg->sge.addr, 565 recvmsg->sge.length, DMA_FROM_DEVICE); 566 567 switch (recvmsg->type) { 568 case SMB_DIRECT_MSG_NEGOTIATE_REQ: 569 if (wc->byte_len < sizeof(struct smb_direct_negotiate_req)) { 570 put_empty_recvmsg(t, recvmsg); 571 return; 572 } 573 t->negotiation_requested = true; 574 t->full_packet_received = true; 575 t->status = SMB_DIRECT_CS_CONNECTED; 576 enqueue_reassembly(t, recvmsg, 0); 577 wake_up_interruptible(&t->wait_status); 578 break; 579 case SMB_DIRECT_MSG_DATA_TRANSFER: { 580 struct smb_direct_data_transfer *data_transfer = 581 (struct smb_direct_data_transfer *)recvmsg->packet; 582 unsigned int data_length; 583 int avail_recvmsg_count, receive_credits; 584 585 if (wc->byte_len < 586 offsetof(struct smb_direct_data_transfer, padding)) { 587 put_empty_recvmsg(t, recvmsg); 588 return; 589 } 590 591 data_length = le32_to_cpu(data_transfer->data_length); 592 if (data_length) { 593 if (wc->byte_len < sizeof(struct smb_direct_data_transfer) + 594 (u64)data_length) { 595 put_empty_recvmsg(t, recvmsg); 596 return; 597 } 598 599 if (t->full_packet_received) 600 recvmsg->first_segment = true; 601 602 if (le32_to_cpu(data_transfer->remaining_data_length)) 603 t->full_packet_received = false; 604 else 605 t->full_packet_received = true; 606 607 enqueue_reassembly(t, recvmsg, (int)data_length); 608 wake_up_interruptible(&t->wait_reassembly_queue); 609 610 spin_lock(&t->receive_credit_lock); 611 receive_credits = --(t->recv_credits); 612 avail_recvmsg_count = t->count_avail_recvmsg; 613 spin_unlock(&t->receive_credit_lock); 614 } else { 615 put_empty_recvmsg(t, recvmsg); 616 617 spin_lock(&t->receive_credit_lock); 618 receive_credits = --(t->recv_credits); 619 avail_recvmsg_count = ++(t->count_avail_recvmsg); 620 spin_unlock(&t->receive_credit_lock); 621 } 622 623 t->recv_credit_target = 624 le16_to_cpu(data_transfer->credits_requested); 625 atomic_add(le16_to_cpu(data_transfer->credits_granted), 626 &t->send_credits); 627 628 if (le16_to_cpu(data_transfer->flags) & 629 SMB_DIRECT_RESPONSE_REQUESTED) 630 queue_work(smb_direct_wq, &t->send_immediate_work); 631 632 if (atomic_read(&t->send_credits) > 0) 633 wake_up_interruptible(&t->wait_send_credits); 634 635 if (is_receive_credit_post_required(receive_credits, avail_recvmsg_count)) 636 mod_delayed_work(smb_direct_wq, 637 &t->post_recv_credits_work, 0); 638 break; 639 } 640 default: 641 break; 642 } 643 } 644 645 static int smb_direct_post_recv(struct smb_direct_transport *t, 646 struct smb_direct_recvmsg *recvmsg) 647 { 648 struct ib_recv_wr wr; 649 int ret; 650 651 recvmsg->sge.addr = ib_dma_map_single(t->cm_id->device, 652 recvmsg->packet, t->max_recv_size, 653 DMA_FROM_DEVICE); 654 ret = ib_dma_mapping_error(t->cm_id->device, recvmsg->sge.addr); 655 if (ret) 656 return ret; 657 recvmsg->sge.length = t->max_recv_size; 658 recvmsg->sge.lkey = t->pd->local_dma_lkey; 659 recvmsg->cqe.done = recv_done; 660 661 wr.wr_cqe = &recvmsg->cqe; 662 wr.next = NULL; 663 wr.sg_list = &recvmsg->sge; 664 wr.num_sge = 1; 665 666 ret = ib_post_recv(t->qp, &wr, NULL); 667 if (ret) { 668 pr_err("Can't post recv: %d\n", ret); 669 ib_dma_unmap_single(t->cm_id->device, 670 recvmsg->sge.addr, recvmsg->sge.length, 671 DMA_FROM_DEVICE); 672 smb_direct_disconnect_rdma_connection(t); 673 return ret; 674 } 675 return ret; 676 } 677 678 static int smb_direct_read(struct ksmbd_transport *t, char *buf, 679 unsigned int size, int unused) 680 { 681 struct smb_direct_recvmsg *recvmsg; 682 struct smb_direct_data_transfer *data_transfer; 683 int to_copy, to_read, data_read, offset; 684 u32 data_length, remaining_data_length, data_offset; 685 int rc; 686 struct smb_direct_transport *st = smb_trans_direct_transfort(t); 687 688 again: 689 if (st->status != SMB_DIRECT_CS_CONNECTED) { 690 pr_err("disconnected\n"); 691 return -ENOTCONN; 692 } 693 694 /* 695 * No need to hold the reassembly queue lock all the time as we are 696 * the only one reading from the front of the queue. The transport 697 * may add more entries to the back of the queue at the same time 698 */ 699 if (st->reassembly_data_length >= size) { 700 int queue_length; 701 int queue_removed = 0; 702 703 /* 704 * Need to make sure reassembly_data_length is read before 705 * reading reassembly_queue_length and calling 706 * get_first_reassembly. This call is lock free 707 * as we never read at the end of the queue which are being 708 * updated in SOFTIRQ as more data is received 709 */ 710 virt_rmb(); 711 queue_length = st->reassembly_queue_length; 712 data_read = 0; 713 to_read = size; 714 offset = st->first_entry_offset; 715 while (data_read < size) { 716 recvmsg = get_first_reassembly(st); 717 data_transfer = smb_direct_recvmsg_payload(recvmsg); 718 data_length = le32_to_cpu(data_transfer->data_length); 719 remaining_data_length = 720 le32_to_cpu(data_transfer->remaining_data_length); 721 data_offset = le32_to_cpu(data_transfer->data_offset); 722 723 /* 724 * The upper layer expects RFC1002 length at the 725 * beginning of the payload. Return it to indicate 726 * the total length of the packet. This minimize the 727 * change to upper layer packet processing logic. This 728 * will be eventually remove when an intermediate 729 * transport layer is added 730 */ 731 if (recvmsg->first_segment && size == 4) { 732 unsigned int rfc1002_len = 733 data_length + remaining_data_length; 734 *((__be32 *)buf) = cpu_to_be32(rfc1002_len); 735 data_read = 4; 736 recvmsg->first_segment = false; 737 ksmbd_debug(RDMA, 738 "returning rfc1002 length %d\n", 739 rfc1002_len); 740 goto read_rfc1002_done; 741 } 742 743 to_copy = min_t(int, data_length - offset, to_read); 744 memcpy(buf + data_read, (char *)data_transfer + data_offset + offset, 745 to_copy); 746 747 /* move on to the next buffer? */ 748 if (to_copy == data_length - offset) { 749 queue_length--; 750 /* 751 * No need to lock if we are not at the 752 * end of the queue 753 */ 754 if (queue_length) { 755 list_del(&recvmsg->list); 756 } else { 757 spin_lock_irq(&st->reassembly_queue_lock); 758 list_del(&recvmsg->list); 759 spin_unlock_irq(&st->reassembly_queue_lock); 760 } 761 queue_removed++; 762 put_recvmsg(st, recvmsg); 763 offset = 0; 764 } else { 765 offset += to_copy; 766 } 767 768 to_read -= to_copy; 769 data_read += to_copy; 770 } 771 772 spin_lock_irq(&st->reassembly_queue_lock); 773 st->reassembly_data_length -= data_read; 774 st->reassembly_queue_length -= queue_removed; 775 spin_unlock_irq(&st->reassembly_queue_lock); 776 777 spin_lock(&st->receive_credit_lock); 778 st->count_avail_recvmsg += queue_removed; 779 if (is_receive_credit_post_required(st->recv_credits, st->count_avail_recvmsg)) { 780 spin_unlock(&st->receive_credit_lock); 781 mod_delayed_work(smb_direct_wq, 782 &st->post_recv_credits_work, 0); 783 } else { 784 spin_unlock(&st->receive_credit_lock); 785 } 786 787 st->first_entry_offset = offset; 788 ksmbd_debug(RDMA, 789 "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n", 790 data_read, st->reassembly_data_length, 791 st->first_entry_offset); 792 read_rfc1002_done: 793 return data_read; 794 } 795 796 ksmbd_debug(RDMA, "wait_event on more data\n"); 797 rc = wait_event_interruptible(st->wait_reassembly_queue, 798 st->reassembly_data_length >= size || 799 st->status != SMB_DIRECT_CS_CONNECTED); 800 if (rc) 801 return -EINTR; 802 803 goto again; 804 } 805 806 static void smb_direct_post_recv_credits(struct work_struct *work) 807 { 808 struct smb_direct_transport *t = container_of(work, 809 struct smb_direct_transport, post_recv_credits_work.work); 810 struct smb_direct_recvmsg *recvmsg; 811 int receive_credits, credits = 0; 812 int ret; 813 int use_free = 1; 814 815 spin_lock(&t->receive_credit_lock); 816 receive_credits = t->recv_credits; 817 spin_unlock(&t->receive_credit_lock); 818 819 if (receive_credits < t->recv_credit_target) { 820 while (true) { 821 if (use_free) 822 recvmsg = get_free_recvmsg(t); 823 else 824 recvmsg = get_empty_recvmsg(t); 825 if (!recvmsg) { 826 if (use_free) { 827 use_free = 0; 828 continue; 829 } else { 830 break; 831 } 832 } 833 834 recvmsg->type = SMB_DIRECT_MSG_DATA_TRANSFER; 835 recvmsg->first_segment = false; 836 837 ret = smb_direct_post_recv(t, recvmsg); 838 if (ret) { 839 pr_err("Can't post recv: %d\n", ret); 840 put_recvmsg(t, recvmsg); 841 break; 842 } 843 credits++; 844 } 845 } 846 847 spin_lock(&t->receive_credit_lock); 848 t->recv_credits += credits; 849 t->count_avail_recvmsg -= credits; 850 spin_unlock(&t->receive_credit_lock); 851 852 spin_lock(&t->lock_new_recv_credits); 853 t->new_recv_credits += credits; 854 spin_unlock(&t->lock_new_recv_credits); 855 856 if (credits) 857 queue_work(smb_direct_wq, &t->send_immediate_work); 858 } 859 860 static void send_done(struct ib_cq *cq, struct ib_wc *wc) 861 { 862 struct smb_direct_sendmsg *sendmsg, *sibling; 863 struct smb_direct_transport *t; 864 struct list_head *pos, *prev, *end; 865 866 sendmsg = container_of(wc->wr_cqe, struct smb_direct_sendmsg, cqe); 867 t = sendmsg->transport; 868 869 ksmbd_debug(RDMA, "Send completed. status='%s (%d)', opcode=%d\n", 870 ib_wc_status_msg(wc->status), wc->status, 871 wc->opcode); 872 873 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { 874 pr_err("Send error. status='%s (%d)', opcode=%d\n", 875 ib_wc_status_msg(wc->status), wc->status, 876 wc->opcode); 877 smb_direct_disconnect_rdma_connection(t); 878 } 879 880 if (atomic_dec_and_test(&t->send_pending)) 881 wake_up(&t->wait_send_pending); 882 883 /* iterate and free the list of messages in reverse. the list's head 884 * is invalid. 885 */ 886 for (pos = &sendmsg->list, prev = pos->prev, end = sendmsg->list.next; 887 prev != end; pos = prev, prev = prev->prev) { 888 sibling = container_of(pos, struct smb_direct_sendmsg, list); 889 smb_direct_free_sendmsg(t, sibling); 890 } 891 892 sibling = container_of(pos, struct smb_direct_sendmsg, list); 893 smb_direct_free_sendmsg(t, sibling); 894 } 895 896 static int manage_credits_prior_sending(struct smb_direct_transport *t) 897 { 898 int new_credits; 899 900 spin_lock(&t->lock_new_recv_credits); 901 new_credits = t->new_recv_credits; 902 t->new_recv_credits = 0; 903 spin_unlock(&t->lock_new_recv_credits); 904 905 return new_credits; 906 } 907 908 static int smb_direct_post_send(struct smb_direct_transport *t, 909 struct ib_send_wr *wr) 910 { 911 int ret; 912 913 atomic_inc(&t->send_pending); 914 ret = ib_post_send(t->qp, wr, NULL); 915 if (ret) { 916 pr_err("failed to post send: %d\n", ret); 917 if (atomic_dec_and_test(&t->send_pending)) 918 wake_up(&t->wait_send_pending); 919 smb_direct_disconnect_rdma_connection(t); 920 } 921 return ret; 922 } 923 924 static void smb_direct_send_ctx_init(struct smb_direct_transport *t, 925 struct smb_direct_send_ctx *send_ctx, 926 bool need_invalidate_rkey, 927 unsigned int remote_key) 928 { 929 INIT_LIST_HEAD(&send_ctx->msg_list); 930 send_ctx->wr_cnt = 0; 931 send_ctx->need_invalidate_rkey = need_invalidate_rkey; 932 send_ctx->remote_key = remote_key; 933 } 934 935 static int smb_direct_flush_send_list(struct smb_direct_transport *t, 936 struct smb_direct_send_ctx *send_ctx, 937 bool is_last) 938 { 939 struct smb_direct_sendmsg *first, *last; 940 int ret; 941 942 if (list_empty(&send_ctx->msg_list)) 943 return 0; 944 945 first = list_first_entry(&send_ctx->msg_list, 946 struct smb_direct_sendmsg, 947 list); 948 last = list_last_entry(&send_ctx->msg_list, 949 struct smb_direct_sendmsg, 950 list); 951 952 last->wr.send_flags = IB_SEND_SIGNALED; 953 last->wr.wr_cqe = &last->cqe; 954 if (is_last && send_ctx->need_invalidate_rkey) { 955 last->wr.opcode = IB_WR_SEND_WITH_INV; 956 last->wr.ex.invalidate_rkey = send_ctx->remote_key; 957 } 958 959 ret = smb_direct_post_send(t, &first->wr); 960 if (!ret) { 961 smb_direct_send_ctx_init(t, send_ctx, 962 send_ctx->need_invalidate_rkey, 963 send_ctx->remote_key); 964 } else { 965 atomic_add(send_ctx->wr_cnt, &t->send_credits); 966 wake_up(&t->wait_send_credits); 967 list_for_each_entry_safe(first, last, &send_ctx->msg_list, 968 list) { 969 smb_direct_free_sendmsg(t, first); 970 } 971 } 972 return ret; 973 } 974 975 static int wait_for_credits(struct smb_direct_transport *t, 976 wait_queue_head_t *waitq, atomic_t *total_credits, 977 int needed) 978 { 979 int ret; 980 981 do { 982 if (atomic_sub_return(needed, total_credits) >= 0) 983 return 0; 984 985 atomic_add(needed, total_credits); 986 ret = wait_event_interruptible(*waitq, 987 atomic_read(total_credits) >= needed || 988 t->status != SMB_DIRECT_CS_CONNECTED); 989 990 if (t->status != SMB_DIRECT_CS_CONNECTED) 991 return -ENOTCONN; 992 else if (ret < 0) 993 return ret; 994 } while (true); 995 } 996 997 static int wait_for_send_credits(struct smb_direct_transport *t, 998 struct smb_direct_send_ctx *send_ctx) 999 { 1000 int ret; 1001 1002 if (send_ctx && 1003 (send_ctx->wr_cnt >= 16 || atomic_read(&t->send_credits) <= 1)) { 1004 ret = smb_direct_flush_send_list(t, send_ctx, false); 1005 if (ret) 1006 return ret; 1007 } 1008 1009 return wait_for_credits(t, &t->wait_send_credits, &t->send_credits, 1); 1010 } 1011 1012 static int wait_for_rw_credits(struct smb_direct_transport *t, int credits) 1013 { 1014 return wait_for_credits(t, &t->wait_rw_credits, &t->rw_credits, credits); 1015 } 1016 1017 static int calc_rw_credits(struct smb_direct_transport *t, 1018 char *buf, unsigned int len) 1019 { 1020 return DIV_ROUND_UP(get_buf_page_count(buf, len), 1021 t->pages_per_rw_credit); 1022 } 1023 1024 static int smb_direct_create_header(struct smb_direct_transport *t, 1025 int size, int remaining_data_length, 1026 struct smb_direct_sendmsg **sendmsg_out) 1027 { 1028 struct smb_direct_sendmsg *sendmsg; 1029 struct smb_direct_data_transfer *packet; 1030 int header_length; 1031 int ret; 1032 1033 sendmsg = smb_direct_alloc_sendmsg(t); 1034 if (IS_ERR(sendmsg)) 1035 return PTR_ERR(sendmsg); 1036 1037 /* Fill in the packet header */ 1038 packet = (struct smb_direct_data_transfer *)sendmsg->packet; 1039 packet->credits_requested = cpu_to_le16(t->send_credit_target); 1040 packet->credits_granted = cpu_to_le16(manage_credits_prior_sending(t)); 1041 1042 packet->flags = 0; 1043 packet->reserved = 0; 1044 if (!size) 1045 packet->data_offset = 0; 1046 else 1047 packet->data_offset = cpu_to_le32(24); 1048 packet->data_length = cpu_to_le32(size); 1049 packet->remaining_data_length = cpu_to_le32(remaining_data_length); 1050 packet->padding = 0; 1051 1052 ksmbd_debug(RDMA, 1053 "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n", 1054 le16_to_cpu(packet->credits_requested), 1055 le16_to_cpu(packet->credits_granted), 1056 le32_to_cpu(packet->data_offset), 1057 le32_to_cpu(packet->data_length), 1058 le32_to_cpu(packet->remaining_data_length)); 1059 1060 /* Map the packet to DMA */ 1061 header_length = sizeof(struct smb_direct_data_transfer); 1062 /* If this is a packet without payload, don't send padding */ 1063 if (!size) 1064 header_length = 1065 offsetof(struct smb_direct_data_transfer, padding); 1066 1067 sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device, 1068 (void *)packet, 1069 header_length, 1070 DMA_TO_DEVICE); 1071 ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr); 1072 if (ret) { 1073 smb_direct_free_sendmsg(t, sendmsg); 1074 return ret; 1075 } 1076 1077 sendmsg->num_sge = 1; 1078 sendmsg->sge[0].length = header_length; 1079 sendmsg->sge[0].lkey = t->pd->local_dma_lkey; 1080 1081 *sendmsg_out = sendmsg; 1082 return 0; 1083 } 1084 1085 static int get_sg_list(void *buf, int size, struct scatterlist *sg_list, int nentries) 1086 { 1087 bool high = is_vmalloc_addr(buf); 1088 struct page *page; 1089 int offset, len; 1090 int i = 0; 1091 1092 if (size <= 0 || nentries < get_buf_page_count(buf, size)) 1093 return -EINVAL; 1094 1095 offset = offset_in_page(buf); 1096 buf -= offset; 1097 while (size > 0) { 1098 len = min_t(int, PAGE_SIZE - offset, size); 1099 if (high) 1100 page = vmalloc_to_page(buf); 1101 else 1102 page = kmap_to_page(buf); 1103 1104 if (!sg_list) 1105 return -EINVAL; 1106 sg_set_page(sg_list, page, len, offset); 1107 sg_list = sg_next(sg_list); 1108 1109 buf += PAGE_SIZE; 1110 size -= len; 1111 offset = 0; 1112 i++; 1113 } 1114 return i; 1115 } 1116 1117 static int get_mapped_sg_list(struct ib_device *device, void *buf, int size, 1118 struct scatterlist *sg_list, int nentries, 1119 enum dma_data_direction dir) 1120 { 1121 int npages; 1122 1123 npages = get_sg_list(buf, size, sg_list, nentries); 1124 if (npages < 0) 1125 return -EINVAL; 1126 return ib_dma_map_sg(device, sg_list, npages, dir); 1127 } 1128 1129 static int post_sendmsg(struct smb_direct_transport *t, 1130 struct smb_direct_send_ctx *send_ctx, 1131 struct smb_direct_sendmsg *msg) 1132 { 1133 int i; 1134 1135 for (i = 0; i < msg->num_sge; i++) 1136 ib_dma_sync_single_for_device(t->cm_id->device, 1137 msg->sge[i].addr, msg->sge[i].length, 1138 DMA_TO_DEVICE); 1139 1140 msg->cqe.done = send_done; 1141 msg->wr.opcode = IB_WR_SEND; 1142 msg->wr.sg_list = &msg->sge[0]; 1143 msg->wr.num_sge = msg->num_sge; 1144 msg->wr.next = NULL; 1145 1146 if (send_ctx) { 1147 msg->wr.wr_cqe = NULL; 1148 msg->wr.send_flags = 0; 1149 if (!list_empty(&send_ctx->msg_list)) { 1150 struct smb_direct_sendmsg *last; 1151 1152 last = list_last_entry(&send_ctx->msg_list, 1153 struct smb_direct_sendmsg, 1154 list); 1155 last->wr.next = &msg->wr; 1156 } 1157 list_add_tail(&msg->list, &send_ctx->msg_list); 1158 send_ctx->wr_cnt++; 1159 return 0; 1160 } 1161 1162 msg->wr.wr_cqe = &msg->cqe; 1163 msg->wr.send_flags = IB_SEND_SIGNALED; 1164 return smb_direct_post_send(t, &msg->wr); 1165 } 1166 1167 static int smb_direct_post_send_data(struct smb_direct_transport *t, 1168 struct smb_direct_send_ctx *send_ctx, 1169 struct kvec *iov, int niov, 1170 int remaining_data_length) 1171 { 1172 int i, j, ret; 1173 struct smb_direct_sendmsg *msg; 1174 int data_length; 1175 struct scatterlist sg[SMB_DIRECT_MAX_SEND_SGES - 1]; 1176 1177 ret = wait_for_send_credits(t, send_ctx); 1178 if (ret) 1179 return ret; 1180 1181 data_length = 0; 1182 for (i = 0; i < niov; i++) 1183 data_length += iov[i].iov_len; 1184 1185 ret = smb_direct_create_header(t, data_length, remaining_data_length, 1186 &msg); 1187 if (ret) { 1188 atomic_inc(&t->send_credits); 1189 return ret; 1190 } 1191 1192 for (i = 0; i < niov; i++) { 1193 struct ib_sge *sge; 1194 int sg_cnt; 1195 1196 sg_init_table(sg, SMB_DIRECT_MAX_SEND_SGES - 1); 1197 sg_cnt = get_mapped_sg_list(t->cm_id->device, 1198 iov[i].iov_base, iov[i].iov_len, 1199 sg, SMB_DIRECT_MAX_SEND_SGES - 1, 1200 DMA_TO_DEVICE); 1201 if (sg_cnt <= 0) { 1202 pr_err("failed to map buffer\n"); 1203 ret = -ENOMEM; 1204 goto err; 1205 } else if (sg_cnt + msg->num_sge > SMB_DIRECT_MAX_SEND_SGES) { 1206 pr_err("buffer not fitted into sges\n"); 1207 ret = -E2BIG; 1208 ib_dma_unmap_sg(t->cm_id->device, sg, sg_cnt, 1209 DMA_TO_DEVICE); 1210 goto err; 1211 } 1212 1213 for (j = 0; j < sg_cnt; j++) { 1214 sge = &msg->sge[msg->num_sge]; 1215 sge->addr = sg_dma_address(&sg[j]); 1216 sge->length = sg_dma_len(&sg[j]); 1217 sge->lkey = t->pd->local_dma_lkey; 1218 msg->num_sge++; 1219 } 1220 } 1221 1222 ret = post_sendmsg(t, send_ctx, msg); 1223 if (ret) 1224 goto err; 1225 return 0; 1226 err: 1227 smb_direct_free_sendmsg(t, msg); 1228 atomic_inc(&t->send_credits); 1229 return ret; 1230 } 1231 1232 static int smb_direct_writev(struct ksmbd_transport *t, 1233 struct kvec *iov, int niovs, int buflen, 1234 bool need_invalidate, unsigned int remote_key) 1235 { 1236 struct smb_direct_transport *st = smb_trans_direct_transfort(t); 1237 int remaining_data_length; 1238 int start, i, j; 1239 int max_iov_size = st->max_send_size - 1240 sizeof(struct smb_direct_data_transfer); 1241 int ret; 1242 struct kvec vec; 1243 struct smb_direct_send_ctx send_ctx; 1244 1245 if (st->status != SMB_DIRECT_CS_CONNECTED) 1246 return -ENOTCONN; 1247 1248 //FIXME: skip RFC1002 header.. 1249 buflen -= 4; 1250 1251 remaining_data_length = buflen; 1252 ksmbd_debug(RDMA, "Sending smb (RDMA): smb_len=%u\n", buflen); 1253 1254 smb_direct_send_ctx_init(st, &send_ctx, need_invalidate, remote_key); 1255 start = i = 1; 1256 buflen = 0; 1257 while (true) { 1258 buflen += iov[i].iov_len; 1259 if (buflen > max_iov_size) { 1260 if (i > start) { 1261 remaining_data_length -= 1262 (buflen - iov[i].iov_len); 1263 ret = smb_direct_post_send_data(st, &send_ctx, 1264 &iov[start], i - start, 1265 remaining_data_length); 1266 if (ret) 1267 goto done; 1268 } else { 1269 /* iov[start] is too big, break it */ 1270 int nvec = (buflen + max_iov_size - 1) / 1271 max_iov_size; 1272 1273 for (j = 0; j < nvec; j++) { 1274 vec.iov_base = 1275 (char *)iov[start].iov_base + 1276 j * max_iov_size; 1277 vec.iov_len = 1278 min_t(int, max_iov_size, 1279 buflen - max_iov_size * j); 1280 remaining_data_length -= vec.iov_len; 1281 ret = smb_direct_post_send_data(st, &send_ctx, &vec, 1, 1282 remaining_data_length); 1283 if (ret) 1284 goto done; 1285 } 1286 i++; 1287 if (i == niovs) 1288 break; 1289 } 1290 start = i; 1291 buflen = 0; 1292 } else { 1293 i++; 1294 if (i == niovs) { 1295 /* send out all remaining vecs */ 1296 remaining_data_length -= buflen; 1297 ret = smb_direct_post_send_data(st, &send_ctx, 1298 &iov[start], i - start, 1299 remaining_data_length); 1300 if (ret) 1301 goto done; 1302 break; 1303 } 1304 } 1305 } 1306 1307 done: 1308 ret = smb_direct_flush_send_list(st, &send_ctx, true); 1309 1310 /* 1311 * As an optimization, we don't wait for individual I/O to finish 1312 * before sending the next one. 1313 * Send them all and wait for pending send count to get to 0 1314 * that means all the I/Os have been out and we are good to return 1315 */ 1316 1317 wait_event(st->wait_send_pending, 1318 atomic_read(&st->send_pending) == 0); 1319 return ret; 1320 } 1321 1322 static void smb_direct_free_rdma_rw_msg(struct smb_direct_transport *t, 1323 struct smb_direct_rdma_rw_msg *msg, 1324 enum dma_data_direction dir) 1325 { 1326 rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port, 1327 msg->sgt.sgl, msg->sgt.nents, dir); 1328 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); 1329 kfree(msg); 1330 } 1331 1332 static void read_write_done(struct ib_cq *cq, struct ib_wc *wc, 1333 enum dma_data_direction dir) 1334 { 1335 struct smb_direct_rdma_rw_msg *msg = container_of(wc->wr_cqe, 1336 struct smb_direct_rdma_rw_msg, cqe); 1337 struct smb_direct_transport *t = msg->t; 1338 1339 if (wc->status != IB_WC_SUCCESS) { 1340 msg->status = -EIO; 1341 pr_err("read/write error. opcode = %d, status = %s(%d)\n", 1342 wc->opcode, ib_wc_status_msg(wc->status), wc->status); 1343 if (wc->status != IB_WC_WR_FLUSH_ERR) 1344 smb_direct_disconnect_rdma_connection(t); 1345 } 1346 1347 complete(msg->completion); 1348 } 1349 1350 static void read_done(struct ib_cq *cq, struct ib_wc *wc) 1351 { 1352 read_write_done(cq, wc, DMA_FROM_DEVICE); 1353 } 1354 1355 static void write_done(struct ib_cq *cq, struct ib_wc *wc) 1356 { 1357 read_write_done(cq, wc, DMA_TO_DEVICE); 1358 } 1359 1360 static int smb_direct_rdma_xmit(struct smb_direct_transport *t, 1361 void *buf, int buf_len, 1362 struct smb2_buffer_desc_v1 *desc, 1363 unsigned int desc_len, 1364 bool is_read) 1365 { 1366 struct smb_direct_rdma_rw_msg *msg, *next_msg; 1367 int i, ret; 1368 DECLARE_COMPLETION_ONSTACK(completion); 1369 struct ib_send_wr *first_wr; 1370 LIST_HEAD(msg_list); 1371 char *desc_buf; 1372 int credits_needed; 1373 unsigned int desc_buf_len, desc_num = 0; 1374 1375 if (t->status != SMB_DIRECT_CS_CONNECTED) 1376 return -ENOTCONN; 1377 1378 if (buf_len > t->max_rdma_rw_size) 1379 return -EINVAL; 1380 1381 /* calculate needed credits */ 1382 credits_needed = 0; 1383 desc_buf = buf; 1384 for (i = 0; i < desc_len / sizeof(*desc); i++) { 1385 if (!buf_len) 1386 break; 1387 1388 desc_buf_len = le32_to_cpu(desc[i].length); 1389 if (!desc_buf_len) 1390 return -EINVAL; 1391 1392 if (desc_buf_len > buf_len) { 1393 desc_buf_len = buf_len; 1394 desc[i].length = cpu_to_le32(desc_buf_len); 1395 buf_len = 0; 1396 } 1397 1398 credits_needed += calc_rw_credits(t, desc_buf, desc_buf_len); 1399 desc_buf += desc_buf_len; 1400 buf_len -= desc_buf_len; 1401 desc_num++; 1402 } 1403 1404 ksmbd_debug(RDMA, "RDMA %s, len %#x, needed credits %#x\n", 1405 str_read_write(is_read), buf_len, credits_needed); 1406 1407 ret = wait_for_rw_credits(t, credits_needed); 1408 if (ret < 0) 1409 return ret; 1410 1411 /* build rdma_rw_ctx for each descriptor */ 1412 desc_buf = buf; 1413 for (i = 0; i < desc_num; i++) { 1414 msg = kzalloc(struct_size(msg, sg_list, SG_CHUNK_SIZE), 1415 KSMBD_DEFAULT_GFP); 1416 if (!msg) { 1417 ret = -ENOMEM; 1418 goto out; 1419 } 1420 1421 desc_buf_len = le32_to_cpu(desc[i].length); 1422 1423 msg->t = t; 1424 msg->cqe.done = is_read ? read_done : write_done; 1425 msg->completion = &completion; 1426 1427 msg->sgt.sgl = &msg->sg_list[0]; 1428 ret = sg_alloc_table_chained(&msg->sgt, 1429 get_buf_page_count(desc_buf, desc_buf_len), 1430 msg->sg_list, SG_CHUNK_SIZE); 1431 if (ret) { 1432 kfree(msg); 1433 ret = -ENOMEM; 1434 goto out; 1435 } 1436 1437 ret = get_sg_list(desc_buf, desc_buf_len, 1438 msg->sgt.sgl, msg->sgt.orig_nents); 1439 if (ret < 0) { 1440 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); 1441 kfree(msg); 1442 goto out; 1443 } 1444 1445 ret = rdma_rw_ctx_init(&msg->rw_ctx, t->qp, t->qp->port, 1446 msg->sgt.sgl, 1447 get_buf_page_count(desc_buf, desc_buf_len), 1448 0, 1449 le64_to_cpu(desc[i].offset), 1450 le32_to_cpu(desc[i].token), 1451 is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 1452 if (ret < 0) { 1453 pr_err("failed to init rdma_rw_ctx: %d\n", ret); 1454 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); 1455 kfree(msg); 1456 goto out; 1457 } 1458 1459 list_add_tail(&msg->list, &msg_list); 1460 desc_buf += desc_buf_len; 1461 } 1462 1463 /* concatenate work requests of rdma_rw_ctxs */ 1464 first_wr = NULL; 1465 list_for_each_entry_reverse(msg, &msg_list, list) { 1466 first_wr = rdma_rw_ctx_wrs(&msg->rw_ctx, t->qp, t->qp->port, 1467 &msg->cqe, first_wr); 1468 } 1469 1470 ret = ib_post_send(t->qp, first_wr, NULL); 1471 if (ret) { 1472 pr_err("failed to post send wr for RDMA R/W: %d\n", ret); 1473 goto out; 1474 } 1475 1476 msg = list_last_entry(&msg_list, struct smb_direct_rdma_rw_msg, list); 1477 wait_for_completion(&completion); 1478 ret = msg->status; 1479 out: 1480 list_for_each_entry_safe(msg, next_msg, &msg_list, list) { 1481 list_del(&msg->list); 1482 smb_direct_free_rdma_rw_msg(t, msg, 1483 is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 1484 } 1485 atomic_add(credits_needed, &t->rw_credits); 1486 wake_up(&t->wait_rw_credits); 1487 return ret; 1488 } 1489 1490 static int smb_direct_rdma_write(struct ksmbd_transport *t, 1491 void *buf, unsigned int buflen, 1492 struct smb2_buffer_desc_v1 *desc, 1493 unsigned int desc_len) 1494 { 1495 return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen, 1496 desc, desc_len, false); 1497 } 1498 1499 static int smb_direct_rdma_read(struct ksmbd_transport *t, 1500 void *buf, unsigned int buflen, 1501 struct smb2_buffer_desc_v1 *desc, 1502 unsigned int desc_len) 1503 { 1504 return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen, 1505 desc, desc_len, true); 1506 } 1507 1508 static void smb_direct_disconnect(struct ksmbd_transport *t) 1509 { 1510 struct smb_direct_transport *st = smb_trans_direct_transfort(t); 1511 1512 ksmbd_debug(RDMA, "Disconnecting cm_id=%p\n", st->cm_id); 1513 1514 smb_direct_disconnect_rdma_work(&st->disconnect_work); 1515 wait_event_interruptible(st->wait_status, 1516 st->status == SMB_DIRECT_CS_DISCONNECTED); 1517 free_transport(st); 1518 } 1519 1520 static void smb_direct_shutdown(struct ksmbd_transport *t) 1521 { 1522 struct smb_direct_transport *st = smb_trans_direct_transfort(t); 1523 1524 ksmbd_debug(RDMA, "smb-direct shutdown cm_id=%p\n", st->cm_id); 1525 1526 smb_direct_disconnect_rdma_work(&st->disconnect_work); 1527 } 1528 1529 static int smb_direct_cm_handler(struct rdma_cm_id *cm_id, 1530 struct rdma_cm_event *event) 1531 { 1532 struct smb_direct_transport *t = cm_id->context; 1533 1534 ksmbd_debug(RDMA, "RDMA CM event. cm_id=%p event=%s (%d)\n", 1535 cm_id, rdma_event_msg(event->event), event->event); 1536 1537 switch (event->event) { 1538 case RDMA_CM_EVENT_ESTABLISHED: { 1539 t->status = SMB_DIRECT_CS_CONNECTED; 1540 wake_up_interruptible(&t->wait_status); 1541 break; 1542 } 1543 case RDMA_CM_EVENT_DEVICE_REMOVAL: 1544 case RDMA_CM_EVENT_DISCONNECTED: { 1545 ib_drain_qp(t->qp); 1546 1547 t->status = SMB_DIRECT_CS_DISCONNECTED; 1548 wake_up_interruptible(&t->wait_status); 1549 wake_up_interruptible(&t->wait_reassembly_queue); 1550 wake_up(&t->wait_send_credits); 1551 break; 1552 } 1553 case RDMA_CM_EVENT_CONNECT_ERROR: { 1554 t->status = SMB_DIRECT_CS_DISCONNECTED; 1555 wake_up_interruptible(&t->wait_status); 1556 break; 1557 } 1558 default: 1559 pr_err("Unexpected RDMA CM event. cm_id=%p, event=%s (%d)\n", 1560 cm_id, rdma_event_msg(event->event), 1561 event->event); 1562 break; 1563 } 1564 return 0; 1565 } 1566 1567 static void smb_direct_qpair_handler(struct ib_event *event, void *context) 1568 { 1569 struct smb_direct_transport *t = context; 1570 1571 ksmbd_debug(RDMA, "Received QP event. cm_id=%p, event=%s (%d)\n", 1572 t->cm_id, ib_event_msg(event->event), event->event); 1573 1574 switch (event->event) { 1575 case IB_EVENT_CQ_ERR: 1576 case IB_EVENT_QP_FATAL: 1577 smb_direct_disconnect_rdma_connection(t); 1578 break; 1579 default: 1580 break; 1581 } 1582 } 1583 1584 static int smb_direct_send_negotiate_response(struct smb_direct_transport *t, 1585 int failed) 1586 { 1587 struct smb_direct_sendmsg *sendmsg; 1588 struct smb_direct_negotiate_resp *resp; 1589 int ret; 1590 1591 sendmsg = smb_direct_alloc_sendmsg(t); 1592 if (IS_ERR(sendmsg)) 1593 return -ENOMEM; 1594 1595 resp = (struct smb_direct_negotiate_resp *)sendmsg->packet; 1596 if (failed) { 1597 memset(resp, 0, sizeof(*resp)); 1598 resp->min_version = cpu_to_le16(0x0100); 1599 resp->max_version = cpu_to_le16(0x0100); 1600 resp->status = STATUS_NOT_SUPPORTED; 1601 } else { 1602 resp->status = STATUS_SUCCESS; 1603 resp->min_version = SMB_DIRECT_VERSION_LE; 1604 resp->max_version = SMB_DIRECT_VERSION_LE; 1605 resp->negotiated_version = SMB_DIRECT_VERSION_LE; 1606 resp->reserved = 0; 1607 resp->credits_requested = 1608 cpu_to_le16(t->send_credit_target); 1609 resp->credits_granted = cpu_to_le16(manage_credits_prior_sending(t)); 1610 resp->max_readwrite_size = cpu_to_le32(t->max_rdma_rw_size); 1611 resp->preferred_send_size = cpu_to_le32(t->max_send_size); 1612 resp->max_receive_size = cpu_to_le32(t->max_recv_size); 1613 resp->max_fragmented_size = 1614 cpu_to_le32(t->max_fragmented_recv_size); 1615 } 1616 1617 sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device, 1618 (void *)resp, sizeof(*resp), 1619 DMA_TO_DEVICE); 1620 ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr); 1621 if (ret) { 1622 smb_direct_free_sendmsg(t, sendmsg); 1623 return ret; 1624 } 1625 1626 sendmsg->num_sge = 1; 1627 sendmsg->sge[0].length = sizeof(*resp); 1628 sendmsg->sge[0].lkey = t->pd->local_dma_lkey; 1629 1630 ret = post_sendmsg(t, NULL, sendmsg); 1631 if (ret) { 1632 smb_direct_free_sendmsg(t, sendmsg); 1633 return ret; 1634 } 1635 1636 wait_event(t->wait_send_pending, 1637 atomic_read(&t->send_pending) == 0); 1638 return 0; 1639 } 1640 1641 static int smb_direct_accept_client(struct smb_direct_transport *t) 1642 { 1643 struct rdma_conn_param conn_param; 1644 struct ib_port_immutable port_immutable; 1645 u32 ird_ord_hdr[2]; 1646 int ret; 1647 1648 memset(&conn_param, 0, sizeof(conn_param)); 1649 conn_param.initiator_depth = min_t(u8, t->cm_id->device->attrs.max_qp_rd_atom, 1650 SMB_DIRECT_CM_INITIATOR_DEPTH); 1651 conn_param.responder_resources = 0; 1652 1653 t->cm_id->device->ops.get_port_immutable(t->cm_id->device, 1654 t->cm_id->port_num, 1655 &port_immutable); 1656 if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) { 1657 ird_ord_hdr[0] = conn_param.responder_resources; 1658 ird_ord_hdr[1] = 1; 1659 conn_param.private_data = ird_ord_hdr; 1660 conn_param.private_data_len = sizeof(ird_ord_hdr); 1661 } else { 1662 conn_param.private_data = NULL; 1663 conn_param.private_data_len = 0; 1664 } 1665 conn_param.retry_count = SMB_DIRECT_CM_RETRY; 1666 conn_param.rnr_retry_count = SMB_DIRECT_CM_RNR_RETRY; 1667 conn_param.flow_control = 0; 1668 1669 ret = rdma_accept(t->cm_id, &conn_param); 1670 if (ret) { 1671 pr_err("error at rdma_accept: %d\n", ret); 1672 return ret; 1673 } 1674 return 0; 1675 } 1676 1677 static int smb_direct_prepare_negotiation(struct smb_direct_transport *t) 1678 { 1679 int ret; 1680 struct smb_direct_recvmsg *recvmsg; 1681 1682 recvmsg = get_free_recvmsg(t); 1683 if (!recvmsg) 1684 return -ENOMEM; 1685 recvmsg->type = SMB_DIRECT_MSG_NEGOTIATE_REQ; 1686 1687 ret = smb_direct_post_recv(t, recvmsg); 1688 if (ret) { 1689 pr_err("Can't post recv: %d\n", ret); 1690 goto out_err; 1691 } 1692 1693 t->negotiation_requested = false; 1694 ret = smb_direct_accept_client(t); 1695 if (ret) { 1696 pr_err("Can't accept client\n"); 1697 goto out_err; 1698 } 1699 1700 smb_direct_post_recv_credits(&t->post_recv_credits_work.work); 1701 return 0; 1702 out_err: 1703 put_recvmsg(t, recvmsg); 1704 return ret; 1705 } 1706 1707 static unsigned int smb_direct_get_max_fr_pages(struct smb_direct_transport *t) 1708 { 1709 return min_t(unsigned int, 1710 t->cm_id->device->attrs.max_fast_reg_page_list_len, 1711 256); 1712 } 1713 1714 static int smb_direct_init_params(struct smb_direct_transport *t, 1715 struct ib_qp_cap *cap) 1716 { 1717 struct ib_device *device = t->cm_id->device; 1718 int max_send_sges, max_rw_wrs, max_send_wrs; 1719 unsigned int max_sge_per_wr, wrs_per_credit; 1720 1721 /* need 3 more sge. because a SMB_DIRECT header, SMB2 header, 1722 * SMB2 response could be mapped. 1723 */ 1724 t->max_send_size = smb_direct_max_send_size; 1725 max_send_sges = DIV_ROUND_UP(t->max_send_size, PAGE_SIZE) + 3; 1726 if (max_send_sges > SMB_DIRECT_MAX_SEND_SGES) { 1727 pr_err("max_send_size %d is too large\n", t->max_send_size); 1728 return -EINVAL; 1729 } 1730 1731 /* Calculate the number of work requests for RDMA R/W. 1732 * The maximum number of pages which can be registered 1733 * with one Memory region can be transferred with one 1734 * R/W credit. And at least 4 work requests for each credit 1735 * are needed for MR registration, RDMA R/W, local & remote 1736 * MR invalidation. 1737 */ 1738 t->max_rdma_rw_size = smb_direct_max_read_write_size; 1739 t->pages_per_rw_credit = smb_direct_get_max_fr_pages(t); 1740 t->max_rw_credits = DIV_ROUND_UP(t->max_rdma_rw_size, 1741 (t->pages_per_rw_credit - 1) * 1742 PAGE_SIZE); 1743 1744 max_sge_per_wr = min_t(unsigned int, device->attrs.max_send_sge, 1745 device->attrs.max_sge_rd); 1746 max_sge_per_wr = max_t(unsigned int, max_sge_per_wr, 1747 max_send_sges); 1748 wrs_per_credit = max_t(unsigned int, 4, 1749 DIV_ROUND_UP(t->pages_per_rw_credit, 1750 max_sge_per_wr) + 1); 1751 max_rw_wrs = t->max_rw_credits * wrs_per_credit; 1752 1753 max_send_wrs = smb_direct_send_credit_target + max_rw_wrs; 1754 if (max_send_wrs > device->attrs.max_cqe || 1755 max_send_wrs > device->attrs.max_qp_wr) { 1756 pr_err("consider lowering send_credit_target = %d\n", 1757 smb_direct_send_credit_target); 1758 pr_err("Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", 1759 device->attrs.max_cqe, device->attrs.max_qp_wr); 1760 return -EINVAL; 1761 } 1762 1763 if (smb_direct_receive_credit_max > device->attrs.max_cqe || 1764 smb_direct_receive_credit_max > device->attrs.max_qp_wr) { 1765 pr_err("consider lowering receive_credit_max = %d\n", 1766 smb_direct_receive_credit_max); 1767 pr_err("Possible CQE overrun, device reporting max_cpe %d max_qp_wr %d\n", 1768 device->attrs.max_cqe, device->attrs.max_qp_wr); 1769 return -EINVAL; 1770 } 1771 1772 if (device->attrs.max_recv_sge < SMB_DIRECT_MAX_RECV_SGES) { 1773 pr_err("warning: device max_recv_sge = %d too small\n", 1774 device->attrs.max_recv_sge); 1775 return -EINVAL; 1776 } 1777 1778 t->recv_credits = 0; 1779 t->count_avail_recvmsg = 0; 1780 1781 t->recv_credit_max = smb_direct_receive_credit_max; 1782 t->recv_credit_target = 10; 1783 t->new_recv_credits = 0; 1784 1785 t->send_credit_target = smb_direct_send_credit_target; 1786 atomic_set(&t->send_credits, 0); 1787 atomic_set(&t->rw_credits, t->max_rw_credits); 1788 1789 t->max_send_size = smb_direct_max_send_size; 1790 t->max_recv_size = smb_direct_max_receive_size; 1791 t->max_fragmented_recv_size = smb_direct_max_fragmented_recv_size; 1792 1793 cap->max_send_wr = max_send_wrs; 1794 cap->max_recv_wr = t->recv_credit_max; 1795 cap->max_send_sge = max_sge_per_wr; 1796 cap->max_recv_sge = SMB_DIRECT_MAX_RECV_SGES; 1797 cap->max_inline_data = 0; 1798 cap->max_rdma_ctxs = t->max_rw_credits; 1799 return 0; 1800 } 1801 1802 static void smb_direct_destroy_pools(struct smb_direct_transport *t) 1803 { 1804 struct smb_direct_recvmsg *recvmsg; 1805 1806 while ((recvmsg = get_free_recvmsg(t))) 1807 mempool_free(recvmsg, t->recvmsg_mempool); 1808 while ((recvmsg = get_empty_recvmsg(t))) 1809 mempool_free(recvmsg, t->recvmsg_mempool); 1810 1811 mempool_destroy(t->recvmsg_mempool); 1812 t->recvmsg_mempool = NULL; 1813 1814 kmem_cache_destroy(t->recvmsg_cache); 1815 t->recvmsg_cache = NULL; 1816 1817 mempool_destroy(t->sendmsg_mempool); 1818 t->sendmsg_mempool = NULL; 1819 1820 kmem_cache_destroy(t->sendmsg_cache); 1821 t->sendmsg_cache = NULL; 1822 } 1823 1824 static int smb_direct_create_pools(struct smb_direct_transport *t) 1825 { 1826 char name[80]; 1827 int i; 1828 struct smb_direct_recvmsg *recvmsg; 1829 1830 snprintf(name, sizeof(name), "smb_direct_rqst_pool_%p", t); 1831 t->sendmsg_cache = kmem_cache_create(name, 1832 sizeof(struct smb_direct_sendmsg) + 1833 sizeof(struct smb_direct_negotiate_resp), 1834 0, SLAB_HWCACHE_ALIGN, NULL); 1835 if (!t->sendmsg_cache) 1836 return -ENOMEM; 1837 1838 t->sendmsg_mempool = mempool_create(t->send_credit_target, 1839 mempool_alloc_slab, mempool_free_slab, 1840 t->sendmsg_cache); 1841 if (!t->sendmsg_mempool) 1842 goto err; 1843 1844 snprintf(name, sizeof(name), "smb_direct_resp_%p", t); 1845 t->recvmsg_cache = kmem_cache_create(name, 1846 sizeof(struct smb_direct_recvmsg) + 1847 t->max_recv_size, 1848 0, SLAB_HWCACHE_ALIGN, NULL); 1849 if (!t->recvmsg_cache) 1850 goto err; 1851 1852 t->recvmsg_mempool = 1853 mempool_create(t->recv_credit_max, mempool_alloc_slab, 1854 mempool_free_slab, t->recvmsg_cache); 1855 if (!t->recvmsg_mempool) 1856 goto err; 1857 1858 INIT_LIST_HEAD(&t->recvmsg_queue); 1859 1860 for (i = 0; i < t->recv_credit_max; i++) { 1861 recvmsg = mempool_alloc(t->recvmsg_mempool, KSMBD_DEFAULT_GFP); 1862 if (!recvmsg) 1863 goto err; 1864 recvmsg->transport = t; 1865 list_add(&recvmsg->list, &t->recvmsg_queue); 1866 } 1867 t->count_avail_recvmsg = t->recv_credit_max; 1868 1869 return 0; 1870 err: 1871 smb_direct_destroy_pools(t); 1872 return -ENOMEM; 1873 } 1874 1875 static int smb_direct_create_qpair(struct smb_direct_transport *t, 1876 struct ib_qp_cap *cap) 1877 { 1878 int ret; 1879 struct ib_qp_init_attr qp_attr; 1880 int pages_per_rw; 1881 1882 t->pd = ib_alloc_pd(t->cm_id->device, 0); 1883 if (IS_ERR(t->pd)) { 1884 pr_err("Can't create RDMA PD\n"); 1885 ret = PTR_ERR(t->pd); 1886 t->pd = NULL; 1887 return ret; 1888 } 1889 1890 t->send_cq = ib_alloc_cq(t->cm_id->device, t, 1891 smb_direct_send_credit_target + cap->max_rdma_ctxs, 1892 0, IB_POLL_WORKQUEUE); 1893 if (IS_ERR(t->send_cq)) { 1894 pr_err("Can't create RDMA send CQ\n"); 1895 ret = PTR_ERR(t->send_cq); 1896 t->send_cq = NULL; 1897 goto err; 1898 } 1899 1900 t->recv_cq = ib_alloc_cq(t->cm_id->device, t, 1901 t->recv_credit_max, 0, IB_POLL_WORKQUEUE); 1902 if (IS_ERR(t->recv_cq)) { 1903 pr_err("Can't create RDMA recv CQ\n"); 1904 ret = PTR_ERR(t->recv_cq); 1905 t->recv_cq = NULL; 1906 goto err; 1907 } 1908 1909 memset(&qp_attr, 0, sizeof(qp_attr)); 1910 qp_attr.event_handler = smb_direct_qpair_handler; 1911 qp_attr.qp_context = t; 1912 qp_attr.cap = *cap; 1913 qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 1914 qp_attr.qp_type = IB_QPT_RC; 1915 qp_attr.send_cq = t->send_cq; 1916 qp_attr.recv_cq = t->recv_cq; 1917 qp_attr.port_num = ~0; 1918 1919 ret = rdma_create_qp(t->cm_id, t->pd, &qp_attr); 1920 if (ret) { 1921 pr_err("Can't create RDMA QP: %d\n", ret); 1922 goto err; 1923 } 1924 1925 t->qp = t->cm_id->qp; 1926 t->cm_id->event_handler = smb_direct_cm_handler; 1927 1928 pages_per_rw = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1; 1929 if (pages_per_rw > t->cm_id->device->attrs.max_sgl_rd) { 1930 ret = ib_mr_pool_init(t->qp, &t->qp->rdma_mrs, 1931 t->max_rw_credits, IB_MR_TYPE_MEM_REG, 1932 t->pages_per_rw_credit, 0); 1933 if (ret) { 1934 pr_err("failed to init mr pool count %d pages %d\n", 1935 t->max_rw_credits, t->pages_per_rw_credit); 1936 goto err; 1937 } 1938 } 1939 1940 return 0; 1941 err: 1942 if (t->qp) { 1943 ib_destroy_qp(t->qp); 1944 t->qp = NULL; 1945 } 1946 if (t->recv_cq) { 1947 ib_destroy_cq(t->recv_cq); 1948 t->recv_cq = NULL; 1949 } 1950 if (t->send_cq) { 1951 ib_destroy_cq(t->send_cq); 1952 t->send_cq = NULL; 1953 } 1954 if (t->pd) { 1955 ib_dealloc_pd(t->pd); 1956 t->pd = NULL; 1957 } 1958 return ret; 1959 } 1960 1961 static int smb_direct_prepare(struct ksmbd_transport *t) 1962 { 1963 struct smb_direct_transport *st = smb_trans_direct_transfort(t); 1964 struct smb_direct_recvmsg *recvmsg; 1965 struct smb_direct_negotiate_req *req; 1966 int ret; 1967 1968 ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n"); 1969 ret = wait_event_interruptible_timeout(st->wait_status, 1970 st->negotiation_requested || 1971 st->status == SMB_DIRECT_CS_DISCONNECTED, 1972 SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ); 1973 if (ret <= 0 || st->status == SMB_DIRECT_CS_DISCONNECTED) 1974 return ret < 0 ? ret : -ETIMEDOUT; 1975 1976 recvmsg = get_first_reassembly(st); 1977 if (!recvmsg) 1978 return -ECONNABORTED; 1979 1980 ret = smb_direct_check_recvmsg(recvmsg); 1981 if (ret == -ECONNABORTED) 1982 goto out; 1983 1984 req = (struct smb_direct_negotiate_req *)recvmsg->packet; 1985 st->max_recv_size = min_t(int, st->max_recv_size, 1986 le32_to_cpu(req->preferred_send_size)); 1987 st->max_send_size = min_t(int, st->max_send_size, 1988 le32_to_cpu(req->max_receive_size)); 1989 st->max_fragmented_send_size = 1990 le32_to_cpu(req->max_fragmented_size); 1991 st->max_fragmented_recv_size = 1992 (st->recv_credit_max * st->max_recv_size) / 2; 1993 1994 ret = smb_direct_send_negotiate_response(st, ret); 1995 out: 1996 spin_lock_irq(&st->reassembly_queue_lock); 1997 st->reassembly_queue_length--; 1998 list_del(&recvmsg->list); 1999 spin_unlock_irq(&st->reassembly_queue_lock); 2000 put_recvmsg(st, recvmsg); 2001 2002 return ret; 2003 } 2004 2005 static int smb_direct_connect(struct smb_direct_transport *st) 2006 { 2007 int ret; 2008 struct ib_qp_cap qp_cap; 2009 2010 ret = smb_direct_init_params(st, &qp_cap); 2011 if (ret) { 2012 pr_err("Can't configure RDMA parameters\n"); 2013 return ret; 2014 } 2015 2016 ret = smb_direct_create_pools(st); 2017 if (ret) { 2018 pr_err("Can't init RDMA pool: %d\n", ret); 2019 return ret; 2020 } 2021 2022 ret = smb_direct_create_qpair(st, &qp_cap); 2023 if (ret) { 2024 pr_err("Can't accept RDMA client: %d\n", ret); 2025 return ret; 2026 } 2027 2028 ret = smb_direct_prepare_negotiation(st); 2029 if (ret) { 2030 pr_err("Can't negotiate: %d\n", ret); 2031 return ret; 2032 } 2033 return 0; 2034 } 2035 2036 static bool rdma_frwr_is_supported(struct ib_device_attr *attrs) 2037 { 2038 if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) 2039 return false; 2040 if (attrs->max_fast_reg_page_list_len == 0) 2041 return false; 2042 return true; 2043 } 2044 2045 static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id) 2046 { 2047 struct smb_direct_transport *t; 2048 struct task_struct *handler; 2049 int ret; 2050 2051 if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) { 2052 ksmbd_debug(RDMA, 2053 "Fast Registration Work Requests is not supported. device capabilities=%llx\n", 2054 new_cm_id->device->attrs.device_cap_flags); 2055 return -EPROTONOSUPPORT; 2056 } 2057 2058 t = alloc_transport(new_cm_id); 2059 if (!t) 2060 return -ENOMEM; 2061 2062 ret = smb_direct_connect(t); 2063 if (ret) 2064 goto out_err; 2065 2066 handler = kthread_run(ksmbd_conn_handler_loop, 2067 KSMBD_TRANS(t)->conn, "ksmbd:r%u", 2068 smb_direct_port); 2069 if (IS_ERR(handler)) { 2070 ret = PTR_ERR(handler); 2071 pr_err("Can't start thread\n"); 2072 goto out_err; 2073 } 2074 2075 return 0; 2076 out_err: 2077 free_transport(t); 2078 return ret; 2079 } 2080 2081 static int smb_direct_listen_handler(struct rdma_cm_id *cm_id, 2082 struct rdma_cm_event *event) 2083 { 2084 switch (event->event) { 2085 case RDMA_CM_EVENT_CONNECT_REQUEST: { 2086 int ret = smb_direct_handle_connect_request(cm_id); 2087 2088 if (ret) { 2089 pr_err("Can't create transport: %d\n", ret); 2090 return ret; 2091 } 2092 2093 ksmbd_debug(RDMA, "Received connection request. cm_id=%p\n", 2094 cm_id); 2095 break; 2096 } 2097 default: 2098 pr_err("Unexpected listen event. cm_id=%p, event=%s (%d)\n", 2099 cm_id, rdma_event_msg(event->event), event->event); 2100 break; 2101 } 2102 return 0; 2103 } 2104 2105 static int smb_direct_listen(int port) 2106 { 2107 int ret; 2108 struct rdma_cm_id *cm_id; 2109 struct sockaddr_in sin = { 2110 .sin_family = AF_INET, 2111 .sin_addr.s_addr = htonl(INADDR_ANY), 2112 .sin_port = htons(port), 2113 }; 2114 2115 cm_id = rdma_create_id(&init_net, smb_direct_listen_handler, 2116 &smb_direct_listener, RDMA_PS_TCP, IB_QPT_RC); 2117 if (IS_ERR(cm_id)) { 2118 pr_err("Can't create cm id: %ld\n", PTR_ERR(cm_id)); 2119 return PTR_ERR(cm_id); 2120 } 2121 2122 ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); 2123 if (ret) { 2124 pr_err("Can't bind: %d\n", ret); 2125 goto err; 2126 } 2127 2128 smb_direct_listener.cm_id = cm_id; 2129 2130 ret = rdma_listen(cm_id, 10); 2131 if (ret) { 2132 pr_err("Can't listen: %d\n", ret); 2133 goto err; 2134 } 2135 return 0; 2136 err: 2137 smb_direct_listener.cm_id = NULL; 2138 rdma_destroy_id(cm_id); 2139 return ret; 2140 } 2141 2142 static int smb_direct_ib_client_add(struct ib_device *ib_dev) 2143 { 2144 struct smb_direct_device *smb_dev; 2145 2146 /* Set 5445 port if device type is iWARP(No IB) */ 2147 if (ib_dev->node_type != RDMA_NODE_IB_CA) 2148 smb_direct_port = SMB_DIRECT_PORT_IWARP; 2149 2150 if (!rdma_frwr_is_supported(&ib_dev->attrs)) 2151 return 0; 2152 2153 smb_dev = kzalloc(sizeof(*smb_dev), KSMBD_DEFAULT_GFP); 2154 if (!smb_dev) 2155 return -ENOMEM; 2156 smb_dev->ib_dev = ib_dev; 2157 2158 write_lock(&smb_direct_device_lock); 2159 list_add(&smb_dev->list, &smb_direct_device_list); 2160 write_unlock(&smb_direct_device_lock); 2161 2162 ksmbd_debug(RDMA, "ib device added: name %s\n", ib_dev->name); 2163 return 0; 2164 } 2165 2166 static void smb_direct_ib_client_remove(struct ib_device *ib_dev, 2167 void *client_data) 2168 { 2169 struct smb_direct_device *smb_dev, *tmp; 2170 2171 write_lock(&smb_direct_device_lock); 2172 list_for_each_entry_safe(smb_dev, tmp, &smb_direct_device_list, list) { 2173 if (smb_dev->ib_dev == ib_dev) { 2174 list_del(&smb_dev->list); 2175 kfree(smb_dev); 2176 break; 2177 } 2178 } 2179 write_unlock(&smb_direct_device_lock); 2180 } 2181 2182 static struct ib_client smb_direct_ib_client = { 2183 .name = "ksmbd_smb_direct_ib", 2184 .add = smb_direct_ib_client_add, 2185 .remove = smb_direct_ib_client_remove, 2186 }; 2187 2188 int ksmbd_rdma_init(void) 2189 { 2190 int ret; 2191 2192 smb_direct_listener.cm_id = NULL; 2193 2194 ret = ib_register_client(&smb_direct_ib_client); 2195 if (ret) { 2196 pr_err("failed to ib_register_client\n"); 2197 return ret; 2198 } 2199 2200 /* When a client is running out of send credits, the credits are 2201 * granted by the server's sending a packet using this queue. 2202 * This avoids the situation that a clients cannot send packets 2203 * for lack of credits 2204 */ 2205 smb_direct_wq = alloc_workqueue("ksmbd-smb_direct-wq", 2206 WQ_HIGHPRI | WQ_MEM_RECLAIM, 0); 2207 if (!smb_direct_wq) 2208 return -ENOMEM; 2209 2210 ret = smb_direct_listen(smb_direct_port); 2211 if (ret) { 2212 destroy_workqueue(smb_direct_wq); 2213 smb_direct_wq = NULL; 2214 pr_err("Can't listen: %d\n", ret); 2215 return ret; 2216 } 2217 2218 ksmbd_debug(RDMA, "init RDMA listener. cm_id=%p\n", 2219 smb_direct_listener.cm_id); 2220 return 0; 2221 } 2222 2223 void ksmbd_rdma_destroy(void) 2224 { 2225 if (!smb_direct_listener.cm_id) 2226 return; 2227 2228 ib_unregister_client(&smb_direct_ib_client); 2229 rdma_destroy_id(smb_direct_listener.cm_id); 2230 2231 smb_direct_listener.cm_id = NULL; 2232 2233 if (smb_direct_wq) { 2234 destroy_workqueue(smb_direct_wq); 2235 smb_direct_wq = NULL; 2236 } 2237 } 2238 2239 bool ksmbd_rdma_capable_netdev(struct net_device *netdev) 2240 { 2241 struct smb_direct_device *smb_dev; 2242 int i; 2243 bool rdma_capable = false; 2244 2245 read_lock(&smb_direct_device_lock); 2246 list_for_each_entry(smb_dev, &smb_direct_device_list, list) { 2247 for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) { 2248 struct net_device *ndev; 2249 2250 ndev = ib_device_get_netdev(smb_dev->ib_dev, i + 1); 2251 if (!ndev) 2252 continue; 2253 2254 if (ndev == netdev) { 2255 dev_put(ndev); 2256 rdma_capable = true; 2257 goto out; 2258 } 2259 dev_put(ndev); 2260 } 2261 } 2262 out: 2263 read_unlock(&smb_direct_device_lock); 2264 2265 if (rdma_capable == false) { 2266 struct ib_device *ibdev; 2267 2268 ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN); 2269 if (ibdev) { 2270 rdma_capable = rdma_frwr_is_supported(&ibdev->attrs); 2271 ib_device_put(ibdev); 2272 } 2273 } 2274 2275 ksmbd_debug(RDMA, "netdev(%s) rdma capable : %s\n", 2276 netdev->name, str_true_false(rdma_capable)); 2277 2278 return rdma_capable; 2279 } 2280 2281 static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = { 2282 .prepare = smb_direct_prepare, 2283 .disconnect = smb_direct_disconnect, 2284 .shutdown = smb_direct_shutdown, 2285 .writev = smb_direct_writev, 2286 .read = smb_direct_read, 2287 .rdma_read = smb_direct_rdma_read, 2288 .rdma_write = smb_direct_rdma_write, 2289 .free_transport = smb_direct_free_transport, 2290 }; 2291