1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2017, Microsoft Corporation. 4 * Copyright (C) 2018, LG Electronics. 5 * 6 * Author(s): Long Li <longli@microsoft.com>, 7 * Hyunchul Lee <hyc.lee@gmail.com> 8 */ 9 10 #define SUBMOD_NAME "smb_direct" 11 12 #include <linux/kthread.h> 13 #include <linux/list.h> 14 #include <linux/mempool.h> 15 #include <linux/highmem.h> 16 #include <linux/scatterlist.h> 17 #include <linux/string_choices.h> 18 #include <rdma/ib_verbs.h> 19 #include <rdma/rdma_cm.h> 20 #include <rdma/rw.h> 21 22 #include "glob.h" 23 #include "connection.h" 24 #include "smb_common.h" 25 #include "../common/smb2status.h" 26 #include "transport_rdma.h" 27 28 #define SMB_DIRECT_PORT_IWARP 5445 29 #define SMB_DIRECT_PORT_INFINIBAND 445 30 31 #define SMB_DIRECT_VERSION_LE cpu_to_le16(0x0100) 32 33 /* SMB_DIRECT negotiation timeout in seconds */ 34 #define SMB_DIRECT_NEGOTIATE_TIMEOUT 120 35 36 #define SMB_DIRECT_MAX_SEND_SGES 6 37 #define SMB_DIRECT_MAX_RECV_SGES 1 38 39 /* 40 * Default maximum number of RDMA read/write outstanding on this connection 41 * This value is possibly decreased during QP creation on hardware limit 42 */ 43 #define SMB_DIRECT_CM_INITIATOR_DEPTH 8 44 45 /* Maximum number of retries on data transfer operations */ 46 #define SMB_DIRECT_CM_RETRY 6 47 /* No need to retry on Receiver Not Ready since SMB_DIRECT manages credits */ 48 #define SMB_DIRECT_CM_RNR_RETRY 0 49 50 /* 51 * User configurable initial values per SMB_DIRECT transport connection 52 * as defined in [MS-SMBD] 3.1.1.1 53 * Those may change after a SMB_DIRECT negotiation 54 */ 55 56 /* Set 445 port to SMB Direct port by default */ 57 static int smb_direct_port = SMB_DIRECT_PORT_INFINIBAND; 58 59 /* The local peer's maximum number of credits to grant to the peer */ 60 static int smb_direct_receive_credit_max = 255; 61 62 /* The remote peer's credit request of local peer */ 63 static int smb_direct_send_credit_target = 255; 64 65 /* The maximum single message size can be sent to remote peer */ 66 static int smb_direct_max_send_size = 1364; 67 68 /* The maximum fragmented upper-layer payload receive size supported */ 69 static int smb_direct_max_fragmented_recv_size = 1024 * 1024; 70 71 /* The maximum single-message size which can be received */ 72 static int smb_direct_max_receive_size = 1364; 73 74 static int smb_direct_max_read_write_size = SMBD_DEFAULT_IOSIZE; 75 76 static LIST_HEAD(smb_direct_device_list); 77 static DEFINE_RWLOCK(smb_direct_device_lock); 78 79 struct smb_direct_device { 80 struct ib_device *ib_dev; 81 struct list_head list; 82 }; 83 84 static struct smb_direct_listener { 85 struct rdma_cm_id *cm_id; 86 } smb_direct_listener; 87 88 static struct workqueue_struct *smb_direct_wq; 89 90 enum smb_direct_status { 91 SMB_DIRECT_CS_NEW = 0, 92 SMB_DIRECT_CS_CONNECTED, 93 SMB_DIRECT_CS_DISCONNECTING, 94 SMB_DIRECT_CS_DISCONNECTED, 95 }; 96 97 struct smb_direct_transport { 98 struct ksmbd_transport transport; 99 100 enum smb_direct_status status; 101 bool full_packet_received; 102 wait_queue_head_t wait_status; 103 104 struct rdma_cm_id *cm_id; 105 struct ib_cq *send_cq; 106 struct ib_cq *recv_cq; 107 struct ib_pd *pd; 108 struct ib_qp *qp; 109 110 int max_send_size; 111 int max_recv_size; 112 int max_fragmented_send_size; 113 int max_fragmented_recv_size; 114 int max_rdma_rw_size; 115 116 spinlock_t reassembly_queue_lock; 117 struct list_head reassembly_queue; 118 int reassembly_data_length; 119 int reassembly_queue_length; 120 int first_entry_offset; 121 wait_queue_head_t wait_reassembly_queue; 122 123 spinlock_t receive_credit_lock; 124 int recv_credits; 125 int count_avail_recvmsg; 126 int recv_credit_max; 127 int recv_credit_target; 128 129 spinlock_t recvmsg_queue_lock; 130 struct list_head recvmsg_queue; 131 132 spinlock_t empty_recvmsg_queue_lock; 133 struct list_head empty_recvmsg_queue; 134 135 int send_credit_target; 136 atomic_t send_credits; 137 spinlock_t lock_new_recv_credits; 138 int new_recv_credits; 139 int max_rw_credits; 140 int pages_per_rw_credit; 141 atomic_t rw_credits; 142 143 wait_queue_head_t wait_send_credits; 144 wait_queue_head_t wait_rw_credits; 145 146 mempool_t *sendmsg_mempool; 147 struct kmem_cache *sendmsg_cache; 148 mempool_t *recvmsg_mempool; 149 struct kmem_cache *recvmsg_cache; 150 151 wait_queue_head_t wait_send_pending; 152 atomic_t send_pending; 153 154 struct delayed_work post_recv_credits_work; 155 struct work_struct send_immediate_work; 156 struct work_struct disconnect_work; 157 158 bool negotiation_requested; 159 }; 160 161 #define KSMBD_TRANS(t) ((struct ksmbd_transport *)&((t)->transport)) 162 #define SMBD_TRANS(t) ((struct smb_direct_transport *)container_of(t, \ 163 struct smb_direct_transport, transport)) 164 enum { 165 SMB_DIRECT_MSG_NEGOTIATE_REQ = 0, 166 SMB_DIRECT_MSG_DATA_TRANSFER 167 }; 168 169 static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops; 170 171 struct smb_direct_send_ctx { 172 struct list_head msg_list; 173 int wr_cnt; 174 bool need_invalidate_rkey; 175 unsigned int remote_key; 176 }; 177 178 struct smb_direct_sendmsg { 179 struct smb_direct_transport *transport; 180 struct ib_send_wr wr; 181 struct list_head list; 182 int num_sge; 183 struct ib_sge sge[SMB_DIRECT_MAX_SEND_SGES]; 184 struct ib_cqe cqe; 185 u8 packet[]; 186 }; 187 188 struct smb_direct_recvmsg { 189 struct smb_direct_transport *transport; 190 struct list_head list; 191 int type; 192 struct ib_sge sge; 193 struct ib_cqe cqe; 194 bool first_segment; 195 u8 packet[]; 196 }; 197 198 struct smb_direct_rdma_rw_msg { 199 struct smb_direct_transport *t; 200 struct ib_cqe cqe; 201 int status; 202 struct completion *completion; 203 struct list_head list; 204 struct rdma_rw_ctx rw_ctx; 205 struct sg_table sgt; 206 struct scatterlist sg_list[]; 207 }; 208 209 void init_smbd_max_io_size(unsigned int sz) 210 { 211 sz = clamp_val(sz, SMBD_MIN_IOSIZE, SMBD_MAX_IOSIZE); 212 smb_direct_max_read_write_size = sz; 213 } 214 215 unsigned int get_smbd_max_read_write_size(void) 216 { 217 return smb_direct_max_read_write_size; 218 } 219 220 static inline int get_buf_page_count(void *buf, int size) 221 { 222 return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) - 223 (uintptr_t)buf / PAGE_SIZE; 224 } 225 226 static void smb_direct_destroy_pools(struct smb_direct_transport *transport); 227 static void smb_direct_post_recv_credits(struct work_struct *work); 228 static int smb_direct_post_send_data(struct smb_direct_transport *t, 229 struct smb_direct_send_ctx *send_ctx, 230 struct kvec *iov, int niov, 231 int remaining_data_length); 232 233 static inline struct smb_direct_transport * 234 smb_trans_direct_transfort(struct ksmbd_transport *t) 235 { 236 return container_of(t, struct smb_direct_transport, transport); 237 } 238 239 static inline void 240 *smb_direct_recvmsg_payload(struct smb_direct_recvmsg *recvmsg) 241 { 242 return (void *)recvmsg->packet; 243 } 244 245 static inline bool is_receive_credit_post_required(int receive_credits, 246 int avail_recvmsg_count) 247 { 248 return receive_credits <= (smb_direct_receive_credit_max >> 3) && 249 avail_recvmsg_count >= (receive_credits >> 2); 250 } 251 252 static struct 253 smb_direct_recvmsg *get_free_recvmsg(struct smb_direct_transport *t) 254 { 255 struct smb_direct_recvmsg *recvmsg = NULL; 256 257 spin_lock(&t->recvmsg_queue_lock); 258 if (!list_empty(&t->recvmsg_queue)) { 259 recvmsg = list_first_entry(&t->recvmsg_queue, 260 struct smb_direct_recvmsg, 261 list); 262 list_del(&recvmsg->list); 263 } 264 spin_unlock(&t->recvmsg_queue_lock); 265 return recvmsg; 266 } 267 268 static void put_recvmsg(struct smb_direct_transport *t, 269 struct smb_direct_recvmsg *recvmsg) 270 { 271 ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr, 272 recvmsg->sge.length, DMA_FROM_DEVICE); 273 274 spin_lock(&t->recvmsg_queue_lock); 275 list_add(&recvmsg->list, &t->recvmsg_queue); 276 spin_unlock(&t->recvmsg_queue_lock); 277 } 278 279 static struct 280 smb_direct_recvmsg *get_empty_recvmsg(struct smb_direct_transport *t) 281 { 282 struct smb_direct_recvmsg *recvmsg = NULL; 283 284 spin_lock(&t->empty_recvmsg_queue_lock); 285 if (!list_empty(&t->empty_recvmsg_queue)) { 286 recvmsg = list_first_entry(&t->empty_recvmsg_queue, 287 struct smb_direct_recvmsg, list); 288 list_del(&recvmsg->list); 289 } 290 spin_unlock(&t->empty_recvmsg_queue_lock); 291 return recvmsg; 292 } 293 294 static void put_empty_recvmsg(struct smb_direct_transport *t, 295 struct smb_direct_recvmsg *recvmsg) 296 { 297 ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr, 298 recvmsg->sge.length, DMA_FROM_DEVICE); 299 300 spin_lock(&t->empty_recvmsg_queue_lock); 301 list_add_tail(&recvmsg->list, &t->empty_recvmsg_queue); 302 spin_unlock(&t->empty_recvmsg_queue_lock); 303 } 304 305 static void enqueue_reassembly(struct smb_direct_transport *t, 306 struct smb_direct_recvmsg *recvmsg, 307 int data_length) 308 { 309 spin_lock(&t->reassembly_queue_lock); 310 list_add_tail(&recvmsg->list, &t->reassembly_queue); 311 t->reassembly_queue_length++; 312 /* 313 * Make sure reassembly_data_length is updated after list and 314 * reassembly_queue_length are updated. On the dequeue side 315 * reassembly_data_length is checked without a lock to determine 316 * if reassembly_queue_length and list is up to date 317 */ 318 virt_wmb(); 319 t->reassembly_data_length += data_length; 320 spin_unlock(&t->reassembly_queue_lock); 321 } 322 323 static struct smb_direct_recvmsg *get_first_reassembly(struct smb_direct_transport *t) 324 { 325 if (!list_empty(&t->reassembly_queue)) 326 return list_first_entry(&t->reassembly_queue, 327 struct smb_direct_recvmsg, list); 328 else 329 return NULL; 330 } 331 332 static void smb_direct_disconnect_rdma_work(struct work_struct *work) 333 { 334 struct smb_direct_transport *t = 335 container_of(work, struct smb_direct_transport, 336 disconnect_work); 337 338 if (t->status == SMB_DIRECT_CS_CONNECTED) { 339 t->status = SMB_DIRECT_CS_DISCONNECTING; 340 rdma_disconnect(t->cm_id); 341 } 342 } 343 344 static void 345 smb_direct_disconnect_rdma_connection(struct smb_direct_transport *t) 346 { 347 if (t->status == SMB_DIRECT_CS_CONNECTED) 348 queue_work(smb_direct_wq, &t->disconnect_work); 349 } 350 351 static void smb_direct_send_immediate_work(struct work_struct *work) 352 { 353 struct smb_direct_transport *t = container_of(work, 354 struct smb_direct_transport, send_immediate_work); 355 356 if (t->status != SMB_DIRECT_CS_CONNECTED) 357 return; 358 359 smb_direct_post_send_data(t, NULL, NULL, 0, 0); 360 } 361 362 static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) 363 { 364 struct smb_direct_transport *t; 365 struct ksmbd_conn *conn; 366 367 t = kzalloc(sizeof(*t), KSMBD_DEFAULT_GFP); 368 if (!t) 369 return NULL; 370 371 t->cm_id = cm_id; 372 cm_id->context = t; 373 374 t->status = SMB_DIRECT_CS_NEW; 375 init_waitqueue_head(&t->wait_status); 376 377 spin_lock_init(&t->reassembly_queue_lock); 378 INIT_LIST_HEAD(&t->reassembly_queue); 379 t->reassembly_data_length = 0; 380 t->reassembly_queue_length = 0; 381 init_waitqueue_head(&t->wait_reassembly_queue); 382 init_waitqueue_head(&t->wait_send_credits); 383 init_waitqueue_head(&t->wait_rw_credits); 384 385 spin_lock_init(&t->receive_credit_lock); 386 spin_lock_init(&t->recvmsg_queue_lock); 387 INIT_LIST_HEAD(&t->recvmsg_queue); 388 389 spin_lock_init(&t->empty_recvmsg_queue_lock); 390 INIT_LIST_HEAD(&t->empty_recvmsg_queue); 391 392 init_waitqueue_head(&t->wait_send_pending); 393 atomic_set(&t->send_pending, 0); 394 395 spin_lock_init(&t->lock_new_recv_credits); 396 397 INIT_DELAYED_WORK(&t->post_recv_credits_work, 398 smb_direct_post_recv_credits); 399 INIT_WORK(&t->send_immediate_work, smb_direct_send_immediate_work); 400 INIT_WORK(&t->disconnect_work, smb_direct_disconnect_rdma_work); 401 402 conn = ksmbd_conn_alloc(); 403 if (!conn) 404 goto err; 405 conn->transport = KSMBD_TRANS(t); 406 KSMBD_TRANS(t)->conn = conn; 407 KSMBD_TRANS(t)->ops = &ksmbd_smb_direct_transport_ops; 408 return t; 409 err: 410 kfree(t); 411 return NULL; 412 } 413 414 static void smb_direct_free_transport(struct ksmbd_transport *kt) 415 { 416 kfree(SMBD_TRANS(kt)); 417 } 418 419 static void free_transport(struct smb_direct_transport *t) 420 { 421 struct smb_direct_recvmsg *recvmsg; 422 423 wake_up_interruptible(&t->wait_send_credits); 424 425 ksmbd_debug(RDMA, "wait for all send posted to IB to finish\n"); 426 wait_event(t->wait_send_pending, 427 atomic_read(&t->send_pending) == 0); 428 429 cancel_work_sync(&t->disconnect_work); 430 cancel_delayed_work_sync(&t->post_recv_credits_work); 431 cancel_work_sync(&t->send_immediate_work); 432 433 if (t->qp) { 434 ib_drain_qp(t->qp); 435 ib_mr_pool_destroy(t->qp, &t->qp->rdma_mrs); 436 t->qp = NULL; 437 rdma_destroy_qp(t->cm_id); 438 } 439 440 ksmbd_debug(RDMA, "drain the reassembly queue\n"); 441 do { 442 spin_lock(&t->reassembly_queue_lock); 443 recvmsg = get_first_reassembly(t); 444 if (recvmsg) { 445 list_del(&recvmsg->list); 446 spin_unlock(&t->reassembly_queue_lock); 447 put_recvmsg(t, recvmsg); 448 } else { 449 spin_unlock(&t->reassembly_queue_lock); 450 } 451 } while (recvmsg); 452 t->reassembly_data_length = 0; 453 454 if (t->send_cq) 455 ib_free_cq(t->send_cq); 456 if (t->recv_cq) 457 ib_free_cq(t->recv_cq); 458 if (t->pd) 459 ib_dealloc_pd(t->pd); 460 if (t->cm_id) 461 rdma_destroy_id(t->cm_id); 462 463 smb_direct_destroy_pools(t); 464 ksmbd_conn_free(KSMBD_TRANS(t)->conn); 465 } 466 467 static struct smb_direct_sendmsg 468 *smb_direct_alloc_sendmsg(struct smb_direct_transport *t) 469 { 470 struct smb_direct_sendmsg *msg; 471 472 msg = mempool_alloc(t->sendmsg_mempool, KSMBD_DEFAULT_GFP); 473 if (!msg) 474 return ERR_PTR(-ENOMEM); 475 msg->transport = t; 476 INIT_LIST_HEAD(&msg->list); 477 msg->num_sge = 0; 478 return msg; 479 } 480 481 static void smb_direct_free_sendmsg(struct smb_direct_transport *t, 482 struct smb_direct_sendmsg *msg) 483 { 484 int i; 485 486 if (msg->num_sge > 0) { 487 ib_dma_unmap_single(t->cm_id->device, 488 msg->sge[0].addr, msg->sge[0].length, 489 DMA_TO_DEVICE); 490 for (i = 1; i < msg->num_sge; i++) 491 ib_dma_unmap_page(t->cm_id->device, 492 msg->sge[i].addr, msg->sge[i].length, 493 DMA_TO_DEVICE); 494 } 495 mempool_free(msg, t->sendmsg_mempool); 496 } 497 498 static int smb_direct_check_recvmsg(struct smb_direct_recvmsg *recvmsg) 499 { 500 switch (recvmsg->type) { 501 case SMB_DIRECT_MSG_DATA_TRANSFER: { 502 struct smb_direct_data_transfer *req = 503 (struct smb_direct_data_transfer *)recvmsg->packet; 504 struct smb2_hdr *hdr = (struct smb2_hdr *)(recvmsg->packet 505 + le32_to_cpu(req->data_offset)); 506 ksmbd_debug(RDMA, 507 "CreditGranted: %u, CreditRequested: %u, DataLength: %u, RemainingDataLength: %u, SMB: %x, Command: %u\n", 508 le16_to_cpu(req->credits_granted), 509 le16_to_cpu(req->credits_requested), 510 req->data_length, req->remaining_data_length, 511 hdr->ProtocolId, hdr->Command); 512 break; 513 } 514 case SMB_DIRECT_MSG_NEGOTIATE_REQ: { 515 struct smb_direct_negotiate_req *req = 516 (struct smb_direct_negotiate_req *)recvmsg->packet; 517 ksmbd_debug(RDMA, 518 "MinVersion: %u, MaxVersion: %u, CreditRequested: %u, MaxSendSize: %u, MaxRecvSize: %u, MaxFragmentedSize: %u\n", 519 le16_to_cpu(req->min_version), 520 le16_to_cpu(req->max_version), 521 le16_to_cpu(req->credits_requested), 522 le32_to_cpu(req->preferred_send_size), 523 le32_to_cpu(req->max_receive_size), 524 le32_to_cpu(req->max_fragmented_size)); 525 if (le16_to_cpu(req->min_version) > 0x0100 || 526 le16_to_cpu(req->max_version) < 0x0100) 527 return -EOPNOTSUPP; 528 if (le16_to_cpu(req->credits_requested) <= 0 || 529 le32_to_cpu(req->max_receive_size) <= 128 || 530 le32_to_cpu(req->max_fragmented_size) <= 531 128 * 1024) 532 return -ECONNABORTED; 533 534 break; 535 } 536 default: 537 return -EINVAL; 538 } 539 return 0; 540 } 541 542 static void recv_done(struct ib_cq *cq, struct ib_wc *wc) 543 { 544 struct smb_direct_recvmsg *recvmsg; 545 struct smb_direct_transport *t; 546 547 recvmsg = container_of(wc->wr_cqe, struct smb_direct_recvmsg, cqe); 548 t = recvmsg->transport; 549 550 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { 551 if (wc->status != IB_WC_WR_FLUSH_ERR) { 552 pr_err("Recv error. status='%s (%d)' opcode=%d\n", 553 ib_wc_status_msg(wc->status), wc->status, 554 wc->opcode); 555 smb_direct_disconnect_rdma_connection(t); 556 } 557 put_empty_recvmsg(t, recvmsg); 558 return; 559 } 560 561 ksmbd_debug(RDMA, "Recv completed. status='%s (%d)', opcode=%d\n", 562 ib_wc_status_msg(wc->status), wc->status, 563 wc->opcode); 564 565 ib_dma_sync_single_for_cpu(wc->qp->device, recvmsg->sge.addr, 566 recvmsg->sge.length, DMA_FROM_DEVICE); 567 568 switch (recvmsg->type) { 569 case SMB_DIRECT_MSG_NEGOTIATE_REQ: 570 if (wc->byte_len < sizeof(struct smb_direct_negotiate_req)) { 571 put_empty_recvmsg(t, recvmsg); 572 return; 573 } 574 t->negotiation_requested = true; 575 t->full_packet_received = true; 576 t->status = SMB_DIRECT_CS_CONNECTED; 577 enqueue_reassembly(t, recvmsg, 0); 578 wake_up_interruptible(&t->wait_status); 579 break; 580 case SMB_DIRECT_MSG_DATA_TRANSFER: { 581 struct smb_direct_data_transfer *data_transfer = 582 (struct smb_direct_data_transfer *)recvmsg->packet; 583 unsigned int data_length; 584 int avail_recvmsg_count, receive_credits; 585 586 if (wc->byte_len < 587 offsetof(struct smb_direct_data_transfer, padding)) { 588 put_empty_recvmsg(t, recvmsg); 589 return; 590 } 591 592 data_length = le32_to_cpu(data_transfer->data_length); 593 if (data_length) { 594 if (wc->byte_len < sizeof(struct smb_direct_data_transfer) + 595 (u64)data_length) { 596 put_empty_recvmsg(t, recvmsg); 597 return; 598 } 599 600 if (t->full_packet_received) 601 recvmsg->first_segment = true; 602 603 if (le32_to_cpu(data_transfer->remaining_data_length)) 604 t->full_packet_received = false; 605 else 606 t->full_packet_received = true; 607 608 enqueue_reassembly(t, recvmsg, (int)data_length); 609 wake_up_interruptible(&t->wait_reassembly_queue); 610 611 spin_lock(&t->receive_credit_lock); 612 receive_credits = --(t->recv_credits); 613 avail_recvmsg_count = t->count_avail_recvmsg; 614 spin_unlock(&t->receive_credit_lock); 615 } else { 616 put_empty_recvmsg(t, recvmsg); 617 618 spin_lock(&t->receive_credit_lock); 619 receive_credits = --(t->recv_credits); 620 avail_recvmsg_count = ++(t->count_avail_recvmsg); 621 spin_unlock(&t->receive_credit_lock); 622 } 623 624 t->recv_credit_target = 625 le16_to_cpu(data_transfer->credits_requested); 626 atomic_add(le16_to_cpu(data_transfer->credits_granted), 627 &t->send_credits); 628 629 if (le16_to_cpu(data_transfer->flags) & 630 SMB_DIRECT_RESPONSE_REQUESTED) 631 queue_work(smb_direct_wq, &t->send_immediate_work); 632 633 if (atomic_read(&t->send_credits) > 0) 634 wake_up_interruptible(&t->wait_send_credits); 635 636 if (is_receive_credit_post_required(receive_credits, avail_recvmsg_count)) 637 mod_delayed_work(smb_direct_wq, 638 &t->post_recv_credits_work, 0); 639 break; 640 } 641 default: 642 break; 643 } 644 } 645 646 static int smb_direct_post_recv(struct smb_direct_transport *t, 647 struct smb_direct_recvmsg *recvmsg) 648 { 649 struct ib_recv_wr wr; 650 int ret; 651 652 recvmsg->sge.addr = ib_dma_map_single(t->cm_id->device, 653 recvmsg->packet, t->max_recv_size, 654 DMA_FROM_DEVICE); 655 ret = ib_dma_mapping_error(t->cm_id->device, recvmsg->sge.addr); 656 if (ret) 657 return ret; 658 recvmsg->sge.length = t->max_recv_size; 659 recvmsg->sge.lkey = t->pd->local_dma_lkey; 660 recvmsg->cqe.done = recv_done; 661 662 wr.wr_cqe = &recvmsg->cqe; 663 wr.next = NULL; 664 wr.sg_list = &recvmsg->sge; 665 wr.num_sge = 1; 666 667 ret = ib_post_recv(t->qp, &wr, NULL); 668 if (ret) { 669 pr_err("Can't post recv: %d\n", ret); 670 ib_dma_unmap_single(t->cm_id->device, 671 recvmsg->sge.addr, recvmsg->sge.length, 672 DMA_FROM_DEVICE); 673 smb_direct_disconnect_rdma_connection(t); 674 return ret; 675 } 676 return ret; 677 } 678 679 static int smb_direct_read(struct ksmbd_transport *t, char *buf, 680 unsigned int size, int unused) 681 { 682 struct smb_direct_recvmsg *recvmsg; 683 struct smb_direct_data_transfer *data_transfer; 684 int to_copy, to_read, data_read, offset; 685 u32 data_length, remaining_data_length, data_offset; 686 int rc; 687 struct smb_direct_transport *st = smb_trans_direct_transfort(t); 688 689 again: 690 if (st->status != SMB_DIRECT_CS_CONNECTED) { 691 pr_err("disconnected\n"); 692 return -ENOTCONN; 693 } 694 695 /* 696 * No need to hold the reassembly queue lock all the time as we are 697 * the only one reading from the front of the queue. The transport 698 * may add more entries to the back of the queue at the same time 699 */ 700 if (st->reassembly_data_length >= size) { 701 int queue_length; 702 int queue_removed = 0; 703 704 /* 705 * Need to make sure reassembly_data_length is read before 706 * reading reassembly_queue_length and calling 707 * get_first_reassembly. This call is lock free 708 * as we never read at the end of the queue which are being 709 * updated in SOFTIRQ as more data is received 710 */ 711 virt_rmb(); 712 queue_length = st->reassembly_queue_length; 713 data_read = 0; 714 to_read = size; 715 offset = st->first_entry_offset; 716 while (data_read < size) { 717 recvmsg = get_first_reassembly(st); 718 data_transfer = smb_direct_recvmsg_payload(recvmsg); 719 data_length = le32_to_cpu(data_transfer->data_length); 720 remaining_data_length = 721 le32_to_cpu(data_transfer->remaining_data_length); 722 data_offset = le32_to_cpu(data_transfer->data_offset); 723 724 /* 725 * The upper layer expects RFC1002 length at the 726 * beginning of the payload. Return it to indicate 727 * the total length of the packet. This minimize the 728 * change to upper layer packet processing logic. This 729 * will be eventually remove when an intermediate 730 * transport layer is added 731 */ 732 if (recvmsg->first_segment && size == 4) { 733 unsigned int rfc1002_len = 734 data_length + remaining_data_length; 735 *((__be32 *)buf) = cpu_to_be32(rfc1002_len); 736 data_read = 4; 737 recvmsg->first_segment = false; 738 ksmbd_debug(RDMA, 739 "returning rfc1002 length %d\n", 740 rfc1002_len); 741 goto read_rfc1002_done; 742 } 743 744 to_copy = min_t(int, data_length - offset, to_read); 745 memcpy(buf + data_read, (char *)data_transfer + data_offset + offset, 746 to_copy); 747 748 /* move on to the next buffer? */ 749 if (to_copy == data_length - offset) { 750 queue_length--; 751 /* 752 * No need to lock if we are not at the 753 * end of the queue 754 */ 755 if (queue_length) { 756 list_del(&recvmsg->list); 757 } else { 758 spin_lock_irq(&st->reassembly_queue_lock); 759 list_del(&recvmsg->list); 760 spin_unlock_irq(&st->reassembly_queue_lock); 761 } 762 queue_removed++; 763 put_recvmsg(st, recvmsg); 764 offset = 0; 765 } else { 766 offset += to_copy; 767 } 768 769 to_read -= to_copy; 770 data_read += to_copy; 771 } 772 773 spin_lock_irq(&st->reassembly_queue_lock); 774 st->reassembly_data_length -= data_read; 775 st->reassembly_queue_length -= queue_removed; 776 spin_unlock_irq(&st->reassembly_queue_lock); 777 778 spin_lock(&st->receive_credit_lock); 779 st->count_avail_recvmsg += queue_removed; 780 if (is_receive_credit_post_required(st->recv_credits, st->count_avail_recvmsg)) { 781 spin_unlock(&st->receive_credit_lock); 782 mod_delayed_work(smb_direct_wq, 783 &st->post_recv_credits_work, 0); 784 } else { 785 spin_unlock(&st->receive_credit_lock); 786 } 787 788 st->first_entry_offset = offset; 789 ksmbd_debug(RDMA, 790 "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n", 791 data_read, st->reassembly_data_length, 792 st->first_entry_offset); 793 read_rfc1002_done: 794 return data_read; 795 } 796 797 ksmbd_debug(RDMA, "wait_event on more data\n"); 798 rc = wait_event_interruptible(st->wait_reassembly_queue, 799 st->reassembly_data_length >= size || 800 st->status != SMB_DIRECT_CS_CONNECTED); 801 if (rc) 802 return -EINTR; 803 804 goto again; 805 } 806 807 static void smb_direct_post_recv_credits(struct work_struct *work) 808 { 809 struct smb_direct_transport *t = container_of(work, 810 struct smb_direct_transport, post_recv_credits_work.work); 811 struct smb_direct_recvmsg *recvmsg; 812 int receive_credits, credits = 0; 813 int ret; 814 int use_free = 1; 815 816 spin_lock(&t->receive_credit_lock); 817 receive_credits = t->recv_credits; 818 spin_unlock(&t->receive_credit_lock); 819 820 if (receive_credits < t->recv_credit_target) { 821 while (true) { 822 if (use_free) 823 recvmsg = get_free_recvmsg(t); 824 else 825 recvmsg = get_empty_recvmsg(t); 826 if (!recvmsg) { 827 if (use_free) { 828 use_free = 0; 829 continue; 830 } else { 831 break; 832 } 833 } 834 835 recvmsg->type = SMB_DIRECT_MSG_DATA_TRANSFER; 836 recvmsg->first_segment = false; 837 838 ret = smb_direct_post_recv(t, recvmsg); 839 if (ret) { 840 pr_err("Can't post recv: %d\n", ret); 841 put_recvmsg(t, recvmsg); 842 break; 843 } 844 credits++; 845 } 846 } 847 848 spin_lock(&t->receive_credit_lock); 849 t->recv_credits += credits; 850 t->count_avail_recvmsg -= credits; 851 spin_unlock(&t->receive_credit_lock); 852 853 spin_lock(&t->lock_new_recv_credits); 854 t->new_recv_credits += credits; 855 spin_unlock(&t->lock_new_recv_credits); 856 857 if (credits) 858 queue_work(smb_direct_wq, &t->send_immediate_work); 859 } 860 861 static void send_done(struct ib_cq *cq, struct ib_wc *wc) 862 { 863 struct smb_direct_sendmsg *sendmsg, *sibling; 864 struct smb_direct_transport *t; 865 struct list_head *pos, *prev, *end; 866 867 sendmsg = container_of(wc->wr_cqe, struct smb_direct_sendmsg, cqe); 868 t = sendmsg->transport; 869 870 ksmbd_debug(RDMA, "Send completed. status='%s (%d)', opcode=%d\n", 871 ib_wc_status_msg(wc->status), wc->status, 872 wc->opcode); 873 874 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { 875 pr_err("Send error. status='%s (%d)', opcode=%d\n", 876 ib_wc_status_msg(wc->status), wc->status, 877 wc->opcode); 878 smb_direct_disconnect_rdma_connection(t); 879 } 880 881 if (atomic_dec_and_test(&t->send_pending)) 882 wake_up(&t->wait_send_pending); 883 884 /* iterate and free the list of messages in reverse. the list's head 885 * is invalid. 886 */ 887 for (pos = &sendmsg->list, prev = pos->prev, end = sendmsg->list.next; 888 prev != end; pos = prev, prev = prev->prev) { 889 sibling = container_of(pos, struct smb_direct_sendmsg, list); 890 smb_direct_free_sendmsg(t, sibling); 891 } 892 893 sibling = container_of(pos, struct smb_direct_sendmsg, list); 894 smb_direct_free_sendmsg(t, sibling); 895 } 896 897 static int manage_credits_prior_sending(struct smb_direct_transport *t) 898 { 899 int new_credits; 900 901 spin_lock(&t->lock_new_recv_credits); 902 new_credits = t->new_recv_credits; 903 t->new_recv_credits = 0; 904 spin_unlock(&t->lock_new_recv_credits); 905 906 return new_credits; 907 } 908 909 static int smb_direct_post_send(struct smb_direct_transport *t, 910 struct ib_send_wr *wr) 911 { 912 int ret; 913 914 atomic_inc(&t->send_pending); 915 ret = ib_post_send(t->qp, wr, NULL); 916 if (ret) { 917 pr_err("failed to post send: %d\n", ret); 918 if (atomic_dec_and_test(&t->send_pending)) 919 wake_up(&t->wait_send_pending); 920 smb_direct_disconnect_rdma_connection(t); 921 } 922 return ret; 923 } 924 925 static void smb_direct_send_ctx_init(struct smb_direct_transport *t, 926 struct smb_direct_send_ctx *send_ctx, 927 bool need_invalidate_rkey, 928 unsigned int remote_key) 929 { 930 INIT_LIST_HEAD(&send_ctx->msg_list); 931 send_ctx->wr_cnt = 0; 932 send_ctx->need_invalidate_rkey = need_invalidate_rkey; 933 send_ctx->remote_key = remote_key; 934 } 935 936 static int smb_direct_flush_send_list(struct smb_direct_transport *t, 937 struct smb_direct_send_ctx *send_ctx, 938 bool is_last) 939 { 940 struct smb_direct_sendmsg *first, *last; 941 int ret; 942 943 if (list_empty(&send_ctx->msg_list)) 944 return 0; 945 946 first = list_first_entry(&send_ctx->msg_list, 947 struct smb_direct_sendmsg, 948 list); 949 last = list_last_entry(&send_ctx->msg_list, 950 struct smb_direct_sendmsg, 951 list); 952 953 last->wr.send_flags = IB_SEND_SIGNALED; 954 last->wr.wr_cqe = &last->cqe; 955 if (is_last && send_ctx->need_invalidate_rkey) { 956 last->wr.opcode = IB_WR_SEND_WITH_INV; 957 last->wr.ex.invalidate_rkey = send_ctx->remote_key; 958 } 959 960 ret = smb_direct_post_send(t, &first->wr); 961 if (!ret) { 962 smb_direct_send_ctx_init(t, send_ctx, 963 send_ctx->need_invalidate_rkey, 964 send_ctx->remote_key); 965 } else { 966 atomic_add(send_ctx->wr_cnt, &t->send_credits); 967 wake_up(&t->wait_send_credits); 968 list_for_each_entry_safe(first, last, &send_ctx->msg_list, 969 list) { 970 smb_direct_free_sendmsg(t, first); 971 } 972 } 973 return ret; 974 } 975 976 static int wait_for_credits(struct smb_direct_transport *t, 977 wait_queue_head_t *waitq, atomic_t *total_credits, 978 int needed) 979 { 980 int ret; 981 982 do { 983 if (atomic_sub_return(needed, total_credits) >= 0) 984 return 0; 985 986 atomic_add(needed, total_credits); 987 ret = wait_event_interruptible(*waitq, 988 atomic_read(total_credits) >= needed || 989 t->status != SMB_DIRECT_CS_CONNECTED); 990 991 if (t->status != SMB_DIRECT_CS_CONNECTED) 992 return -ENOTCONN; 993 else if (ret < 0) 994 return ret; 995 } while (true); 996 } 997 998 static int wait_for_send_credits(struct smb_direct_transport *t, 999 struct smb_direct_send_ctx *send_ctx) 1000 { 1001 int ret; 1002 1003 if (send_ctx && 1004 (send_ctx->wr_cnt >= 16 || atomic_read(&t->send_credits) <= 1)) { 1005 ret = smb_direct_flush_send_list(t, send_ctx, false); 1006 if (ret) 1007 return ret; 1008 } 1009 1010 return wait_for_credits(t, &t->wait_send_credits, &t->send_credits, 1); 1011 } 1012 1013 static int wait_for_rw_credits(struct smb_direct_transport *t, int credits) 1014 { 1015 return wait_for_credits(t, &t->wait_rw_credits, &t->rw_credits, credits); 1016 } 1017 1018 static int calc_rw_credits(struct smb_direct_transport *t, 1019 char *buf, unsigned int len) 1020 { 1021 return DIV_ROUND_UP(get_buf_page_count(buf, len), 1022 t->pages_per_rw_credit); 1023 } 1024 1025 static int smb_direct_create_header(struct smb_direct_transport *t, 1026 int size, int remaining_data_length, 1027 struct smb_direct_sendmsg **sendmsg_out) 1028 { 1029 struct smb_direct_sendmsg *sendmsg; 1030 struct smb_direct_data_transfer *packet; 1031 int header_length; 1032 int ret; 1033 1034 sendmsg = smb_direct_alloc_sendmsg(t); 1035 if (IS_ERR(sendmsg)) 1036 return PTR_ERR(sendmsg); 1037 1038 /* Fill in the packet header */ 1039 packet = (struct smb_direct_data_transfer *)sendmsg->packet; 1040 packet->credits_requested = cpu_to_le16(t->send_credit_target); 1041 packet->credits_granted = cpu_to_le16(manage_credits_prior_sending(t)); 1042 1043 packet->flags = 0; 1044 packet->reserved = 0; 1045 if (!size) 1046 packet->data_offset = 0; 1047 else 1048 packet->data_offset = cpu_to_le32(24); 1049 packet->data_length = cpu_to_le32(size); 1050 packet->remaining_data_length = cpu_to_le32(remaining_data_length); 1051 packet->padding = 0; 1052 1053 ksmbd_debug(RDMA, 1054 "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n", 1055 le16_to_cpu(packet->credits_requested), 1056 le16_to_cpu(packet->credits_granted), 1057 le32_to_cpu(packet->data_offset), 1058 le32_to_cpu(packet->data_length), 1059 le32_to_cpu(packet->remaining_data_length)); 1060 1061 /* Map the packet to DMA */ 1062 header_length = sizeof(struct smb_direct_data_transfer); 1063 /* If this is a packet without payload, don't send padding */ 1064 if (!size) 1065 header_length = 1066 offsetof(struct smb_direct_data_transfer, padding); 1067 1068 sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device, 1069 (void *)packet, 1070 header_length, 1071 DMA_TO_DEVICE); 1072 ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr); 1073 if (ret) { 1074 smb_direct_free_sendmsg(t, sendmsg); 1075 return ret; 1076 } 1077 1078 sendmsg->num_sge = 1; 1079 sendmsg->sge[0].length = header_length; 1080 sendmsg->sge[0].lkey = t->pd->local_dma_lkey; 1081 1082 *sendmsg_out = sendmsg; 1083 return 0; 1084 } 1085 1086 static int get_sg_list(void *buf, int size, struct scatterlist *sg_list, int nentries) 1087 { 1088 bool high = is_vmalloc_addr(buf); 1089 struct page *page; 1090 int offset, len; 1091 int i = 0; 1092 1093 if (size <= 0 || nentries < get_buf_page_count(buf, size)) 1094 return -EINVAL; 1095 1096 offset = offset_in_page(buf); 1097 buf -= offset; 1098 while (size > 0) { 1099 len = min_t(int, PAGE_SIZE - offset, size); 1100 if (high) 1101 page = vmalloc_to_page(buf); 1102 else 1103 page = kmap_to_page(buf); 1104 1105 if (!sg_list) 1106 return -EINVAL; 1107 sg_set_page(sg_list, page, len, offset); 1108 sg_list = sg_next(sg_list); 1109 1110 buf += PAGE_SIZE; 1111 size -= len; 1112 offset = 0; 1113 i++; 1114 } 1115 return i; 1116 } 1117 1118 static int get_mapped_sg_list(struct ib_device *device, void *buf, int size, 1119 struct scatterlist *sg_list, int nentries, 1120 enum dma_data_direction dir) 1121 { 1122 int npages; 1123 1124 npages = get_sg_list(buf, size, sg_list, nentries); 1125 if (npages < 0) 1126 return -EINVAL; 1127 return ib_dma_map_sg(device, sg_list, npages, dir); 1128 } 1129 1130 static int post_sendmsg(struct smb_direct_transport *t, 1131 struct smb_direct_send_ctx *send_ctx, 1132 struct smb_direct_sendmsg *msg) 1133 { 1134 int i; 1135 1136 for (i = 0; i < msg->num_sge; i++) 1137 ib_dma_sync_single_for_device(t->cm_id->device, 1138 msg->sge[i].addr, msg->sge[i].length, 1139 DMA_TO_DEVICE); 1140 1141 msg->cqe.done = send_done; 1142 msg->wr.opcode = IB_WR_SEND; 1143 msg->wr.sg_list = &msg->sge[0]; 1144 msg->wr.num_sge = msg->num_sge; 1145 msg->wr.next = NULL; 1146 1147 if (send_ctx) { 1148 msg->wr.wr_cqe = NULL; 1149 msg->wr.send_flags = 0; 1150 if (!list_empty(&send_ctx->msg_list)) { 1151 struct smb_direct_sendmsg *last; 1152 1153 last = list_last_entry(&send_ctx->msg_list, 1154 struct smb_direct_sendmsg, 1155 list); 1156 last->wr.next = &msg->wr; 1157 } 1158 list_add_tail(&msg->list, &send_ctx->msg_list); 1159 send_ctx->wr_cnt++; 1160 return 0; 1161 } 1162 1163 msg->wr.wr_cqe = &msg->cqe; 1164 msg->wr.send_flags = IB_SEND_SIGNALED; 1165 return smb_direct_post_send(t, &msg->wr); 1166 } 1167 1168 static int smb_direct_post_send_data(struct smb_direct_transport *t, 1169 struct smb_direct_send_ctx *send_ctx, 1170 struct kvec *iov, int niov, 1171 int remaining_data_length) 1172 { 1173 int i, j, ret; 1174 struct smb_direct_sendmsg *msg; 1175 int data_length; 1176 struct scatterlist sg[SMB_DIRECT_MAX_SEND_SGES - 1]; 1177 1178 ret = wait_for_send_credits(t, send_ctx); 1179 if (ret) 1180 return ret; 1181 1182 data_length = 0; 1183 for (i = 0; i < niov; i++) 1184 data_length += iov[i].iov_len; 1185 1186 ret = smb_direct_create_header(t, data_length, remaining_data_length, 1187 &msg); 1188 if (ret) { 1189 atomic_inc(&t->send_credits); 1190 return ret; 1191 } 1192 1193 for (i = 0; i < niov; i++) { 1194 struct ib_sge *sge; 1195 int sg_cnt; 1196 1197 sg_init_table(sg, SMB_DIRECT_MAX_SEND_SGES - 1); 1198 sg_cnt = get_mapped_sg_list(t->cm_id->device, 1199 iov[i].iov_base, iov[i].iov_len, 1200 sg, SMB_DIRECT_MAX_SEND_SGES - 1, 1201 DMA_TO_DEVICE); 1202 if (sg_cnt <= 0) { 1203 pr_err("failed to map buffer\n"); 1204 ret = -ENOMEM; 1205 goto err; 1206 } else if (sg_cnt + msg->num_sge > SMB_DIRECT_MAX_SEND_SGES) { 1207 pr_err("buffer not fitted into sges\n"); 1208 ret = -E2BIG; 1209 ib_dma_unmap_sg(t->cm_id->device, sg, sg_cnt, 1210 DMA_TO_DEVICE); 1211 goto err; 1212 } 1213 1214 for (j = 0; j < sg_cnt; j++) { 1215 sge = &msg->sge[msg->num_sge]; 1216 sge->addr = sg_dma_address(&sg[j]); 1217 sge->length = sg_dma_len(&sg[j]); 1218 sge->lkey = t->pd->local_dma_lkey; 1219 msg->num_sge++; 1220 } 1221 } 1222 1223 ret = post_sendmsg(t, send_ctx, msg); 1224 if (ret) 1225 goto err; 1226 return 0; 1227 err: 1228 smb_direct_free_sendmsg(t, msg); 1229 atomic_inc(&t->send_credits); 1230 return ret; 1231 } 1232 1233 static int smb_direct_writev(struct ksmbd_transport *t, 1234 struct kvec *iov, int niovs, int buflen, 1235 bool need_invalidate, unsigned int remote_key) 1236 { 1237 struct smb_direct_transport *st = smb_trans_direct_transfort(t); 1238 int remaining_data_length; 1239 int start, i, j; 1240 int max_iov_size = st->max_send_size - 1241 sizeof(struct smb_direct_data_transfer); 1242 int ret; 1243 struct kvec vec; 1244 struct smb_direct_send_ctx send_ctx; 1245 1246 if (st->status != SMB_DIRECT_CS_CONNECTED) 1247 return -ENOTCONN; 1248 1249 //FIXME: skip RFC1002 header.. 1250 buflen -= 4; 1251 1252 remaining_data_length = buflen; 1253 ksmbd_debug(RDMA, "Sending smb (RDMA): smb_len=%u\n", buflen); 1254 1255 smb_direct_send_ctx_init(st, &send_ctx, need_invalidate, remote_key); 1256 start = i = 1; 1257 buflen = 0; 1258 while (true) { 1259 buflen += iov[i].iov_len; 1260 if (buflen > max_iov_size) { 1261 if (i > start) { 1262 remaining_data_length -= 1263 (buflen - iov[i].iov_len); 1264 ret = smb_direct_post_send_data(st, &send_ctx, 1265 &iov[start], i - start, 1266 remaining_data_length); 1267 if (ret) 1268 goto done; 1269 } else { 1270 /* iov[start] is too big, break it */ 1271 int nvec = (buflen + max_iov_size - 1) / 1272 max_iov_size; 1273 1274 for (j = 0; j < nvec; j++) { 1275 vec.iov_base = 1276 (char *)iov[start].iov_base + 1277 j * max_iov_size; 1278 vec.iov_len = 1279 min_t(int, max_iov_size, 1280 buflen - max_iov_size * j); 1281 remaining_data_length -= vec.iov_len; 1282 ret = smb_direct_post_send_data(st, &send_ctx, &vec, 1, 1283 remaining_data_length); 1284 if (ret) 1285 goto done; 1286 } 1287 i++; 1288 if (i == niovs) 1289 break; 1290 } 1291 start = i; 1292 buflen = 0; 1293 } else { 1294 i++; 1295 if (i == niovs) { 1296 /* send out all remaining vecs */ 1297 remaining_data_length -= buflen; 1298 ret = smb_direct_post_send_data(st, &send_ctx, 1299 &iov[start], i - start, 1300 remaining_data_length); 1301 if (ret) 1302 goto done; 1303 break; 1304 } 1305 } 1306 } 1307 1308 done: 1309 ret = smb_direct_flush_send_list(st, &send_ctx, true); 1310 1311 /* 1312 * As an optimization, we don't wait for individual I/O to finish 1313 * before sending the next one. 1314 * Send them all and wait for pending send count to get to 0 1315 * that means all the I/Os have been out and we are good to return 1316 */ 1317 1318 wait_event(st->wait_send_pending, 1319 atomic_read(&st->send_pending) == 0); 1320 return ret; 1321 } 1322 1323 static void smb_direct_free_rdma_rw_msg(struct smb_direct_transport *t, 1324 struct smb_direct_rdma_rw_msg *msg, 1325 enum dma_data_direction dir) 1326 { 1327 rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port, 1328 msg->sgt.sgl, msg->sgt.nents, dir); 1329 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); 1330 kfree(msg); 1331 } 1332 1333 static void read_write_done(struct ib_cq *cq, struct ib_wc *wc, 1334 enum dma_data_direction dir) 1335 { 1336 struct smb_direct_rdma_rw_msg *msg = container_of(wc->wr_cqe, 1337 struct smb_direct_rdma_rw_msg, cqe); 1338 struct smb_direct_transport *t = msg->t; 1339 1340 if (wc->status != IB_WC_SUCCESS) { 1341 msg->status = -EIO; 1342 pr_err("read/write error. opcode = %d, status = %s(%d)\n", 1343 wc->opcode, ib_wc_status_msg(wc->status), wc->status); 1344 if (wc->status != IB_WC_WR_FLUSH_ERR) 1345 smb_direct_disconnect_rdma_connection(t); 1346 } 1347 1348 complete(msg->completion); 1349 } 1350 1351 static void read_done(struct ib_cq *cq, struct ib_wc *wc) 1352 { 1353 read_write_done(cq, wc, DMA_FROM_DEVICE); 1354 } 1355 1356 static void write_done(struct ib_cq *cq, struct ib_wc *wc) 1357 { 1358 read_write_done(cq, wc, DMA_TO_DEVICE); 1359 } 1360 1361 static int smb_direct_rdma_xmit(struct smb_direct_transport *t, 1362 void *buf, int buf_len, 1363 struct smb2_buffer_desc_v1 *desc, 1364 unsigned int desc_len, 1365 bool is_read) 1366 { 1367 struct smb_direct_rdma_rw_msg *msg, *next_msg; 1368 int i, ret; 1369 DECLARE_COMPLETION_ONSTACK(completion); 1370 struct ib_send_wr *first_wr; 1371 LIST_HEAD(msg_list); 1372 char *desc_buf; 1373 int credits_needed; 1374 unsigned int desc_buf_len, desc_num = 0; 1375 1376 if (t->status != SMB_DIRECT_CS_CONNECTED) 1377 return -ENOTCONN; 1378 1379 if (buf_len > t->max_rdma_rw_size) 1380 return -EINVAL; 1381 1382 /* calculate needed credits */ 1383 credits_needed = 0; 1384 desc_buf = buf; 1385 for (i = 0; i < desc_len / sizeof(*desc); i++) { 1386 if (!buf_len) 1387 break; 1388 1389 desc_buf_len = le32_to_cpu(desc[i].length); 1390 if (!desc_buf_len) 1391 return -EINVAL; 1392 1393 if (desc_buf_len > buf_len) { 1394 desc_buf_len = buf_len; 1395 desc[i].length = cpu_to_le32(desc_buf_len); 1396 buf_len = 0; 1397 } 1398 1399 credits_needed += calc_rw_credits(t, desc_buf, desc_buf_len); 1400 desc_buf += desc_buf_len; 1401 buf_len -= desc_buf_len; 1402 desc_num++; 1403 } 1404 1405 ksmbd_debug(RDMA, "RDMA %s, len %#x, needed credits %#x\n", 1406 str_read_write(is_read), buf_len, credits_needed); 1407 1408 ret = wait_for_rw_credits(t, credits_needed); 1409 if (ret < 0) 1410 return ret; 1411 1412 /* build rdma_rw_ctx for each descriptor */ 1413 desc_buf = buf; 1414 for (i = 0; i < desc_num; i++) { 1415 msg = kzalloc(struct_size(msg, sg_list, SG_CHUNK_SIZE), 1416 KSMBD_DEFAULT_GFP); 1417 if (!msg) { 1418 ret = -ENOMEM; 1419 goto out; 1420 } 1421 1422 desc_buf_len = le32_to_cpu(desc[i].length); 1423 1424 msg->t = t; 1425 msg->cqe.done = is_read ? read_done : write_done; 1426 msg->completion = &completion; 1427 1428 msg->sgt.sgl = &msg->sg_list[0]; 1429 ret = sg_alloc_table_chained(&msg->sgt, 1430 get_buf_page_count(desc_buf, desc_buf_len), 1431 msg->sg_list, SG_CHUNK_SIZE); 1432 if (ret) { 1433 kfree(msg); 1434 ret = -ENOMEM; 1435 goto out; 1436 } 1437 1438 ret = get_sg_list(desc_buf, desc_buf_len, 1439 msg->sgt.sgl, msg->sgt.orig_nents); 1440 if (ret < 0) { 1441 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); 1442 kfree(msg); 1443 goto out; 1444 } 1445 1446 ret = rdma_rw_ctx_init(&msg->rw_ctx, t->qp, t->qp->port, 1447 msg->sgt.sgl, 1448 get_buf_page_count(desc_buf, desc_buf_len), 1449 0, 1450 le64_to_cpu(desc[i].offset), 1451 le32_to_cpu(desc[i].token), 1452 is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 1453 if (ret < 0) { 1454 pr_err("failed to init rdma_rw_ctx: %d\n", ret); 1455 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); 1456 kfree(msg); 1457 goto out; 1458 } 1459 1460 list_add_tail(&msg->list, &msg_list); 1461 desc_buf += desc_buf_len; 1462 } 1463 1464 /* concatenate work requests of rdma_rw_ctxs */ 1465 first_wr = NULL; 1466 list_for_each_entry_reverse(msg, &msg_list, list) { 1467 first_wr = rdma_rw_ctx_wrs(&msg->rw_ctx, t->qp, t->qp->port, 1468 &msg->cqe, first_wr); 1469 } 1470 1471 ret = ib_post_send(t->qp, first_wr, NULL); 1472 if (ret) { 1473 pr_err("failed to post send wr for RDMA R/W: %d\n", ret); 1474 goto out; 1475 } 1476 1477 msg = list_last_entry(&msg_list, struct smb_direct_rdma_rw_msg, list); 1478 wait_for_completion(&completion); 1479 ret = msg->status; 1480 out: 1481 list_for_each_entry_safe(msg, next_msg, &msg_list, list) { 1482 list_del(&msg->list); 1483 smb_direct_free_rdma_rw_msg(t, msg, 1484 is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 1485 } 1486 atomic_add(credits_needed, &t->rw_credits); 1487 wake_up(&t->wait_rw_credits); 1488 return ret; 1489 } 1490 1491 static int smb_direct_rdma_write(struct ksmbd_transport *t, 1492 void *buf, unsigned int buflen, 1493 struct smb2_buffer_desc_v1 *desc, 1494 unsigned int desc_len) 1495 { 1496 return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen, 1497 desc, desc_len, false); 1498 } 1499 1500 static int smb_direct_rdma_read(struct ksmbd_transport *t, 1501 void *buf, unsigned int buflen, 1502 struct smb2_buffer_desc_v1 *desc, 1503 unsigned int desc_len) 1504 { 1505 return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen, 1506 desc, desc_len, true); 1507 } 1508 1509 static void smb_direct_disconnect(struct ksmbd_transport *t) 1510 { 1511 struct smb_direct_transport *st = smb_trans_direct_transfort(t); 1512 1513 ksmbd_debug(RDMA, "Disconnecting cm_id=%p\n", st->cm_id); 1514 1515 smb_direct_disconnect_rdma_work(&st->disconnect_work); 1516 wait_event_interruptible(st->wait_status, 1517 st->status == SMB_DIRECT_CS_DISCONNECTED); 1518 free_transport(st); 1519 } 1520 1521 static void smb_direct_shutdown(struct ksmbd_transport *t) 1522 { 1523 struct smb_direct_transport *st = smb_trans_direct_transfort(t); 1524 1525 ksmbd_debug(RDMA, "smb-direct shutdown cm_id=%p\n", st->cm_id); 1526 1527 smb_direct_disconnect_rdma_work(&st->disconnect_work); 1528 } 1529 1530 static int smb_direct_cm_handler(struct rdma_cm_id *cm_id, 1531 struct rdma_cm_event *event) 1532 { 1533 struct smb_direct_transport *t = cm_id->context; 1534 1535 ksmbd_debug(RDMA, "RDMA CM event. cm_id=%p event=%s (%d)\n", 1536 cm_id, rdma_event_msg(event->event), event->event); 1537 1538 switch (event->event) { 1539 case RDMA_CM_EVENT_ESTABLISHED: { 1540 t->status = SMB_DIRECT_CS_CONNECTED; 1541 wake_up_interruptible(&t->wait_status); 1542 break; 1543 } 1544 case RDMA_CM_EVENT_DEVICE_REMOVAL: 1545 case RDMA_CM_EVENT_DISCONNECTED: { 1546 ib_drain_qp(t->qp); 1547 1548 t->status = SMB_DIRECT_CS_DISCONNECTED; 1549 wake_up_interruptible(&t->wait_status); 1550 wake_up_interruptible(&t->wait_reassembly_queue); 1551 wake_up(&t->wait_send_credits); 1552 break; 1553 } 1554 case RDMA_CM_EVENT_CONNECT_ERROR: { 1555 t->status = SMB_DIRECT_CS_DISCONNECTED; 1556 wake_up_interruptible(&t->wait_status); 1557 break; 1558 } 1559 default: 1560 pr_err("Unexpected RDMA CM event. cm_id=%p, event=%s (%d)\n", 1561 cm_id, rdma_event_msg(event->event), 1562 event->event); 1563 break; 1564 } 1565 return 0; 1566 } 1567 1568 static void smb_direct_qpair_handler(struct ib_event *event, void *context) 1569 { 1570 struct smb_direct_transport *t = context; 1571 1572 ksmbd_debug(RDMA, "Received QP event. cm_id=%p, event=%s (%d)\n", 1573 t->cm_id, ib_event_msg(event->event), event->event); 1574 1575 switch (event->event) { 1576 case IB_EVENT_CQ_ERR: 1577 case IB_EVENT_QP_FATAL: 1578 smb_direct_disconnect_rdma_connection(t); 1579 break; 1580 default: 1581 break; 1582 } 1583 } 1584 1585 static int smb_direct_send_negotiate_response(struct smb_direct_transport *t, 1586 int failed) 1587 { 1588 struct smb_direct_sendmsg *sendmsg; 1589 struct smb_direct_negotiate_resp *resp; 1590 int ret; 1591 1592 sendmsg = smb_direct_alloc_sendmsg(t); 1593 if (IS_ERR(sendmsg)) 1594 return -ENOMEM; 1595 1596 resp = (struct smb_direct_negotiate_resp *)sendmsg->packet; 1597 if (failed) { 1598 memset(resp, 0, sizeof(*resp)); 1599 resp->min_version = cpu_to_le16(0x0100); 1600 resp->max_version = cpu_to_le16(0x0100); 1601 resp->status = STATUS_NOT_SUPPORTED; 1602 } else { 1603 resp->status = STATUS_SUCCESS; 1604 resp->min_version = SMB_DIRECT_VERSION_LE; 1605 resp->max_version = SMB_DIRECT_VERSION_LE; 1606 resp->negotiated_version = SMB_DIRECT_VERSION_LE; 1607 resp->reserved = 0; 1608 resp->credits_requested = 1609 cpu_to_le16(t->send_credit_target); 1610 resp->credits_granted = cpu_to_le16(manage_credits_prior_sending(t)); 1611 resp->max_readwrite_size = cpu_to_le32(t->max_rdma_rw_size); 1612 resp->preferred_send_size = cpu_to_le32(t->max_send_size); 1613 resp->max_receive_size = cpu_to_le32(t->max_recv_size); 1614 resp->max_fragmented_size = 1615 cpu_to_le32(t->max_fragmented_recv_size); 1616 } 1617 1618 sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device, 1619 (void *)resp, sizeof(*resp), 1620 DMA_TO_DEVICE); 1621 ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr); 1622 if (ret) { 1623 smb_direct_free_sendmsg(t, sendmsg); 1624 return ret; 1625 } 1626 1627 sendmsg->num_sge = 1; 1628 sendmsg->sge[0].length = sizeof(*resp); 1629 sendmsg->sge[0].lkey = t->pd->local_dma_lkey; 1630 1631 ret = post_sendmsg(t, NULL, sendmsg); 1632 if (ret) { 1633 smb_direct_free_sendmsg(t, sendmsg); 1634 return ret; 1635 } 1636 1637 wait_event(t->wait_send_pending, 1638 atomic_read(&t->send_pending) == 0); 1639 return 0; 1640 } 1641 1642 static int smb_direct_accept_client(struct smb_direct_transport *t) 1643 { 1644 struct rdma_conn_param conn_param; 1645 struct ib_port_immutable port_immutable; 1646 u32 ird_ord_hdr[2]; 1647 int ret; 1648 1649 memset(&conn_param, 0, sizeof(conn_param)); 1650 conn_param.initiator_depth = min_t(u8, t->cm_id->device->attrs.max_qp_rd_atom, 1651 SMB_DIRECT_CM_INITIATOR_DEPTH); 1652 conn_param.responder_resources = 0; 1653 1654 t->cm_id->device->ops.get_port_immutable(t->cm_id->device, 1655 t->cm_id->port_num, 1656 &port_immutable); 1657 if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) { 1658 ird_ord_hdr[0] = conn_param.responder_resources; 1659 ird_ord_hdr[1] = 1; 1660 conn_param.private_data = ird_ord_hdr; 1661 conn_param.private_data_len = sizeof(ird_ord_hdr); 1662 } else { 1663 conn_param.private_data = NULL; 1664 conn_param.private_data_len = 0; 1665 } 1666 conn_param.retry_count = SMB_DIRECT_CM_RETRY; 1667 conn_param.rnr_retry_count = SMB_DIRECT_CM_RNR_RETRY; 1668 conn_param.flow_control = 0; 1669 1670 ret = rdma_accept(t->cm_id, &conn_param); 1671 if (ret) { 1672 pr_err("error at rdma_accept: %d\n", ret); 1673 return ret; 1674 } 1675 return 0; 1676 } 1677 1678 static int smb_direct_prepare_negotiation(struct smb_direct_transport *t) 1679 { 1680 int ret; 1681 struct smb_direct_recvmsg *recvmsg; 1682 1683 recvmsg = get_free_recvmsg(t); 1684 if (!recvmsg) 1685 return -ENOMEM; 1686 recvmsg->type = SMB_DIRECT_MSG_NEGOTIATE_REQ; 1687 1688 ret = smb_direct_post_recv(t, recvmsg); 1689 if (ret) { 1690 pr_err("Can't post recv: %d\n", ret); 1691 goto out_err; 1692 } 1693 1694 t->negotiation_requested = false; 1695 ret = smb_direct_accept_client(t); 1696 if (ret) { 1697 pr_err("Can't accept client\n"); 1698 goto out_err; 1699 } 1700 1701 smb_direct_post_recv_credits(&t->post_recv_credits_work.work); 1702 return 0; 1703 out_err: 1704 put_recvmsg(t, recvmsg); 1705 return ret; 1706 } 1707 1708 static unsigned int smb_direct_get_max_fr_pages(struct smb_direct_transport *t) 1709 { 1710 return min_t(unsigned int, 1711 t->cm_id->device->attrs.max_fast_reg_page_list_len, 1712 256); 1713 } 1714 1715 static int smb_direct_init_params(struct smb_direct_transport *t, 1716 struct ib_qp_cap *cap) 1717 { 1718 struct ib_device *device = t->cm_id->device; 1719 int max_send_sges, max_rw_wrs, max_send_wrs; 1720 unsigned int max_sge_per_wr, wrs_per_credit; 1721 1722 /* need 3 more sge. because a SMB_DIRECT header, SMB2 header, 1723 * SMB2 response could be mapped. 1724 */ 1725 t->max_send_size = smb_direct_max_send_size; 1726 max_send_sges = DIV_ROUND_UP(t->max_send_size, PAGE_SIZE) + 3; 1727 if (max_send_sges > SMB_DIRECT_MAX_SEND_SGES) { 1728 pr_err("max_send_size %d is too large\n", t->max_send_size); 1729 return -EINVAL; 1730 } 1731 1732 /* Calculate the number of work requests for RDMA R/W. 1733 * The maximum number of pages which can be registered 1734 * with one Memory region can be transferred with one 1735 * R/W credit. And at least 4 work requests for each credit 1736 * are needed for MR registration, RDMA R/W, local & remote 1737 * MR invalidation. 1738 */ 1739 t->max_rdma_rw_size = smb_direct_max_read_write_size; 1740 t->pages_per_rw_credit = smb_direct_get_max_fr_pages(t); 1741 t->max_rw_credits = DIV_ROUND_UP(t->max_rdma_rw_size, 1742 (t->pages_per_rw_credit - 1) * 1743 PAGE_SIZE); 1744 1745 max_sge_per_wr = min_t(unsigned int, device->attrs.max_send_sge, 1746 device->attrs.max_sge_rd); 1747 max_sge_per_wr = max_t(unsigned int, max_sge_per_wr, 1748 max_send_sges); 1749 wrs_per_credit = max_t(unsigned int, 4, 1750 DIV_ROUND_UP(t->pages_per_rw_credit, 1751 max_sge_per_wr) + 1); 1752 max_rw_wrs = t->max_rw_credits * wrs_per_credit; 1753 1754 max_send_wrs = smb_direct_send_credit_target + max_rw_wrs; 1755 if (max_send_wrs > device->attrs.max_cqe || 1756 max_send_wrs > device->attrs.max_qp_wr) { 1757 pr_err("consider lowering send_credit_target = %d\n", 1758 smb_direct_send_credit_target); 1759 pr_err("Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", 1760 device->attrs.max_cqe, device->attrs.max_qp_wr); 1761 return -EINVAL; 1762 } 1763 1764 if (smb_direct_receive_credit_max > device->attrs.max_cqe || 1765 smb_direct_receive_credit_max > device->attrs.max_qp_wr) { 1766 pr_err("consider lowering receive_credit_max = %d\n", 1767 smb_direct_receive_credit_max); 1768 pr_err("Possible CQE overrun, device reporting max_cpe %d max_qp_wr %d\n", 1769 device->attrs.max_cqe, device->attrs.max_qp_wr); 1770 return -EINVAL; 1771 } 1772 1773 if (device->attrs.max_recv_sge < SMB_DIRECT_MAX_RECV_SGES) { 1774 pr_err("warning: device max_recv_sge = %d too small\n", 1775 device->attrs.max_recv_sge); 1776 return -EINVAL; 1777 } 1778 1779 t->recv_credits = 0; 1780 t->count_avail_recvmsg = 0; 1781 1782 t->recv_credit_max = smb_direct_receive_credit_max; 1783 t->recv_credit_target = 10; 1784 t->new_recv_credits = 0; 1785 1786 t->send_credit_target = smb_direct_send_credit_target; 1787 atomic_set(&t->send_credits, 0); 1788 atomic_set(&t->rw_credits, t->max_rw_credits); 1789 1790 t->max_send_size = smb_direct_max_send_size; 1791 t->max_recv_size = smb_direct_max_receive_size; 1792 t->max_fragmented_recv_size = smb_direct_max_fragmented_recv_size; 1793 1794 cap->max_send_wr = max_send_wrs; 1795 cap->max_recv_wr = t->recv_credit_max; 1796 cap->max_send_sge = max_sge_per_wr; 1797 cap->max_recv_sge = SMB_DIRECT_MAX_RECV_SGES; 1798 cap->max_inline_data = 0; 1799 cap->max_rdma_ctxs = t->max_rw_credits; 1800 return 0; 1801 } 1802 1803 static void smb_direct_destroy_pools(struct smb_direct_transport *t) 1804 { 1805 struct smb_direct_recvmsg *recvmsg; 1806 1807 while ((recvmsg = get_free_recvmsg(t))) 1808 mempool_free(recvmsg, t->recvmsg_mempool); 1809 while ((recvmsg = get_empty_recvmsg(t))) 1810 mempool_free(recvmsg, t->recvmsg_mempool); 1811 1812 mempool_destroy(t->recvmsg_mempool); 1813 t->recvmsg_mempool = NULL; 1814 1815 kmem_cache_destroy(t->recvmsg_cache); 1816 t->recvmsg_cache = NULL; 1817 1818 mempool_destroy(t->sendmsg_mempool); 1819 t->sendmsg_mempool = NULL; 1820 1821 kmem_cache_destroy(t->sendmsg_cache); 1822 t->sendmsg_cache = NULL; 1823 } 1824 1825 static int smb_direct_create_pools(struct smb_direct_transport *t) 1826 { 1827 char name[80]; 1828 int i; 1829 struct smb_direct_recvmsg *recvmsg; 1830 1831 snprintf(name, sizeof(name), "smb_direct_rqst_pool_%p", t); 1832 t->sendmsg_cache = kmem_cache_create(name, 1833 sizeof(struct smb_direct_sendmsg) + 1834 sizeof(struct smb_direct_negotiate_resp), 1835 0, SLAB_HWCACHE_ALIGN, NULL); 1836 if (!t->sendmsg_cache) 1837 return -ENOMEM; 1838 1839 t->sendmsg_mempool = mempool_create(t->send_credit_target, 1840 mempool_alloc_slab, mempool_free_slab, 1841 t->sendmsg_cache); 1842 if (!t->sendmsg_mempool) 1843 goto err; 1844 1845 snprintf(name, sizeof(name), "smb_direct_resp_%p", t); 1846 t->recvmsg_cache = kmem_cache_create(name, 1847 sizeof(struct smb_direct_recvmsg) + 1848 t->max_recv_size, 1849 0, SLAB_HWCACHE_ALIGN, NULL); 1850 if (!t->recvmsg_cache) 1851 goto err; 1852 1853 t->recvmsg_mempool = 1854 mempool_create(t->recv_credit_max, mempool_alloc_slab, 1855 mempool_free_slab, t->recvmsg_cache); 1856 if (!t->recvmsg_mempool) 1857 goto err; 1858 1859 INIT_LIST_HEAD(&t->recvmsg_queue); 1860 1861 for (i = 0; i < t->recv_credit_max; i++) { 1862 recvmsg = mempool_alloc(t->recvmsg_mempool, KSMBD_DEFAULT_GFP); 1863 if (!recvmsg) 1864 goto err; 1865 recvmsg->transport = t; 1866 list_add(&recvmsg->list, &t->recvmsg_queue); 1867 } 1868 t->count_avail_recvmsg = t->recv_credit_max; 1869 1870 return 0; 1871 err: 1872 smb_direct_destroy_pools(t); 1873 return -ENOMEM; 1874 } 1875 1876 static int smb_direct_create_qpair(struct smb_direct_transport *t, 1877 struct ib_qp_cap *cap) 1878 { 1879 int ret; 1880 struct ib_qp_init_attr qp_attr; 1881 int pages_per_rw; 1882 1883 t->pd = ib_alloc_pd(t->cm_id->device, 0); 1884 if (IS_ERR(t->pd)) { 1885 pr_err("Can't create RDMA PD\n"); 1886 ret = PTR_ERR(t->pd); 1887 t->pd = NULL; 1888 return ret; 1889 } 1890 1891 t->send_cq = ib_alloc_cq(t->cm_id->device, t, 1892 smb_direct_send_credit_target + cap->max_rdma_ctxs, 1893 0, IB_POLL_WORKQUEUE); 1894 if (IS_ERR(t->send_cq)) { 1895 pr_err("Can't create RDMA send CQ\n"); 1896 ret = PTR_ERR(t->send_cq); 1897 t->send_cq = NULL; 1898 goto err; 1899 } 1900 1901 t->recv_cq = ib_alloc_cq(t->cm_id->device, t, 1902 t->recv_credit_max, 0, IB_POLL_WORKQUEUE); 1903 if (IS_ERR(t->recv_cq)) { 1904 pr_err("Can't create RDMA recv CQ\n"); 1905 ret = PTR_ERR(t->recv_cq); 1906 t->recv_cq = NULL; 1907 goto err; 1908 } 1909 1910 memset(&qp_attr, 0, sizeof(qp_attr)); 1911 qp_attr.event_handler = smb_direct_qpair_handler; 1912 qp_attr.qp_context = t; 1913 qp_attr.cap = *cap; 1914 qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 1915 qp_attr.qp_type = IB_QPT_RC; 1916 qp_attr.send_cq = t->send_cq; 1917 qp_attr.recv_cq = t->recv_cq; 1918 qp_attr.port_num = ~0; 1919 1920 ret = rdma_create_qp(t->cm_id, t->pd, &qp_attr); 1921 if (ret) { 1922 pr_err("Can't create RDMA QP: %d\n", ret); 1923 goto err; 1924 } 1925 1926 t->qp = t->cm_id->qp; 1927 t->cm_id->event_handler = smb_direct_cm_handler; 1928 1929 pages_per_rw = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1; 1930 if (pages_per_rw > t->cm_id->device->attrs.max_sgl_rd) { 1931 ret = ib_mr_pool_init(t->qp, &t->qp->rdma_mrs, 1932 t->max_rw_credits, IB_MR_TYPE_MEM_REG, 1933 t->pages_per_rw_credit, 0); 1934 if (ret) { 1935 pr_err("failed to init mr pool count %d pages %d\n", 1936 t->max_rw_credits, t->pages_per_rw_credit); 1937 goto err; 1938 } 1939 } 1940 1941 return 0; 1942 err: 1943 if (t->qp) { 1944 t->qp = NULL; 1945 rdma_destroy_qp(t->cm_id); 1946 } 1947 if (t->recv_cq) { 1948 ib_destroy_cq(t->recv_cq); 1949 t->recv_cq = NULL; 1950 } 1951 if (t->send_cq) { 1952 ib_destroy_cq(t->send_cq); 1953 t->send_cq = NULL; 1954 } 1955 if (t->pd) { 1956 ib_dealloc_pd(t->pd); 1957 t->pd = NULL; 1958 } 1959 return ret; 1960 } 1961 1962 static int smb_direct_prepare(struct ksmbd_transport *t) 1963 { 1964 struct smb_direct_transport *st = smb_trans_direct_transfort(t); 1965 struct smb_direct_recvmsg *recvmsg; 1966 struct smb_direct_negotiate_req *req; 1967 int ret; 1968 1969 ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n"); 1970 ret = wait_event_interruptible_timeout(st->wait_status, 1971 st->negotiation_requested || 1972 st->status == SMB_DIRECT_CS_DISCONNECTED, 1973 SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ); 1974 if (ret <= 0 || st->status == SMB_DIRECT_CS_DISCONNECTED) 1975 return ret < 0 ? ret : -ETIMEDOUT; 1976 1977 recvmsg = get_first_reassembly(st); 1978 if (!recvmsg) 1979 return -ECONNABORTED; 1980 1981 ret = smb_direct_check_recvmsg(recvmsg); 1982 if (ret == -ECONNABORTED) 1983 goto out; 1984 1985 req = (struct smb_direct_negotiate_req *)recvmsg->packet; 1986 st->max_recv_size = min_t(int, st->max_recv_size, 1987 le32_to_cpu(req->preferred_send_size)); 1988 st->max_send_size = min_t(int, st->max_send_size, 1989 le32_to_cpu(req->max_receive_size)); 1990 st->max_fragmented_send_size = 1991 le32_to_cpu(req->max_fragmented_size); 1992 st->max_fragmented_recv_size = 1993 (st->recv_credit_max * st->max_recv_size) / 2; 1994 1995 ret = smb_direct_send_negotiate_response(st, ret); 1996 out: 1997 spin_lock_irq(&st->reassembly_queue_lock); 1998 st->reassembly_queue_length--; 1999 list_del(&recvmsg->list); 2000 spin_unlock_irq(&st->reassembly_queue_lock); 2001 put_recvmsg(st, recvmsg); 2002 2003 return ret; 2004 } 2005 2006 static int smb_direct_connect(struct smb_direct_transport *st) 2007 { 2008 int ret; 2009 struct ib_qp_cap qp_cap; 2010 2011 ret = smb_direct_init_params(st, &qp_cap); 2012 if (ret) { 2013 pr_err("Can't configure RDMA parameters\n"); 2014 return ret; 2015 } 2016 2017 ret = smb_direct_create_pools(st); 2018 if (ret) { 2019 pr_err("Can't init RDMA pool: %d\n", ret); 2020 return ret; 2021 } 2022 2023 ret = smb_direct_create_qpair(st, &qp_cap); 2024 if (ret) { 2025 pr_err("Can't accept RDMA client: %d\n", ret); 2026 return ret; 2027 } 2028 2029 ret = smb_direct_prepare_negotiation(st); 2030 if (ret) { 2031 pr_err("Can't negotiate: %d\n", ret); 2032 return ret; 2033 } 2034 return 0; 2035 } 2036 2037 static bool rdma_frwr_is_supported(struct ib_device_attr *attrs) 2038 { 2039 if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) 2040 return false; 2041 if (attrs->max_fast_reg_page_list_len == 0) 2042 return false; 2043 return true; 2044 } 2045 2046 static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id) 2047 { 2048 struct smb_direct_transport *t; 2049 struct task_struct *handler; 2050 int ret; 2051 2052 if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) { 2053 ksmbd_debug(RDMA, 2054 "Fast Registration Work Requests is not supported. device capabilities=%llx\n", 2055 new_cm_id->device->attrs.device_cap_flags); 2056 return -EPROTONOSUPPORT; 2057 } 2058 2059 t = alloc_transport(new_cm_id); 2060 if (!t) 2061 return -ENOMEM; 2062 2063 ret = smb_direct_connect(t); 2064 if (ret) 2065 goto out_err; 2066 2067 handler = kthread_run(ksmbd_conn_handler_loop, 2068 KSMBD_TRANS(t)->conn, "ksmbd:r%u", 2069 smb_direct_port); 2070 if (IS_ERR(handler)) { 2071 ret = PTR_ERR(handler); 2072 pr_err("Can't start thread\n"); 2073 goto out_err; 2074 } 2075 2076 return 0; 2077 out_err: 2078 free_transport(t); 2079 return ret; 2080 } 2081 2082 static int smb_direct_listen_handler(struct rdma_cm_id *cm_id, 2083 struct rdma_cm_event *event) 2084 { 2085 switch (event->event) { 2086 case RDMA_CM_EVENT_CONNECT_REQUEST: { 2087 int ret = smb_direct_handle_connect_request(cm_id); 2088 2089 if (ret) { 2090 pr_err("Can't create transport: %d\n", ret); 2091 return ret; 2092 } 2093 2094 ksmbd_debug(RDMA, "Received connection request. cm_id=%p\n", 2095 cm_id); 2096 break; 2097 } 2098 default: 2099 pr_err("Unexpected listen event. cm_id=%p, event=%s (%d)\n", 2100 cm_id, rdma_event_msg(event->event), event->event); 2101 break; 2102 } 2103 return 0; 2104 } 2105 2106 static int smb_direct_listen(int port) 2107 { 2108 int ret; 2109 struct rdma_cm_id *cm_id; 2110 struct sockaddr_in sin = { 2111 .sin_family = AF_INET, 2112 .sin_addr.s_addr = htonl(INADDR_ANY), 2113 .sin_port = htons(port), 2114 }; 2115 2116 cm_id = rdma_create_id(&init_net, smb_direct_listen_handler, 2117 &smb_direct_listener, RDMA_PS_TCP, IB_QPT_RC); 2118 if (IS_ERR(cm_id)) { 2119 pr_err("Can't create cm id: %ld\n", PTR_ERR(cm_id)); 2120 return PTR_ERR(cm_id); 2121 } 2122 2123 ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); 2124 if (ret) { 2125 pr_err("Can't bind: %d\n", ret); 2126 goto err; 2127 } 2128 2129 smb_direct_listener.cm_id = cm_id; 2130 2131 ret = rdma_listen(cm_id, 10); 2132 if (ret) { 2133 pr_err("Can't listen: %d\n", ret); 2134 goto err; 2135 } 2136 return 0; 2137 err: 2138 smb_direct_listener.cm_id = NULL; 2139 rdma_destroy_id(cm_id); 2140 return ret; 2141 } 2142 2143 static int smb_direct_ib_client_add(struct ib_device *ib_dev) 2144 { 2145 struct smb_direct_device *smb_dev; 2146 2147 /* Set 5445 port if device type is iWARP(No IB) */ 2148 if (ib_dev->node_type != RDMA_NODE_IB_CA) 2149 smb_direct_port = SMB_DIRECT_PORT_IWARP; 2150 2151 if (!rdma_frwr_is_supported(&ib_dev->attrs)) 2152 return 0; 2153 2154 smb_dev = kzalloc(sizeof(*smb_dev), KSMBD_DEFAULT_GFP); 2155 if (!smb_dev) 2156 return -ENOMEM; 2157 smb_dev->ib_dev = ib_dev; 2158 2159 write_lock(&smb_direct_device_lock); 2160 list_add(&smb_dev->list, &smb_direct_device_list); 2161 write_unlock(&smb_direct_device_lock); 2162 2163 ksmbd_debug(RDMA, "ib device added: name %s\n", ib_dev->name); 2164 return 0; 2165 } 2166 2167 static void smb_direct_ib_client_remove(struct ib_device *ib_dev, 2168 void *client_data) 2169 { 2170 struct smb_direct_device *smb_dev, *tmp; 2171 2172 write_lock(&smb_direct_device_lock); 2173 list_for_each_entry_safe(smb_dev, tmp, &smb_direct_device_list, list) { 2174 if (smb_dev->ib_dev == ib_dev) { 2175 list_del(&smb_dev->list); 2176 kfree(smb_dev); 2177 break; 2178 } 2179 } 2180 write_unlock(&smb_direct_device_lock); 2181 } 2182 2183 static struct ib_client smb_direct_ib_client = { 2184 .name = "ksmbd_smb_direct_ib", 2185 .add = smb_direct_ib_client_add, 2186 .remove = smb_direct_ib_client_remove, 2187 }; 2188 2189 int ksmbd_rdma_init(void) 2190 { 2191 int ret; 2192 2193 smb_direct_listener.cm_id = NULL; 2194 2195 ret = ib_register_client(&smb_direct_ib_client); 2196 if (ret) { 2197 pr_err("failed to ib_register_client\n"); 2198 return ret; 2199 } 2200 2201 /* When a client is running out of send credits, the credits are 2202 * granted by the server's sending a packet using this queue. 2203 * This avoids the situation that a clients cannot send packets 2204 * for lack of credits 2205 */ 2206 smb_direct_wq = alloc_workqueue("ksmbd-smb_direct-wq", 2207 WQ_HIGHPRI | WQ_MEM_RECLAIM, 0); 2208 if (!smb_direct_wq) 2209 return -ENOMEM; 2210 2211 ret = smb_direct_listen(smb_direct_port); 2212 if (ret) { 2213 destroy_workqueue(smb_direct_wq); 2214 smb_direct_wq = NULL; 2215 pr_err("Can't listen: %d\n", ret); 2216 return ret; 2217 } 2218 2219 ksmbd_debug(RDMA, "init RDMA listener. cm_id=%p\n", 2220 smb_direct_listener.cm_id); 2221 return 0; 2222 } 2223 2224 void ksmbd_rdma_destroy(void) 2225 { 2226 if (!smb_direct_listener.cm_id) 2227 return; 2228 2229 ib_unregister_client(&smb_direct_ib_client); 2230 rdma_destroy_id(smb_direct_listener.cm_id); 2231 2232 smb_direct_listener.cm_id = NULL; 2233 2234 if (smb_direct_wq) { 2235 destroy_workqueue(smb_direct_wq); 2236 smb_direct_wq = NULL; 2237 } 2238 } 2239 2240 bool ksmbd_rdma_capable_netdev(struct net_device *netdev) 2241 { 2242 struct smb_direct_device *smb_dev; 2243 int i; 2244 bool rdma_capable = false; 2245 2246 read_lock(&smb_direct_device_lock); 2247 list_for_each_entry(smb_dev, &smb_direct_device_list, list) { 2248 for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) { 2249 struct net_device *ndev; 2250 2251 ndev = ib_device_get_netdev(smb_dev->ib_dev, i + 1); 2252 if (!ndev) 2253 continue; 2254 2255 if (ndev == netdev) { 2256 dev_put(ndev); 2257 rdma_capable = true; 2258 goto out; 2259 } 2260 dev_put(ndev); 2261 } 2262 } 2263 out: 2264 read_unlock(&smb_direct_device_lock); 2265 2266 if (rdma_capable == false) { 2267 struct ib_device *ibdev; 2268 2269 ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN); 2270 if (ibdev) { 2271 rdma_capable = rdma_frwr_is_supported(&ibdev->attrs); 2272 ib_device_put(ibdev); 2273 } 2274 } 2275 2276 ksmbd_debug(RDMA, "netdev(%s) rdma capable : %s\n", 2277 netdev->name, str_true_false(rdma_capable)); 2278 2279 return rdma_capable; 2280 } 2281 2282 static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = { 2283 .prepare = smb_direct_prepare, 2284 .disconnect = smb_direct_disconnect, 2285 .shutdown = smb_direct_shutdown, 2286 .writev = smb_direct_writev, 2287 .read = smb_direct_read, 2288 .rdma_read = smb_direct_rdma_read, 2289 .rdma_write = smb_direct_rdma_write, 2290 .free_transport = smb_direct_free_transport, 2291 }; 2292