1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2017, Microsoft Corporation. 4 * Copyright (C) 2018, LG Electronics. 5 * 6 * Author(s): Long Li <longli@microsoft.com>, 7 * Hyunchul Lee <hyc.lee@gmail.com> 8 */ 9 10 #define SUBMOD_NAME "smb_direct" 11 12 #include <linux/kthread.h> 13 #include <linux/list.h> 14 #include <linux/mempool.h> 15 #include <linux/highmem.h> 16 #include <linux/scatterlist.h> 17 #include <linux/string_choices.h> 18 #include <rdma/ib_verbs.h> 19 #include <rdma/rdma_cm.h> 20 #include <rdma/rw.h> 21 22 #include "glob.h" 23 #include "connection.h" 24 #include "smb_common.h" 25 #include "../common/smb2status.h" 26 #include "transport_rdma.h" 27 28 #define SMB_DIRECT_PORT_IWARP 5445 29 #define SMB_DIRECT_PORT_INFINIBAND 445 30 31 #define SMB_DIRECT_VERSION_LE cpu_to_le16(0x0100) 32 33 /* SMB_DIRECT negotiation timeout in seconds */ 34 #define SMB_DIRECT_NEGOTIATE_TIMEOUT 120 35 36 #define SMB_DIRECT_MAX_SEND_SGES 6 37 #define SMB_DIRECT_MAX_RECV_SGES 1 38 39 /* 40 * Default maximum number of RDMA read/write outstanding on this connection 41 * This value is possibly decreased during QP creation on hardware limit 42 */ 43 #define SMB_DIRECT_CM_INITIATOR_DEPTH 8 44 45 /* Maximum number of retries on data transfer operations */ 46 #define SMB_DIRECT_CM_RETRY 6 47 /* No need to retry on Receiver Not Ready since SMB_DIRECT manages credits */ 48 #define SMB_DIRECT_CM_RNR_RETRY 0 49 50 /* 51 * User configurable initial values per SMB_DIRECT transport connection 52 * as defined in [MS-SMBD] 3.1.1.1 53 * Those may change after a SMB_DIRECT negotiation 54 */ 55 56 /* Set 445 port to SMB Direct port by default */ 57 static int smb_direct_port = SMB_DIRECT_PORT_INFINIBAND; 58 59 /* The local peer's maximum number of credits to grant to the peer */ 60 static int smb_direct_receive_credit_max = 255; 61 62 /* The remote peer's credit request of local peer */ 63 static int smb_direct_send_credit_target = 255; 64 65 /* The maximum single message size can be sent to remote peer */ 66 static int smb_direct_max_send_size = 1364; 67 68 /* The maximum fragmented upper-layer payload receive size supported */ 69 static int smb_direct_max_fragmented_recv_size = 1024 * 1024; 70 71 /* The maximum single-message size which can be received */ 72 static int smb_direct_max_receive_size = 1364; 73 74 static int smb_direct_max_read_write_size = SMBD_DEFAULT_IOSIZE; 75 76 static LIST_HEAD(smb_direct_device_list); 77 static DEFINE_RWLOCK(smb_direct_device_lock); 78 79 struct smb_direct_device { 80 struct ib_device *ib_dev; 81 struct list_head list; 82 }; 83 84 static struct smb_direct_listener { 85 struct rdma_cm_id *cm_id; 86 } smb_direct_listener; 87 88 static struct workqueue_struct *smb_direct_wq; 89 90 enum smb_direct_status { 91 SMB_DIRECT_CS_NEW = 0, 92 SMB_DIRECT_CS_CONNECTED, 93 SMB_DIRECT_CS_DISCONNECTING, 94 SMB_DIRECT_CS_DISCONNECTED, 95 }; 96 97 struct smb_direct_transport { 98 struct ksmbd_transport transport; 99 100 enum smb_direct_status status; 101 bool full_packet_received; 102 wait_queue_head_t wait_status; 103 104 struct rdma_cm_id *cm_id; 105 struct ib_cq *send_cq; 106 struct ib_cq *recv_cq; 107 struct ib_pd *pd; 108 struct ib_qp *qp; 109 110 int max_send_size; 111 int max_recv_size; 112 int max_fragmented_send_size; 113 int max_fragmented_recv_size; 114 int max_rdma_rw_size; 115 116 spinlock_t reassembly_queue_lock; 117 struct list_head reassembly_queue; 118 int reassembly_data_length; 119 int reassembly_queue_length; 120 int first_entry_offset; 121 wait_queue_head_t wait_reassembly_queue; 122 123 spinlock_t receive_credit_lock; 124 int recv_credits; 125 int count_avail_recvmsg; 126 int recv_credit_max; 127 int recv_credit_target; 128 129 spinlock_t recvmsg_queue_lock; 130 struct list_head recvmsg_queue; 131 132 int send_credit_target; 133 atomic_t send_credits; 134 spinlock_t lock_new_recv_credits; 135 int new_recv_credits; 136 int max_rw_credits; 137 int pages_per_rw_credit; 138 atomic_t rw_credits; 139 140 wait_queue_head_t wait_send_credits; 141 wait_queue_head_t wait_rw_credits; 142 143 mempool_t *sendmsg_mempool; 144 struct kmem_cache *sendmsg_cache; 145 mempool_t *recvmsg_mempool; 146 struct kmem_cache *recvmsg_cache; 147 148 wait_queue_head_t wait_send_pending; 149 atomic_t send_pending; 150 151 struct delayed_work post_recv_credits_work; 152 struct work_struct send_immediate_work; 153 struct work_struct disconnect_work; 154 155 bool negotiation_requested; 156 }; 157 158 #define KSMBD_TRANS(t) ((struct ksmbd_transport *)&((t)->transport)) 159 #define SMBD_TRANS(t) ((struct smb_direct_transport *)container_of(t, \ 160 struct smb_direct_transport, transport)) 161 enum { 162 SMB_DIRECT_MSG_NEGOTIATE_REQ = 0, 163 SMB_DIRECT_MSG_DATA_TRANSFER 164 }; 165 166 static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops; 167 168 struct smb_direct_send_ctx { 169 struct list_head msg_list; 170 int wr_cnt; 171 bool need_invalidate_rkey; 172 unsigned int remote_key; 173 }; 174 175 struct smb_direct_sendmsg { 176 struct smb_direct_transport *transport; 177 struct ib_send_wr wr; 178 struct list_head list; 179 int num_sge; 180 struct ib_sge sge[SMB_DIRECT_MAX_SEND_SGES]; 181 struct ib_cqe cqe; 182 u8 packet[]; 183 }; 184 185 struct smb_direct_recvmsg { 186 struct smb_direct_transport *transport; 187 struct list_head list; 188 int type; 189 struct ib_sge sge; 190 struct ib_cqe cqe; 191 bool first_segment; 192 u8 packet[]; 193 }; 194 195 struct smb_direct_rdma_rw_msg { 196 struct smb_direct_transport *t; 197 struct ib_cqe cqe; 198 int status; 199 struct completion *completion; 200 struct list_head list; 201 struct rdma_rw_ctx rw_ctx; 202 struct sg_table sgt; 203 struct scatterlist sg_list[]; 204 }; 205 206 void init_smbd_max_io_size(unsigned int sz) 207 { 208 sz = clamp_val(sz, SMBD_MIN_IOSIZE, SMBD_MAX_IOSIZE); 209 smb_direct_max_read_write_size = sz; 210 } 211 212 unsigned int get_smbd_max_read_write_size(void) 213 { 214 return smb_direct_max_read_write_size; 215 } 216 217 static inline int get_buf_page_count(void *buf, int size) 218 { 219 return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) - 220 (uintptr_t)buf / PAGE_SIZE; 221 } 222 223 static void smb_direct_destroy_pools(struct smb_direct_transport *transport); 224 static void smb_direct_post_recv_credits(struct work_struct *work); 225 static int smb_direct_post_send_data(struct smb_direct_transport *t, 226 struct smb_direct_send_ctx *send_ctx, 227 struct kvec *iov, int niov, 228 int remaining_data_length); 229 230 static inline struct smb_direct_transport * 231 smb_trans_direct_transfort(struct ksmbd_transport *t) 232 { 233 return container_of(t, struct smb_direct_transport, transport); 234 } 235 236 static inline void 237 *smb_direct_recvmsg_payload(struct smb_direct_recvmsg *recvmsg) 238 { 239 return (void *)recvmsg->packet; 240 } 241 242 static inline bool is_receive_credit_post_required(int receive_credits, 243 int avail_recvmsg_count) 244 { 245 return receive_credits <= (smb_direct_receive_credit_max >> 3) && 246 avail_recvmsg_count >= (receive_credits >> 2); 247 } 248 249 static struct 250 smb_direct_recvmsg *get_free_recvmsg(struct smb_direct_transport *t) 251 { 252 struct smb_direct_recvmsg *recvmsg = NULL; 253 254 spin_lock(&t->recvmsg_queue_lock); 255 if (!list_empty(&t->recvmsg_queue)) { 256 recvmsg = list_first_entry(&t->recvmsg_queue, 257 struct smb_direct_recvmsg, 258 list); 259 list_del(&recvmsg->list); 260 } 261 spin_unlock(&t->recvmsg_queue_lock); 262 return recvmsg; 263 } 264 265 static void put_recvmsg(struct smb_direct_transport *t, 266 struct smb_direct_recvmsg *recvmsg) 267 { 268 if (likely(recvmsg->sge.length != 0)) { 269 ib_dma_unmap_single(t->cm_id->device, 270 recvmsg->sge.addr, 271 recvmsg->sge.length, 272 DMA_FROM_DEVICE); 273 recvmsg->sge.length = 0; 274 } 275 276 spin_lock(&t->recvmsg_queue_lock); 277 list_add(&recvmsg->list, &t->recvmsg_queue); 278 spin_unlock(&t->recvmsg_queue_lock); 279 } 280 281 static void enqueue_reassembly(struct smb_direct_transport *t, 282 struct smb_direct_recvmsg *recvmsg, 283 int data_length) 284 { 285 spin_lock(&t->reassembly_queue_lock); 286 list_add_tail(&recvmsg->list, &t->reassembly_queue); 287 t->reassembly_queue_length++; 288 /* 289 * Make sure reassembly_data_length is updated after list and 290 * reassembly_queue_length are updated. On the dequeue side 291 * reassembly_data_length is checked without a lock to determine 292 * if reassembly_queue_length and list is up to date 293 */ 294 virt_wmb(); 295 t->reassembly_data_length += data_length; 296 spin_unlock(&t->reassembly_queue_lock); 297 } 298 299 static struct smb_direct_recvmsg *get_first_reassembly(struct smb_direct_transport *t) 300 { 301 if (!list_empty(&t->reassembly_queue)) 302 return list_first_entry(&t->reassembly_queue, 303 struct smb_direct_recvmsg, list); 304 else 305 return NULL; 306 } 307 308 static void smb_direct_disconnect_rdma_work(struct work_struct *work) 309 { 310 struct smb_direct_transport *t = 311 container_of(work, struct smb_direct_transport, 312 disconnect_work); 313 314 if (t->status == SMB_DIRECT_CS_CONNECTED) { 315 t->status = SMB_DIRECT_CS_DISCONNECTING; 316 rdma_disconnect(t->cm_id); 317 } 318 } 319 320 static void 321 smb_direct_disconnect_rdma_connection(struct smb_direct_transport *t) 322 { 323 if (t->status == SMB_DIRECT_CS_CONNECTED) 324 queue_work(smb_direct_wq, &t->disconnect_work); 325 } 326 327 static void smb_direct_send_immediate_work(struct work_struct *work) 328 { 329 struct smb_direct_transport *t = container_of(work, 330 struct smb_direct_transport, send_immediate_work); 331 332 if (t->status != SMB_DIRECT_CS_CONNECTED) 333 return; 334 335 smb_direct_post_send_data(t, NULL, NULL, 0, 0); 336 } 337 338 static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) 339 { 340 struct smb_direct_transport *t; 341 struct ksmbd_conn *conn; 342 343 t = kzalloc(sizeof(*t), KSMBD_DEFAULT_GFP); 344 if (!t) 345 return NULL; 346 347 t->cm_id = cm_id; 348 cm_id->context = t; 349 350 t->status = SMB_DIRECT_CS_NEW; 351 init_waitqueue_head(&t->wait_status); 352 353 spin_lock_init(&t->reassembly_queue_lock); 354 INIT_LIST_HEAD(&t->reassembly_queue); 355 t->reassembly_data_length = 0; 356 t->reassembly_queue_length = 0; 357 init_waitqueue_head(&t->wait_reassembly_queue); 358 init_waitqueue_head(&t->wait_send_credits); 359 init_waitqueue_head(&t->wait_rw_credits); 360 361 spin_lock_init(&t->receive_credit_lock); 362 spin_lock_init(&t->recvmsg_queue_lock); 363 INIT_LIST_HEAD(&t->recvmsg_queue); 364 365 init_waitqueue_head(&t->wait_send_pending); 366 atomic_set(&t->send_pending, 0); 367 368 spin_lock_init(&t->lock_new_recv_credits); 369 370 INIT_DELAYED_WORK(&t->post_recv_credits_work, 371 smb_direct_post_recv_credits); 372 INIT_WORK(&t->send_immediate_work, smb_direct_send_immediate_work); 373 INIT_WORK(&t->disconnect_work, smb_direct_disconnect_rdma_work); 374 375 conn = ksmbd_conn_alloc(); 376 if (!conn) 377 goto err; 378 conn->transport = KSMBD_TRANS(t); 379 KSMBD_TRANS(t)->conn = conn; 380 KSMBD_TRANS(t)->ops = &ksmbd_smb_direct_transport_ops; 381 return t; 382 err: 383 kfree(t); 384 return NULL; 385 } 386 387 static void smb_direct_free_transport(struct ksmbd_transport *kt) 388 { 389 kfree(SMBD_TRANS(kt)); 390 } 391 392 static void free_transport(struct smb_direct_transport *t) 393 { 394 struct smb_direct_recvmsg *recvmsg; 395 396 wake_up_interruptible(&t->wait_send_credits); 397 398 ksmbd_debug(RDMA, "wait for all send posted to IB to finish\n"); 399 wait_event(t->wait_send_pending, 400 atomic_read(&t->send_pending) == 0); 401 402 cancel_work_sync(&t->disconnect_work); 403 cancel_delayed_work_sync(&t->post_recv_credits_work); 404 cancel_work_sync(&t->send_immediate_work); 405 406 if (t->qp) { 407 ib_drain_qp(t->qp); 408 ib_mr_pool_destroy(t->qp, &t->qp->rdma_mrs); 409 t->qp = NULL; 410 rdma_destroy_qp(t->cm_id); 411 } 412 413 ksmbd_debug(RDMA, "drain the reassembly queue\n"); 414 do { 415 spin_lock(&t->reassembly_queue_lock); 416 recvmsg = get_first_reassembly(t); 417 if (recvmsg) { 418 list_del(&recvmsg->list); 419 spin_unlock(&t->reassembly_queue_lock); 420 put_recvmsg(t, recvmsg); 421 } else { 422 spin_unlock(&t->reassembly_queue_lock); 423 } 424 } while (recvmsg); 425 t->reassembly_data_length = 0; 426 427 if (t->send_cq) 428 ib_free_cq(t->send_cq); 429 if (t->recv_cq) 430 ib_free_cq(t->recv_cq); 431 if (t->pd) 432 ib_dealloc_pd(t->pd); 433 if (t->cm_id) 434 rdma_destroy_id(t->cm_id); 435 436 smb_direct_destroy_pools(t); 437 ksmbd_conn_free(KSMBD_TRANS(t)->conn); 438 } 439 440 static struct smb_direct_sendmsg 441 *smb_direct_alloc_sendmsg(struct smb_direct_transport *t) 442 { 443 struct smb_direct_sendmsg *msg; 444 445 msg = mempool_alloc(t->sendmsg_mempool, KSMBD_DEFAULT_GFP); 446 if (!msg) 447 return ERR_PTR(-ENOMEM); 448 msg->transport = t; 449 INIT_LIST_HEAD(&msg->list); 450 msg->num_sge = 0; 451 return msg; 452 } 453 454 static void smb_direct_free_sendmsg(struct smb_direct_transport *t, 455 struct smb_direct_sendmsg *msg) 456 { 457 int i; 458 459 if (msg->num_sge > 0) { 460 ib_dma_unmap_single(t->cm_id->device, 461 msg->sge[0].addr, msg->sge[0].length, 462 DMA_TO_DEVICE); 463 for (i = 1; i < msg->num_sge; i++) 464 ib_dma_unmap_page(t->cm_id->device, 465 msg->sge[i].addr, msg->sge[i].length, 466 DMA_TO_DEVICE); 467 } 468 mempool_free(msg, t->sendmsg_mempool); 469 } 470 471 static int smb_direct_check_recvmsg(struct smb_direct_recvmsg *recvmsg) 472 { 473 switch (recvmsg->type) { 474 case SMB_DIRECT_MSG_DATA_TRANSFER: { 475 struct smb_direct_data_transfer *req = 476 (struct smb_direct_data_transfer *)recvmsg->packet; 477 struct smb2_hdr *hdr = (struct smb2_hdr *)(recvmsg->packet 478 + le32_to_cpu(req->data_offset)); 479 ksmbd_debug(RDMA, 480 "CreditGranted: %u, CreditRequested: %u, DataLength: %u, RemainingDataLength: %u, SMB: %x, Command: %u\n", 481 le16_to_cpu(req->credits_granted), 482 le16_to_cpu(req->credits_requested), 483 req->data_length, req->remaining_data_length, 484 hdr->ProtocolId, hdr->Command); 485 break; 486 } 487 case SMB_DIRECT_MSG_NEGOTIATE_REQ: { 488 struct smb_direct_negotiate_req *req = 489 (struct smb_direct_negotiate_req *)recvmsg->packet; 490 ksmbd_debug(RDMA, 491 "MinVersion: %u, MaxVersion: %u, CreditRequested: %u, MaxSendSize: %u, MaxRecvSize: %u, MaxFragmentedSize: %u\n", 492 le16_to_cpu(req->min_version), 493 le16_to_cpu(req->max_version), 494 le16_to_cpu(req->credits_requested), 495 le32_to_cpu(req->preferred_send_size), 496 le32_to_cpu(req->max_receive_size), 497 le32_to_cpu(req->max_fragmented_size)); 498 if (le16_to_cpu(req->min_version) > 0x0100 || 499 le16_to_cpu(req->max_version) < 0x0100) 500 return -EOPNOTSUPP; 501 if (le16_to_cpu(req->credits_requested) <= 0 || 502 le32_to_cpu(req->max_receive_size) <= 128 || 503 le32_to_cpu(req->max_fragmented_size) <= 504 128 * 1024) 505 return -ECONNABORTED; 506 507 break; 508 } 509 default: 510 return -EINVAL; 511 } 512 return 0; 513 } 514 515 static void recv_done(struct ib_cq *cq, struct ib_wc *wc) 516 { 517 struct smb_direct_recvmsg *recvmsg; 518 struct smb_direct_transport *t; 519 520 recvmsg = container_of(wc->wr_cqe, struct smb_direct_recvmsg, cqe); 521 t = recvmsg->transport; 522 523 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { 524 put_recvmsg(t, recvmsg); 525 if (wc->status != IB_WC_WR_FLUSH_ERR) { 526 pr_err("Recv error. status='%s (%d)' opcode=%d\n", 527 ib_wc_status_msg(wc->status), wc->status, 528 wc->opcode); 529 smb_direct_disconnect_rdma_connection(t); 530 } 531 return; 532 } 533 534 ksmbd_debug(RDMA, "Recv completed. status='%s (%d)', opcode=%d\n", 535 ib_wc_status_msg(wc->status), wc->status, 536 wc->opcode); 537 538 ib_dma_sync_single_for_cpu(wc->qp->device, recvmsg->sge.addr, 539 recvmsg->sge.length, DMA_FROM_DEVICE); 540 541 switch (recvmsg->type) { 542 case SMB_DIRECT_MSG_NEGOTIATE_REQ: 543 if (wc->byte_len < sizeof(struct smb_direct_negotiate_req)) { 544 put_recvmsg(t, recvmsg); 545 smb_direct_disconnect_rdma_connection(t); 546 return; 547 } 548 t->negotiation_requested = true; 549 t->full_packet_received = true; 550 t->status = SMB_DIRECT_CS_CONNECTED; 551 enqueue_reassembly(t, recvmsg, 0); 552 wake_up_interruptible(&t->wait_status); 553 return; 554 case SMB_DIRECT_MSG_DATA_TRANSFER: { 555 struct smb_direct_data_transfer *data_transfer = 556 (struct smb_direct_data_transfer *)recvmsg->packet; 557 u32 remaining_data_length, data_offset, data_length; 558 int avail_recvmsg_count, receive_credits; 559 560 if (wc->byte_len < 561 offsetof(struct smb_direct_data_transfer, padding)) { 562 put_recvmsg(t, recvmsg); 563 smb_direct_disconnect_rdma_connection(t); 564 return; 565 } 566 567 remaining_data_length = le32_to_cpu(data_transfer->remaining_data_length); 568 data_length = le32_to_cpu(data_transfer->data_length); 569 data_offset = le32_to_cpu(data_transfer->data_offset); 570 if (wc->byte_len < data_offset || 571 wc->byte_len < (u64)data_offset + data_length) { 572 put_recvmsg(t, recvmsg); 573 smb_direct_disconnect_rdma_connection(t); 574 return; 575 } 576 if (remaining_data_length > t->max_fragmented_recv_size || 577 data_length > t->max_fragmented_recv_size || 578 (u64)remaining_data_length + (u64)data_length > 579 (u64)t->max_fragmented_recv_size) { 580 put_recvmsg(t, recvmsg); 581 smb_direct_disconnect_rdma_connection(t); 582 return; 583 } 584 585 if (data_length) { 586 if (t->full_packet_received) 587 recvmsg->first_segment = true; 588 589 if (le32_to_cpu(data_transfer->remaining_data_length)) 590 t->full_packet_received = false; 591 else 592 t->full_packet_received = true; 593 594 spin_lock(&t->receive_credit_lock); 595 receive_credits = --(t->recv_credits); 596 avail_recvmsg_count = t->count_avail_recvmsg; 597 spin_unlock(&t->receive_credit_lock); 598 } else { 599 spin_lock(&t->receive_credit_lock); 600 receive_credits = --(t->recv_credits); 601 avail_recvmsg_count = ++(t->count_avail_recvmsg); 602 spin_unlock(&t->receive_credit_lock); 603 } 604 605 t->recv_credit_target = 606 le16_to_cpu(data_transfer->credits_requested); 607 atomic_add(le16_to_cpu(data_transfer->credits_granted), 608 &t->send_credits); 609 610 if (le16_to_cpu(data_transfer->flags) & 611 SMB_DIRECT_RESPONSE_REQUESTED) 612 queue_work(smb_direct_wq, &t->send_immediate_work); 613 614 if (atomic_read(&t->send_credits) > 0) 615 wake_up_interruptible(&t->wait_send_credits); 616 617 if (is_receive_credit_post_required(receive_credits, avail_recvmsg_count)) 618 mod_delayed_work(smb_direct_wq, 619 &t->post_recv_credits_work, 0); 620 621 if (data_length) { 622 enqueue_reassembly(t, recvmsg, (int)data_length); 623 wake_up_interruptible(&t->wait_reassembly_queue); 624 } else 625 put_recvmsg(t, recvmsg); 626 627 return; 628 } 629 } 630 631 /* 632 * This is an internal error! 633 */ 634 WARN_ON_ONCE(recvmsg->type != SMB_DIRECT_MSG_DATA_TRANSFER); 635 put_recvmsg(t, recvmsg); 636 smb_direct_disconnect_rdma_connection(t); 637 } 638 639 static int smb_direct_post_recv(struct smb_direct_transport *t, 640 struct smb_direct_recvmsg *recvmsg) 641 { 642 struct ib_recv_wr wr; 643 int ret; 644 645 recvmsg->sge.addr = ib_dma_map_single(t->cm_id->device, 646 recvmsg->packet, t->max_recv_size, 647 DMA_FROM_DEVICE); 648 ret = ib_dma_mapping_error(t->cm_id->device, recvmsg->sge.addr); 649 if (ret) 650 return ret; 651 recvmsg->sge.length = t->max_recv_size; 652 recvmsg->sge.lkey = t->pd->local_dma_lkey; 653 recvmsg->cqe.done = recv_done; 654 655 wr.wr_cqe = &recvmsg->cqe; 656 wr.next = NULL; 657 wr.sg_list = &recvmsg->sge; 658 wr.num_sge = 1; 659 660 ret = ib_post_recv(t->qp, &wr, NULL); 661 if (ret) { 662 pr_err("Can't post recv: %d\n", ret); 663 ib_dma_unmap_single(t->cm_id->device, 664 recvmsg->sge.addr, recvmsg->sge.length, 665 DMA_FROM_DEVICE); 666 recvmsg->sge.length = 0; 667 smb_direct_disconnect_rdma_connection(t); 668 return ret; 669 } 670 return ret; 671 } 672 673 static int smb_direct_read(struct ksmbd_transport *t, char *buf, 674 unsigned int size, int unused) 675 { 676 struct smb_direct_recvmsg *recvmsg; 677 struct smb_direct_data_transfer *data_transfer; 678 int to_copy, to_read, data_read, offset; 679 u32 data_length, remaining_data_length, data_offset; 680 int rc; 681 struct smb_direct_transport *st = smb_trans_direct_transfort(t); 682 683 again: 684 if (st->status != SMB_DIRECT_CS_CONNECTED) { 685 pr_err("disconnected\n"); 686 return -ENOTCONN; 687 } 688 689 /* 690 * No need to hold the reassembly queue lock all the time as we are 691 * the only one reading from the front of the queue. The transport 692 * may add more entries to the back of the queue at the same time 693 */ 694 if (st->reassembly_data_length >= size) { 695 int queue_length; 696 int queue_removed = 0; 697 698 /* 699 * Need to make sure reassembly_data_length is read before 700 * reading reassembly_queue_length and calling 701 * get_first_reassembly. This call is lock free 702 * as we never read at the end of the queue which are being 703 * updated in SOFTIRQ as more data is received 704 */ 705 virt_rmb(); 706 queue_length = st->reassembly_queue_length; 707 data_read = 0; 708 to_read = size; 709 offset = st->first_entry_offset; 710 while (data_read < size) { 711 recvmsg = get_first_reassembly(st); 712 data_transfer = smb_direct_recvmsg_payload(recvmsg); 713 data_length = le32_to_cpu(data_transfer->data_length); 714 remaining_data_length = 715 le32_to_cpu(data_transfer->remaining_data_length); 716 data_offset = le32_to_cpu(data_transfer->data_offset); 717 718 /* 719 * The upper layer expects RFC1002 length at the 720 * beginning of the payload. Return it to indicate 721 * the total length of the packet. This minimize the 722 * change to upper layer packet processing logic. This 723 * will be eventually remove when an intermediate 724 * transport layer is added 725 */ 726 if (recvmsg->first_segment && size == 4) { 727 unsigned int rfc1002_len = 728 data_length + remaining_data_length; 729 *((__be32 *)buf) = cpu_to_be32(rfc1002_len); 730 data_read = 4; 731 recvmsg->first_segment = false; 732 ksmbd_debug(RDMA, 733 "returning rfc1002 length %d\n", 734 rfc1002_len); 735 goto read_rfc1002_done; 736 } 737 738 to_copy = min_t(int, data_length - offset, to_read); 739 memcpy(buf + data_read, (char *)data_transfer + data_offset + offset, 740 to_copy); 741 742 /* move on to the next buffer? */ 743 if (to_copy == data_length - offset) { 744 queue_length--; 745 /* 746 * No need to lock if we are not at the 747 * end of the queue 748 */ 749 if (queue_length) { 750 list_del(&recvmsg->list); 751 } else { 752 spin_lock_irq(&st->reassembly_queue_lock); 753 list_del(&recvmsg->list); 754 spin_unlock_irq(&st->reassembly_queue_lock); 755 } 756 queue_removed++; 757 put_recvmsg(st, recvmsg); 758 offset = 0; 759 } else { 760 offset += to_copy; 761 } 762 763 to_read -= to_copy; 764 data_read += to_copy; 765 } 766 767 spin_lock_irq(&st->reassembly_queue_lock); 768 st->reassembly_data_length -= data_read; 769 st->reassembly_queue_length -= queue_removed; 770 spin_unlock_irq(&st->reassembly_queue_lock); 771 772 spin_lock(&st->receive_credit_lock); 773 st->count_avail_recvmsg += queue_removed; 774 if (is_receive_credit_post_required(st->recv_credits, st->count_avail_recvmsg)) { 775 spin_unlock(&st->receive_credit_lock); 776 mod_delayed_work(smb_direct_wq, 777 &st->post_recv_credits_work, 0); 778 } else { 779 spin_unlock(&st->receive_credit_lock); 780 } 781 782 st->first_entry_offset = offset; 783 ksmbd_debug(RDMA, 784 "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n", 785 data_read, st->reassembly_data_length, 786 st->first_entry_offset); 787 read_rfc1002_done: 788 return data_read; 789 } 790 791 ksmbd_debug(RDMA, "wait_event on more data\n"); 792 rc = wait_event_interruptible(st->wait_reassembly_queue, 793 st->reassembly_data_length >= size || 794 st->status != SMB_DIRECT_CS_CONNECTED); 795 if (rc) 796 return -EINTR; 797 798 goto again; 799 } 800 801 static void smb_direct_post_recv_credits(struct work_struct *work) 802 { 803 struct smb_direct_transport *t = container_of(work, 804 struct smb_direct_transport, post_recv_credits_work.work); 805 struct smb_direct_recvmsg *recvmsg; 806 int receive_credits, credits = 0; 807 int ret; 808 809 spin_lock(&t->receive_credit_lock); 810 receive_credits = t->recv_credits; 811 spin_unlock(&t->receive_credit_lock); 812 813 if (receive_credits < t->recv_credit_target) { 814 while (true) { 815 recvmsg = get_free_recvmsg(t); 816 if (!recvmsg) 817 break; 818 819 recvmsg->type = SMB_DIRECT_MSG_DATA_TRANSFER; 820 recvmsg->first_segment = false; 821 822 ret = smb_direct_post_recv(t, recvmsg); 823 if (ret) { 824 pr_err("Can't post recv: %d\n", ret); 825 put_recvmsg(t, recvmsg); 826 break; 827 } 828 credits++; 829 } 830 } 831 832 spin_lock(&t->receive_credit_lock); 833 t->recv_credits += credits; 834 t->count_avail_recvmsg -= credits; 835 spin_unlock(&t->receive_credit_lock); 836 837 spin_lock(&t->lock_new_recv_credits); 838 t->new_recv_credits += credits; 839 spin_unlock(&t->lock_new_recv_credits); 840 841 if (credits) 842 queue_work(smb_direct_wq, &t->send_immediate_work); 843 } 844 845 static void send_done(struct ib_cq *cq, struct ib_wc *wc) 846 { 847 struct smb_direct_sendmsg *sendmsg, *sibling; 848 struct smb_direct_transport *t; 849 struct list_head *pos, *prev, *end; 850 851 sendmsg = container_of(wc->wr_cqe, struct smb_direct_sendmsg, cqe); 852 t = sendmsg->transport; 853 854 ksmbd_debug(RDMA, "Send completed. status='%s (%d)', opcode=%d\n", 855 ib_wc_status_msg(wc->status), wc->status, 856 wc->opcode); 857 858 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { 859 pr_err("Send error. status='%s (%d)', opcode=%d\n", 860 ib_wc_status_msg(wc->status), wc->status, 861 wc->opcode); 862 smb_direct_disconnect_rdma_connection(t); 863 } 864 865 if (atomic_dec_and_test(&t->send_pending)) 866 wake_up(&t->wait_send_pending); 867 868 /* iterate and free the list of messages in reverse. the list's head 869 * is invalid. 870 */ 871 for (pos = &sendmsg->list, prev = pos->prev, end = sendmsg->list.next; 872 prev != end; pos = prev, prev = prev->prev) { 873 sibling = container_of(pos, struct smb_direct_sendmsg, list); 874 smb_direct_free_sendmsg(t, sibling); 875 } 876 877 sibling = container_of(pos, struct smb_direct_sendmsg, list); 878 smb_direct_free_sendmsg(t, sibling); 879 } 880 881 static int manage_credits_prior_sending(struct smb_direct_transport *t) 882 { 883 int new_credits; 884 885 spin_lock(&t->lock_new_recv_credits); 886 new_credits = t->new_recv_credits; 887 t->new_recv_credits = 0; 888 spin_unlock(&t->lock_new_recv_credits); 889 890 return new_credits; 891 } 892 893 static int smb_direct_post_send(struct smb_direct_transport *t, 894 struct ib_send_wr *wr) 895 { 896 int ret; 897 898 atomic_inc(&t->send_pending); 899 ret = ib_post_send(t->qp, wr, NULL); 900 if (ret) { 901 pr_err("failed to post send: %d\n", ret); 902 if (atomic_dec_and_test(&t->send_pending)) 903 wake_up(&t->wait_send_pending); 904 smb_direct_disconnect_rdma_connection(t); 905 } 906 return ret; 907 } 908 909 static void smb_direct_send_ctx_init(struct smb_direct_transport *t, 910 struct smb_direct_send_ctx *send_ctx, 911 bool need_invalidate_rkey, 912 unsigned int remote_key) 913 { 914 INIT_LIST_HEAD(&send_ctx->msg_list); 915 send_ctx->wr_cnt = 0; 916 send_ctx->need_invalidate_rkey = need_invalidate_rkey; 917 send_ctx->remote_key = remote_key; 918 } 919 920 static int smb_direct_flush_send_list(struct smb_direct_transport *t, 921 struct smb_direct_send_ctx *send_ctx, 922 bool is_last) 923 { 924 struct smb_direct_sendmsg *first, *last; 925 int ret; 926 927 if (list_empty(&send_ctx->msg_list)) 928 return 0; 929 930 first = list_first_entry(&send_ctx->msg_list, 931 struct smb_direct_sendmsg, 932 list); 933 last = list_last_entry(&send_ctx->msg_list, 934 struct smb_direct_sendmsg, 935 list); 936 937 last->wr.send_flags = IB_SEND_SIGNALED; 938 last->wr.wr_cqe = &last->cqe; 939 if (is_last && send_ctx->need_invalidate_rkey) { 940 last->wr.opcode = IB_WR_SEND_WITH_INV; 941 last->wr.ex.invalidate_rkey = send_ctx->remote_key; 942 } 943 944 ret = smb_direct_post_send(t, &first->wr); 945 if (!ret) { 946 smb_direct_send_ctx_init(t, send_ctx, 947 send_ctx->need_invalidate_rkey, 948 send_ctx->remote_key); 949 } else { 950 atomic_add(send_ctx->wr_cnt, &t->send_credits); 951 wake_up(&t->wait_send_credits); 952 list_for_each_entry_safe(first, last, &send_ctx->msg_list, 953 list) { 954 smb_direct_free_sendmsg(t, first); 955 } 956 } 957 return ret; 958 } 959 960 static int wait_for_credits(struct smb_direct_transport *t, 961 wait_queue_head_t *waitq, atomic_t *total_credits, 962 int needed) 963 { 964 int ret; 965 966 do { 967 if (atomic_sub_return(needed, total_credits) >= 0) 968 return 0; 969 970 atomic_add(needed, total_credits); 971 ret = wait_event_interruptible(*waitq, 972 atomic_read(total_credits) >= needed || 973 t->status != SMB_DIRECT_CS_CONNECTED); 974 975 if (t->status != SMB_DIRECT_CS_CONNECTED) 976 return -ENOTCONN; 977 else if (ret < 0) 978 return ret; 979 } while (true); 980 } 981 982 static int wait_for_send_credits(struct smb_direct_transport *t, 983 struct smb_direct_send_ctx *send_ctx) 984 { 985 int ret; 986 987 if (send_ctx && 988 (send_ctx->wr_cnt >= 16 || atomic_read(&t->send_credits) <= 1)) { 989 ret = smb_direct_flush_send_list(t, send_ctx, false); 990 if (ret) 991 return ret; 992 } 993 994 return wait_for_credits(t, &t->wait_send_credits, &t->send_credits, 1); 995 } 996 997 static int wait_for_rw_credits(struct smb_direct_transport *t, int credits) 998 { 999 return wait_for_credits(t, &t->wait_rw_credits, &t->rw_credits, credits); 1000 } 1001 1002 static int calc_rw_credits(struct smb_direct_transport *t, 1003 char *buf, unsigned int len) 1004 { 1005 return DIV_ROUND_UP(get_buf_page_count(buf, len), 1006 t->pages_per_rw_credit); 1007 } 1008 1009 static int smb_direct_create_header(struct smb_direct_transport *t, 1010 int size, int remaining_data_length, 1011 struct smb_direct_sendmsg **sendmsg_out) 1012 { 1013 struct smb_direct_sendmsg *sendmsg; 1014 struct smb_direct_data_transfer *packet; 1015 int header_length; 1016 int ret; 1017 1018 sendmsg = smb_direct_alloc_sendmsg(t); 1019 if (IS_ERR(sendmsg)) 1020 return PTR_ERR(sendmsg); 1021 1022 /* Fill in the packet header */ 1023 packet = (struct smb_direct_data_transfer *)sendmsg->packet; 1024 packet->credits_requested = cpu_to_le16(t->send_credit_target); 1025 packet->credits_granted = cpu_to_le16(manage_credits_prior_sending(t)); 1026 1027 packet->flags = 0; 1028 packet->reserved = 0; 1029 if (!size) 1030 packet->data_offset = 0; 1031 else 1032 packet->data_offset = cpu_to_le32(24); 1033 packet->data_length = cpu_to_le32(size); 1034 packet->remaining_data_length = cpu_to_le32(remaining_data_length); 1035 packet->padding = 0; 1036 1037 ksmbd_debug(RDMA, 1038 "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n", 1039 le16_to_cpu(packet->credits_requested), 1040 le16_to_cpu(packet->credits_granted), 1041 le32_to_cpu(packet->data_offset), 1042 le32_to_cpu(packet->data_length), 1043 le32_to_cpu(packet->remaining_data_length)); 1044 1045 /* Map the packet to DMA */ 1046 header_length = sizeof(struct smb_direct_data_transfer); 1047 /* If this is a packet without payload, don't send padding */ 1048 if (!size) 1049 header_length = 1050 offsetof(struct smb_direct_data_transfer, padding); 1051 1052 sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device, 1053 (void *)packet, 1054 header_length, 1055 DMA_TO_DEVICE); 1056 ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr); 1057 if (ret) { 1058 smb_direct_free_sendmsg(t, sendmsg); 1059 return ret; 1060 } 1061 1062 sendmsg->num_sge = 1; 1063 sendmsg->sge[0].length = header_length; 1064 sendmsg->sge[0].lkey = t->pd->local_dma_lkey; 1065 1066 *sendmsg_out = sendmsg; 1067 return 0; 1068 } 1069 1070 static int get_sg_list(void *buf, int size, struct scatterlist *sg_list, int nentries) 1071 { 1072 bool high = is_vmalloc_addr(buf); 1073 struct page *page; 1074 int offset, len; 1075 int i = 0; 1076 1077 if (size <= 0 || nentries < get_buf_page_count(buf, size)) 1078 return -EINVAL; 1079 1080 offset = offset_in_page(buf); 1081 buf -= offset; 1082 while (size > 0) { 1083 len = min_t(int, PAGE_SIZE - offset, size); 1084 if (high) 1085 page = vmalloc_to_page(buf); 1086 else 1087 page = kmap_to_page(buf); 1088 1089 if (!sg_list) 1090 return -EINVAL; 1091 sg_set_page(sg_list, page, len, offset); 1092 sg_list = sg_next(sg_list); 1093 1094 buf += PAGE_SIZE; 1095 size -= len; 1096 offset = 0; 1097 i++; 1098 } 1099 return i; 1100 } 1101 1102 static int get_mapped_sg_list(struct ib_device *device, void *buf, int size, 1103 struct scatterlist *sg_list, int nentries, 1104 enum dma_data_direction dir) 1105 { 1106 int npages; 1107 1108 npages = get_sg_list(buf, size, sg_list, nentries); 1109 if (npages < 0) 1110 return -EINVAL; 1111 return ib_dma_map_sg(device, sg_list, npages, dir); 1112 } 1113 1114 static int post_sendmsg(struct smb_direct_transport *t, 1115 struct smb_direct_send_ctx *send_ctx, 1116 struct smb_direct_sendmsg *msg) 1117 { 1118 int i; 1119 1120 for (i = 0; i < msg->num_sge; i++) 1121 ib_dma_sync_single_for_device(t->cm_id->device, 1122 msg->sge[i].addr, msg->sge[i].length, 1123 DMA_TO_DEVICE); 1124 1125 msg->cqe.done = send_done; 1126 msg->wr.opcode = IB_WR_SEND; 1127 msg->wr.sg_list = &msg->sge[0]; 1128 msg->wr.num_sge = msg->num_sge; 1129 msg->wr.next = NULL; 1130 1131 if (send_ctx) { 1132 msg->wr.wr_cqe = NULL; 1133 msg->wr.send_flags = 0; 1134 if (!list_empty(&send_ctx->msg_list)) { 1135 struct smb_direct_sendmsg *last; 1136 1137 last = list_last_entry(&send_ctx->msg_list, 1138 struct smb_direct_sendmsg, 1139 list); 1140 last->wr.next = &msg->wr; 1141 } 1142 list_add_tail(&msg->list, &send_ctx->msg_list); 1143 send_ctx->wr_cnt++; 1144 return 0; 1145 } 1146 1147 msg->wr.wr_cqe = &msg->cqe; 1148 msg->wr.send_flags = IB_SEND_SIGNALED; 1149 return smb_direct_post_send(t, &msg->wr); 1150 } 1151 1152 static int smb_direct_post_send_data(struct smb_direct_transport *t, 1153 struct smb_direct_send_ctx *send_ctx, 1154 struct kvec *iov, int niov, 1155 int remaining_data_length) 1156 { 1157 int i, j, ret; 1158 struct smb_direct_sendmsg *msg; 1159 int data_length; 1160 struct scatterlist sg[SMB_DIRECT_MAX_SEND_SGES - 1]; 1161 1162 ret = wait_for_send_credits(t, send_ctx); 1163 if (ret) 1164 return ret; 1165 1166 data_length = 0; 1167 for (i = 0; i < niov; i++) 1168 data_length += iov[i].iov_len; 1169 1170 ret = smb_direct_create_header(t, data_length, remaining_data_length, 1171 &msg); 1172 if (ret) { 1173 atomic_inc(&t->send_credits); 1174 return ret; 1175 } 1176 1177 for (i = 0; i < niov; i++) { 1178 struct ib_sge *sge; 1179 int sg_cnt; 1180 1181 sg_init_table(sg, SMB_DIRECT_MAX_SEND_SGES - 1); 1182 sg_cnt = get_mapped_sg_list(t->cm_id->device, 1183 iov[i].iov_base, iov[i].iov_len, 1184 sg, SMB_DIRECT_MAX_SEND_SGES - 1, 1185 DMA_TO_DEVICE); 1186 if (sg_cnt <= 0) { 1187 pr_err("failed to map buffer\n"); 1188 ret = -ENOMEM; 1189 goto err; 1190 } else if (sg_cnt + msg->num_sge > SMB_DIRECT_MAX_SEND_SGES) { 1191 pr_err("buffer not fitted into sges\n"); 1192 ret = -E2BIG; 1193 ib_dma_unmap_sg(t->cm_id->device, sg, sg_cnt, 1194 DMA_TO_DEVICE); 1195 goto err; 1196 } 1197 1198 for (j = 0; j < sg_cnt; j++) { 1199 sge = &msg->sge[msg->num_sge]; 1200 sge->addr = sg_dma_address(&sg[j]); 1201 sge->length = sg_dma_len(&sg[j]); 1202 sge->lkey = t->pd->local_dma_lkey; 1203 msg->num_sge++; 1204 } 1205 } 1206 1207 ret = post_sendmsg(t, send_ctx, msg); 1208 if (ret) 1209 goto err; 1210 return 0; 1211 err: 1212 smb_direct_free_sendmsg(t, msg); 1213 atomic_inc(&t->send_credits); 1214 return ret; 1215 } 1216 1217 static int smb_direct_writev(struct ksmbd_transport *t, 1218 struct kvec *iov, int niovs, int buflen, 1219 bool need_invalidate, unsigned int remote_key) 1220 { 1221 struct smb_direct_transport *st = smb_trans_direct_transfort(t); 1222 size_t remaining_data_length; 1223 size_t iov_idx; 1224 size_t iov_ofs; 1225 size_t max_iov_size = st->max_send_size - 1226 sizeof(struct smb_direct_data_transfer); 1227 int ret; 1228 struct smb_direct_send_ctx send_ctx; 1229 int error = 0; 1230 1231 if (st->status != SMB_DIRECT_CS_CONNECTED) 1232 return -ENOTCONN; 1233 1234 //FIXME: skip RFC1002 header.. 1235 if (WARN_ON_ONCE(niovs <= 1 || iov[0].iov_len != 4)) 1236 return -EINVAL; 1237 buflen -= 4; 1238 iov_idx = 1; 1239 iov_ofs = 0; 1240 1241 remaining_data_length = buflen; 1242 ksmbd_debug(RDMA, "Sending smb (RDMA): smb_len=%u\n", buflen); 1243 1244 smb_direct_send_ctx_init(st, &send_ctx, need_invalidate, remote_key); 1245 while (remaining_data_length) { 1246 struct kvec vecs[SMB_DIRECT_MAX_SEND_SGES - 1]; /* minus smbdirect hdr */ 1247 size_t possible_bytes = max_iov_size; 1248 size_t possible_vecs; 1249 size_t bytes = 0; 1250 size_t nvecs = 0; 1251 1252 /* 1253 * For the last message remaining_data_length should be 1254 * have been 0 already! 1255 */ 1256 if (WARN_ON_ONCE(iov_idx >= niovs)) { 1257 error = -EINVAL; 1258 goto done; 1259 } 1260 1261 /* 1262 * We have 2 factors which limit the arguments we pass 1263 * to smb_direct_post_send_data(): 1264 * 1265 * 1. The number of supported sges for the send, 1266 * while one is reserved for the smbdirect header. 1267 * And we currently need one SGE per page. 1268 * 2. The number of negotiated payload bytes per send. 1269 */ 1270 possible_vecs = min_t(size_t, ARRAY_SIZE(vecs), niovs - iov_idx); 1271 1272 while (iov_idx < niovs && possible_vecs && possible_bytes) { 1273 struct kvec *v = &vecs[nvecs]; 1274 int page_count; 1275 1276 v->iov_base = ((u8 *)iov[iov_idx].iov_base) + iov_ofs; 1277 v->iov_len = min_t(size_t, 1278 iov[iov_idx].iov_len - iov_ofs, 1279 possible_bytes); 1280 page_count = get_buf_page_count(v->iov_base, v->iov_len); 1281 if (page_count > possible_vecs) { 1282 /* 1283 * If the number of pages in the buffer 1284 * is to much (because we currently require 1285 * one SGE per page), we need to limit the 1286 * length. 1287 * 1288 * We know possible_vecs is at least 1, 1289 * so we always keep the first page. 1290 * 1291 * We need to calculate the number extra 1292 * pages (epages) we can also keep. 1293 * 1294 * We calculate the number of bytes in the 1295 * first page (fplen), this should never be 1296 * larger than v->iov_len because page_count is 1297 * at least 2, but adding a limitation feels 1298 * better. 1299 * 1300 * Then we calculate the number of bytes (elen) 1301 * we can keep for the extra pages. 1302 */ 1303 size_t epages = possible_vecs - 1; 1304 size_t fpofs = offset_in_page(v->iov_base); 1305 size_t fplen = min_t(size_t, PAGE_SIZE - fpofs, v->iov_len); 1306 size_t elen = min_t(size_t, v->iov_len - fplen, epages*PAGE_SIZE); 1307 1308 v->iov_len = fplen + elen; 1309 page_count = get_buf_page_count(v->iov_base, v->iov_len); 1310 if (WARN_ON_ONCE(page_count > possible_vecs)) { 1311 /* 1312 * Something went wrong in the above 1313 * logic... 1314 */ 1315 error = -EINVAL; 1316 goto done; 1317 } 1318 } 1319 possible_vecs -= page_count; 1320 nvecs += 1; 1321 possible_bytes -= v->iov_len; 1322 bytes += v->iov_len; 1323 1324 iov_ofs += v->iov_len; 1325 if (iov_ofs >= iov[iov_idx].iov_len) { 1326 iov_idx += 1; 1327 iov_ofs = 0; 1328 } 1329 } 1330 1331 remaining_data_length -= bytes; 1332 1333 ret = smb_direct_post_send_data(st, &send_ctx, 1334 vecs, nvecs, 1335 remaining_data_length); 1336 if (unlikely(ret)) { 1337 error = ret; 1338 goto done; 1339 } 1340 } 1341 1342 done: 1343 ret = smb_direct_flush_send_list(st, &send_ctx, true); 1344 if (unlikely(!ret && error)) 1345 ret = error; 1346 1347 /* 1348 * As an optimization, we don't wait for individual I/O to finish 1349 * before sending the next one. 1350 * Send them all and wait for pending send count to get to 0 1351 * that means all the I/Os have been out and we are good to return 1352 */ 1353 1354 wait_event(st->wait_send_pending, 1355 atomic_read(&st->send_pending) == 0); 1356 return ret; 1357 } 1358 1359 static void smb_direct_free_rdma_rw_msg(struct smb_direct_transport *t, 1360 struct smb_direct_rdma_rw_msg *msg, 1361 enum dma_data_direction dir) 1362 { 1363 rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port, 1364 msg->sgt.sgl, msg->sgt.nents, dir); 1365 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); 1366 kfree(msg); 1367 } 1368 1369 static void read_write_done(struct ib_cq *cq, struct ib_wc *wc, 1370 enum dma_data_direction dir) 1371 { 1372 struct smb_direct_rdma_rw_msg *msg = container_of(wc->wr_cqe, 1373 struct smb_direct_rdma_rw_msg, cqe); 1374 struct smb_direct_transport *t = msg->t; 1375 1376 if (wc->status != IB_WC_SUCCESS) { 1377 msg->status = -EIO; 1378 pr_err("read/write error. opcode = %d, status = %s(%d)\n", 1379 wc->opcode, ib_wc_status_msg(wc->status), wc->status); 1380 if (wc->status != IB_WC_WR_FLUSH_ERR) 1381 smb_direct_disconnect_rdma_connection(t); 1382 } 1383 1384 complete(msg->completion); 1385 } 1386 1387 static void read_done(struct ib_cq *cq, struct ib_wc *wc) 1388 { 1389 read_write_done(cq, wc, DMA_FROM_DEVICE); 1390 } 1391 1392 static void write_done(struct ib_cq *cq, struct ib_wc *wc) 1393 { 1394 read_write_done(cq, wc, DMA_TO_DEVICE); 1395 } 1396 1397 static int smb_direct_rdma_xmit(struct smb_direct_transport *t, 1398 void *buf, int buf_len, 1399 struct smb2_buffer_desc_v1 *desc, 1400 unsigned int desc_len, 1401 bool is_read) 1402 { 1403 struct smb_direct_rdma_rw_msg *msg, *next_msg; 1404 int i, ret; 1405 DECLARE_COMPLETION_ONSTACK(completion); 1406 struct ib_send_wr *first_wr; 1407 LIST_HEAD(msg_list); 1408 char *desc_buf; 1409 int credits_needed; 1410 unsigned int desc_buf_len, desc_num = 0; 1411 1412 if (t->status != SMB_DIRECT_CS_CONNECTED) 1413 return -ENOTCONN; 1414 1415 if (buf_len > t->max_rdma_rw_size) 1416 return -EINVAL; 1417 1418 /* calculate needed credits */ 1419 credits_needed = 0; 1420 desc_buf = buf; 1421 for (i = 0; i < desc_len / sizeof(*desc); i++) { 1422 if (!buf_len) 1423 break; 1424 1425 desc_buf_len = le32_to_cpu(desc[i].length); 1426 if (!desc_buf_len) 1427 return -EINVAL; 1428 1429 if (desc_buf_len > buf_len) { 1430 desc_buf_len = buf_len; 1431 desc[i].length = cpu_to_le32(desc_buf_len); 1432 buf_len = 0; 1433 } 1434 1435 credits_needed += calc_rw_credits(t, desc_buf, desc_buf_len); 1436 desc_buf += desc_buf_len; 1437 buf_len -= desc_buf_len; 1438 desc_num++; 1439 } 1440 1441 ksmbd_debug(RDMA, "RDMA %s, len %#x, needed credits %#x\n", 1442 str_read_write(is_read), buf_len, credits_needed); 1443 1444 ret = wait_for_rw_credits(t, credits_needed); 1445 if (ret < 0) 1446 return ret; 1447 1448 /* build rdma_rw_ctx for each descriptor */ 1449 desc_buf = buf; 1450 for (i = 0; i < desc_num; i++) { 1451 msg = kzalloc(struct_size(msg, sg_list, SG_CHUNK_SIZE), 1452 KSMBD_DEFAULT_GFP); 1453 if (!msg) { 1454 ret = -ENOMEM; 1455 goto out; 1456 } 1457 1458 desc_buf_len = le32_to_cpu(desc[i].length); 1459 1460 msg->t = t; 1461 msg->cqe.done = is_read ? read_done : write_done; 1462 msg->completion = &completion; 1463 1464 msg->sgt.sgl = &msg->sg_list[0]; 1465 ret = sg_alloc_table_chained(&msg->sgt, 1466 get_buf_page_count(desc_buf, desc_buf_len), 1467 msg->sg_list, SG_CHUNK_SIZE); 1468 if (ret) { 1469 kfree(msg); 1470 ret = -ENOMEM; 1471 goto out; 1472 } 1473 1474 ret = get_sg_list(desc_buf, desc_buf_len, 1475 msg->sgt.sgl, msg->sgt.orig_nents); 1476 if (ret < 0) { 1477 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); 1478 kfree(msg); 1479 goto out; 1480 } 1481 1482 ret = rdma_rw_ctx_init(&msg->rw_ctx, t->qp, t->qp->port, 1483 msg->sgt.sgl, 1484 get_buf_page_count(desc_buf, desc_buf_len), 1485 0, 1486 le64_to_cpu(desc[i].offset), 1487 le32_to_cpu(desc[i].token), 1488 is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 1489 if (ret < 0) { 1490 pr_err("failed to init rdma_rw_ctx: %d\n", ret); 1491 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); 1492 kfree(msg); 1493 goto out; 1494 } 1495 1496 list_add_tail(&msg->list, &msg_list); 1497 desc_buf += desc_buf_len; 1498 } 1499 1500 /* concatenate work requests of rdma_rw_ctxs */ 1501 first_wr = NULL; 1502 list_for_each_entry_reverse(msg, &msg_list, list) { 1503 first_wr = rdma_rw_ctx_wrs(&msg->rw_ctx, t->qp, t->qp->port, 1504 &msg->cqe, first_wr); 1505 } 1506 1507 ret = ib_post_send(t->qp, first_wr, NULL); 1508 if (ret) { 1509 pr_err("failed to post send wr for RDMA R/W: %d\n", ret); 1510 goto out; 1511 } 1512 1513 msg = list_last_entry(&msg_list, struct smb_direct_rdma_rw_msg, list); 1514 wait_for_completion(&completion); 1515 ret = msg->status; 1516 out: 1517 list_for_each_entry_safe(msg, next_msg, &msg_list, list) { 1518 list_del(&msg->list); 1519 smb_direct_free_rdma_rw_msg(t, msg, 1520 is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 1521 } 1522 atomic_add(credits_needed, &t->rw_credits); 1523 wake_up(&t->wait_rw_credits); 1524 return ret; 1525 } 1526 1527 static int smb_direct_rdma_write(struct ksmbd_transport *t, 1528 void *buf, unsigned int buflen, 1529 struct smb2_buffer_desc_v1 *desc, 1530 unsigned int desc_len) 1531 { 1532 return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen, 1533 desc, desc_len, false); 1534 } 1535 1536 static int smb_direct_rdma_read(struct ksmbd_transport *t, 1537 void *buf, unsigned int buflen, 1538 struct smb2_buffer_desc_v1 *desc, 1539 unsigned int desc_len) 1540 { 1541 return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen, 1542 desc, desc_len, true); 1543 } 1544 1545 static void smb_direct_disconnect(struct ksmbd_transport *t) 1546 { 1547 struct smb_direct_transport *st = smb_trans_direct_transfort(t); 1548 1549 ksmbd_debug(RDMA, "Disconnecting cm_id=%p\n", st->cm_id); 1550 1551 smb_direct_disconnect_rdma_work(&st->disconnect_work); 1552 wait_event_interruptible(st->wait_status, 1553 st->status == SMB_DIRECT_CS_DISCONNECTED); 1554 free_transport(st); 1555 } 1556 1557 static void smb_direct_shutdown(struct ksmbd_transport *t) 1558 { 1559 struct smb_direct_transport *st = smb_trans_direct_transfort(t); 1560 1561 ksmbd_debug(RDMA, "smb-direct shutdown cm_id=%p\n", st->cm_id); 1562 1563 smb_direct_disconnect_rdma_work(&st->disconnect_work); 1564 } 1565 1566 static int smb_direct_cm_handler(struct rdma_cm_id *cm_id, 1567 struct rdma_cm_event *event) 1568 { 1569 struct smb_direct_transport *t = cm_id->context; 1570 1571 ksmbd_debug(RDMA, "RDMA CM event. cm_id=%p event=%s (%d)\n", 1572 cm_id, rdma_event_msg(event->event), event->event); 1573 1574 switch (event->event) { 1575 case RDMA_CM_EVENT_ESTABLISHED: { 1576 t->status = SMB_DIRECT_CS_CONNECTED; 1577 wake_up_interruptible(&t->wait_status); 1578 break; 1579 } 1580 case RDMA_CM_EVENT_DEVICE_REMOVAL: 1581 case RDMA_CM_EVENT_DISCONNECTED: { 1582 ib_drain_qp(t->qp); 1583 1584 t->status = SMB_DIRECT_CS_DISCONNECTED; 1585 wake_up_interruptible(&t->wait_status); 1586 wake_up_interruptible(&t->wait_reassembly_queue); 1587 wake_up(&t->wait_send_credits); 1588 break; 1589 } 1590 case RDMA_CM_EVENT_CONNECT_ERROR: { 1591 t->status = SMB_DIRECT_CS_DISCONNECTED; 1592 wake_up_interruptible(&t->wait_status); 1593 break; 1594 } 1595 default: 1596 pr_err("Unexpected RDMA CM event. cm_id=%p, event=%s (%d)\n", 1597 cm_id, rdma_event_msg(event->event), 1598 event->event); 1599 break; 1600 } 1601 return 0; 1602 } 1603 1604 static void smb_direct_qpair_handler(struct ib_event *event, void *context) 1605 { 1606 struct smb_direct_transport *t = context; 1607 1608 ksmbd_debug(RDMA, "Received QP event. cm_id=%p, event=%s (%d)\n", 1609 t->cm_id, ib_event_msg(event->event), event->event); 1610 1611 switch (event->event) { 1612 case IB_EVENT_CQ_ERR: 1613 case IB_EVENT_QP_FATAL: 1614 smb_direct_disconnect_rdma_connection(t); 1615 break; 1616 default: 1617 break; 1618 } 1619 } 1620 1621 static int smb_direct_send_negotiate_response(struct smb_direct_transport *t, 1622 int failed) 1623 { 1624 struct smb_direct_sendmsg *sendmsg; 1625 struct smb_direct_negotiate_resp *resp; 1626 int ret; 1627 1628 sendmsg = smb_direct_alloc_sendmsg(t); 1629 if (IS_ERR(sendmsg)) 1630 return -ENOMEM; 1631 1632 resp = (struct smb_direct_negotiate_resp *)sendmsg->packet; 1633 if (failed) { 1634 memset(resp, 0, sizeof(*resp)); 1635 resp->min_version = cpu_to_le16(0x0100); 1636 resp->max_version = cpu_to_le16(0x0100); 1637 resp->status = STATUS_NOT_SUPPORTED; 1638 } else { 1639 resp->status = STATUS_SUCCESS; 1640 resp->min_version = SMB_DIRECT_VERSION_LE; 1641 resp->max_version = SMB_DIRECT_VERSION_LE; 1642 resp->negotiated_version = SMB_DIRECT_VERSION_LE; 1643 resp->reserved = 0; 1644 resp->credits_requested = 1645 cpu_to_le16(t->send_credit_target); 1646 resp->credits_granted = cpu_to_le16(manage_credits_prior_sending(t)); 1647 resp->max_readwrite_size = cpu_to_le32(t->max_rdma_rw_size); 1648 resp->preferred_send_size = cpu_to_le32(t->max_send_size); 1649 resp->max_receive_size = cpu_to_le32(t->max_recv_size); 1650 resp->max_fragmented_size = 1651 cpu_to_le32(t->max_fragmented_recv_size); 1652 } 1653 1654 sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device, 1655 (void *)resp, sizeof(*resp), 1656 DMA_TO_DEVICE); 1657 ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr); 1658 if (ret) { 1659 smb_direct_free_sendmsg(t, sendmsg); 1660 return ret; 1661 } 1662 1663 sendmsg->num_sge = 1; 1664 sendmsg->sge[0].length = sizeof(*resp); 1665 sendmsg->sge[0].lkey = t->pd->local_dma_lkey; 1666 1667 ret = post_sendmsg(t, NULL, sendmsg); 1668 if (ret) { 1669 smb_direct_free_sendmsg(t, sendmsg); 1670 return ret; 1671 } 1672 1673 wait_event(t->wait_send_pending, 1674 atomic_read(&t->send_pending) == 0); 1675 return 0; 1676 } 1677 1678 static int smb_direct_accept_client(struct smb_direct_transport *t) 1679 { 1680 struct rdma_conn_param conn_param; 1681 struct ib_port_immutable port_immutable; 1682 u32 ird_ord_hdr[2]; 1683 int ret; 1684 1685 memset(&conn_param, 0, sizeof(conn_param)); 1686 conn_param.initiator_depth = min_t(u8, t->cm_id->device->attrs.max_qp_rd_atom, 1687 SMB_DIRECT_CM_INITIATOR_DEPTH); 1688 conn_param.responder_resources = 0; 1689 1690 t->cm_id->device->ops.get_port_immutable(t->cm_id->device, 1691 t->cm_id->port_num, 1692 &port_immutable); 1693 if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) { 1694 ird_ord_hdr[0] = conn_param.responder_resources; 1695 ird_ord_hdr[1] = 1; 1696 conn_param.private_data = ird_ord_hdr; 1697 conn_param.private_data_len = sizeof(ird_ord_hdr); 1698 } else { 1699 conn_param.private_data = NULL; 1700 conn_param.private_data_len = 0; 1701 } 1702 conn_param.retry_count = SMB_DIRECT_CM_RETRY; 1703 conn_param.rnr_retry_count = SMB_DIRECT_CM_RNR_RETRY; 1704 conn_param.flow_control = 0; 1705 1706 ret = rdma_accept(t->cm_id, &conn_param); 1707 if (ret) { 1708 pr_err("error at rdma_accept: %d\n", ret); 1709 return ret; 1710 } 1711 return 0; 1712 } 1713 1714 static int smb_direct_prepare_negotiation(struct smb_direct_transport *t) 1715 { 1716 int ret; 1717 struct smb_direct_recvmsg *recvmsg; 1718 1719 recvmsg = get_free_recvmsg(t); 1720 if (!recvmsg) 1721 return -ENOMEM; 1722 recvmsg->type = SMB_DIRECT_MSG_NEGOTIATE_REQ; 1723 1724 ret = smb_direct_post_recv(t, recvmsg); 1725 if (ret) { 1726 pr_err("Can't post recv: %d\n", ret); 1727 goto out_err; 1728 } 1729 1730 t->negotiation_requested = false; 1731 ret = smb_direct_accept_client(t); 1732 if (ret) { 1733 pr_err("Can't accept client\n"); 1734 goto out_err; 1735 } 1736 1737 smb_direct_post_recv_credits(&t->post_recv_credits_work.work); 1738 return 0; 1739 out_err: 1740 put_recvmsg(t, recvmsg); 1741 return ret; 1742 } 1743 1744 static unsigned int smb_direct_get_max_fr_pages(struct smb_direct_transport *t) 1745 { 1746 return min_t(unsigned int, 1747 t->cm_id->device->attrs.max_fast_reg_page_list_len, 1748 256); 1749 } 1750 1751 static int smb_direct_init_params(struct smb_direct_transport *t, 1752 struct ib_qp_cap *cap) 1753 { 1754 struct ib_device *device = t->cm_id->device; 1755 int max_send_sges, max_rw_wrs, max_send_wrs; 1756 unsigned int max_sge_per_wr, wrs_per_credit; 1757 1758 /* need 3 more sge. because a SMB_DIRECT header, SMB2 header, 1759 * SMB2 response could be mapped. 1760 */ 1761 t->max_send_size = smb_direct_max_send_size; 1762 max_send_sges = DIV_ROUND_UP(t->max_send_size, PAGE_SIZE) + 3; 1763 if (max_send_sges > SMB_DIRECT_MAX_SEND_SGES) { 1764 pr_err("max_send_size %d is too large\n", t->max_send_size); 1765 return -EINVAL; 1766 } 1767 1768 /* Calculate the number of work requests for RDMA R/W. 1769 * The maximum number of pages which can be registered 1770 * with one Memory region can be transferred with one 1771 * R/W credit. And at least 4 work requests for each credit 1772 * are needed for MR registration, RDMA R/W, local & remote 1773 * MR invalidation. 1774 */ 1775 t->max_rdma_rw_size = smb_direct_max_read_write_size; 1776 t->pages_per_rw_credit = smb_direct_get_max_fr_pages(t); 1777 t->max_rw_credits = DIV_ROUND_UP(t->max_rdma_rw_size, 1778 (t->pages_per_rw_credit - 1) * 1779 PAGE_SIZE); 1780 1781 max_sge_per_wr = min_t(unsigned int, device->attrs.max_send_sge, 1782 device->attrs.max_sge_rd); 1783 max_sge_per_wr = max_t(unsigned int, max_sge_per_wr, 1784 max_send_sges); 1785 wrs_per_credit = max_t(unsigned int, 4, 1786 DIV_ROUND_UP(t->pages_per_rw_credit, 1787 max_sge_per_wr) + 1); 1788 max_rw_wrs = t->max_rw_credits * wrs_per_credit; 1789 1790 max_send_wrs = smb_direct_send_credit_target + max_rw_wrs; 1791 if (max_send_wrs > device->attrs.max_cqe || 1792 max_send_wrs > device->attrs.max_qp_wr) { 1793 pr_err("consider lowering send_credit_target = %d\n", 1794 smb_direct_send_credit_target); 1795 pr_err("Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", 1796 device->attrs.max_cqe, device->attrs.max_qp_wr); 1797 return -EINVAL; 1798 } 1799 1800 if (smb_direct_receive_credit_max > device->attrs.max_cqe || 1801 smb_direct_receive_credit_max > device->attrs.max_qp_wr) { 1802 pr_err("consider lowering receive_credit_max = %d\n", 1803 smb_direct_receive_credit_max); 1804 pr_err("Possible CQE overrun, device reporting max_cpe %d max_qp_wr %d\n", 1805 device->attrs.max_cqe, device->attrs.max_qp_wr); 1806 return -EINVAL; 1807 } 1808 1809 if (device->attrs.max_send_sge < SMB_DIRECT_MAX_SEND_SGES) { 1810 pr_err("warning: device max_send_sge = %d too small\n", 1811 device->attrs.max_send_sge); 1812 return -EINVAL; 1813 } 1814 if (device->attrs.max_recv_sge < SMB_DIRECT_MAX_RECV_SGES) { 1815 pr_err("warning: device max_recv_sge = %d too small\n", 1816 device->attrs.max_recv_sge); 1817 return -EINVAL; 1818 } 1819 1820 t->recv_credits = 0; 1821 t->count_avail_recvmsg = 0; 1822 1823 t->recv_credit_max = smb_direct_receive_credit_max; 1824 t->recv_credit_target = 10; 1825 t->new_recv_credits = 0; 1826 1827 t->send_credit_target = smb_direct_send_credit_target; 1828 atomic_set(&t->send_credits, 0); 1829 atomic_set(&t->rw_credits, t->max_rw_credits); 1830 1831 t->max_send_size = smb_direct_max_send_size; 1832 t->max_recv_size = smb_direct_max_receive_size; 1833 t->max_fragmented_recv_size = smb_direct_max_fragmented_recv_size; 1834 1835 cap->max_send_wr = max_send_wrs; 1836 cap->max_recv_wr = t->recv_credit_max; 1837 cap->max_send_sge = SMB_DIRECT_MAX_SEND_SGES; 1838 cap->max_recv_sge = SMB_DIRECT_MAX_RECV_SGES; 1839 cap->max_inline_data = 0; 1840 cap->max_rdma_ctxs = t->max_rw_credits; 1841 return 0; 1842 } 1843 1844 static void smb_direct_destroy_pools(struct smb_direct_transport *t) 1845 { 1846 struct smb_direct_recvmsg *recvmsg; 1847 1848 while ((recvmsg = get_free_recvmsg(t))) 1849 mempool_free(recvmsg, t->recvmsg_mempool); 1850 1851 mempool_destroy(t->recvmsg_mempool); 1852 t->recvmsg_mempool = NULL; 1853 1854 kmem_cache_destroy(t->recvmsg_cache); 1855 t->recvmsg_cache = NULL; 1856 1857 mempool_destroy(t->sendmsg_mempool); 1858 t->sendmsg_mempool = NULL; 1859 1860 kmem_cache_destroy(t->sendmsg_cache); 1861 t->sendmsg_cache = NULL; 1862 } 1863 1864 static int smb_direct_create_pools(struct smb_direct_transport *t) 1865 { 1866 char name[80]; 1867 int i; 1868 struct smb_direct_recvmsg *recvmsg; 1869 1870 snprintf(name, sizeof(name), "smb_direct_rqst_pool_%p", t); 1871 t->sendmsg_cache = kmem_cache_create(name, 1872 sizeof(struct smb_direct_sendmsg) + 1873 sizeof(struct smb_direct_negotiate_resp), 1874 0, SLAB_HWCACHE_ALIGN, NULL); 1875 if (!t->sendmsg_cache) 1876 return -ENOMEM; 1877 1878 t->sendmsg_mempool = mempool_create(t->send_credit_target, 1879 mempool_alloc_slab, mempool_free_slab, 1880 t->sendmsg_cache); 1881 if (!t->sendmsg_mempool) 1882 goto err; 1883 1884 snprintf(name, sizeof(name), "smb_direct_resp_%p", t); 1885 t->recvmsg_cache = kmem_cache_create(name, 1886 sizeof(struct smb_direct_recvmsg) + 1887 t->max_recv_size, 1888 0, SLAB_HWCACHE_ALIGN, NULL); 1889 if (!t->recvmsg_cache) 1890 goto err; 1891 1892 t->recvmsg_mempool = 1893 mempool_create(t->recv_credit_max, mempool_alloc_slab, 1894 mempool_free_slab, t->recvmsg_cache); 1895 if (!t->recvmsg_mempool) 1896 goto err; 1897 1898 INIT_LIST_HEAD(&t->recvmsg_queue); 1899 1900 for (i = 0; i < t->recv_credit_max; i++) { 1901 recvmsg = mempool_alloc(t->recvmsg_mempool, KSMBD_DEFAULT_GFP); 1902 if (!recvmsg) 1903 goto err; 1904 recvmsg->transport = t; 1905 recvmsg->sge.length = 0; 1906 list_add(&recvmsg->list, &t->recvmsg_queue); 1907 } 1908 t->count_avail_recvmsg = t->recv_credit_max; 1909 1910 return 0; 1911 err: 1912 smb_direct_destroy_pools(t); 1913 return -ENOMEM; 1914 } 1915 1916 static int smb_direct_create_qpair(struct smb_direct_transport *t, 1917 struct ib_qp_cap *cap) 1918 { 1919 int ret; 1920 struct ib_qp_init_attr qp_attr; 1921 int pages_per_rw; 1922 1923 t->pd = ib_alloc_pd(t->cm_id->device, 0); 1924 if (IS_ERR(t->pd)) { 1925 pr_err("Can't create RDMA PD\n"); 1926 ret = PTR_ERR(t->pd); 1927 t->pd = NULL; 1928 return ret; 1929 } 1930 1931 t->send_cq = ib_alloc_cq(t->cm_id->device, t, 1932 smb_direct_send_credit_target + cap->max_rdma_ctxs, 1933 0, IB_POLL_WORKQUEUE); 1934 if (IS_ERR(t->send_cq)) { 1935 pr_err("Can't create RDMA send CQ\n"); 1936 ret = PTR_ERR(t->send_cq); 1937 t->send_cq = NULL; 1938 goto err; 1939 } 1940 1941 t->recv_cq = ib_alloc_cq(t->cm_id->device, t, 1942 t->recv_credit_max, 0, IB_POLL_WORKQUEUE); 1943 if (IS_ERR(t->recv_cq)) { 1944 pr_err("Can't create RDMA recv CQ\n"); 1945 ret = PTR_ERR(t->recv_cq); 1946 t->recv_cq = NULL; 1947 goto err; 1948 } 1949 1950 memset(&qp_attr, 0, sizeof(qp_attr)); 1951 qp_attr.event_handler = smb_direct_qpair_handler; 1952 qp_attr.qp_context = t; 1953 qp_attr.cap = *cap; 1954 qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 1955 qp_attr.qp_type = IB_QPT_RC; 1956 qp_attr.send_cq = t->send_cq; 1957 qp_attr.recv_cq = t->recv_cq; 1958 qp_attr.port_num = ~0; 1959 1960 ret = rdma_create_qp(t->cm_id, t->pd, &qp_attr); 1961 if (ret) { 1962 pr_err("Can't create RDMA QP: %d\n", ret); 1963 goto err; 1964 } 1965 1966 t->qp = t->cm_id->qp; 1967 t->cm_id->event_handler = smb_direct_cm_handler; 1968 1969 pages_per_rw = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1; 1970 if (pages_per_rw > t->cm_id->device->attrs.max_sgl_rd) { 1971 ret = ib_mr_pool_init(t->qp, &t->qp->rdma_mrs, 1972 t->max_rw_credits, IB_MR_TYPE_MEM_REG, 1973 t->pages_per_rw_credit, 0); 1974 if (ret) { 1975 pr_err("failed to init mr pool count %d pages %d\n", 1976 t->max_rw_credits, t->pages_per_rw_credit); 1977 goto err; 1978 } 1979 } 1980 1981 return 0; 1982 err: 1983 if (t->qp) { 1984 t->qp = NULL; 1985 rdma_destroy_qp(t->cm_id); 1986 } 1987 if (t->recv_cq) { 1988 ib_destroy_cq(t->recv_cq); 1989 t->recv_cq = NULL; 1990 } 1991 if (t->send_cq) { 1992 ib_destroy_cq(t->send_cq); 1993 t->send_cq = NULL; 1994 } 1995 if (t->pd) { 1996 ib_dealloc_pd(t->pd); 1997 t->pd = NULL; 1998 } 1999 return ret; 2000 } 2001 2002 static int smb_direct_prepare(struct ksmbd_transport *t) 2003 { 2004 struct smb_direct_transport *st = smb_trans_direct_transfort(t); 2005 struct smb_direct_recvmsg *recvmsg; 2006 struct smb_direct_negotiate_req *req; 2007 int ret; 2008 2009 ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n"); 2010 ret = wait_event_interruptible_timeout(st->wait_status, 2011 st->negotiation_requested || 2012 st->status == SMB_DIRECT_CS_DISCONNECTED, 2013 SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ); 2014 if (ret <= 0 || st->status == SMB_DIRECT_CS_DISCONNECTED) 2015 return ret < 0 ? ret : -ETIMEDOUT; 2016 2017 recvmsg = get_first_reassembly(st); 2018 if (!recvmsg) 2019 return -ECONNABORTED; 2020 2021 ret = smb_direct_check_recvmsg(recvmsg); 2022 if (ret == -ECONNABORTED) 2023 goto out; 2024 2025 req = (struct smb_direct_negotiate_req *)recvmsg->packet; 2026 st->max_recv_size = min_t(int, st->max_recv_size, 2027 le32_to_cpu(req->preferred_send_size)); 2028 st->max_send_size = min_t(int, st->max_send_size, 2029 le32_to_cpu(req->max_receive_size)); 2030 st->max_fragmented_send_size = 2031 le32_to_cpu(req->max_fragmented_size); 2032 st->max_fragmented_recv_size = 2033 (st->recv_credit_max * st->max_recv_size) / 2; 2034 2035 ret = smb_direct_send_negotiate_response(st, ret); 2036 out: 2037 spin_lock_irq(&st->reassembly_queue_lock); 2038 st->reassembly_queue_length--; 2039 list_del(&recvmsg->list); 2040 spin_unlock_irq(&st->reassembly_queue_lock); 2041 put_recvmsg(st, recvmsg); 2042 2043 return ret; 2044 } 2045 2046 static int smb_direct_connect(struct smb_direct_transport *st) 2047 { 2048 int ret; 2049 struct ib_qp_cap qp_cap; 2050 2051 ret = smb_direct_init_params(st, &qp_cap); 2052 if (ret) { 2053 pr_err("Can't configure RDMA parameters\n"); 2054 return ret; 2055 } 2056 2057 ret = smb_direct_create_pools(st); 2058 if (ret) { 2059 pr_err("Can't init RDMA pool: %d\n", ret); 2060 return ret; 2061 } 2062 2063 ret = smb_direct_create_qpair(st, &qp_cap); 2064 if (ret) { 2065 pr_err("Can't accept RDMA client: %d\n", ret); 2066 return ret; 2067 } 2068 2069 ret = smb_direct_prepare_negotiation(st); 2070 if (ret) { 2071 pr_err("Can't negotiate: %d\n", ret); 2072 return ret; 2073 } 2074 return 0; 2075 } 2076 2077 static bool rdma_frwr_is_supported(struct ib_device_attr *attrs) 2078 { 2079 if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) 2080 return false; 2081 if (attrs->max_fast_reg_page_list_len == 0) 2082 return false; 2083 return true; 2084 } 2085 2086 static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id) 2087 { 2088 struct smb_direct_transport *t; 2089 struct task_struct *handler; 2090 int ret; 2091 2092 if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) { 2093 ksmbd_debug(RDMA, 2094 "Fast Registration Work Requests is not supported. device capabilities=%llx\n", 2095 new_cm_id->device->attrs.device_cap_flags); 2096 return -EPROTONOSUPPORT; 2097 } 2098 2099 t = alloc_transport(new_cm_id); 2100 if (!t) 2101 return -ENOMEM; 2102 2103 ret = smb_direct_connect(t); 2104 if (ret) 2105 goto out_err; 2106 2107 handler = kthread_run(ksmbd_conn_handler_loop, 2108 KSMBD_TRANS(t)->conn, "ksmbd:r%u", 2109 smb_direct_port); 2110 if (IS_ERR(handler)) { 2111 ret = PTR_ERR(handler); 2112 pr_err("Can't start thread\n"); 2113 goto out_err; 2114 } 2115 2116 return 0; 2117 out_err: 2118 free_transport(t); 2119 return ret; 2120 } 2121 2122 static int smb_direct_listen_handler(struct rdma_cm_id *cm_id, 2123 struct rdma_cm_event *event) 2124 { 2125 switch (event->event) { 2126 case RDMA_CM_EVENT_CONNECT_REQUEST: { 2127 int ret = smb_direct_handle_connect_request(cm_id); 2128 2129 if (ret) { 2130 pr_err("Can't create transport: %d\n", ret); 2131 return ret; 2132 } 2133 2134 ksmbd_debug(RDMA, "Received connection request. cm_id=%p\n", 2135 cm_id); 2136 break; 2137 } 2138 default: 2139 pr_err("Unexpected listen event. cm_id=%p, event=%s (%d)\n", 2140 cm_id, rdma_event_msg(event->event), event->event); 2141 break; 2142 } 2143 return 0; 2144 } 2145 2146 static int smb_direct_listen(int port) 2147 { 2148 int ret; 2149 struct rdma_cm_id *cm_id; 2150 struct sockaddr_in sin = { 2151 .sin_family = AF_INET, 2152 .sin_addr.s_addr = htonl(INADDR_ANY), 2153 .sin_port = htons(port), 2154 }; 2155 2156 cm_id = rdma_create_id(&init_net, smb_direct_listen_handler, 2157 &smb_direct_listener, RDMA_PS_TCP, IB_QPT_RC); 2158 if (IS_ERR(cm_id)) { 2159 pr_err("Can't create cm id: %ld\n", PTR_ERR(cm_id)); 2160 return PTR_ERR(cm_id); 2161 } 2162 2163 ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); 2164 if (ret) { 2165 pr_err("Can't bind: %d\n", ret); 2166 goto err; 2167 } 2168 2169 smb_direct_listener.cm_id = cm_id; 2170 2171 ret = rdma_listen(cm_id, 10); 2172 if (ret) { 2173 pr_err("Can't listen: %d\n", ret); 2174 goto err; 2175 } 2176 return 0; 2177 err: 2178 smb_direct_listener.cm_id = NULL; 2179 rdma_destroy_id(cm_id); 2180 return ret; 2181 } 2182 2183 static int smb_direct_ib_client_add(struct ib_device *ib_dev) 2184 { 2185 struct smb_direct_device *smb_dev; 2186 2187 /* Set 5445 port if device type is iWARP(No IB) */ 2188 if (ib_dev->node_type != RDMA_NODE_IB_CA) 2189 smb_direct_port = SMB_DIRECT_PORT_IWARP; 2190 2191 if (!rdma_frwr_is_supported(&ib_dev->attrs)) 2192 return 0; 2193 2194 smb_dev = kzalloc(sizeof(*smb_dev), KSMBD_DEFAULT_GFP); 2195 if (!smb_dev) 2196 return -ENOMEM; 2197 smb_dev->ib_dev = ib_dev; 2198 2199 write_lock(&smb_direct_device_lock); 2200 list_add(&smb_dev->list, &smb_direct_device_list); 2201 write_unlock(&smb_direct_device_lock); 2202 2203 ksmbd_debug(RDMA, "ib device added: name %s\n", ib_dev->name); 2204 return 0; 2205 } 2206 2207 static void smb_direct_ib_client_remove(struct ib_device *ib_dev, 2208 void *client_data) 2209 { 2210 struct smb_direct_device *smb_dev, *tmp; 2211 2212 write_lock(&smb_direct_device_lock); 2213 list_for_each_entry_safe(smb_dev, tmp, &smb_direct_device_list, list) { 2214 if (smb_dev->ib_dev == ib_dev) { 2215 list_del(&smb_dev->list); 2216 kfree(smb_dev); 2217 break; 2218 } 2219 } 2220 write_unlock(&smb_direct_device_lock); 2221 } 2222 2223 static struct ib_client smb_direct_ib_client = { 2224 .name = "ksmbd_smb_direct_ib", 2225 .add = smb_direct_ib_client_add, 2226 .remove = smb_direct_ib_client_remove, 2227 }; 2228 2229 int ksmbd_rdma_init(void) 2230 { 2231 int ret; 2232 2233 smb_direct_listener.cm_id = NULL; 2234 2235 ret = ib_register_client(&smb_direct_ib_client); 2236 if (ret) { 2237 pr_err("failed to ib_register_client\n"); 2238 return ret; 2239 } 2240 2241 /* When a client is running out of send credits, the credits are 2242 * granted by the server's sending a packet using this queue. 2243 * This avoids the situation that a clients cannot send packets 2244 * for lack of credits 2245 */ 2246 smb_direct_wq = alloc_workqueue("ksmbd-smb_direct-wq", 2247 WQ_HIGHPRI | WQ_MEM_RECLAIM, 0); 2248 if (!smb_direct_wq) 2249 return -ENOMEM; 2250 2251 ret = smb_direct_listen(smb_direct_port); 2252 if (ret) { 2253 destroy_workqueue(smb_direct_wq); 2254 smb_direct_wq = NULL; 2255 pr_err("Can't listen: %d\n", ret); 2256 return ret; 2257 } 2258 2259 ksmbd_debug(RDMA, "init RDMA listener. cm_id=%p\n", 2260 smb_direct_listener.cm_id); 2261 return 0; 2262 } 2263 2264 void ksmbd_rdma_stop_listening(void) 2265 { 2266 if (!smb_direct_listener.cm_id) 2267 return; 2268 2269 ib_unregister_client(&smb_direct_ib_client); 2270 rdma_destroy_id(smb_direct_listener.cm_id); 2271 2272 smb_direct_listener.cm_id = NULL; 2273 } 2274 2275 void ksmbd_rdma_destroy(void) 2276 { 2277 if (smb_direct_wq) { 2278 destroy_workqueue(smb_direct_wq); 2279 smb_direct_wq = NULL; 2280 } 2281 } 2282 2283 bool ksmbd_rdma_capable_netdev(struct net_device *netdev) 2284 { 2285 struct smb_direct_device *smb_dev; 2286 int i; 2287 bool rdma_capable = false; 2288 2289 read_lock(&smb_direct_device_lock); 2290 list_for_each_entry(smb_dev, &smb_direct_device_list, list) { 2291 for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) { 2292 struct net_device *ndev; 2293 2294 ndev = ib_device_get_netdev(smb_dev->ib_dev, i + 1); 2295 if (!ndev) 2296 continue; 2297 2298 if (ndev == netdev) { 2299 dev_put(ndev); 2300 rdma_capable = true; 2301 goto out; 2302 } 2303 dev_put(ndev); 2304 } 2305 } 2306 out: 2307 read_unlock(&smb_direct_device_lock); 2308 2309 if (rdma_capable == false) { 2310 struct ib_device *ibdev; 2311 2312 ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN); 2313 if (ibdev) { 2314 rdma_capable = rdma_frwr_is_supported(&ibdev->attrs); 2315 ib_device_put(ibdev); 2316 } 2317 } 2318 2319 ksmbd_debug(RDMA, "netdev(%s) rdma capable : %s\n", 2320 netdev->name, str_true_false(rdma_capable)); 2321 2322 return rdma_capable; 2323 } 2324 2325 static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = { 2326 .prepare = smb_direct_prepare, 2327 .disconnect = smb_direct_disconnect, 2328 .shutdown = smb_direct_shutdown, 2329 .writev = smb_direct_writev, 2330 .read = smb_direct_read, 2331 .rdma_read = smb_direct_rdma_read, 2332 .rdma_write = smb_direct_rdma_write, 2333 .free_transport = smb_direct_free_transport, 2334 }; 2335