1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2017, Microsoft Corporation. 4 * Copyright (C) 2018, LG Electronics. 5 * 6 * Author(s): Long Li <longli@microsoft.com>, 7 * Hyunchul Lee <hyc.lee@gmail.com> 8 */ 9 10 #define SUBMOD_NAME "smb_direct" 11 12 #include <linux/kthread.h> 13 #include <linux/list.h> 14 #include <linux/mempool.h> 15 #include <linux/highmem.h> 16 #include <linux/scatterlist.h> 17 #include <linux/string_choices.h> 18 #include <rdma/ib_verbs.h> 19 #include <rdma/rdma_cm.h> 20 #include <rdma/rw.h> 21 22 #include "glob.h" 23 #include "connection.h" 24 #include "smb_common.h" 25 #include "../common/smb2status.h" 26 #include "../common/smbdirect/smbdirect.h" 27 #include "../common/smbdirect/smbdirect_pdu.h" 28 #include "../common/smbdirect/smbdirect_socket.h" 29 #include "transport_rdma.h" 30 31 #define SMB_DIRECT_PORT_IWARP 5445 32 #define SMB_DIRECT_PORT_INFINIBAND 445 33 34 #define SMB_DIRECT_VERSION_LE cpu_to_le16(SMBDIRECT_V1) 35 36 /* SMB_DIRECT negotiation timeout (for the server) in seconds */ 37 #define SMB_DIRECT_NEGOTIATE_TIMEOUT 5 38 39 /* The timeout to wait for a keepalive message from peer in seconds */ 40 #define SMB_DIRECT_KEEPALIVE_SEND_INTERVAL 120 41 42 /* The timeout to wait for a keepalive message from peer in seconds */ 43 #define SMB_DIRECT_KEEPALIVE_RECV_TIMEOUT 5 44 45 /* 46 * Default maximum number of RDMA read/write outstanding on this connection 47 * This value is possibly decreased during QP creation on hardware limit 48 */ 49 #define SMB_DIRECT_CM_INITIATOR_DEPTH 8 50 51 /* Maximum number of retries on data transfer operations */ 52 #define SMB_DIRECT_CM_RETRY 6 53 /* No need to retry on Receiver Not Ready since SMB_DIRECT manages credits */ 54 #define SMB_DIRECT_CM_RNR_RETRY 0 55 56 /* 57 * User configurable initial values per SMB_DIRECT transport connection 58 * as defined in [MS-SMBD] 3.1.1.1 59 * Those may change after a SMB_DIRECT negotiation 60 */ 61 62 /* Set 445 port to SMB Direct port by default */ 63 static int smb_direct_port = SMB_DIRECT_PORT_INFINIBAND; 64 65 /* The local peer's maximum number of credits to grant to the peer */ 66 static int smb_direct_receive_credit_max = 255; 67 68 /* The remote peer's credit request of local peer */ 69 static int smb_direct_send_credit_target = 255; 70 71 /* The maximum single message size can be sent to remote peer */ 72 static int smb_direct_max_send_size = 1364; 73 74 /* The maximum fragmented upper-layer payload receive size supported */ 75 static int smb_direct_max_fragmented_recv_size = 1024 * 1024; 76 77 /* The maximum single-message size which can be received */ 78 static int smb_direct_max_receive_size = 1364; 79 80 static int smb_direct_max_read_write_size = SMBD_DEFAULT_IOSIZE; 81 82 static LIST_HEAD(smb_direct_device_list); 83 static DEFINE_RWLOCK(smb_direct_device_lock); 84 85 struct smb_direct_device { 86 struct ib_device *ib_dev; 87 struct list_head list; 88 }; 89 90 static struct smb_direct_listener { 91 struct rdma_cm_id *cm_id; 92 } smb_direct_listener; 93 94 static struct workqueue_struct *smb_direct_wq; 95 96 struct smb_direct_transport { 97 struct ksmbd_transport transport; 98 99 struct smbdirect_socket socket; 100 }; 101 102 #define KSMBD_TRANS(t) (&(t)->transport) 103 #define SMBD_TRANS(t) (container_of(t, \ 104 struct smb_direct_transport, transport)) 105 106 static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops; 107 108 void init_smbd_max_io_size(unsigned int sz) 109 { 110 sz = clamp_val(sz, SMBD_MIN_IOSIZE, SMBD_MAX_IOSIZE); 111 smb_direct_max_read_write_size = sz; 112 } 113 114 unsigned int get_smbd_max_read_write_size(struct ksmbd_transport *kt) 115 { 116 struct smb_direct_transport *t; 117 struct smbdirect_socket *sc; 118 struct smbdirect_socket_parameters *sp; 119 120 if (kt->ops != &ksmbd_smb_direct_transport_ops) 121 return 0; 122 123 t = SMBD_TRANS(kt); 124 sc = &t->socket; 125 sp = &sc->parameters; 126 127 return sp->max_read_write_size; 128 } 129 130 static inline int get_buf_page_count(void *buf, int size) 131 { 132 return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) - 133 (uintptr_t)buf / PAGE_SIZE; 134 } 135 136 static void smb_direct_destroy_pools(struct smbdirect_socket *sc); 137 static void smb_direct_post_recv_credits(struct work_struct *work); 138 static int smb_direct_post_send_data(struct smbdirect_socket *sc, 139 struct smbdirect_send_batch *send_ctx, 140 struct kvec *iov, int niov, 141 int remaining_data_length); 142 143 static inline void 144 *smbdirect_recv_io_payload(struct smbdirect_recv_io *recvmsg) 145 { 146 return (void *)recvmsg->packet; 147 } 148 149 static struct 150 smbdirect_recv_io *get_free_recvmsg(struct smbdirect_socket *sc) 151 { 152 struct smbdirect_recv_io *recvmsg = NULL; 153 unsigned long flags; 154 155 spin_lock_irqsave(&sc->recv_io.free.lock, flags); 156 if (!list_empty(&sc->recv_io.free.list)) { 157 recvmsg = list_first_entry(&sc->recv_io.free.list, 158 struct smbdirect_recv_io, 159 list); 160 list_del(&recvmsg->list); 161 } 162 spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); 163 return recvmsg; 164 } 165 166 static void put_recvmsg(struct smbdirect_socket *sc, 167 struct smbdirect_recv_io *recvmsg) 168 { 169 unsigned long flags; 170 171 if (likely(recvmsg->sge.length != 0)) { 172 ib_dma_unmap_single(sc->ib.dev, 173 recvmsg->sge.addr, 174 recvmsg->sge.length, 175 DMA_FROM_DEVICE); 176 recvmsg->sge.length = 0; 177 } 178 179 spin_lock_irqsave(&sc->recv_io.free.lock, flags); 180 list_add(&recvmsg->list, &sc->recv_io.free.list); 181 spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); 182 183 queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); 184 } 185 186 static void enqueue_reassembly(struct smbdirect_socket *sc, 187 struct smbdirect_recv_io *recvmsg, 188 int data_length) 189 { 190 unsigned long flags; 191 192 spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); 193 list_add_tail(&recvmsg->list, &sc->recv_io.reassembly.list); 194 sc->recv_io.reassembly.queue_length++; 195 /* 196 * Make sure reassembly_data_length is updated after list and 197 * reassembly_queue_length are updated. On the dequeue side 198 * reassembly_data_length is checked without a lock to determine 199 * if reassembly_queue_length and list is up to date 200 */ 201 virt_wmb(); 202 sc->recv_io.reassembly.data_length += data_length; 203 spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); 204 } 205 206 static struct smbdirect_recv_io *get_first_reassembly(struct smbdirect_socket *sc) 207 { 208 if (!list_empty(&sc->recv_io.reassembly.list)) 209 return list_first_entry(&sc->recv_io.reassembly.list, 210 struct smbdirect_recv_io, list); 211 else 212 return NULL; 213 } 214 215 static void smb_direct_disconnect_wake_up_all(struct smbdirect_socket *sc) 216 { 217 /* 218 * Wake up all waiters in all wait queues 219 * in order to notice the broken connection. 220 */ 221 wake_up_all(&sc->status_wait); 222 wake_up_all(&sc->send_io.credits.wait_queue); 223 wake_up_all(&sc->send_io.pending.zero_wait_queue); 224 wake_up_all(&sc->recv_io.reassembly.wait_queue); 225 wake_up_all(&sc->rw_io.credits.wait_queue); 226 } 227 228 static void smb_direct_disconnect_rdma_work(struct work_struct *work) 229 { 230 struct smbdirect_socket *sc = 231 container_of(work, struct smbdirect_socket, disconnect_work); 232 233 /* 234 * make sure this and other work is not queued again 235 * but here we don't block and avoid 236 * disable[_delayed]_work_sync() 237 */ 238 disable_work(&sc->disconnect_work); 239 disable_work(&sc->recv_io.posted.refill_work); 240 disable_delayed_work(&sc->idle.timer_work); 241 disable_work(&sc->idle.immediate_work); 242 243 if (sc->first_error == 0) 244 sc->first_error = -ECONNABORTED; 245 246 switch (sc->status) { 247 case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED: 248 case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING: 249 case SMBDIRECT_SOCKET_NEGOTIATE_FAILED: 250 case SMBDIRECT_SOCKET_CONNECTED: 251 case SMBDIRECT_SOCKET_ERROR: 252 sc->status = SMBDIRECT_SOCKET_DISCONNECTING; 253 rdma_disconnect(sc->rdma.cm_id); 254 break; 255 256 case SMBDIRECT_SOCKET_CREATED: 257 case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: 258 case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: 259 case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: 260 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED: 261 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING: 262 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED: 263 case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED: 264 case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING: 265 case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED: 266 /* 267 * rdma_accept() never reached 268 * RDMA_CM_EVENT_ESTABLISHED 269 */ 270 sc->status = SMBDIRECT_SOCKET_DISCONNECTED; 271 break; 272 273 case SMBDIRECT_SOCKET_DISCONNECTING: 274 case SMBDIRECT_SOCKET_DISCONNECTED: 275 case SMBDIRECT_SOCKET_DESTROYED: 276 break; 277 } 278 279 /* 280 * Wake up all waiters in all wait queues 281 * in order to notice the broken connection. 282 */ 283 smb_direct_disconnect_wake_up_all(sc); 284 } 285 286 static void 287 smb_direct_disconnect_rdma_connection(struct smbdirect_socket *sc) 288 { 289 /* 290 * make sure other work (than disconnect_work) is 291 * not queued again but here we don't block and avoid 292 * disable[_delayed]_work_sync() 293 */ 294 disable_work(&sc->recv_io.posted.refill_work); 295 disable_work(&sc->idle.immediate_work); 296 disable_delayed_work(&sc->idle.timer_work); 297 298 if (sc->first_error == 0) 299 sc->first_error = -ECONNABORTED; 300 301 switch (sc->status) { 302 case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: 303 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED: 304 case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED: 305 case SMBDIRECT_SOCKET_NEGOTIATE_FAILED: 306 case SMBDIRECT_SOCKET_ERROR: 307 case SMBDIRECT_SOCKET_DISCONNECTING: 308 case SMBDIRECT_SOCKET_DISCONNECTED: 309 case SMBDIRECT_SOCKET_DESTROYED: 310 /* 311 * Keep the current error status 312 */ 313 break; 314 315 case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: 316 case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: 317 sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED; 318 break; 319 320 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED: 321 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING: 322 sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED; 323 break; 324 325 case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED: 326 case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING: 327 sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED; 328 break; 329 330 case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED: 331 case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING: 332 sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; 333 break; 334 335 case SMBDIRECT_SOCKET_CREATED: 336 case SMBDIRECT_SOCKET_CONNECTED: 337 sc->status = SMBDIRECT_SOCKET_ERROR; 338 break; 339 } 340 341 /* 342 * Wake up all waiters in all wait queues 343 * in order to notice the broken connection. 344 */ 345 smb_direct_disconnect_wake_up_all(sc); 346 347 queue_work(sc->workqueue, &sc->disconnect_work); 348 } 349 350 static void smb_direct_send_immediate_work(struct work_struct *work) 351 { 352 struct smbdirect_socket *sc = 353 container_of(work, struct smbdirect_socket, idle.immediate_work); 354 355 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 356 return; 357 358 smb_direct_post_send_data(sc, NULL, NULL, 0, 0); 359 } 360 361 static void smb_direct_idle_connection_timer(struct work_struct *work) 362 { 363 struct smbdirect_socket *sc = 364 container_of(work, struct smbdirect_socket, idle.timer_work.work); 365 struct smbdirect_socket_parameters *sp = &sc->parameters; 366 367 if (sc->idle.keepalive != SMBDIRECT_KEEPALIVE_NONE) { 368 smb_direct_disconnect_rdma_connection(sc); 369 return; 370 } 371 372 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 373 return; 374 375 /* 376 * Now use the keepalive timeout (instead of keepalive interval) 377 * in order to wait for a response 378 */ 379 sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; 380 mod_delayed_work(sc->workqueue, &sc->idle.timer_work, 381 msecs_to_jiffies(sp->keepalive_timeout_msec)); 382 queue_work(sc->workqueue, &sc->idle.immediate_work); 383 } 384 385 static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) 386 { 387 struct smb_direct_transport *t; 388 struct smbdirect_socket *sc; 389 struct smbdirect_socket_parameters *sp; 390 struct ksmbd_conn *conn; 391 392 t = kzalloc(sizeof(*t), KSMBD_DEFAULT_GFP); 393 if (!t) 394 return NULL; 395 sc = &t->socket; 396 smbdirect_socket_init(sc); 397 sp = &sc->parameters; 398 399 sc->workqueue = smb_direct_wq; 400 401 INIT_WORK(&sc->disconnect_work, smb_direct_disconnect_rdma_work); 402 403 sp->negotiate_timeout_msec = SMB_DIRECT_NEGOTIATE_TIMEOUT * 1000; 404 sp->initiator_depth = SMB_DIRECT_CM_INITIATOR_DEPTH; 405 sp->responder_resources = 1; 406 sp->recv_credit_max = smb_direct_receive_credit_max; 407 sp->send_credit_target = smb_direct_send_credit_target; 408 sp->max_send_size = smb_direct_max_send_size; 409 sp->max_fragmented_recv_size = smb_direct_max_fragmented_recv_size; 410 sp->max_recv_size = smb_direct_max_receive_size; 411 sp->max_read_write_size = smb_direct_max_read_write_size; 412 sp->keepalive_interval_msec = SMB_DIRECT_KEEPALIVE_SEND_INTERVAL * 1000; 413 sp->keepalive_timeout_msec = SMB_DIRECT_KEEPALIVE_RECV_TIMEOUT * 1000; 414 415 sc->rdma.cm_id = cm_id; 416 cm_id->context = sc; 417 418 sc->ib.dev = sc->rdma.cm_id->device; 419 420 INIT_WORK(&sc->recv_io.posted.refill_work, 421 smb_direct_post_recv_credits); 422 INIT_WORK(&sc->idle.immediate_work, smb_direct_send_immediate_work); 423 INIT_DELAYED_WORK(&sc->idle.timer_work, smb_direct_idle_connection_timer); 424 425 conn = ksmbd_conn_alloc(); 426 if (!conn) 427 goto err; 428 conn->transport = KSMBD_TRANS(t); 429 KSMBD_TRANS(t)->conn = conn; 430 KSMBD_TRANS(t)->ops = &ksmbd_smb_direct_transport_ops; 431 return t; 432 err: 433 kfree(t); 434 return NULL; 435 } 436 437 static void smb_direct_free_transport(struct ksmbd_transport *kt) 438 { 439 kfree(SMBD_TRANS(kt)); 440 } 441 442 static void free_transport(struct smb_direct_transport *t) 443 { 444 struct smbdirect_socket *sc = &t->socket; 445 struct smbdirect_recv_io *recvmsg; 446 447 disable_work_sync(&sc->disconnect_work); 448 if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING) { 449 smb_direct_disconnect_rdma_work(&sc->disconnect_work); 450 wait_event_interruptible(sc->status_wait, 451 sc->status == SMBDIRECT_SOCKET_DISCONNECTED); 452 } 453 454 /* 455 * Wake up all waiters in all wait queues 456 * in order to notice the broken connection. 457 * 458 * Most likely this was already called via 459 * smb_direct_disconnect_rdma_work(), but call it again... 460 */ 461 smb_direct_disconnect_wake_up_all(sc); 462 463 disable_work_sync(&sc->recv_io.posted.refill_work); 464 disable_delayed_work_sync(&sc->idle.timer_work); 465 disable_work_sync(&sc->idle.immediate_work); 466 467 if (sc->ib.qp) { 468 ib_drain_qp(sc->ib.qp); 469 ib_mr_pool_destroy(sc->ib.qp, &sc->ib.qp->rdma_mrs); 470 sc->ib.qp = NULL; 471 rdma_destroy_qp(sc->rdma.cm_id); 472 } 473 474 ksmbd_debug(RDMA, "drain the reassembly queue\n"); 475 do { 476 unsigned long flags; 477 478 spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); 479 recvmsg = get_first_reassembly(sc); 480 if (recvmsg) { 481 list_del(&recvmsg->list); 482 spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); 483 put_recvmsg(sc, recvmsg); 484 } else { 485 spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); 486 } 487 } while (recvmsg); 488 sc->recv_io.reassembly.data_length = 0; 489 490 if (sc->ib.send_cq) 491 ib_free_cq(sc->ib.send_cq); 492 if (sc->ib.recv_cq) 493 ib_free_cq(sc->ib.recv_cq); 494 if (sc->ib.pd) 495 ib_dealloc_pd(sc->ib.pd); 496 if (sc->rdma.cm_id) 497 rdma_destroy_id(sc->rdma.cm_id); 498 499 smb_direct_destroy_pools(sc); 500 ksmbd_conn_free(KSMBD_TRANS(t)->conn); 501 } 502 503 static struct smbdirect_send_io 504 *smb_direct_alloc_sendmsg(struct smbdirect_socket *sc) 505 { 506 struct smbdirect_send_io *msg; 507 508 msg = mempool_alloc(sc->send_io.mem.pool, KSMBD_DEFAULT_GFP); 509 if (!msg) 510 return ERR_PTR(-ENOMEM); 511 msg->socket = sc; 512 INIT_LIST_HEAD(&msg->sibling_list); 513 msg->num_sge = 0; 514 return msg; 515 } 516 517 static void smb_direct_free_sendmsg(struct smbdirect_socket *sc, 518 struct smbdirect_send_io *msg) 519 { 520 int i; 521 522 if (msg->num_sge > 0) { 523 ib_dma_unmap_single(sc->ib.dev, 524 msg->sge[0].addr, msg->sge[0].length, 525 DMA_TO_DEVICE); 526 for (i = 1; i < msg->num_sge; i++) 527 ib_dma_unmap_page(sc->ib.dev, 528 msg->sge[i].addr, msg->sge[i].length, 529 DMA_TO_DEVICE); 530 } 531 mempool_free(msg, sc->send_io.mem.pool); 532 } 533 534 static int smb_direct_check_recvmsg(struct smbdirect_recv_io *recvmsg) 535 { 536 struct smbdirect_socket *sc = recvmsg->socket; 537 538 switch (sc->recv_io.expected) { 539 case SMBDIRECT_EXPECT_DATA_TRANSFER: { 540 struct smbdirect_data_transfer *req = 541 (struct smbdirect_data_transfer *)recvmsg->packet; 542 struct smb2_hdr *hdr = (struct smb2_hdr *)(recvmsg->packet 543 + le32_to_cpu(req->data_offset)); 544 ksmbd_debug(RDMA, 545 "CreditGranted: %u, CreditRequested: %u, DataLength: %u, RemainingDataLength: %u, SMB: %x, Command: %u\n", 546 le16_to_cpu(req->credits_granted), 547 le16_to_cpu(req->credits_requested), 548 req->data_length, req->remaining_data_length, 549 hdr->ProtocolId, hdr->Command); 550 return 0; 551 } 552 case SMBDIRECT_EXPECT_NEGOTIATE_REQ: { 553 struct smbdirect_negotiate_req *req = 554 (struct smbdirect_negotiate_req *)recvmsg->packet; 555 ksmbd_debug(RDMA, 556 "MinVersion: %u, MaxVersion: %u, CreditRequested: %u, MaxSendSize: %u, MaxRecvSize: %u, MaxFragmentedSize: %u\n", 557 le16_to_cpu(req->min_version), 558 le16_to_cpu(req->max_version), 559 le16_to_cpu(req->credits_requested), 560 le32_to_cpu(req->preferred_send_size), 561 le32_to_cpu(req->max_receive_size), 562 le32_to_cpu(req->max_fragmented_size)); 563 if (le16_to_cpu(req->min_version) > 0x0100 || 564 le16_to_cpu(req->max_version) < 0x0100) 565 return -EOPNOTSUPP; 566 if (le16_to_cpu(req->credits_requested) <= 0 || 567 le32_to_cpu(req->max_receive_size) <= 128 || 568 le32_to_cpu(req->max_fragmented_size) <= 569 128 * 1024) 570 return -ECONNABORTED; 571 572 return 0; 573 } 574 case SMBDIRECT_EXPECT_NEGOTIATE_REP: 575 /* client only */ 576 break; 577 } 578 579 /* This is an internal error */ 580 return -EINVAL; 581 } 582 583 static void recv_done(struct ib_cq *cq, struct ib_wc *wc) 584 { 585 struct smbdirect_recv_io *recvmsg; 586 struct smbdirect_socket *sc; 587 struct smbdirect_socket_parameters *sp; 588 589 recvmsg = container_of(wc->wr_cqe, struct smbdirect_recv_io, cqe); 590 sc = recvmsg->socket; 591 sp = &sc->parameters; 592 593 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { 594 put_recvmsg(sc, recvmsg); 595 if (wc->status != IB_WC_WR_FLUSH_ERR) { 596 pr_err("Recv error. status='%s (%d)' opcode=%d\n", 597 ib_wc_status_msg(wc->status), wc->status, 598 wc->opcode); 599 smb_direct_disconnect_rdma_connection(sc); 600 } 601 return; 602 } 603 604 ksmbd_debug(RDMA, "Recv completed. status='%s (%d)', opcode=%d\n", 605 ib_wc_status_msg(wc->status), wc->status, 606 wc->opcode); 607 608 ib_dma_sync_single_for_cpu(wc->qp->device, recvmsg->sge.addr, 609 recvmsg->sge.length, DMA_FROM_DEVICE); 610 611 /* 612 * Reset timer to the keepalive interval in 613 * order to trigger our next keepalive message. 614 */ 615 sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; 616 mod_delayed_work(sc->workqueue, &sc->idle.timer_work, 617 msecs_to_jiffies(sp->keepalive_interval_msec)); 618 619 switch (sc->recv_io.expected) { 620 case SMBDIRECT_EXPECT_NEGOTIATE_REQ: 621 if (wc->byte_len < sizeof(struct smbdirect_negotiate_req)) { 622 put_recvmsg(sc, recvmsg); 623 smb_direct_disconnect_rdma_connection(sc); 624 return; 625 } 626 sc->recv_io.reassembly.full_packet_received = true; 627 WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_NEGOTIATE_NEEDED); 628 sc->status = SMBDIRECT_SOCKET_NEGOTIATE_RUNNING; 629 enqueue_reassembly(sc, recvmsg, 0); 630 wake_up(&sc->status_wait); 631 return; 632 case SMBDIRECT_EXPECT_DATA_TRANSFER: { 633 struct smbdirect_data_transfer *data_transfer = 634 (struct smbdirect_data_transfer *)recvmsg->packet; 635 u32 remaining_data_length, data_offset, data_length; 636 u16 old_recv_credit_target; 637 638 if (wc->byte_len < 639 offsetof(struct smbdirect_data_transfer, padding)) { 640 put_recvmsg(sc, recvmsg); 641 smb_direct_disconnect_rdma_connection(sc); 642 return; 643 } 644 645 remaining_data_length = le32_to_cpu(data_transfer->remaining_data_length); 646 data_length = le32_to_cpu(data_transfer->data_length); 647 data_offset = le32_to_cpu(data_transfer->data_offset); 648 if (wc->byte_len < data_offset || 649 wc->byte_len < (u64)data_offset + data_length) { 650 put_recvmsg(sc, recvmsg); 651 smb_direct_disconnect_rdma_connection(sc); 652 return; 653 } 654 if (remaining_data_length > sp->max_fragmented_recv_size || 655 data_length > sp->max_fragmented_recv_size || 656 (u64)remaining_data_length + (u64)data_length > 657 (u64)sp->max_fragmented_recv_size) { 658 put_recvmsg(sc, recvmsg); 659 smb_direct_disconnect_rdma_connection(sc); 660 return; 661 } 662 663 if (data_length) { 664 if (sc->recv_io.reassembly.full_packet_received) 665 recvmsg->first_segment = true; 666 667 if (le32_to_cpu(data_transfer->remaining_data_length)) 668 sc->recv_io.reassembly.full_packet_received = false; 669 else 670 sc->recv_io.reassembly.full_packet_received = true; 671 } 672 673 atomic_dec(&sc->recv_io.posted.count); 674 atomic_dec(&sc->recv_io.credits.count); 675 676 old_recv_credit_target = sc->recv_io.credits.target; 677 sc->recv_io.credits.target = 678 le16_to_cpu(data_transfer->credits_requested); 679 sc->recv_io.credits.target = 680 min_t(u16, sc->recv_io.credits.target, sp->recv_credit_max); 681 sc->recv_io.credits.target = 682 max_t(u16, sc->recv_io.credits.target, 1); 683 atomic_add(le16_to_cpu(data_transfer->credits_granted), 684 &sc->send_io.credits.count); 685 686 if (le16_to_cpu(data_transfer->flags) & 687 SMBDIRECT_FLAG_RESPONSE_REQUESTED) 688 queue_work(sc->workqueue, &sc->idle.immediate_work); 689 690 if (atomic_read(&sc->send_io.credits.count) > 0) 691 wake_up(&sc->send_io.credits.wait_queue); 692 693 if (data_length) { 694 if (sc->recv_io.credits.target > old_recv_credit_target) 695 queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); 696 697 enqueue_reassembly(sc, recvmsg, (int)data_length); 698 wake_up(&sc->recv_io.reassembly.wait_queue); 699 } else 700 put_recvmsg(sc, recvmsg); 701 702 return; 703 } 704 case SMBDIRECT_EXPECT_NEGOTIATE_REP: 705 /* client only */ 706 break; 707 } 708 709 /* 710 * This is an internal error! 711 */ 712 WARN_ON_ONCE(sc->recv_io.expected != SMBDIRECT_EXPECT_DATA_TRANSFER); 713 put_recvmsg(sc, recvmsg); 714 smb_direct_disconnect_rdma_connection(sc); 715 } 716 717 static int smb_direct_post_recv(struct smbdirect_socket *sc, 718 struct smbdirect_recv_io *recvmsg) 719 { 720 struct smbdirect_socket_parameters *sp = &sc->parameters; 721 struct ib_recv_wr wr; 722 int ret; 723 724 recvmsg->sge.addr = ib_dma_map_single(sc->ib.dev, 725 recvmsg->packet, 726 sp->max_recv_size, 727 DMA_FROM_DEVICE); 728 ret = ib_dma_mapping_error(sc->ib.dev, recvmsg->sge.addr); 729 if (ret) 730 return ret; 731 recvmsg->sge.length = sp->max_recv_size; 732 recvmsg->sge.lkey = sc->ib.pd->local_dma_lkey; 733 recvmsg->cqe.done = recv_done; 734 735 wr.wr_cqe = &recvmsg->cqe; 736 wr.next = NULL; 737 wr.sg_list = &recvmsg->sge; 738 wr.num_sge = 1; 739 740 ret = ib_post_recv(sc->ib.qp, &wr, NULL); 741 if (ret) { 742 pr_err("Can't post recv: %d\n", ret); 743 ib_dma_unmap_single(sc->ib.dev, 744 recvmsg->sge.addr, recvmsg->sge.length, 745 DMA_FROM_DEVICE); 746 recvmsg->sge.length = 0; 747 smb_direct_disconnect_rdma_connection(sc); 748 return ret; 749 } 750 return ret; 751 } 752 753 static int smb_direct_read(struct ksmbd_transport *t, char *buf, 754 unsigned int size, int unused) 755 { 756 struct smbdirect_recv_io *recvmsg; 757 struct smbdirect_data_transfer *data_transfer; 758 int to_copy, to_read, data_read, offset; 759 u32 data_length, remaining_data_length, data_offset; 760 int rc; 761 struct smb_direct_transport *st = SMBD_TRANS(t); 762 struct smbdirect_socket *sc = &st->socket; 763 764 again: 765 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { 766 pr_err("disconnected\n"); 767 return -ENOTCONN; 768 } 769 770 /* 771 * No need to hold the reassembly queue lock all the time as we are 772 * the only one reading from the front of the queue. The transport 773 * may add more entries to the back of the queue at the same time 774 */ 775 if (sc->recv_io.reassembly.data_length >= size) { 776 int queue_length; 777 int queue_removed = 0; 778 unsigned long flags; 779 780 /* 781 * Need to make sure reassembly_data_length is read before 782 * reading reassembly_queue_length and calling 783 * get_first_reassembly. This call is lock free 784 * as we never read at the end of the queue which are being 785 * updated in SOFTIRQ as more data is received 786 */ 787 virt_rmb(); 788 queue_length = sc->recv_io.reassembly.queue_length; 789 data_read = 0; 790 to_read = size; 791 offset = sc->recv_io.reassembly.first_entry_offset; 792 while (data_read < size) { 793 recvmsg = get_first_reassembly(sc); 794 data_transfer = smbdirect_recv_io_payload(recvmsg); 795 data_length = le32_to_cpu(data_transfer->data_length); 796 remaining_data_length = 797 le32_to_cpu(data_transfer->remaining_data_length); 798 data_offset = le32_to_cpu(data_transfer->data_offset); 799 800 /* 801 * The upper layer expects RFC1002 length at the 802 * beginning of the payload. Return it to indicate 803 * the total length of the packet. This minimize the 804 * change to upper layer packet processing logic. This 805 * will be eventually remove when an intermediate 806 * transport layer is added 807 */ 808 if (recvmsg->first_segment && size == 4) { 809 unsigned int rfc1002_len = 810 data_length + remaining_data_length; 811 *((__be32 *)buf) = cpu_to_be32(rfc1002_len); 812 data_read = 4; 813 recvmsg->first_segment = false; 814 ksmbd_debug(RDMA, 815 "returning rfc1002 length %d\n", 816 rfc1002_len); 817 goto read_rfc1002_done; 818 } 819 820 to_copy = min_t(int, data_length - offset, to_read); 821 memcpy(buf + data_read, (char *)data_transfer + data_offset + offset, 822 to_copy); 823 824 /* move on to the next buffer? */ 825 if (to_copy == data_length - offset) { 826 queue_length--; 827 /* 828 * No need to lock if we are not at the 829 * end of the queue 830 */ 831 if (queue_length) { 832 list_del(&recvmsg->list); 833 } else { 834 spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); 835 list_del(&recvmsg->list); 836 spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); 837 } 838 queue_removed++; 839 put_recvmsg(sc, recvmsg); 840 offset = 0; 841 } else { 842 offset += to_copy; 843 } 844 845 to_read -= to_copy; 846 data_read += to_copy; 847 } 848 849 spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); 850 sc->recv_io.reassembly.data_length -= data_read; 851 sc->recv_io.reassembly.queue_length -= queue_removed; 852 spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); 853 854 sc->recv_io.reassembly.first_entry_offset = offset; 855 ksmbd_debug(RDMA, 856 "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n", 857 data_read, sc->recv_io.reassembly.data_length, 858 sc->recv_io.reassembly.first_entry_offset); 859 read_rfc1002_done: 860 return data_read; 861 } 862 863 ksmbd_debug(RDMA, "wait_event on more data\n"); 864 rc = wait_event_interruptible(sc->recv_io.reassembly.wait_queue, 865 sc->recv_io.reassembly.data_length >= size || 866 sc->status != SMBDIRECT_SOCKET_CONNECTED); 867 if (rc) 868 return -EINTR; 869 870 goto again; 871 } 872 873 static void smb_direct_post_recv_credits(struct work_struct *work) 874 { 875 struct smbdirect_socket *sc = 876 container_of(work, struct smbdirect_socket, recv_io.posted.refill_work); 877 struct smbdirect_recv_io *recvmsg; 878 int credits = 0; 879 int ret; 880 881 if (atomic_read(&sc->recv_io.credits.count) < sc->recv_io.credits.target) { 882 while (true) { 883 recvmsg = get_free_recvmsg(sc); 884 if (!recvmsg) 885 break; 886 887 recvmsg->first_segment = false; 888 889 ret = smb_direct_post_recv(sc, recvmsg); 890 if (ret) { 891 pr_err("Can't post recv: %d\n", ret); 892 put_recvmsg(sc, recvmsg); 893 break; 894 } 895 credits++; 896 897 atomic_inc(&sc->recv_io.posted.count); 898 } 899 } 900 901 if (credits) 902 queue_work(sc->workqueue, &sc->idle.immediate_work); 903 } 904 905 static void send_done(struct ib_cq *cq, struct ib_wc *wc) 906 { 907 struct smbdirect_send_io *sendmsg, *sibling; 908 struct smbdirect_socket *sc; 909 struct list_head *pos, *prev, *end; 910 911 sendmsg = container_of(wc->wr_cqe, struct smbdirect_send_io, cqe); 912 sc = sendmsg->socket; 913 914 ksmbd_debug(RDMA, "Send completed. status='%s (%d)', opcode=%d\n", 915 ib_wc_status_msg(wc->status), wc->status, 916 wc->opcode); 917 918 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { 919 pr_err("Send error. status='%s (%d)', opcode=%d\n", 920 ib_wc_status_msg(wc->status), wc->status, 921 wc->opcode); 922 smb_direct_disconnect_rdma_connection(sc); 923 } 924 925 if (atomic_dec_and_test(&sc->send_io.pending.count)) 926 wake_up(&sc->send_io.pending.zero_wait_queue); 927 928 /* iterate and free the list of messages in reverse. the list's head 929 * is invalid. 930 */ 931 for (pos = &sendmsg->sibling_list, prev = pos->prev, end = sendmsg->sibling_list.next; 932 prev != end; pos = prev, prev = prev->prev) { 933 sibling = container_of(pos, struct smbdirect_send_io, sibling_list); 934 smb_direct_free_sendmsg(sc, sibling); 935 } 936 937 sibling = container_of(pos, struct smbdirect_send_io, sibling_list); 938 smb_direct_free_sendmsg(sc, sibling); 939 } 940 941 static int manage_credits_prior_sending(struct smbdirect_socket *sc) 942 { 943 int new_credits; 944 945 if (atomic_read(&sc->recv_io.credits.count) >= sc->recv_io.credits.target) 946 return 0; 947 948 new_credits = atomic_read(&sc->recv_io.posted.count); 949 if (new_credits == 0) 950 return 0; 951 952 new_credits -= atomic_read(&sc->recv_io.credits.count); 953 if (new_credits <= 0) 954 return 0; 955 956 atomic_add(new_credits, &sc->recv_io.credits.count); 957 return new_credits; 958 } 959 960 static int manage_keep_alive_before_sending(struct smbdirect_socket *sc) 961 { 962 struct smbdirect_socket_parameters *sp = &sc->parameters; 963 964 if (sc->idle.keepalive == SMBDIRECT_KEEPALIVE_PENDING) { 965 sc->idle.keepalive = SMBDIRECT_KEEPALIVE_SENT; 966 /* 967 * Now use the keepalive timeout (instead of keepalive interval) 968 * in order to wait for a response 969 */ 970 mod_delayed_work(sc->workqueue, &sc->idle.timer_work, 971 msecs_to_jiffies(sp->keepalive_timeout_msec)); 972 return 1; 973 } 974 return 0; 975 } 976 977 static int smb_direct_post_send(struct smbdirect_socket *sc, 978 struct ib_send_wr *wr) 979 { 980 int ret; 981 982 atomic_inc(&sc->send_io.pending.count); 983 ret = ib_post_send(sc->ib.qp, wr, NULL); 984 if (ret) { 985 pr_err("failed to post send: %d\n", ret); 986 if (atomic_dec_and_test(&sc->send_io.pending.count)) 987 wake_up(&sc->send_io.pending.zero_wait_queue); 988 smb_direct_disconnect_rdma_connection(sc); 989 } 990 return ret; 991 } 992 993 static void smb_direct_send_ctx_init(struct smbdirect_send_batch *send_ctx, 994 bool need_invalidate_rkey, 995 unsigned int remote_key) 996 { 997 INIT_LIST_HEAD(&send_ctx->msg_list); 998 send_ctx->wr_cnt = 0; 999 send_ctx->need_invalidate_rkey = need_invalidate_rkey; 1000 send_ctx->remote_key = remote_key; 1001 } 1002 1003 static int smb_direct_flush_send_list(struct smbdirect_socket *sc, 1004 struct smbdirect_send_batch *send_ctx, 1005 bool is_last) 1006 { 1007 struct smbdirect_send_io *first, *last; 1008 int ret; 1009 1010 if (list_empty(&send_ctx->msg_list)) 1011 return 0; 1012 1013 first = list_first_entry(&send_ctx->msg_list, 1014 struct smbdirect_send_io, 1015 sibling_list); 1016 last = list_last_entry(&send_ctx->msg_list, 1017 struct smbdirect_send_io, 1018 sibling_list); 1019 1020 if (send_ctx->need_invalidate_rkey) { 1021 first->wr.opcode = IB_WR_SEND_WITH_INV; 1022 first->wr.ex.invalidate_rkey = send_ctx->remote_key; 1023 send_ctx->need_invalidate_rkey = false; 1024 send_ctx->remote_key = 0; 1025 } 1026 1027 last->wr.send_flags = IB_SEND_SIGNALED; 1028 last->wr.wr_cqe = &last->cqe; 1029 1030 ret = smb_direct_post_send(sc, &first->wr); 1031 if (!ret) { 1032 smb_direct_send_ctx_init(send_ctx, 1033 send_ctx->need_invalidate_rkey, 1034 send_ctx->remote_key); 1035 } else { 1036 atomic_add(send_ctx->wr_cnt, &sc->send_io.credits.count); 1037 wake_up(&sc->send_io.credits.wait_queue); 1038 list_for_each_entry_safe(first, last, &send_ctx->msg_list, 1039 sibling_list) { 1040 smb_direct_free_sendmsg(sc, first); 1041 } 1042 } 1043 return ret; 1044 } 1045 1046 static int wait_for_credits(struct smbdirect_socket *sc, 1047 wait_queue_head_t *waitq, atomic_t *total_credits, 1048 int needed) 1049 { 1050 int ret; 1051 1052 do { 1053 if (atomic_sub_return(needed, total_credits) >= 0) 1054 return 0; 1055 1056 atomic_add(needed, total_credits); 1057 ret = wait_event_interruptible(*waitq, 1058 atomic_read(total_credits) >= needed || 1059 sc->status != SMBDIRECT_SOCKET_CONNECTED); 1060 1061 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 1062 return -ENOTCONN; 1063 else if (ret < 0) 1064 return ret; 1065 } while (true); 1066 } 1067 1068 static int wait_for_send_credits(struct smbdirect_socket *sc, 1069 struct smbdirect_send_batch *send_ctx) 1070 { 1071 int ret; 1072 1073 if (send_ctx && 1074 (send_ctx->wr_cnt >= 16 || atomic_read(&sc->send_io.credits.count) <= 1)) { 1075 ret = smb_direct_flush_send_list(sc, send_ctx, false); 1076 if (ret) 1077 return ret; 1078 } 1079 1080 return wait_for_credits(sc, &sc->send_io.credits.wait_queue, &sc->send_io.credits.count, 1); 1081 } 1082 1083 static int wait_for_rw_credits(struct smbdirect_socket *sc, int credits) 1084 { 1085 return wait_for_credits(sc, 1086 &sc->rw_io.credits.wait_queue, 1087 &sc->rw_io.credits.count, 1088 credits); 1089 } 1090 1091 static int calc_rw_credits(struct smbdirect_socket *sc, 1092 char *buf, unsigned int len) 1093 { 1094 return DIV_ROUND_UP(get_buf_page_count(buf, len), 1095 sc->rw_io.credits.num_pages); 1096 } 1097 1098 static int smb_direct_create_header(struct smbdirect_socket *sc, 1099 int size, int remaining_data_length, 1100 struct smbdirect_send_io **sendmsg_out) 1101 { 1102 struct smbdirect_socket_parameters *sp = &sc->parameters; 1103 struct smbdirect_send_io *sendmsg; 1104 struct smbdirect_data_transfer *packet; 1105 int header_length; 1106 int ret; 1107 1108 sendmsg = smb_direct_alloc_sendmsg(sc); 1109 if (IS_ERR(sendmsg)) 1110 return PTR_ERR(sendmsg); 1111 1112 /* Fill in the packet header */ 1113 packet = (struct smbdirect_data_transfer *)sendmsg->packet; 1114 packet->credits_requested = cpu_to_le16(sp->send_credit_target); 1115 packet->credits_granted = cpu_to_le16(manage_credits_prior_sending(sc)); 1116 1117 packet->flags = 0; 1118 if (manage_keep_alive_before_sending(sc)) 1119 packet->flags |= cpu_to_le16(SMBDIRECT_FLAG_RESPONSE_REQUESTED); 1120 1121 packet->reserved = 0; 1122 if (!size) 1123 packet->data_offset = 0; 1124 else 1125 packet->data_offset = cpu_to_le32(24); 1126 packet->data_length = cpu_to_le32(size); 1127 packet->remaining_data_length = cpu_to_le32(remaining_data_length); 1128 packet->padding = 0; 1129 1130 ksmbd_debug(RDMA, 1131 "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n", 1132 le16_to_cpu(packet->credits_requested), 1133 le16_to_cpu(packet->credits_granted), 1134 le32_to_cpu(packet->data_offset), 1135 le32_to_cpu(packet->data_length), 1136 le32_to_cpu(packet->remaining_data_length)); 1137 1138 /* Map the packet to DMA */ 1139 header_length = sizeof(struct smbdirect_data_transfer); 1140 /* If this is a packet without payload, don't send padding */ 1141 if (!size) 1142 header_length = 1143 offsetof(struct smbdirect_data_transfer, padding); 1144 1145 sendmsg->sge[0].addr = ib_dma_map_single(sc->ib.dev, 1146 (void *)packet, 1147 header_length, 1148 DMA_TO_DEVICE); 1149 ret = ib_dma_mapping_error(sc->ib.dev, sendmsg->sge[0].addr); 1150 if (ret) { 1151 smb_direct_free_sendmsg(sc, sendmsg); 1152 return ret; 1153 } 1154 1155 sendmsg->num_sge = 1; 1156 sendmsg->sge[0].length = header_length; 1157 sendmsg->sge[0].lkey = sc->ib.pd->local_dma_lkey; 1158 1159 *sendmsg_out = sendmsg; 1160 return 0; 1161 } 1162 1163 static int get_sg_list(void *buf, int size, struct scatterlist *sg_list, int nentries) 1164 { 1165 bool high = is_vmalloc_addr(buf); 1166 struct page *page; 1167 int offset, len; 1168 int i = 0; 1169 1170 if (size <= 0 || nentries < get_buf_page_count(buf, size)) 1171 return -EINVAL; 1172 1173 offset = offset_in_page(buf); 1174 buf -= offset; 1175 while (size > 0) { 1176 len = min_t(int, PAGE_SIZE - offset, size); 1177 if (high) 1178 page = vmalloc_to_page(buf); 1179 else 1180 page = kmap_to_page(buf); 1181 1182 if (!sg_list) 1183 return -EINVAL; 1184 sg_set_page(sg_list, page, len, offset); 1185 sg_list = sg_next(sg_list); 1186 1187 buf += PAGE_SIZE; 1188 size -= len; 1189 offset = 0; 1190 i++; 1191 } 1192 return i; 1193 } 1194 1195 static int get_mapped_sg_list(struct ib_device *device, void *buf, int size, 1196 struct scatterlist *sg_list, int nentries, 1197 enum dma_data_direction dir) 1198 { 1199 int npages; 1200 1201 npages = get_sg_list(buf, size, sg_list, nentries); 1202 if (npages < 0) 1203 return -EINVAL; 1204 return ib_dma_map_sg(device, sg_list, npages, dir); 1205 } 1206 1207 static int post_sendmsg(struct smbdirect_socket *sc, 1208 struct smbdirect_send_batch *send_ctx, 1209 struct smbdirect_send_io *msg) 1210 { 1211 int i; 1212 1213 for (i = 0; i < msg->num_sge; i++) 1214 ib_dma_sync_single_for_device(sc->ib.dev, 1215 msg->sge[i].addr, msg->sge[i].length, 1216 DMA_TO_DEVICE); 1217 1218 msg->cqe.done = send_done; 1219 msg->wr.opcode = IB_WR_SEND; 1220 msg->wr.sg_list = &msg->sge[0]; 1221 msg->wr.num_sge = msg->num_sge; 1222 msg->wr.next = NULL; 1223 1224 if (send_ctx) { 1225 msg->wr.wr_cqe = NULL; 1226 msg->wr.send_flags = 0; 1227 if (!list_empty(&send_ctx->msg_list)) { 1228 struct smbdirect_send_io *last; 1229 1230 last = list_last_entry(&send_ctx->msg_list, 1231 struct smbdirect_send_io, 1232 sibling_list); 1233 last->wr.next = &msg->wr; 1234 } 1235 list_add_tail(&msg->sibling_list, &send_ctx->msg_list); 1236 send_ctx->wr_cnt++; 1237 return 0; 1238 } 1239 1240 msg->wr.wr_cqe = &msg->cqe; 1241 msg->wr.send_flags = IB_SEND_SIGNALED; 1242 return smb_direct_post_send(sc, &msg->wr); 1243 } 1244 1245 static int smb_direct_post_send_data(struct smbdirect_socket *sc, 1246 struct smbdirect_send_batch *send_ctx, 1247 struct kvec *iov, int niov, 1248 int remaining_data_length) 1249 { 1250 int i, j, ret; 1251 struct smbdirect_send_io *msg; 1252 int data_length; 1253 struct scatterlist sg[SMBDIRECT_SEND_IO_MAX_SGE - 1]; 1254 1255 ret = wait_for_send_credits(sc, send_ctx); 1256 if (ret) 1257 return ret; 1258 1259 data_length = 0; 1260 for (i = 0; i < niov; i++) 1261 data_length += iov[i].iov_len; 1262 1263 ret = smb_direct_create_header(sc, data_length, remaining_data_length, 1264 &msg); 1265 if (ret) { 1266 atomic_inc(&sc->send_io.credits.count); 1267 return ret; 1268 } 1269 1270 for (i = 0; i < niov; i++) { 1271 struct ib_sge *sge; 1272 int sg_cnt; 1273 1274 sg_init_table(sg, SMBDIRECT_SEND_IO_MAX_SGE - 1); 1275 sg_cnt = get_mapped_sg_list(sc->ib.dev, 1276 iov[i].iov_base, iov[i].iov_len, 1277 sg, SMBDIRECT_SEND_IO_MAX_SGE - 1, 1278 DMA_TO_DEVICE); 1279 if (sg_cnt <= 0) { 1280 pr_err("failed to map buffer\n"); 1281 ret = -ENOMEM; 1282 goto err; 1283 } else if (sg_cnt + msg->num_sge > SMBDIRECT_SEND_IO_MAX_SGE) { 1284 pr_err("buffer not fitted into sges\n"); 1285 ret = -E2BIG; 1286 ib_dma_unmap_sg(sc->ib.dev, sg, sg_cnt, 1287 DMA_TO_DEVICE); 1288 goto err; 1289 } 1290 1291 for (j = 0; j < sg_cnt; j++) { 1292 sge = &msg->sge[msg->num_sge]; 1293 sge->addr = sg_dma_address(&sg[j]); 1294 sge->length = sg_dma_len(&sg[j]); 1295 sge->lkey = sc->ib.pd->local_dma_lkey; 1296 msg->num_sge++; 1297 } 1298 } 1299 1300 ret = post_sendmsg(sc, send_ctx, msg); 1301 if (ret) 1302 goto err; 1303 return 0; 1304 err: 1305 smb_direct_free_sendmsg(sc, msg); 1306 atomic_inc(&sc->send_io.credits.count); 1307 return ret; 1308 } 1309 1310 static int smb_direct_writev(struct ksmbd_transport *t, 1311 struct kvec *iov, int niovs, int buflen, 1312 bool need_invalidate, unsigned int remote_key) 1313 { 1314 struct smb_direct_transport *st = SMBD_TRANS(t); 1315 struct smbdirect_socket *sc = &st->socket; 1316 struct smbdirect_socket_parameters *sp = &sc->parameters; 1317 size_t remaining_data_length; 1318 size_t iov_idx; 1319 size_t iov_ofs; 1320 size_t max_iov_size = sp->max_send_size - 1321 sizeof(struct smbdirect_data_transfer); 1322 int ret; 1323 struct smbdirect_send_batch send_ctx; 1324 int error = 0; 1325 1326 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 1327 return -ENOTCONN; 1328 1329 //FIXME: skip RFC1002 header.. 1330 if (WARN_ON_ONCE(niovs <= 1 || iov[0].iov_len != 4)) 1331 return -EINVAL; 1332 buflen -= 4; 1333 iov_idx = 1; 1334 iov_ofs = 0; 1335 1336 remaining_data_length = buflen; 1337 ksmbd_debug(RDMA, "Sending smb (RDMA): smb_len=%u\n", buflen); 1338 1339 smb_direct_send_ctx_init(&send_ctx, need_invalidate, remote_key); 1340 while (remaining_data_length) { 1341 struct kvec vecs[SMBDIRECT_SEND_IO_MAX_SGE - 1]; /* minus smbdirect hdr */ 1342 size_t possible_bytes = max_iov_size; 1343 size_t possible_vecs; 1344 size_t bytes = 0; 1345 size_t nvecs = 0; 1346 1347 /* 1348 * For the last message remaining_data_length should be 1349 * have been 0 already! 1350 */ 1351 if (WARN_ON_ONCE(iov_idx >= niovs)) { 1352 error = -EINVAL; 1353 goto done; 1354 } 1355 1356 /* 1357 * We have 2 factors which limit the arguments we pass 1358 * to smb_direct_post_send_data(): 1359 * 1360 * 1. The number of supported sges for the send, 1361 * while one is reserved for the smbdirect header. 1362 * And we currently need one SGE per page. 1363 * 2. The number of negotiated payload bytes per send. 1364 */ 1365 possible_vecs = min_t(size_t, ARRAY_SIZE(vecs), niovs - iov_idx); 1366 1367 while (iov_idx < niovs && possible_vecs && possible_bytes) { 1368 struct kvec *v = &vecs[nvecs]; 1369 int page_count; 1370 1371 v->iov_base = ((u8 *)iov[iov_idx].iov_base) + iov_ofs; 1372 v->iov_len = min_t(size_t, 1373 iov[iov_idx].iov_len - iov_ofs, 1374 possible_bytes); 1375 page_count = get_buf_page_count(v->iov_base, v->iov_len); 1376 if (page_count > possible_vecs) { 1377 /* 1378 * If the number of pages in the buffer 1379 * is to much (because we currently require 1380 * one SGE per page), we need to limit the 1381 * length. 1382 * 1383 * We know possible_vecs is at least 1, 1384 * so we always keep the first page. 1385 * 1386 * We need to calculate the number extra 1387 * pages (epages) we can also keep. 1388 * 1389 * We calculate the number of bytes in the 1390 * first page (fplen), this should never be 1391 * larger than v->iov_len because page_count is 1392 * at least 2, but adding a limitation feels 1393 * better. 1394 * 1395 * Then we calculate the number of bytes (elen) 1396 * we can keep for the extra pages. 1397 */ 1398 size_t epages = possible_vecs - 1; 1399 size_t fpofs = offset_in_page(v->iov_base); 1400 size_t fplen = min_t(size_t, PAGE_SIZE - fpofs, v->iov_len); 1401 size_t elen = min_t(size_t, v->iov_len - fplen, epages*PAGE_SIZE); 1402 1403 v->iov_len = fplen + elen; 1404 page_count = get_buf_page_count(v->iov_base, v->iov_len); 1405 if (WARN_ON_ONCE(page_count > possible_vecs)) { 1406 /* 1407 * Something went wrong in the above 1408 * logic... 1409 */ 1410 error = -EINVAL; 1411 goto done; 1412 } 1413 } 1414 possible_vecs -= page_count; 1415 nvecs += 1; 1416 possible_bytes -= v->iov_len; 1417 bytes += v->iov_len; 1418 1419 iov_ofs += v->iov_len; 1420 if (iov_ofs >= iov[iov_idx].iov_len) { 1421 iov_idx += 1; 1422 iov_ofs = 0; 1423 } 1424 } 1425 1426 remaining_data_length -= bytes; 1427 1428 ret = smb_direct_post_send_data(sc, &send_ctx, 1429 vecs, nvecs, 1430 remaining_data_length); 1431 if (unlikely(ret)) { 1432 error = ret; 1433 goto done; 1434 } 1435 } 1436 1437 done: 1438 ret = smb_direct_flush_send_list(sc, &send_ctx, true); 1439 if (unlikely(!ret && error)) 1440 ret = error; 1441 1442 /* 1443 * As an optimization, we don't wait for individual I/O to finish 1444 * before sending the next one. 1445 * Send them all and wait for pending send count to get to 0 1446 * that means all the I/Os have been out and we are good to return 1447 */ 1448 1449 wait_event(sc->send_io.pending.zero_wait_queue, 1450 atomic_read(&sc->send_io.pending.count) == 0 || 1451 sc->status != SMBDIRECT_SOCKET_CONNECTED); 1452 if (sc->status != SMBDIRECT_SOCKET_CONNECTED && ret == 0) 1453 ret = -ENOTCONN; 1454 1455 return ret; 1456 } 1457 1458 static void smb_direct_free_rdma_rw_msg(struct smb_direct_transport *t, 1459 struct smbdirect_rw_io *msg, 1460 enum dma_data_direction dir) 1461 { 1462 struct smbdirect_socket *sc = &t->socket; 1463 1464 rdma_rw_ctx_destroy(&msg->rdma_ctx, sc->ib.qp, sc->ib.qp->port, 1465 msg->sgt.sgl, msg->sgt.nents, dir); 1466 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); 1467 kfree(msg); 1468 } 1469 1470 static void read_write_done(struct ib_cq *cq, struct ib_wc *wc, 1471 enum dma_data_direction dir) 1472 { 1473 struct smbdirect_rw_io *msg = 1474 container_of(wc->wr_cqe, struct smbdirect_rw_io, cqe); 1475 struct smbdirect_socket *sc = msg->socket; 1476 1477 if (wc->status != IB_WC_SUCCESS) { 1478 msg->error = -EIO; 1479 pr_err("read/write error. opcode = %d, status = %s(%d)\n", 1480 wc->opcode, ib_wc_status_msg(wc->status), wc->status); 1481 if (wc->status != IB_WC_WR_FLUSH_ERR) 1482 smb_direct_disconnect_rdma_connection(sc); 1483 } 1484 1485 complete(msg->completion); 1486 } 1487 1488 static void read_done(struct ib_cq *cq, struct ib_wc *wc) 1489 { 1490 read_write_done(cq, wc, DMA_FROM_DEVICE); 1491 } 1492 1493 static void write_done(struct ib_cq *cq, struct ib_wc *wc) 1494 { 1495 read_write_done(cq, wc, DMA_TO_DEVICE); 1496 } 1497 1498 static int smb_direct_rdma_xmit(struct smb_direct_transport *t, 1499 void *buf, int buf_len, 1500 struct smbdirect_buffer_descriptor_v1 *desc, 1501 unsigned int desc_len, 1502 bool is_read) 1503 { 1504 struct smbdirect_socket *sc = &t->socket; 1505 struct smbdirect_socket_parameters *sp = &sc->parameters; 1506 struct smbdirect_rw_io *msg, *next_msg; 1507 int i, ret; 1508 DECLARE_COMPLETION_ONSTACK(completion); 1509 struct ib_send_wr *first_wr; 1510 LIST_HEAD(msg_list); 1511 char *desc_buf; 1512 int credits_needed; 1513 unsigned int desc_buf_len, desc_num = 0; 1514 1515 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 1516 return -ENOTCONN; 1517 1518 if (buf_len > sp->max_read_write_size) 1519 return -EINVAL; 1520 1521 /* calculate needed credits */ 1522 credits_needed = 0; 1523 desc_buf = buf; 1524 for (i = 0; i < desc_len / sizeof(*desc); i++) { 1525 if (!buf_len) 1526 break; 1527 1528 desc_buf_len = le32_to_cpu(desc[i].length); 1529 if (!desc_buf_len) 1530 return -EINVAL; 1531 1532 if (desc_buf_len > buf_len) { 1533 desc_buf_len = buf_len; 1534 desc[i].length = cpu_to_le32(desc_buf_len); 1535 buf_len = 0; 1536 } 1537 1538 credits_needed += calc_rw_credits(sc, desc_buf, desc_buf_len); 1539 desc_buf += desc_buf_len; 1540 buf_len -= desc_buf_len; 1541 desc_num++; 1542 } 1543 1544 ksmbd_debug(RDMA, "RDMA %s, len %#x, needed credits %#x\n", 1545 str_read_write(is_read), buf_len, credits_needed); 1546 1547 ret = wait_for_rw_credits(sc, credits_needed); 1548 if (ret < 0) 1549 return ret; 1550 1551 /* build rdma_rw_ctx for each descriptor */ 1552 desc_buf = buf; 1553 for (i = 0; i < desc_num; i++) { 1554 msg = kzalloc(struct_size(msg, sg_list, SG_CHUNK_SIZE), 1555 KSMBD_DEFAULT_GFP); 1556 if (!msg) { 1557 ret = -ENOMEM; 1558 goto out; 1559 } 1560 1561 desc_buf_len = le32_to_cpu(desc[i].length); 1562 1563 msg->socket = sc; 1564 msg->cqe.done = is_read ? read_done : write_done; 1565 msg->completion = &completion; 1566 1567 msg->sgt.sgl = &msg->sg_list[0]; 1568 ret = sg_alloc_table_chained(&msg->sgt, 1569 get_buf_page_count(desc_buf, desc_buf_len), 1570 msg->sg_list, SG_CHUNK_SIZE); 1571 if (ret) { 1572 kfree(msg); 1573 ret = -ENOMEM; 1574 goto out; 1575 } 1576 1577 ret = get_sg_list(desc_buf, desc_buf_len, 1578 msg->sgt.sgl, msg->sgt.orig_nents); 1579 if (ret < 0) { 1580 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); 1581 kfree(msg); 1582 goto out; 1583 } 1584 1585 ret = rdma_rw_ctx_init(&msg->rdma_ctx, sc->ib.qp, sc->ib.qp->port, 1586 msg->sgt.sgl, 1587 get_buf_page_count(desc_buf, desc_buf_len), 1588 0, 1589 le64_to_cpu(desc[i].offset), 1590 le32_to_cpu(desc[i].token), 1591 is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 1592 if (ret < 0) { 1593 pr_err("failed to init rdma_rw_ctx: %d\n", ret); 1594 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); 1595 kfree(msg); 1596 goto out; 1597 } 1598 1599 list_add_tail(&msg->list, &msg_list); 1600 desc_buf += desc_buf_len; 1601 } 1602 1603 /* concatenate work requests of rdma_rw_ctxs */ 1604 first_wr = NULL; 1605 list_for_each_entry_reverse(msg, &msg_list, list) { 1606 first_wr = rdma_rw_ctx_wrs(&msg->rdma_ctx, sc->ib.qp, sc->ib.qp->port, 1607 &msg->cqe, first_wr); 1608 } 1609 1610 ret = ib_post_send(sc->ib.qp, first_wr, NULL); 1611 if (ret) { 1612 pr_err("failed to post send wr for RDMA R/W: %d\n", ret); 1613 goto out; 1614 } 1615 1616 msg = list_last_entry(&msg_list, struct smbdirect_rw_io, list); 1617 wait_for_completion(&completion); 1618 ret = msg->error; 1619 out: 1620 list_for_each_entry_safe(msg, next_msg, &msg_list, list) { 1621 list_del(&msg->list); 1622 smb_direct_free_rdma_rw_msg(t, msg, 1623 is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 1624 } 1625 atomic_add(credits_needed, &sc->rw_io.credits.count); 1626 wake_up(&sc->rw_io.credits.wait_queue); 1627 return ret; 1628 } 1629 1630 static int smb_direct_rdma_write(struct ksmbd_transport *t, 1631 void *buf, unsigned int buflen, 1632 struct smbdirect_buffer_descriptor_v1 *desc, 1633 unsigned int desc_len) 1634 { 1635 return smb_direct_rdma_xmit(SMBD_TRANS(t), buf, buflen, 1636 desc, desc_len, false); 1637 } 1638 1639 static int smb_direct_rdma_read(struct ksmbd_transport *t, 1640 void *buf, unsigned int buflen, 1641 struct smbdirect_buffer_descriptor_v1 *desc, 1642 unsigned int desc_len) 1643 { 1644 return smb_direct_rdma_xmit(SMBD_TRANS(t), buf, buflen, 1645 desc, desc_len, true); 1646 } 1647 1648 static void smb_direct_disconnect(struct ksmbd_transport *t) 1649 { 1650 struct smb_direct_transport *st = SMBD_TRANS(t); 1651 struct smbdirect_socket *sc = &st->socket; 1652 1653 ksmbd_debug(RDMA, "Disconnecting cm_id=%p\n", sc->rdma.cm_id); 1654 1655 free_transport(st); 1656 } 1657 1658 static void smb_direct_shutdown(struct ksmbd_transport *t) 1659 { 1660 struct smb_direct_transport *st = SMBD_TRANS(t); 1661 struct smbdirect_socket *sc = &st->socket; 1662 1663 ksmbd_debug(RDMA, "smb-direct shutdown cm_id=%p\n", sc->rdma.cm_id); 1664 1665 smb_direct_disconnect_rdma_work(&sc->disconnect_work); 1666 } 1667 1668 static int smb_direct_cm_handler(struct rdma_cm_id *cm_id, 1669 struct rdma_cm_event *event) 1670 { 1671 struct smbdirect_socket *sc = cm_id->context; 1672 1673 ksmbd_debug(RDMA, "RDMA CM event. cm_id=%p event=%s (%d)\n", 1674 cm_id, rdma_event_msg(event->event), event->event); 1675 1676 switch (event->event) { 1677 case RDMA_CM_EVENT_ESTABLISHED: { 1678 WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING); 1679 sc->status = SMBDIRECT_SOCKET_NEGOTIATE_NEEDED; 1680 wake_up(&sc->status_wait); 1681 break; 1682 } 1683 case RDMA_CM_EVENT_DEVICE_REMOVAL: 1684 case RDMA_CM_EVENT_DISCONNECTED: { 1685 ib_drain_qp(sc->ib.qp); 1686 1687 sc->status = SMBDIRECT_SOCKET_DISCONNECTED; 1688 smb_direct_disconnect_rdma_work(&sc->disconnect_work); 1689 break; 1690 } 1691 case RDMA_CM_EVENT_CONNECT_ERROR: { 1692 sc->status = SMBDIRECT_SOCKET_DISCONNECTED; 1693 smb_direct_disconnect_rdma_work(&sc->disconnect_work); 1694 break; 1695 } 1696 default: 1697 pr_err("Unexpected RDMA CM event. cm_id=%p, event=%s (%d)\n", 1698 cm_id, rdma_event_msg(event->event), 1699 event->event); 1700 break; 1701 } 1702 return 0; 1703 } 1704 1705 static void smb_direct_qpair_handler(struct ib_event *event, void *context) 1706 { 1707 struct smbdirect_socket *sc = context; 1708 1709 ksmbd_debug(RDMA, "Received QP event. cm_id=%p, event=%s (%d)\n", 1710 sc->rdma.cm_id, ib_event_msg(event->event), event->event); 1711 1712 switch (event->event) { 1713 case IB_EVENT_CQ_ERR: 1714 case IB_EVENT_QP_FATAL: 1715 smb_direct_disconnect_rdma_connection(sc); 1716 break; 1717 default: 1718 break; 1719 } 1720 } 1721 1722 static int smb_direct_send_negotiate_response(struct smbdirect_socket *sc, 1723 int failed) 1724 { 1725 struct smbdirect_socket_parameters *sp = &sc->parameters; 1726 struct smbdirect_send_io *sendmsg; 1727 struct smbdirect_negotiate_resp *resp; 1728 int ret; 1729 1730 sendmsg = smb_direct_alloc_sendmsg(sc); 1731 if (IS_ERR(sendmsg)) 1732 return -ENOMEM; 1733 1734 resp = (struct smbdirect_negotiate_resp *)sendmsg->packet; 1735 if (failed) { 1736 memset(resp, 0, sizeof(*resp)); 1737 resp->min_version = SMB_DIRECT_VERSION_LE; 1738 resp->max_version = SMB_DIRECT_VERSION_LE; 1739 resp->status = STATUS_NOT_SUPPORTED; 1740 1741 sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; 1742 } else { 1743 resp->status = STATUS_SUCCESS; 1744 resp->min_version = SMB_DIRECT_VERSION_LE; 1745 resp->max_version = SMB_DIRECT_VERSION_LE; 1746 resp->negotiated_version = SMB_DIRECT_VERSION_LE; 1747 resp->reserved = 0; 1748 resp->credits_requested = 1749 cpu_to_le16(sp->send_credit_target); 1750 resp->credits_granted = cpu_to_le16(manage_credits_prior_sending(sc)); 1751 resp->max_readwrite_size = cpu_to_le32(sp->max_read_write_size); 1752 resp->preferred_send_size = cpu_to_le32(sp->max_send_size); 1753 resp->max_receive_size = cpu_to_le32(sp->max_recv_size); 1754 resp->max_fragmented_size = 1755 cpu_to_le32(sp->max_fragmented_recv_size); 1756 1757 sc->recv_io.expected = SMBDIRECT_EXPECT_DATA_TRANSFER; 1758 sc->status = SMBDIRECT_SOCKET_CONNECTED; 1759 } 1760 1761 sendmsg->sge[0].addr = ib_dma_map_single(sc->ib.dev, 1762 (void *)resp, sizeof(*resp), 1763 DMA_TO_DEVICE); 1764 ret = ib_dma_mapping_error(sc->ib.dev, sendmsg->sge[0].addr); 1765 if (ret) { 1766 smb_direct_free_sendmsg(sc, sendmsg); 1767 return ret; 1768 } 1769 1770 sendmsg->num_sge = 1; 1771 sendmsg->sge[0].length = sizeof(*resp); 1772 sendmsg->sge[0].lkey = sc->ib.pd->local_dma_lkey; 1773 1774 ret = post_sendmsg(sc, NULL, sendmsg); 1775 if (ret) { 1776 smb_direct_free_sendmsg(sc, sendmsg); 1777 return ret; 1778 } 1779 1780 wait_event(sc->send_io.pending.zero_wait_queue, 1781 atomic_read(&sc->send_io.pending.count) == 0 || 1782 sc->status != SMBDIRECT_SOCKET_CONNECTED); 1783 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 1784 return -ENOTCONN; 1785 1786 return 0; 1787 } 1788 1789 static int smb_direct_accept_client(struct smbdirect_socket *sc) 1790 { 1791 struct smbdirect_socket_parameters *sp = &sc->parameters; 1792 struct rdma_conn_param conn_param; 1793 __be32 ird_ord_hdr[2]; 1794 int ret; 1795 1796 /* 1797 * smb_direct_handle_connect_request() 1798 * already negotiated sp->initiator_depth 1799 * and sp->responder_resources 1800 */ 1801 memset(&conn_param, 0, sizeof(conn_param)); 1802 conn_param.initiator_depth = sp->initiator_depth; 1803 conn_param.responder_resources = sp->responder_resources; 1804 1805 if (sc->rdma.legacy_iwarp) { 1806 ird_ord_hdr[0] = cpu_to_be32(conn_param.responder_resources); 1807 ird_ord_hdr[1] = cpu_to_be32(conn_param.initiator_depth); 1808 conn_param.private_data = ird_ord_hdr; 1809 conn_param.private_data_len = sizeof(ird_ord_hdr); 1810 } else { 1811 conn_param.private_data = NULL; 1812 conn_param.private_data_len = 0; 1813 } 1814 conn_param.retry_count = SMB_DIRECT_CM_RETRY; 1815 conn_param.rnr_retry_count = SMB_DIRECT_CM_RNR_RETRY; 1816 conn_param.flow_control = 0; 1817 1818 /* 1819 * start with the negotiate timeout and SMBDIRECT_KEEPALIVE_PENDING 1820 * so that the timer will cause a disconnect. 1821 */ 1822 sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; 1823 mod_delayed_work(sc->workqueue, &sc->idle.timer_work, 1824 msecs_to_jiffies(sp->negotiate_timeout_msec)); 1825 1826 WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED); 1827 sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING; 1828 ret = rdma_accept(sc->rdma.cm_id, &conn_param); 1829 if (ret) { 1830 pr_err("error at rdma_accept: %d\n", ret); 1831 return ret; 1832 } 1833 return 0; 1834 } 1835 1836 static int smb_direct_prepare_negotiation(struct smbdirect_socket *sc) 1837 { 1838 struct smbdirect_recv_io *recvmsg; 1839 int ret; 1840 1841 WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_CREATED); 1842 sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED; 1843 1844 sc->recv_io.expected = SMBDIRECT_EXPECT_NEGOTIATE_REQ; 1845 1846 recvmsg = get_free_recvmsg(sc); 1847 if (!recvmsg) 1848 return -ENOMEM; 1849 1850 ret = smb_direct_post_recv(sc, recvmsg); 1851 if (ret) { 1852 pr_err("Can't post recv: %d\n", ret); 1853 goto out_err; 1854 } 1855 1856 ret = smb_direct_accept_client(sc); 1857 if (ret) { 1858 pr_err("Can't accept client\n"); 1859 goto out_err; 1860 } 1861 1862 smb_direct_post_recv_credits(&sc->recv_io.posted.refill_work); 1863 return 0; 1864 out_err: 1865 put_recvmsg(sc, recvmsg); 1866 return ret; 1867 } 1868 1869 static unsigned int smb_direct_get_max_fr_pages(struct smbdirect_socket *sc) 1870 { 1871 return min_t(unsigned int, 1872 sc->ib.dev->attrs.max_fast_reg_page_list_len, 1873 256); 1874 } 1875 1876 static int smb_direct_init_params(struct smbdirect_socket *sc, 1877 struct ib_qp_cap *cap) 1878 { 1879 struct smbdirect_socket_parameters *sp = &sc->parameters; 1880 struct ib_device *device = sc->ib.dev; 1881 int max_send_sges, max_rw_wrs, max_send_wrs; 1882 unsigned int max_sge_per_wr, wrs_per_credit; 1883 1884 /* need 3 more sge. because a SMB_DIRECT header, SMB2 header, 1885 * SMB2 response could be mapped. 1886 */ 1887 max_send_sges = DIV_ROUND_UP(sp->max_send_size, PAGE_SIZE) + 3; 1888 if (max_send_sges > SMBDIRECT_SEND_IO_MAX_SGE) { 1889 pr_err("max_send_size %d is too large\n", sp->max_send_size); 1890 return -EINVAL; 1891 } 1892 1893 /* Calculate the number of work requests for RDMA R/W. 1894 * The maximum number of pages which can be registered 1895 * with one Memory region can be transferred with one 1896 * R/W credit. And at least 4 work requests for each credit 1897 * are needed for MR registration, RDMA R/W, local & remote 1898 * MR invalidation. 1899 */ 1900 sc->rw_io.credits.num_pages = smb_direct_get_max_fr_pages(sc); 1901 sc->rw_io.credits.max = DIV_ROUND_UP(sp->max_read_write_size, 1902 (sc->rw_io.credits.num_pages - 1) * 1903 PAGE_SIZE); 1904 1905 max_sge_per_wr = min_t(unsigned int, device->attrs.max_send_sge, 1906 device->attrs.max_sge_rd); 1907 max_sge_per_wr = max_t(unsigned int, max_sge_per_wr, 1908 max_send_sges); 1909 wrs_per_credit = max_t(unsigned int, 4, 1910 DIV_ROUND_UP(sc->rw_io.credits.num_pages, 1911 max_sge_per_wr) + 1); 1912 max_rw_wrs = sc->rw_io.credits.max * wrs_per_credit; 1913 1914 max_send_wrs = sp->send_credit_target + max_rw_wrs; 1915 if (max_send_wrs > device->attrs.max_cqe || 1916 max_send_wrs > device->attrs.max_qp_wr) { 1917 pr_err("consider lowering send_credit_target = %d\n", 1918 sp->send_credit_target); 1919 pr_err("Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", 1920 device->attrs.max_cqe, device->attrs.max_qp_wr); 1921 return -EINVAL; 1922 } 1923 1924 if (sp->recv_credit_max > device->attrs.max_cqe || 1925 sp->recv_credit_max > device->attrs.max_qp_wr) { 1926 pr_err("consider lowering receive_credit_max = %d\n", 1927 sp->recv_credit_max); 1928 pr_err("Possible CQE overrun, device reporting max_cpe %d max_qp_wr %d\n", 1929 device->attrs.max_cqe, device->attrs.max_qp_wr); 1930 return -EINVAL; 1931 } 1932 1933 if (device->attrs.max_send_sge < SMBDIRECT_SEND_IO_MAX_SGE) { 1934 pr_err("warning: device max_send_sge = %d too small\n", 1935 device->attrs.max_send_sge); 1936 return -EINVAL; 1937 } 1938 if (device->attrs.max_recv_sge < SMBDIRECT_RECV_IO_MAX_SGE) { 1939 pr_err("warning: device max_recv_sge = %d too small\n", 1940 device->attrs.max_recv_sge); 1941 return -EINVAL; 1942 } 1943 1944 sc->recv_io.credits.target = 1; 1945 1946 atomic_set(&sc->rw_io.credits.count, sc->rw_io.credits.max); 1947 1948 cap->max_send_wr = max_send_wrs; 1949 cap->max_recv_wr = sp->recv_credit_max; 1950 cap->max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE; 1951 cap->max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE; 1952 cap->max_inline_data = 0; 1953 cap->max_rdma_ctxs = sc->rw_io.credits.max; 1954 return 0; 1955 } 1956 1957 static void smb_direct_destroy_pools(struct smbdirect_socket *sc) 1958 { 1959 struct smbdirect_recv_io *recvmsg; 1960 1961 while ((recvmsg = get_free_recvmsg(sc))) 1962 mempool_free(recvmsg, sc->recv_io.mem.pool); 1963 1964 mempool_destroy(sc->recv_io.mem.pool); 1965 sc->recv_io.mem.pool = NULL; 1966 1967 kmem_cache_destroy(sc->recv_io.mem.cache); 1968 sc->recv_io.mem.cache = NULL; 1969 1970 mempool_destroy(sc->send_io.mem.pool); 1971 sc->send_io.mem.pool = NULL; 1972 1973 kmem_cache_destroy(sc->send_io.mem.cache); 1974 sc->send_io.mem.cache = NULL; 1975 } 1976 1977 static int smb_direct_create_pools(struct smbdirect_socket *sc) 1978 { 1979 struct smbdirect_socket_parameters *sp = &sc->parameters; 1980 char name[80]; 1981 int i; 1982 struct smbdirect_recv_io *recvmsg; 1983 1984 snprintf(name, sizeof(name), "smbdirect_send_io_pool_%p", sc); 1985 sc->send_io.mem.cache = kmem_cache_create(name, 1986 sizeof(struct smbdirect_send_io) + 1987 sizeof(struct smbdirect_negotiate_resp), 1988 0, SLAB_HWCACHE_ALIGN, NULL); 1989 if (!sc->send_io.mem.cache) 1990 return -ENOMEM; 1991 1992 sc->send_io.mem.pool = mempool_create(sp->send_credit_target, 1993 mempool_alloc_slab, mempool_free_slab, 1994 sc->send_io.mem.cache); 1995 if (!sc->send_io.mem.pool) 1996 goto err; 1997 1998 snprintf(name, sizeof(name), "smbdirect_recv_io_pool_%p", sc); 1999 sc->recv_io.mem.cache = kmem_cache_create(name, 2000 sizeof(struct smbdirect_recv_io) + 2001 sp->max_recv_size, 2002 0, SLAB_HWCACHE_ALIGN, NULL); 2003 if (!sc->recv_io.mem.cache) 2004 goto err; 2005 2006 sc->recv_io.mem.pool = 2007 mempool_create(sp->recv_credit_max, mempool_alloc_slab, 2008 mempool_free_slab, sc->recv_io.mem.cache); 2009 if (!sc->recv_io.mem.pool) 2010 goto err; 2011 2012 for (i = 0; i < sp->recv_credit_max; i++) { 2013 recvmsg = mempool_alloc(sc->recv_io.mem.pool, KSMBD_DEFAULT_GFP); 2014 if (!recvmsg) 2015 goto err; 2016 recvmsg->socket = sc; 2017 recvmsg->sge.length = 0; 2018 list_add(&recvmsg->list, &sc->recv_io.free.list); 2019 } 2020 2021 return 0; 2022 err: 2023 smb_direct_destroy_pools(sc); 2024 return -ENOMEM; 2025 } 2026 2027 static int smb_direct_create_qpair(struct smbdirect_socket *sc, 2028 struct ib_qp_cap *cap) 2029 { 2030 struct smbdirect_socket_parameters *sp = &sc->parameters; 2031 int ret; 2032 struct ib_qp_init_attr qp_attr; 2033 int pages_per_rw; 2034 2035 sc->ib.pd = ib_alloc_pd(sc->ib.dev, 0); 2036 if (IS_ERR(sc->ib.pd)) { 2037 pr_err("Can't create RDMA PD\n"); 2038 ret = PTR_ERR(sc->ib.pd); 2039 sc->ib.pd = NULL; 2040 return ret; 2041 } 2042 2043 sc->ib.send_cq = ib_alloc_cq_any(sc->ib.dev, sc, 2044 sp->send_credit_target + 2045 cap->max_rdma_ctxs, 2046 IB_POLL_WORKQUEUE); 2047 if (IS_ERR(sc->ib.send_cq)) { 2048 pr_err("Can't create RDMA send CQ\n"); 2049 ret = PTR_ERR(sc->ib.send_cq); 2050 sc->ib.send_cq = NULL; 2051 goto err; 2052 } 2053 2054 sc->ib.recv_cq = ib_alloc_cq_any(sc->ib.dev, sc, 2055 sp->recv_credit_max, 2056 IB_POLL_WORKQUEUE); 2057 if (IS_ERR(sc->ib.recv_cq)) { 2058 pr_err("Can't create RDMA recv CQ\n"); 2059 ret = PTR_ERR(sc->ib.recv_cq); 2060 sc->ib.recv_cq = NULL; 2061 goto err; 2062 } 2063 2064 memset(&qp_attr, 0, sizeof(qp_attr)); 2065 qp_attr.event_handler = smb_direct_qpair_handler; 2066 qp_attr.qp_context = sc; 2067 qp_attr.cap = *cap; 2068 qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 2069 qp_attr.qp_type = IB_QPT_RC; 2070 qp_attr.send_cq = sc->ib.send_cq; 2071 qp_attr.recv_cq = sc->ib.recv_cq; 2072 qp_attr.port_num = ~0; 2073 2074 ret = rdma_create_qp(sc->rdma.cm_id, sc->ib.pd, &qp_attr); 2075 if (ret) { 2076 pr_err("Can't create RDMA QP: %d\n", ret); 2077 goto err; 2078 } 2079 2080 sc->ib.qp = sc->rdma.cm_id->qp; 2081 sc->rdma.cm_id->event_handler = smb_direct_cm_handler; 2082 2083 pages_per_rw = DIV_ROUND_UP(sp->max_read_write_size, PAGE_SIZE) + 1; 2084 if (pages_per_rw > sc->ib.dev->attrs.max_sgl_rd) { 2085 ret = ib_mr_pool_init(sc->ib.qp, &sc->ib.qp->rdma_mrs, 2086 sc->rw_io.credits.max, IB_MR_TYPE_MEM_REG, 2087 sc->rw_io.credits.num_pages, 0); 2088 if (ret) { 2089 pr_err("failed to init mr pool count %zu pages %zu\n", 2090 sc->rw_io.credits.max, sc->rw_io.credits.num_pages); 2091 goto err; 2092 } 2093 } 2094 2095 return 0; 2096 err: 2097 if (sc->ib.qp) { 2098 sc->ib.qp = NULL; 2099 rdma_destroy_qp(sc->rdma.cm_id); 2100 } 2101 if (sc->ib.recv_cq) { 2102 ib_destroy_cq(sc->ib.recv_cq); 2103 sc->ib.recv_cq = NULL; 2104 } 2105 if (sc->ib.send_cq) { 2106 ib_destroy_cq(sc->ib.send_cq); 2107 sc->ib.send_cq = NULL; 2108 } 2109 if (sc->ib.pd) { 2110 ib_dealloc_pd(sc->ib.pd); 2111 sc->ib.pd = NULL; 2112 } 2113 return ret; 2114 } 2115 2116 static int smb_direct_prepare(struct ksmbd_transport *t) 2117 { 2118 struct smb_direct_transport *st = SMBD_TRANS(t); 2119 struct smbdirect_socket *sc = &st->socket; 2120 struct smbdirect_socket_parameters *sp = &sc->parameters; 2121 struct smbdirect_recv_io *recvmsg; 2122 struct smbdirect_negotiate_req *req; 2123 unsigned long flags; 2124 int ret; 2125 2126 /* 2127 * We are waiting to pass the following states: 2128 * 2129 * SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED 2130 * SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING 2131 * SMBDIRECT_SOCKET_NEGOTIATE_NEEDED 2132 * 2133 * To finally get to SMBDIRECT_SOCKET_NEGOTIATE_RUNNING 2134 * in order to continue below. 2135 * 2136 * Everything else is unexpected and an error. 2137 */ 2138 ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n"); 2139 ret = wait_event_interruptible_timeout(sc->status_wait, 2140 sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED && 2141 sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING && 2142 sc->status != SMBDIRECT_SOCKET_NEGOTIATE_NEEDED, 2143 msecs_to_jiffies(sp->negotiate_timeout_msec)); 2144 if (ret <= 0 || sc->status != SMBDIRECT_SOCKET_NEGOTIATE_RUNNING) 2145 return ret < 0 ? ret : -ETIMEDOUT; 2146 2147 recvmsg = get_first_reassembly(sc); 2148 if (!recvmsg) 2149 return -ECONNABORTED; 2150 2151 ret = smb_direct_check_recvmsg(recvmsg); 2152 if (ret == -ECONNABORTED) 2153 goto out; 2154 2155 req = (struct smbdirect_negotiate_req *)recvmsg->packet; 2156 sp->max_recv_size = min_t(int, sp->max_recv_size, 2157 le32_to_cpu(req->preferred_send_size)); 2158 sp->max_send_size = min_t(int, sp->max_send_size, 2159 le32_to_cpu(req->max_receive_size)); 2160 sp->max_fragmented_send_size = 2161 le32_to_cpu(req->max_fragmented_size); 2162 sp->max_fragmented_recv_size = 2163 (sp->recv_credit_max * sp->max_recv_size) / 2; 2164 sc->recv_io.credits.target = le16_to_cpu(req->credits_requested); 2165 sc->recv_io.credits.target = min_t(u16, sc->recv_io.credits.target, sp->recv_credit_max); 2166 sc->recv_io.credits.target = max_t(u16, sc->recv_io.credits.target, 1); 2167 2168 ret = smb_direct_send_negotiate_response(sc, ret); 2169 out: 2170 spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); 2171 sc->recv_io.reassembly.queue_length--; 2172 list_del(&recvmsg->list); 2173 spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); 2174 put_recvmsg(sc, recvmsg); 2175 2176 return ret; 2177 } 2178 2179 static int smb_direct_connect(struct smbdirect_socket *sc) 2180 { 2181 struct ib_qp_cap qp_cap; 2182 int ret; 2183 2184 ret = smb_direct_init_params(sc, &qp_cap); 2185 if (ret) { 2186 pr_err("Can't configure RDMA parameters\n"); 2187 return ret; 2188 } 2189 2190 ret = smb_direct_create_pools(sc); 2191 if (ret) { 2192 pr_err("Can't init RDMA pool: %d\n", ret); 2193 return ret; 2194 } 2195 2196 ret = smb_direct_create_qpair(sc, &qp_cap); 2197 if (ret) { 2198 pr_err("Can't accept RDMA client: %d\n", ret); 2199 return ret; 2200 } 2201 2202 ret = smb_direct_prepare_negotiation(sc); 2203 if (ret) { 2204 pr_err("Can't negotiate: %d\n", ret); 2205 return ret; 2206 } 2207 return 0; 2208 } 2209 2210 static bool rdma_frwr_is_supported(struct ib_device_attr *attrs) 2211 { 2212 if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) 2213 return false; 2214 if (attrs->max_fast_reg_page_list_len == 0) 2215 return false; 2216 return true; 2217 } 2218 2219 static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id, 2220 struct rdma_cm_event *event) 2221 { 2222 struct smb_direct_transport *t; 2223 struct smbdirect_socket *sc; 2224 struct smbdirect_socket_parameters *sp; 2225 struct task_struct *handler; 2226 u8 peer_initiator_depth; 2227 u8 peer_responder_resources; 2228 int ret; 2229 2230 if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) { 2231 ksmbd_debug(RDMA, 2232 "Fast Registration Work Requests is not supported. device capabilities=%llx\n", 2233 new_cm_id->device->attrs.device_cap_flags); 2234 return -EPROTONOSUPPORT; 2235 } 2236 2237 t = alloc_transport(new_cm_id); 2238 if (!t) 2239 return -ENOMEM; 2240 sc = &t->socket; 2241 sp = &sc->parameters; 2242 2243 peer_initiator_depth = event->param.conn.initiator_depth; 2244 peer_responder_resources = event->param.conn.responder_resources; 2245 if (rdma_protocol_iwarp(new_cm_id->device, new_cm_id->port_num) && 2246 event->param.conn.private_data_len == 8) { 2247 /* 2248 * Legacy clients with only iWarp MPA v1 support 2249 * need a private blob in order to negotiate 2250 * the IRD/ORD values. 2251 */ 2252 const __be32 *ird_ord_hdr = event->param.conn.private_data; 2253 u32 ird32 = be32_to_cpu(ird_ord_hdr[0]); 2254 u32 ord32 = be32_to_cpu(ird_ord_hdr[1]); 2255 2256 /* 2257 * cifs.ko sends the legacy IRD/ORD negotiation 2258 * event if iWarp MPA v2 was used. 2259 * 2260 * Here we check that the values match and only 2261 * mark the client as legacy if they don't match. 2262 */ 2263 if ((u32)event->param.conn.initiator_depth != ird32 || 2264 (u32)event->param.conn.responder_resources != ord32) { 2265 /* 2266 * There are broken clients (old cifs.ko) 2267 * using little endian and also 2268 * struct rdma_conn_param only uses u8 2269 * for initiator_depth and responder_resources, 2270 * so we truncate the value to U8_MAX. 2271 * 2272 * smb_direct_accept_client() will then 2273 * do the real negotiation in order to 2274 * select the minimum between client and 2275 * server. 2276 */ 2277 ird32 = min_t(u32, ird32, U8_MAX); 2278 ord32 = min_t(u32, ord32, U8_MAX); 2279 2280 sc->rdma.legacy_iwarp = true; 2281 peer_initiator_depth = (u8)ird32; 2282 peer_responder_resources = (u8)ord32; 2283 } 2284 } 2285 2286 /* 2287 * First set what the we as server are able to support 2288 */ 2289 sp->initiator_depth = min_t(u8, sp->initiator_depth, 2290 new_cm_id->device->attrs.max_qp_rd_atom); 2291 2292 /* 2293 * negotiate the value by using the minimum 2294 * between client and server if the client provided 2295 * non 0 values. 2296 */ 2297 if (peer_initiator_depth != 0) 2298 sp->initiator_depth = min_t(u8, sp->initiator_depth, 2299 peer_initiator_depth); 2300 if (peer_responder_resources != 0) 2301 sp->responder_resources = min_t(u8, sp->responder_resources, 2302 peer_responder_resources); 2303 2304 ret = smb_direct_connect(sc); 2305 if (ret) 2306 goto out_err; 2307 2308 handler = kthread_run(ksmbd_conn_handler_loop, 2309 KSMBD_TRANS(t)->conn, "ksmbd:r%u", 2310 smb_direct_port); 2311 if (IS_ERR(handler)) { 2312 ret = PTR_ERR(handler); 2313 pr_err("Can't start thread\n"); 2314 goto out_err; 2315 } 2316 2317 return 0; 2318 out_err: 2319 free_transport(t); 2320 return ret; 2321 } 2322 2323 static int smb_direct_listen_handler(struct rdma_cm_id *cm_id, 2324 struct rdma_cm_event *event) 2325 { 2326 switch (event->event) { 2327 case RDMA_CM_EVENT_CONNECT_REQUEST: { 2328 int ret = smb_direct_handle_connect_request(cm_id, event); 2329 2330 if (ret) { 2331 pr_err("Can't create transport: %d\n", ret); 2332 return ret; 2333 } 2334 2335 ksmbd_debug(RDMA, "Received connection request. cm_id=%p\n", 2336 cm_id); 2337 break; 2338 } 2339 default: 2340 pr_err("Unexpected listen event. cm_id=%p, event=%s (%d)\n", 2341 cm_id, rdma_event_msg(event->event), event->event); 2342 break; 2343 } 2344 return 0; 2345 } 2346 2347 static int smb_direct_listen(int port) 2348 { 2349 int ret; 2350 struct rdma_cm_id *cm_id; 2351 struct sockaddr_in sin = { 2352 .sin_family = AF_INET, 2353 .sin_addr.s_addr = htonl(INADDR_ANY), 2354 .sin_port = htons(port), 2355 }; 2356 2357 cm_id = rdma_create_id(&init_net, smb_direct_listen_handler, 2358 &smb_direct_listener, RDMA_PS_TCP, IB_QPT_RC); 2359 if (IS_ERR(cm_id)) { 2360 pr_err("Can't create cm id: %ld\n", PTR_ERR(cm_id)); 2361 return PTR_ERR(cm_id); 2362 } 2363 2364 ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); 2365 if (ret) { 2366 pr_err("Can't bind: %d\n", ret); 2367 goto err; 2368 } 2369 2370 smb_direct_listener.cm_id = cm_id; 2371 2372 ret = rdma_listen(cm_id, 10); 2373 if (ret) { 2374 pr_err("Can't listen: %d\n", ret); 2375 goto err; 2376 } 2377 return 0; 2378 err: 2379 smb_direct_listener.cm_id = NULL; 2380 rdma_destroy_id(cm_id); 2381 return ret; 2382 } 2383 2384 static int smb_direct_ib_client_add(struct ib_device *ib_dev) 2385 { 2386 struct smb_direct_device *smb_dev; 2387 2388 /* Set 5445 port if device type is iWARP(No IB) */ 2389 if (ib_dev->node_type != RDMA_NODE_IB_CA) 2390 smb_direct_port = SMB_DIRECT_PORT_IWARP; 2391 2392 if (!rdma_frwr_is_supported(&ib_dev->attrs)) 2393 return 0; 2394 2395 smb_dev = kzalloc(sizeof(*smb_dev), KSMBD_DEFAULT_GFP); 2396 if (!smb_dev) 2397 return -ENOMEM; 2398 smb_dev->ib_dev = ib_dev; 2399 2400 write_lock(&smb_direct_device_lock); 2401 list_add(&smb_dev->list, &smb_direct_device_list); 2402 write_unlock(&smb_direct_device_lock); 2403 2404 ksmbd_debug(RDMA, "ib device added: name %s\n", ib_dev->name); 2405 return 0; 2406 } 2407 2408 static void smb_direct_ib_client_remove(struct ib_device *ib_dev, 2409 void *client_data) 2410 { 2411 struct smb_direct_device *smb_dev, *tmp; 2412 2413 write_lock(&smb_direct_device_lock); 2414 list_for_each_entry_safe(smb_dev, tmp, &smb_direct_device_list, list) { 2415 if (smb_dev->ib_dev == ib_dev) { 2416 list_del(&smb_dev->list); 2417 kfree(smb_dev); 2418 break; 2419 } 2420 } 2421 write_unlock(&smb_direct_device_lock); 2422 } 2423 2424 static struct ib_client smb_direct_ib_client = { 2425 .name = "ksmbd_smb_direct_ib", 2426 .add = smb_direct_ib_client_add, 2427 .remove = smb_direct_ib_client_remove, 2428 }; 2429 2430 int ksmbd_rdma_init(void) 2431 { 2432 int ret; 2433 2434 smb_direct_listener.cm_id = NULL; 2435 2436 ret = ib_register_client(&smb_direct_ib_client); 2437 if (ret) { 2438 pr_err("failed to ib_register_client\n"); 2439 return ret; 2440 } 2441 2442 /* When a client is running out of send credits, the credits are 2443 * granted by the server's sending a packet using this queue. 2444 * This avoids the situation that a clients cannot send packets 2445 * for lack of credits 2446 */ 2447 smb_direct_wq = alloc_workqueue("ksmbd-smb_direct-wq", 2448 WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_PERCPU, 2449 0); 2450 if (!smb_direct_wq) 2451 return -ENOMEM; 2452 2453 ret = smb_direct_listen(smb_direct_port); 2454 if (ret) { 2455 destroy_workqueue(smb_direct_wq); 2456 smb_direct_wq = NULL; 2457 pr_err("Can't listen: %d\n", ret); 2458 return ret; 2459 } 2460 2461 ksmbd_debug(RDMA, "init RDMA listener. cm_id=%p\n", 2462 smb_direct_listener.cm_id); 2463 return 0; 2464 } 2465 2466 void ksmbd_rdma_stop_listening(void) 2467 { 2468 if (!smb_direct_listener.cm_id) 2469 return; 2470 2471 ib_unregister_client(&smb_direct_ib_client); 2472 rdma_destroy_id(smb_direct_listener.cm_id); 2473 2474 smb_direct_listener.cm_id = NULL; 2475 } 2476 2477 void ksmbd_rdma_destroy(void) 2478 { 2479 if (smb_direct_wq) { 2480 destroy_workqueue(smb_direct_wq); 2481 smb_direct_wq = NULL; 2482 } 2483 } 2484 2485 bool ksmbd_rdma_capable_netdev(struct net_device *netdev) 2486 { 2487 struct smb_direct_device *smb_dev; 2488 int i; 2489 bool rdma_capable = false; 2490 2491 read_lock(&smb_direct_device_lock); 2492 list_for_each_entry(smb_dev, &smb_direct_device_list, list) { 2493 for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) { 2494 struct net_device *ndev; 2495 2496 ndev = ib_device_get_netdev(smb_dev->ib_dev, i + 1); 2497 if (!ndev) 2498 continue; 2499 2500 if (ndev == netdev) { 2501 dev_put(ndev); 2502 rdma_capable = true; 2503 goto out; 2504 } 2505 dev_put(ndev); 2506 } 2507 } 2508 out: 2509 read_unlock(&smb_direct_device_lock); 2510 2511 if (rdma_capable == false) { 2512 struct ib_device *ibdev; 2513 2514 ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN); 2515 if (ibdev) { 2516 rdma_capable = rdma_frwr_is_supported(&ibdev->attrs); 2517 ib_device_put(ibdev); 2518 } 2519 } 2520 2521 ksmbd_debug(RDMA, "netdev(%s) rdma capable : %s\n", 2522 netdev->name, str_true_false(rdma_capable)); 2523 2524 return rdma_capable; 2525 } 2526 2527 static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = { 2528 .prepare = smb_direct_prepare, 2529 .disconnect = smb_direct_disconnect, 2530 .shutdown = smb_direct_shutdown, 2531 .writev = smb_direct_writev, 2532 .read = smb_direct_read, 2533 .rdma_read = smb_direct_rdma_read, 2534 .rdma_write = smb_direct_rdma_write, 2535 .free_transport = smb_direct_free_transport, 2536 }; 2537