1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2017, Microsoft Corporation. 4 * Copyright (C) 2018, LG Electronics. 5 * 6 * Author(s): Long Li <longli@microsoft.com>, 7 * Hyunchul Lee <hyc.lee@gmail.com> 8 */ 9 10 #define SUBMOD_NAME "smb_direct" 11 12 #include <linux/kthread.h> 13 #include <linux/list.h> 14 #include <linux/mempool.h> 15 #include <linux/highmem.h> 16 #include <linux/scatterlist.h> 17 #include <linux/string_choices.h> 18 #include <rdma/ib_verbs.h> 19 #include <rdma/rdma_cm.h> 20 #include <rdma/rw.h> 21 22 #define __SMBDIRECT_SOCKET_DISCONNECT(__sc) smb_direct_disconnect_rdma_connection(__sc) 23 24 #include "glob.h" 25 #include "connection.h" 26 #include "smb_common.h" 27 #include "../common/smb2status.h" 28 #include "../common/smbdirect/smbdirect.h" 29 #include "../common/smbdirect/smbdirect_pdu.h" 30 #include "../common/smbdirect/smbdirect_socket.h" 31 #include "transport_rdma.h" 32 33 #define SMB_DIRECT_PORT_IWARP 5445 34 #define SMB_DIRECT_PORT_INFINIBAND 445 35 36 #define SMB_DIRECT_VERSION_LE cpu_to_le16(SMBDIRECT_V1) 37 38 /* SMB_DIRECT negotiation timeout (for the server) in seconds */ 39 #define SMB_DIRECT_NEGOTIATE_TIMEOUT 5 40 41 /* The timeout to wait for a keepalive message from peer in seconds */ 42 #define SMB_DIRECT_KEEPALIVE_SEND_INTERVAL 120 43 44 /* The timeout to wait for a keepalive message from peer in seconds */ 45 #define SMB_DIRECT_KEEPALIVE_RECV_TIMEOUT 5 46 47 /* 48 * Default maximum number of RDMA read/write outstanding on this connection 49 * This value is possibly decreased during QP creation on hardware limit 50 */ 51 #define SMB_DIRECT_CM_INITIATOR_DEPTH 8 52 53 /* Maximum number of retries on data transfer operations */ 54 #define SMB_DIRECT_CM_RETRY 6 55 /* No need to retry on Receiver Not Ready since SMB_DIRECT manages credits */ 56 #define SMB_DIRECT_CM_RNR_RETRY 0 57 58 /* 59 * User configurable initial values per SMB_DIRECT transport connection 60 * as defined in [MS-SMBD] 3.1.1.1 61 * Those may change after a SMB_DIRECT negotiation 62 */ 63 64 /* Set 445 port to SMB Direct port by default */ 65 static int smb_direct_port = SMB_DIRECT_PORT_INFINIBAND; 66 67 /* The local peer's maximum number of credits to grant to the peer */ 68 static int smb_direct_receive_credit_max = 255; 69 70 /* The remote peer's credit request of local peer */ 71 static int smb_direct_send_credit_target = 255; 72 73 /* The maximum single message size can be sent to remote peer */ 74 static int smb_direct_max_send_size = 1364; 75 76 /* The maximum fragmented upper-layer payload receive size supported */ 77 static int smb_direct_max_fragmented_recv_size = 1024 * 1024; 78 79 /* The maximum single-message size which can be received */ 80 static int smb_direct_max_receive_size = 1364; 81 82 static int smb_direct_max_read_write_size = SMBD_DEFAULT_IOSIZE; 83 84 static LIST_HEAD(smb_direct_device_list); 85 static DEFINE_RWLOCK(smb_direct_device_lock); 86 87 struct smb_direct_device { 88 struct ib_device *ib_dev; 89 struct list_head list; 90 }; 91 92 static struct smb_direct_listener { 93 struct rdma_cm_id *cm_id; 94 } smb_direct_listener; 95 96 static struct workqueue_struct *smb_direct_wq; 97 98 struct smb_direct_transport { 99 struct ksmbd_transport transport; 100 101 struct smbdirect_socket socket; 102 }; 103 104 #define KSMBD_TRANS(t) (&(t)->transport) 105 #define SMBD_TRANS(t) (container_of(t, \ 106 struct smb_direct_transport, transport)) 107 108 static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops; 109 110 void init_smbd_max_io_size(unsigned int sz) 111 { 112 sz = clamp_val(sz, SMBD_MIN_IOSIZE, SMBD_MAX_IOSIZE); 113 smb_direct_max_read_write_size = sz; 114 } 115 116 unsigned int get_smbd_max_read_write_size(struct ksmbd_transport *kt) 117 { 118 struct smb_direct_transport *t; 119 struct smbdirect_socket *sc; 120 struct smbdirect_socket_parameters *sp; 121 122 if (kt->ops != &ksmbd_smb_direct_transport_ops) 123 return 0; 124 125 t = SMBD_TRANS(kt); 126 sc = &t->socket; 127 sp = &sc->parameters; 128 129 return sp->max_read_write_size; 130 } 131 132 static inline int get_buf_page_count(void *buf, int size) 133 { 134 return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) - 135 (uintptr_t)buf / PAGE_SIZE; 136 } 137 138 static void smb_direct_destroy_pools(struct smbdirect_socket *sc); 139 static void smb_direct_post_recv_credits(struct work_struct *work); 140 static int smb_direct_post_send_data(struct smbdirect_socket *sc, 141 struct smbdirect_send_batch *send_ctx, 142 struct kvec *iov, int niov, 143 int remaining_data_length); 144 145 static inline void 146 *smbdirect_recv_io_payload(struct smbdirect_recv_io *recvmsg) 147 { 148 return (void *)recvmsg->packet; 149 } 150 151 static struct 152 smbdirect_recv_io *get_free_recvmsg(struct smbdirect_socket *sc) 153 { 154 struct smbdirect_recv_io *recvmsg = NULL; 155 unsigned long flags; 156 157 spin_lock_irqsave(&sc->recv_io.free.lock, flags); 158 if (!list_empty(&sc->recv_io.free.list)) { 159 recvmsg = list_first_entry(&sc->recv_io.free.list, 160 struct smbdirect_recv_io, 161 list); 162 list_del(&recvmsg->list); 163 } 164 spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); 165 return recvmsg; 166 } 167 168 static void put_recvmsg(struct smbdirect_socket *sc, 169 struct smbdirect_recv_io *recvmsg) 170 { 171 unsigned long flags; 172 173 if (likely(recvmsg->sge.length != 0)) { 174 ib_dma_unmap_single(sc->ib.dev, 175 recvmsg->sge.addr, 176 recvmsg->sge.length, 177 DMA_FROM_DEVICE); 178 recvmsg->sge.length = 0; 179 } 180 181 spin_lock_irqsave(&sc->recv_io.free.lock, flags); 182 list_add(&recvmsg->list, &sc->recv_io.free.list); 183 spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); 184 185 queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); 186 } 187 188 static void enqueue_reassembly(struct smbdirect_socket *sc, 189 struct smbdirect_recv_io *recvmsg, 190 int data_length) 191 { 192 unsigned long flags; 193 194 spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); 195 list_add_tail(&recvmsg->list, &sc->recv_io.reassembly.list); 196 sc->recv_io.reassembly.queue_length++; 197 /* 198 * Make sure reassembly_data_length is updated after list and 199 * reassembly_queue_length are updated. On the dequeue side 200 * reassembly_data_length is checked without a lock to determine 201 * if reassembly_queue_length and list is up to date 202 */ 203 virt_wmb(); 204 sc->recv_io.reassembly.data_length += data_length; 205 spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); 206 } 207 208 static struct smbdirect_recv_io *get_first_reassembly(struct smbdirect_socket *sc) 209 { 210 if (!list_empty(&sc->recv_io.reassembly.list)) 211 return list_first_entry(&sc->recv_io.reassembly.list, 212 struct smbdirect_recv_io, list); 213 else 214 return NULL; 215 } 216 217 static void smb_direct_disconnect_wake_up_all(struct smbdirect_socket *sc) 218 { 219 /* 220 * Wake up all waiters in all wait queues 221 * in order to notice the broken connection. 222 */ 223 wake_up_all(&sc->status_wait); 224 wake_up_all(&sc->send_io.lcredits.wait_queue); 225 wake_up_all(&sc->send_io.credits.wait_queue); 226 wake_up_all(&sc->send_io.pending.zero_wait_queue); 227 wake_up_all(&sc->recv_io.reassembly.wait_queue); 228 wake_up_all(&sc->rw_io.credits.wait_queue); 229 } 230 231 static void smb_direct_disconnect_rdma_work(struct work_struct *work) 232 { 233 struct smbdirect_socket *sc = 234 container_of(work, struct smbdirect_socket, disconnect_work); 235 236 if (sc->first_error == 0) 237 sc->first_error = -ECONNABORTED; 238 239 /* 240 * make sure this and other work is not queued again 241 * but here we don't block and avoid 242 * disable[_delayed]_work_sync() 243 */ 244 disable_work(&sc->disconnect_work); 245 disable_work(&sc->recv_io.posted.refill_work); 246 disable_delayed_work(&sc->idle.timer_work); 247 disable_work(&sc->idle.immediate_work); 248 249 switch (sc->status) { 250 case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED: 251 case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING: 252 case SMBDIRECT_SOCKET_NEGOTIATE_FAILED: 253 case SMBDIRECT_SOCKET_CONNECTED: 254 case SMBDIRECT_SOCKET_ERROR: 255 sc->status = SMBDIRECT_SOCKET_DISCONNECTING; 256 rdma_disconnect(sc->rdma.cm_id); 257 break; 258 259 case SMBDIRECT_SOCKET_CREATED: 260 case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: 261 case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: 262 case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: 263 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED: 264 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING: 265 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED: 266 case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED: 267 case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING: 268 case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED: 269 /* 270 * rdma_accept() never reached 271 * RDMA_CM_EVENT_ESTABLISHED 272 */ 273 sc->status = SMBDIRECT_SOCKET_DISCONNECTED; 274 break; 275 276 case SMBDIRECT_SOCKET_DISCONNECTING: 277 case SMBDIRECT_SOCKET_DISCONNECTED: 278 case SMBDIRECT_SOCKET_DESTROYED: 279 break; 280 } 281 282 /* 283 * Wake up all waiters in all wait queues 284 * in order to notice the broken connection. 285 */ 286 smb_direct_disconnect_wake_up_all(sc); 287 } 288 289 static void 290 smb_direct_disconnect_rdma_connection(struct smbdirect_socket *sc) 291 { 292 if (sc->first_error == 0) 293 sc->first_error = -ECONNABORTED; 294 295 /* 296 * make sure other work (than disconnect_work) is 297 * not queued again but here we don't block and avoid 298 * disable[_delayed]_work_sync() 299 */ 300 disable_work(&sc->recv_io.posted.refill_work); 301 disable_work(&sc->idle.immediate_work); 302 disable_delayed_work(&sc->idle.timer_work); 303 304 switch (sc->status) { 305 case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: 306 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED: 307 case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED: 308 case SMBDIRECT_SOCKET_NEGOTIATE_FAILED: 309 case SMBDIRECT_SOCKET_ERROR: 310 case SMBDIRECT_SOCKET_DISCONNECTING: 311 case SMBDIRECT_SOCKET_DISCONNECTED: 312 case SMBDIRECT_SOCKET_DESTROYED: 313 /* 314 * Keep the current error status 315 */ 316 break; 317 318 case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: 319 case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: 320 sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED; 321 break; 322 323 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED: 324 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING: 325 sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED; 326 break; 327 328 case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED: 329 case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING: 330 sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED; 331 break; 332 333 case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED: 334 case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING: 335 sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; 336 break; 337 338 case SMBDIRECT_SOCKET_CREATED: 339 sc->status = SMBDIRECT_SOCKET_DISCONNECTED; 340 break; 341 342 case SMBDIRECT_SOCKET_CONNECTED: 343 sc->status = SMBDIRECT_SOCKET_ERROR; 344 break; 345 } 346 347 /* 348 * Wake up all waiters in all wait queues 349 * in order to notice the broken connection. 350 */ 351 smb_direct_disconnect_wake_up_all(sc); 352 353 queue_work(sc->workqueue, &sc->disconnect_work); 354 } 355 356 static void smb_direct_send_immediate_work(struct work_struct *work) 357 { 358 struct smbdirect_socket *sc = 359 container_of(work, struct smbdirect_socket, idle.immediate_work); 360 361 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 362 return; 363 364 smb_direct_post_send_data(sc, NULL, NULL, 0, 0); 365 } 366 367 static void smb_direct_idle_connection_timer(struct work_struct *work) 368 { 369 struct smbdirect_socket *sc = 370 container_of(work, struct smbdirect_socket, idle.timer_work.work); 371 struct smbdirect_socket_parameters *sp = &sc->parameters; 372 373 if (sc->idle.keepalive != SMBDIRECT_KEEPALIVE_NONE) { 374 smb_direct_disconnect_rdma_connection(sc); 375 return; 376 } 377 378 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 379 return; 380 381 /* 382 * Now use the keepalive timeout (instead of keepalive interval) 383 * in order to wait for a response 384 */ 385 sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; 386 mod_delayed_work(sc->workqueue, &sc->idle.timer_work, 387 msecs_to_jiffies(sp->keepalive_timeout_msec)); 388 queue_work(sc->workqueue, &sc->idle.immediate_work); 389 } 390 391 static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) 392 { 393 struct smb_direct_transport *t; 394 struct smbdirect_socket *sc; 395 struct smbdirect_socket_parameters *sp; 396 struct ksmbd_conn *conn; 397 398 t = kzalloc(sizeof(*t), KSMBD_DEFAULT_GFP); 399 if (!t) 400 return NULL; 401 sc = &t->socket; 402 smbdirect_socket_init(sc); 403 sp = &sc->parameters; 404 405 sc->workqueue = smb_direct_wq; 406 407 INIT_WORK(&sc->disconnect_work, smb_direct_disconnect_rdma_work); 408 409 sp->negotiate_timeout_msec = SMB_DIRECT_NEGOTIATE_TIMEOUT * 1000; 410 sp->initiator_depth = SMB_DIRECT_CM_INITIATOR_DEPTH; 411 sp->responder_resources = 1; 412 sp->recv_credit_max = smb_direct_receive_credit_max; 413 sp->send_credit_target = smb_direct_send_credit_target; 414 sp->max_send_size = smb_direct_max_send_size; 415 sp->max_fragmented_recv_size = smb_direct_max_fragmented_recv_size; 416 sp->max_recv_size = smb_direct_max_receive_size; 417 sp->max_read_write_size = smb_direct_max_read_write_size; 418 sp->keepalive_interval_msec = SMB_DIRECT_KEEPALIVE_SEND_INTERVAL * 1000; 419 sp->keepalive_timeout_msec = SMB_DIRECT_KEEPALIVE_RECV_TIMEOUT * 1000; 420 421 sc->rdma.cm_id = cm_id; 422 cm_id->context = sc; 423 424 sc->ib.dev = sc->rdma.cm_id->device; 425 426 INIT_DELAYED_WORK(&sc->idle.timer_work, smb_direct_idle_connection_timer); 427 428 conn = ksmbd_conn_alloc(); 429 if (!conn) 430 goto err; 431 432 down_write(&conn_list_lock); 433 hash_add(conn_list, &conn->hlist, 0); 434 up_write(&conn_list_lock); 435 436 conn->transport = KSMBD_TRANS(t); 437 KSMBD_TRANS(t)->conn = conn; 438 KSMBD_TRANS(t)->ops = &ksmbd_smb_direct_transport_ops; 439 return t; 440 err: 441 kfree(t); 442 return NULL; 443 } 444 445 static void smb_direct_free_transport(struct ksmbd_transport *kt) 446 { 447 kfree(SMBD_TRANS(kt)); 448 } 449 450 static void free_transport(struct smb_direct_transport *t) 451 { 452 struct smbdirect_socket *sc = &t->socket; 453 struct smbdirect_recv_io *recvmsg; 454 455 disable_work_sync(&sc->disconnect_work); 456 if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING) 457 smb_direct_disconnect_rdma_work(&sc->disconnect_work); 458 if (sc->status < SMBDIRECT_SOCKET_DISCONNECTED) 459 wait_event(sc->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED); 460 461 /* 462 * Wake up all waiters in all wait queues 463 * in order to notice the broken connection. 464 * 465 * Most likely this was already called via 466 * smb_direct_disconnect_rdma_work(), but call it again... 467 */ 468 smb_direct_disconnect_wake_up_all(sc); 469 470 disable_work_sync(&sc->recv_io.posted.refill_work); 471 disable_delayed_work_sync(&sc->idle.timer_work); 472 disable_work_sync(&sc->idle.immediate_work); 473 474 if (sc->rdma.cm_id) 475 rdma_lock_handler(sc->rdma.cm_id); 476 477 if (sc->ib.qp) { 478 ib_drain_qp(sc->ib.qp); 479 sc->ib.qp = NULL; 480 rdma_destroy_qp(sc->rdma.cm_id); 481 } 482 483 ksmbd_debug(RDMA, "drain the reassembly queue\n"); 484 do { 485 unsigned long flags; 486 487 spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); 488 recvmsg = get_first_reassembly(sc); 489 if (recvmsg) { 490 list_del(&recvmsg->list); 491 spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); 492 put_recvmsg(sc, recvmsg); 493 } else { 494 spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); 495 } 496 } while (recvmsg); 497 sc->recv_io.reassembly.data_length = 0; 498 499 if (sc->ib.send_cq) 500 ib_free_cq(sc->ib.send_cq); 501 if (sc->ib.recv_cq) 502 ib_free_cq(sc->ib.recv_cq); 503 if (sc->ib.pd) 504 ib_dealloc_pd(sc->ib.pd); 505 if (sc->rdma.cm_id) { 506 rdma_unlock_handler(sc->rdma.cm_id); 507 rdma_destroy_id(sc->rdma.cm_id); 508 } 509 510 smb_direct_destroy_pools(sc); 511 ksmbd_conn_free(KSMBD_TRANS(t)->conn); 512 } 513 514 static struct smbdirect_send_io 515 *smb_direct_alloc_sendmsg(struct smbdirect_socket *sc) 516 { 517 struct smbdirect_send_io *msg; 518 519 msg = mempool_alloc(sc->send_io.mem.pool, KSMBD_DEFAULT_GFP); 520 if (!msg) 521 return ERR_PTR(-ENOMEM); 522 msg->socket = sc; 523 INIT_LIST_HEAD(&msg->sibling_list); 524 msg->num_sge = 0; 525 return msg; 526 } 527 528 static void smb_direct_free_sendmsg(struct smbdirect_socket *sc, 529 struct smbdirect_send_io *msg) 530 { 531 int i; 532 533 /* 534 * The list needs to be empty! 535 * The caller should take care of it. 536 */ 537 WARN_ON_ONCE(!list_empty(&msg->sibling_list)); 538 539 if (msg->num_sge > 0) { 540 ib_dma_unmap_single(sc->ib.dev, 541 msg->sge[0].addr, msg->sge[0].length, 542 DMA_TO_DEVICE); 543 for (i = 1; i < msg->num_sge; i++) 544 ib_dma_unmap_page(sc->ib.dev, 545 msg->sge[i].addr, msg->sge[i].length, 546 DMA_TO_DEVICE); 547 } 548 mempool_free(msg, sc->send_io.mem.pool); 549 } 550 551 static int smb_direct_check_recvmsg(struct smbdirect_recv_io *recvmsg) 552 { 553 struct smbdirect_socket *sc = recvmsg->socket; 554 555 switch (sc->recv_io.expected) { 556 case SMBDIRECT_EXPECT_DATA_TRANSFER: { 557 struct smbdirect_data_transfer *req = 558 (struct smbdirect_data_transfer *)recvmsg->packet; 559 struct smb2_hdr *hdr = (struct smb2_hdr *)(recvmsg->packet 560 + le32_to_cpu(req->data_offset)); 561 ksmbd_debug(RDMA, 562 "CreditGranted: %u, CreditRequested: %u, DataLength: %u, RemainingDataLength: %u, SMB: %x, Command: %u\n", 563 le16_to_cpu(req->credits_granted), 564 le16_to_cpu(req->credits_requested), 565 req->data_length, req->remaining_data_length, 566 hdr->ProtocolId, hdr->Command); 567 return 0; 568 } 569 case SMBDIRECT_EXPECT_NEGOTIATE_REQ: { 570 struct smbdirect_negotiate_req *req = 571 (struct smbdirect_negotiate_req *)recvmsg->packet; 572 ksmbd_debug(RDMA, 573 "MinVersion: %u, MaxVersion: %u, CreditRequested: %u, MaxSendSize: %u, MaxRecvSize: %u, MaxFragmentedSize: %u\n", 574 le16_to_cpu(req->min_version), 575 le16_to_cpu(req->max_version), 576 le16_to_cpu(req->credits_requested), 577 le32_to_cpu(req->preferred_send_size), 578 le32_to_cpu(req->max_receive_size), 579 le32_to_cpu(req->max_fragmented_size)); 580 if (le16_to_cpu(req->min_version) > 0x0100 || 581 le16_to_cpu(req->max_version) < 0x0100) 582 return -EOPNOTSUPP; 583 if (le16_to_cpu(req->credits_requested) <= 0 || 584 le32_to_cpu(req->max_receive_size) <= 128 || 585 le32_to_cpu(req->max_fragmented_size) <= 586 128 * 1024) 587 return -ECONNABORTED; 588 589 return 0; 590 } 591 case SMBDIRECT_EXPECT_NEGOTIATE_REP: 592 /* client only */ 593 break; 594 } 595 596 /* This is an internal error */ 597 return -EINVAL; 598 } 599 600 static void recv_done(struct ib_cq *cq, struct ib_wc *wc) 601 { 602 struct smbdirect_recv_io *recvmsg; 603 struct smbdirect_socket *sc; 604 struct smbdirect_socket_parameters *sp; 605 606 recvmsg = container_of(wc->wr_cqe, struct smbdirect_recv_io, cqe); 607 sc = recvmsg->socket; 608 sp = &sc->parameters; 609 610 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { 611 put_recvmsg(sc, recvmsg); 612 if (wc->status != IB_WC_WR_FLUSH_ERR) { 613 pr_err("Recv error. status='%s (%d)' opcode=%d\n", 614 ib_wc_status_msg(wc->status), wc->status, 615 wc->opcode); 616 smb_direct_disconnect_rdma_connection(sc); 617 } 618 return; 619 } 620 621 ksmbd_debug(RDMA, "Recv completed. status='%s (%d)', opcode=%d\n", 622 ib_wc_status_msg(wc->status), wc->status, 623 wc->opcode); 624 625 ib_dma_sync_single_for_cpu(wc->qp->device, recvmsg->sge.addr, 626 recvmsg->sge.length, DMA_FROM_DEVICE); 627 628 /* 629 * Reset timer to the keepalive interval in 630 * order to trigger our next keepalive message. 631 */ 632 sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; 633 mod_delayed_work(sc->workqueue, &sc->idle.timer_work, 634 msecs_to_jiffies(sp->keepalive_interval_msec)); 635 636 switch (sc->recv_io.expected) { 637 case SMBDIRECT_EXPECT_NEGOTIATE_REQ: 638 if (wc->byte_len < sizeof(struct smbdirect_negotiate_req)) { 639 put_recvmsg(sc, recvmsg); 640 smb_direct_disconnect_rdma_connection(sc); 641 return; 642 } 643 sc->recv_io.reassembly.full_packet_received = true; 644 /* 645 * Some drivers (at least mlx5_ib) might post a 646 * recv completion before RDMA_CM_EVENT_ESTABLISHED, 647 * we need to adjust our expectation in that case. 648 */ 649 if (!sc->first_error && sc->status == SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING) 650 sc->status = SMBDIRECT_SOCKET_NEGOTIATE_NEEDED; 651 if (SMBDIRECT_CHECK_STATUS_WARN(sc, SMBDIRECT_SOCKET_NEGOTIATE_NEEDED)) { 652 put_recvmsg(sc, recvmsg); 653 smb_direct_disconnect_rdma_connection(sc); 654 return; 655 } 656 sc->status = SMBDIRECT_SOCKET_NEGOTIATE_RUNNING; 657 enqueue_reassembly(sc, recvmsg, 0); 658 wake_up(&sc->status_wait); 659 return; 660 case SMBDIRECT_EXPECT_DATA_TRANSFER: { 661 struct smbdirect_data_transfer *data_transfer = 662 (struct smbdirect_data_transfer *)recvmsg->packet; 663 u32 remaining_data_length, data_offset, data_length; 664 u16 old_recv_credit_target; 665 666 if (wc->byte_len < 667 offsetof(struct smbdirect_data_transfer, padding)) { 668 put_recvmsg(sc, recvmsg); 669 smb_direct_disconnect_rdma_connection(sc); 670 return; 671 } 672 673 remaining_data_length = le32_to_cpu(data_transfer->remaining_data_length); 674 data_length = le32_to_cpu(data_transfer->data_length); 675 data_offset = le32_to_cpu(data_transfer->data_offset); 676 if (wc->byte_len < data_offset || 677 wc->byte_len < (u64)data_offset + data_length) { 678 put_recvmsg(sc, recvmsg); 679 smb_direct_disconnect_rdma_connection(sc); 680 return; 681 } 682 if (remaining_data_length > sp->max_fragmented_recv_size || 683 data_length > sp->max_fragmented_recv_size || 684 (u64)remaining_data_length + (u64)data_length > 685 (u64)sp->max_fragmented_recv_size) { 686 put_recvmsg(sc, recvmsg); 687 smb_direct_disconnect_rdma_connection(sc); 688 return; 689 } 690 691 if (data_length) { 692 if (sc->recv_io.reassembly.full_packet_received) 693 recvmsg->first_segment = true; 694 695 if (le32_to_cpu(data_transfer->remaining_data_length)) 696 sc->recv_io.reassembly.full_packet_received = false; 697 else 698 sc->recv_io.reassembly.full_packet_received = true; 699 } 700 701 atomic_dec(&sc->recv_io.posted.count); 702 atomic_dec(&sc->recv_io.credits.count); 703 704 old_recv_credit_target = sc->recv_io.credits.target; 705 sc->recv_io.credits.target = 706 le16_to_cpu(data_transfer->credits_requested); 707 sc->recv_io.credits.target = 708 min_t(u16, sc->recv_io.credits.target, sp->recv_credit_max); 709 sc->recv_io.credits.target = 710 max_t(u16, sc->recv_io.credits.target, 1); 711 atomic_add(le16_to_cpu(data_transfer->credits_granted), 712 &sc->send_io.credits.count); 713 714 if (le16_to_cpu(data_transfer->flags) & 715 SMBDIRECT_FLAG_RESPONSE_REQUESTED) 716 queue_work(sc->workqueue, &sc->idle.immediate_work); 717 718 if (atomic_read(&sc->send_io.credits.count) > 0) 719 wake_up(&sc->send_io.credits.wait_queue); 720 721 if (data_length) { 722 if (sc->recv_io.credits.target > old_recv_credit_target) 723 queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); 724 725 enqueue_reassembly(sc, recvmsg, (int)data_length); 726 wake_up(&sc->recv_io.reassembly.wait_queue); 727 } else 728 put_recvmsg(sc, recvmsg); 729 730 return; 731 } 732 case SMBDIRECT_EXPECT_NEGOTIATE_REP: 733 /* client only */ 734 break; 735 } 736 737 /* 738 * This is an internal error! 739 */ 740 WARN_ON_ONCE(sc->recv_io.expected != SMBDIRECT_EXPECT_DATA_TRANSFER); 741 put_recvmsg(sc, recvmsg); 742 smb_direct_disconnect_rdma_connection(sc); 743 } 744 745 static int smb_direct_post_recv(struct smbdirect_socket *sc, 746 struct smbdirect_recv_io *recvmsg) 747 { 748 struct smbdirect_socket_parameters *sp = &sc->parameters; 749 struct ib_recv_wr wr; 750 int ret; 751 752 recvmsg->sge.addr = ib_dma_map_single(sc->ib.dev, 753 recvmsg->packet, 754 sp->max_recv_size, 755 DMA_FROM_DEVICE); 756 ret = ib_dma_mapping_error(sc->ib.dev, recvmsg->sge.addr); 757 if (ret) 758 return ret; 759 recvmsg->sge.length = sp->max_recv_size; 760 recvmsg->sge.lkey = sc->ib.pd->local_dma_lkey; 761 recvmsg->cqe.done = recv_done; 762 763 wr.wr_cqe = &recvmsg->cqe; 764 wr.next = NULL; 765 wr.sg_list = &recvmsg->sge; 766 wr.num_sge = 1; 767 768 ret = ib_post_recv(sc->ib.qp, &wr, NULL); 769 if (ret) { 770 pr_err("Can't post recv: %d\n", ret); 771 ib_dma_unmap_single(sc->ib.dev, 772 recvmsg->sge.addr, recvmsg->sge.length, 773 DMA_FROM_DEVICE); 774 recvmsg->sge.length = 0; 775 smb_direct_disconnect_rdma_connection(sc); 776 return ret; 777 } 778 return ret; 779 } 780 781 static int smb_direct_read(struct ksmbd_transport *t, char *buf, 782 unsigned int size, int unused) 783 { 784 struct smbdirect_recv_io *recvmsg; 785 struct smbdirect_data_transfer *data_transfer; 786 int to_copy, to_read, data_read, offset; 787 u32 data_length, remaining_data_length, data_offset; 788 int rc; 789 struct smb_direct_transport *st = SMBD_TRANS(t); 790 struct smbdirect_socket *sc = &st->socket; 791 792 again: 793 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { 794 pr_err("disconnected\n"); 795 return -ENOTCONN; 796 } 797 798 /* 799 * No need to hold the reassembly queue lock all the time as we are 800 * the only one reading from the front of the queue. The transport 801 * may add more entries to the back of the queue at the same time 802 */ 803 if (sc->recv_io.reassembly.data_length >= size) { 804 int queue_length; 805 int queue_removed = 0; 806 unsigned long flags; 807 808 /* 809 * Need to make sure reassembly_data_length is read before 810 * reading reassembly_queue_length and calling 811 * get_first_reassembly. This call is lock free 812 * as we never read at the end of the queue which are being 813 * updated in SOFTIRQ as more data is received 814 */ 815 virt_rmb(); 816 queue_length = sc->recv_io.reassembly.queue_length; 817 data_read = 0; 818 to_read = size; 819 offset = sc->recv_io.reassembly.first_entry_offset; 820 while (data_read < size) { 821 recvmsg = get_first_reassembly(sc); 822 data_transfer = smbdirect_recv_io_payload(recvmsg); 823 data_length = le32_to_cpu(data_transfer->data_length); 824 remaining_data_length = 825 le32_to_cpu(data_transfer->remaining_data_length); 826 data_offset = le32_to_cpu(data_transfer->data_offset); 827 828 /* 829 * The upper layer expects RFC1002 length at the 830 * beginning of the payload. Return it to indicate 831 * the total length of the packet. This minimize the 832 * change to upper layer packet processing logic. This 833 * will be eventually remove when an intermediate 834 * transport layer is added 835 */ 836 if (recvmsg->first_segment && size == 4) { 837 unsigned int rfc1002_len = 838 data_length + remaining_data_length; 839 *((__be32 *)buf) = cpu_to_be32(rfc1002_len); 840 data_read = 4; 841 recvmsg->first_segment = false; 842 ksmbd_debug(RDMA, 843 "returning rfc1002 length %d\n", 844 rfc1002_len); 845 goto read_rfc1002_done; 846 } 847 848 to_copy = min_t(int, data_length - offset, to_read); 849 memcpy(buf + data_read, (char *)data_transfer + data_offset + offset, 850 to_copy); 851 852 /* move on to the next buffer? */ 853 if (to_copy == data_length - offset) { 854 queue_length--; 855 /* 856 * No need to lock if we are not at the 857 * end of the queue 858 */ 859 if (queue_length) { 860 list_del(&recvmsg->list); 861 } else { 862 spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); 863 list_del(&recvmsg->list); 864 spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); 865 } 866 queue_removed++; 867 put_recvmsg(sc, recvmsg); 868 offset = 0; 869 } else { 870 offset += to_copy; 871 } 872 873 to_read -= to_copy; 874 data_read += to_copy; 875 } 876 877 spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); 878 sc->recv_io.reassembly.data_length -= data_read; 879 sc->recv_io.reassembly.queue_length -= queue_removed; 880 spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); 881 882 sc->recv_io.reassembly.first_entry_offset = offset; 883 ksmbd_debug(RDMA, 884 "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n", 885 data_read, sc->recv_io.reassembly.data_length, 886 sc->recv_io.reassembly.first_entry_offset); 887 read_rfc1002_done: 888 return data_read; 889 } 890 891 ksmbd_debug(RDMA, "wait_event on more data\n"); 892 rc = wait_event_interruptible(sc->recv_io.reassembly.wait_queue, 893 sc->recv_io.reassembly.data_length >= size || 894 sc->status != SMBDIRECT_SOCKET_CONNECTED); 895 if (rc) 896 return -EINTR; 897 898 goto again; 899 } 900 901 static void smb_direct_post_recv_credits(struct work_struct *work) 902 { 903 struct smbdirect_socket *sc = 904 container_of(work, struct smbdirect_socket, recv_io.posted.refill_work); 905 struct smbdirect_recv_io *recvmsg; 906 int credits = 0; 907 int ret; 908 909 if (atomic_read(&sc->recv_io.credits.count) < sc->recv_io.credits.target) { 910 while (true) { 911 recvmsg = get_free_recvmsg(sc); 912 if (!recvmsg) 913 break; 914 915 recvmsg->first_segment = false; 916 917 ret = smb_direct_post_recv(sc, recvmsg); 918 if (ret) { 919 pr_err("Can't post recv: %d\n", ret); 920 put_recvmsg(sc, recvmsg); 921 break; 922 } 923 credits++; 924 925 atomic_inc(&sc->recv_io.posted.count); 926 } 927 } 928 929 if (credits) 930 queue_work(sc->workqueue, &sc->idle.immediate_work); 931 } 932 933 static void send_done(struct ib_cq *cq, struct ib_wc *wc) 934 { 935 struct smbdirect_send_io *sendmsg, *sibling, *next; 936 struct smbdirect_socket *sc; 937 int lcredits = 0; 938 939 sendmsg = container_of(wc->wr_cqe, struct smbdirect_send_io, cqe); 940 sc = sendmsg->socket; 941 942 ksmbd_debug(RDMA, "Send completed. status='%s (%d)', opcode=%d\n", 943 ib_wc_status_msg(wc->status), wc->status, 944 wc->opcode); 945 946 /* 947 * Free possible siblings and then the main send_io 948 */ 949 list_for_each_entry_safe(sibling, next, &sendmsg->sibling_list, sibling_list) { 950 list_del_init(&sibling->sibling_list); 951 smb_direct_free_sendmsg(sc, sibling); 952 lcredits += 1; 953 } 954 /* Note this frees wc->wr_cqe, but not wc */ 955 smb_direct_free_sendmsg(sc, sendmsg); 956 lcredits += 1; 957 958 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { 959 pr_err("Send error. status='%s (%d)', opcode=%d\n", 960 ib_wc_status_msg(wc->status), wc->status, 961 wc->opcode); 962 smb_direct_disconnect_rdma_connection(sc); 963 return; 964 } 965 966 atomic_add(lcredits, &sc->send_io.lcredits.count); 967 wake_up(&sc->send_io.lcredits.wait_queue); 968 969 if (atomic_dec_and_test(&sc->send_io.pending.count)) 970 wake_up(&sc->send_io.pending.zero_wait_queue); 971 } 972 973 static int manage_credits_prior_sending(struct smbdirect_socket *sc) 974 { 975 int new_credits; 976 977 if (atomic_read(&sc->recv_io.credits.count) >= sc->recv_io.credits.target) 978 return 0; 979 980 new_credits = atomic_read(&sc->recv_io.posted.count); 981 if (new_credits == 0) 982 return 0; 983 984 new_credits -= atomic_read(&sc->recv_io.credits.count); 985 if (new_credits <= 0) 986 return 0; 987 988 atomic_add(new_credits, &sc->recv_io.credits.count); 989 return new_credits; 990 } 991 992 static int manage_keep_alive_before_sending(struct smbdirect_socket *sc) 993 { 994 struct smbdirect_socket_parameters *sp = &sc->parameters; 995 996 if (sc->idle.keepalive == SMBDIRECT_KEEPALIVE_PENDING) { 997 sc->idle.keepalive = SMBDIRECT_KEEPALIVE_SENT; 998 /* 999 * Now use the keepalive timeout (instead of keepalive interval) 1000 * in order to wait for a response 1001 */ 1002 mod_delayed_work(sc->workqueue, &sc->idle.timer_work, 1003 msecs_to_jiffies(sp->keepalive_timeout_msec)); 1004 return 1; 1005 } 1006 return 0; 1007 } 1008 1009 static int smb_direct_post_send(struct smbdirect_socket *sc, 1010 struct ib_send_wr *wr) 1011 { 1012 int ret; 1013 1014 atomic_inc(&sc->send_io.pending.count); 1015 ret = ib_post_send(sc->ib.qp, wr, NULL); 1016 if (ret) { 1017 pr_err("failed to post send: %d\n", ret); 1018 smb_direct_disconnect_rdma_connection(sc); 1019 } 1020 return ret; 1021 } 1022 1023 static void smb_direct_send_ctx_init(struct smbdirect_send_batch *send_ctx, 1024 bool need_invalidate_rkey, 1025 unsigned int remote_key) 1026 { 1027 INIT_LIST_HEAD(&send_ctx->msg_list); 1028 send_ctx->wr_cnt = 0; 1029 send_ctx->need_invalidate_rkey = need_invalidate_rkey; 1030 send_ctx->remote_key = remote_key; 1031 } 1032 1033 static int smb_direct_flush_send_list(struct smbdirect_socket *sc, 1034 struct smbdirect_send_batch *send_ctx, 1035 bool is_last) 1036 { 1037 struct smbdirect_send_io *first, *last; 1038 int ret; 1039 1040 if (list_empty(&send_ctx->msg_list)) 1041 return 0; 1042 1043 first = list_first_entry(&send_ctx->msg_list, 1044 struct smbdirect_send_io, 1045 sibling_list); 1046 last = list_last_entry(&send_ctx->msg_list, 1047 struct smbdirect_send_io, 1048 sibling_list); 1049 1050 if (send_ctx->need_invalidate_rkey) { 1051 first->wr.opcode = IB_WR_SEND_WITH_INV; 1052 first->wr.ex.invalidate_rkey = send_ctx->remote_key; 1053 send_ctx->need_invalidate_rkey = false; 1054 send_ctx->remote_key = 0; 1055 } 1056 1057 last->wr.send_flags = IB_SEND_SIGNALED; 1058 last->wr.wr_cqe = &last->cqe; 1059 1060 /* 1061 * Remove last from send_ctx->msg_list 1062 * and splice the rest of send_ctx->msg_list 1063 * to last->sibling_list. 1064 * 1065 * send_ctx->msg_list is a valid empty list 1066 * at the end. 1067 */ 1068 list_del_init(&last->sibling_list); 1069 list_splice_tail_init(&send_ctx->msg_list, &last->sibling_list); 1070 send_ctx->wr_cnt = 0; 1071 1072 ret = smb_direct_post_send(sc, &first->wr); 1073 if (ret) { 1074 struct smbdirect_send_io *sibling, *next; 1075 1076 list_for_each_entry_safe(sibling, next, &last->sibling_list, sibling_list) { 1077 list_del_init(&sibling->sibling_list); 1078 smb_direct_free_sendmsg(sc, sibling); 1079 } 1080 smb_direct_free_sendmsg(sc, last); 1081 } 1082 1083 return ret; 1084 } 1085 1086 static int wait_for_credits(struct smbdirect_socket *sc, 1087 wait_queue_head_t *waitq, atomic_t *total_credits, 1088 int needed) 1089 { 1090 int ret; 1091 1092 do { 1093 if (atomic_sub_return(needed, total_credits) >= 0) 1094 return 0; 1095 1096 atomic_add(needed, total_credits); 1097 ret = wait_event_interruptible(*waitq, 1098 atomic_read(total_credits) >= needed || 1099 sc->status != SMBDIRECT_SOCKET_CONNECTED); 1100 1101 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 1102 return -ENOTCONN; 1103 else if (ret < 0) 1104 return ret; 1105 } while (true); 1106 } 1107 1108 static int wait_for_send_lcredit(struct smbdirect_socket *sc, 1109 struct smbdirect_send_batch *send_ctx) 1110 { 1111 if (send_ctx && (atomic_read(&sc->send_io.lcredits.count) <= 1)) { 1112 int ret; 1113 1114 ret = smb_direct_flush_send_list(sc, send_ctx, false); 1115 if (ret) 1116 return ret; 1117 } 1118 1119 return wait_for_credits(sc, 1120 &sc->send_io.lcredits.wait_queue, 1121 &sc->send_io.lcredits.count, 1122 1); 1123 } 1124 1125 static int wait_for_send_credits(struct smbdirect_socket *sc, 1126 struct smbdirect_send_batch *send_ctx) 1127 { 1128 int ret; 1129 1130 if (send_ctx && 1131 (send_ctx->wr_cnt >= 16 || atomic_read(&sc->send_io.credits.count) <= 1)) { 1132 ret = smb_direct_flush_send_list(sc, send_ctx, false); 1133 if (ret) 1134 return ret; 1135 } 1136 1137 return wait_for_credits(sc, &sc->send_io.credits.wait_queue, &sc->send_io.credits.count, 1); 1138 } 1139 1140 static int wait_for_rw_credits(struct smbdirect_socket *sc, int credits) 1141 { 1142 return wait_for_credits(sc, 1143 &sc->rw_io.credits.wait_queue, 1144 &sc->rw_io.credits.count, 1145 credits); 1146 } 1147 1148 static int calc_rw_credits(struct smbdirect_socket *sc, 1149 char *buf, unsigned int len) 1150 { 1151 return DIV_ROUND_UP(get_buf_page_count(buf, len), 1152 sc->rw_io.credits.num_pages); 1153 } 1154 1155 static int smb_direct_create_header(struct smbdirect_socket *sc, 1156 int size, int remaining_data_length, 1157 struct smbdirect_send_io **sendmsg_out) 1158 { 1159 struct smbdirect_socket_parameters *sp = &sc->parameters; 1160 struct smbdirect_send_io *sendmsg; 1161 struct smbdirect_data_transfer *packet; 1162 int header_length; 1163 int ret; 1164 1165 sendmsg = smb_direct_alloc_sendmsg(sc); 1166 if (IS_ERR(sendmsg)) 1167 return PTR_ERR(sendmsg); 1168 1169 /* Fill in the packet header */ 1170 packet = (struct smbdirect_data_transfer *)sendmsg->packet; 1171 packet->credits_requested = cpu_to_le16(sp->send_credit_target); 1172 packet->credits_granted = cpu_to_le16(manage_credits_prior_sending(sc)); 1173 1174 packet->flags = 0; 1175 if (manage_keep_alive_before_sending(sc)) 1176 packet->flags |= cpu_to_le16(SMBDIRECT_FLAG_RESPONSE_REQUESTED); 1177 1178 packet->reserved = 0; 1179 if (!size) 1180 packet->data_offset = 0; 1181 else 1182 packet->data_offset = cpu_to_le32(24); 1183 packet->data_length = cpu_to_le32(size); 1184 packet->remaining_data_length = cpu_to_le32(remaining_data_length); 1185 packet->padding = 0; 1186 1187 ksmbd_debug(RDMA, 1188 "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n", 1189 le16_to_cpu(packet->credits_requested), 1190 le16_to_cpu(packet->credits_granted), 1191 le32_to_cpu(packet->data_offset), 1192 le32_to_cpu(packet->data_length), 1193 le32_to_cpu(packet->remaining_data_length)); 1194 1195 /* Map the packet to DMA */ 1196 header_length = sizeof(struct smbdirect_data_transfer); 1197 /* If this is a packet without payload, don't send padding */ 1198 if (!size) 1199 header_length = 1200 offsetof(struct smbdirect_data_transfer, padding); 1201 1202 sendmsg->sge[0].addr = ib_dma_map_single(sc->ib.dev, 1203 (void *)packet, 1204 header_length, 1205 DMA_TO_DEVICE); 1206 ret = ib_dma_mapping_error(sc->ib.dev, sendmsg->sge[0].addr); 1207 if (ret) { 1208 smb_direct_free_sendmsg(sc, sendmsg); 1209 return ret; 1210 } 1211 1212 sendmsg->num_sge = 1; 1213 sendmsg->sge[0].length = header_length; 1214 sendmsg->sge[0].lkey = sc->ib.pd->local_dma_lkey; 1215 1216 *sendmsg_out = sendmsg; 1217 return 0; 1218 } 1219 1220 static int get_sg_list(void *buf, int size, struct scatterlist *sg_list, int nentries) 1221 { 1222 bool high = is_vmalloc_addr(buf); 1223 struct page *page; 1224 int offset, len; 1225 int i = 0; 1226 1227 if (size <= 0 || nentries < get_buf_page_count(buf, size)) 1228 return -EINVAL; 1229 1230 offset = offset_in_page(buf); 1231 buf -= offset; 1232 while (size > 0) { 1233 len = min_t(int, PAGE_SIZE - offset, size); 1234 if (high) 1235 page = vmalloc_to_page(buf); 1236 else 1237 page = kmap_to_page(buf); 1238 1239 if (!sg_list) 1240 return -EINVAL; 1241 sg_set_page(sg_list, page, len, offset); 1242 sg_list = sg_next(sg_list); 1243 1244 buf += PAGE_SIZE; 1245 size -= len; 1246 offset = 0; 1247 i++; 1248 } 1249 return i; 1250 } 1251 1252 static int get_mapped_sg_list(struct ib_device *device, void *buf, int size, 1253 struct scatterlist *sg_list, int nentries, 1254 enum dma_data_direction dir) 1255 { 1256 int npages; 1257 1258 npages = get_sg_list(buf, size, sg_list, nentries); 1259 if (npages < 0) 1260 return -EINVAL; 1261 return ib_dma_map_sg(device, sg_list, npages, dir); 1262 } 1263 1264 static int post_sendmsg(struct smbdirect_socket *sc, 1265 struct smbdirect_send_batch *send_ctx, 1266 struct smbdirect_send_io *msg) 1267 { 1268 int i; 1269 1270 for (i = 0; i < msg->num_sge; i++) 1271 ib_dma_sync_single_for_device(sc->ib.dev, 1272 msg->sge[i].addr, msg->sge[i].length, 1273 DMA_TO_DEVICE); 1274 1275 msg->cqe.done = send_done; 1276 msg->wr.opcode = IB_WR_SEND; 1277 msg->wr.sg_list = &msg->sge[0]; 1278 msg->wr.num_sge = msg->num_sge; 1279 msg->wr.next = NULL; 1280 1281 if (send_ctx) { 1282 msg->wr.wr_cqe = NULL; 1283 msg->wr.send_flags = 0; 1284 if (!list_empty(&send_ctx->msg_list)) { 1285 struct smbdirect_send_io *last; 1286 1287 last = list_last_entry(&send_ctx->msg_list, 1288 struct smbdirect_send_io, 1289 sibling_list); 1290 last->wr.next = &msg->wr; 1291 } 1292 list_add_tail(&msg->sibling_list, &send_ctx->msg_list); 1293 send_ctx->wr_cnt++; 1294 return 0; 1295 } 1296 1297 msg->wr.wr_cqe = &msg->cqe; 1298 msg->wr.send_flags = IB_SEND_SIGNALED; 1299 return smb_direct_post_send(sc, &msg->wr); 1300 } 1301 1302 static int smb_direct_post_send_data(struct smbdirect_socket *sc, 1303 struct smbdirect_send_batch *send_ctx, 1304 struct kvec *iov, int niov, 1305 int remaining_data_length) 1306 { 1307 int i, j, ret; 1308 struct smbdirect_send_io *msg; 1309 int data_length; 1310 struct scatterlist sg[SMBDIRECT_SEND_IO_MAX_SGE - 1]; 1311 1312 ret = wait_for_send_lcredit(sc, send_ctx); 1313 if (ret) 1314 goto lcredit_failed; 1315 1316 ret = wait_for_send_credits(sc, send_ctx); 1317 if (ret) 1318 goto credit_failed; 1319 1320 data_length = 0; 1321 for (i = 0; i < niov; i++) 1322 data_length += iov[i].iov_len; 1323 1324 ret = smb_direct_create_header(sc, data_length, remaining_data_length, 1325 &msg); 1326 if (ret) 1327 goto header_failed; 1328 1329 for (i = 0; i < niov; i++) { 1330 struct ib_sge *sge; 1331 int sg_cnt; 1332 1333 sg_init_table(sg, SMBDIRECT_SEND_IO_MAX_SGE - 1); 1334 sg_cnt = get_mapped_sg_list(sc->ib.dev, 1335 iov[i].iov_base, iov[i].iov_len, 1336 sg, SMBDIRECT_SEND_IO_MAX_SGE - 1, 1337 DMA_TO_DEVICE); 1338 if (sg_cnt <= 0) { 1339 pr_err("failed to map buffer\n"); 1340 ret = -ENOMEM; 1341 goto err; 1342 } else if (sg_cnt + msg->num_sge > SMBDIRECT_SEND_IO_MAX_SGE) { 1343 pr_err("buffer not fitted into sges\n"); 1344 ret = -E2BIG; 1345 ib_dma_unmap_sg(sc->ib.dev, sg, sg_cnt, 1346 DMA_TO_DEVICE); 1347 goto err; 1348 } 1349 1350 for (j = 0; j < sg_cnt; j++) { 1351 sge = &msg->sge[msg->num_sge]; 1352 sge->addr = sg_dma_address(&sg[j]); 1353 sge->length = sg_dma_len(&sg[j]); 1354 sge->lkey = sc->ib.pd->local_dma_lkey; 1355 msg->num_sge++; 1356 } 1357 } 1358 1359 ret = post_sendmsg(sc, send_ctx, msg); 1360 if (ret) 1361 goto err; 1362 return 0; 1363 err: 1364 smb_direct_free_sendmsg(sc, msg); 1365 header_failed: 1366 atomic_inc(&sc->send_io.credits.count); 1367 credit_failed: 1368 atomic_inc(&sc->send_io.lcredits.count); 1369 lcredit_failed: 1370 return ret; 1371 } 1372 1373 static int smb_direct_writev(struct ksmbd_transport *t, 1374 struct kvec *iov, int niovs, int buflen, 1375 bool need_invalidate, unsigned int remote_key) 1376 { 1377 struct smb_direct_transport *st = SMBD_TRANS(t); 1378 struct smbdirect_socket *sc = &st->socket; 1379 struct smbdirect_socket_parameters *sp = &sc->parameters; 1380 size_t remaining_data_length; 1381 size_t iov_idx; 1382 size_t iov_ofs; 1383 size_t max_iov_size = sp->max_send_size - 1384 sizeof(struct smbdirect_data_transfer); 1385 int ret; 1386 struct smbdirect_send_batch send_ctx; 1387 int error = 0; 1388 1389 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 1390 return -ENOTCONN; 1391 1392 //FIXME: skip RFC1002 header.. 1393 if (WARN_ON_ONCE(niovs <= 1 || iov[0].iov_len != 4)) 1394 return -EINVAL; 1395 buflen -= 4; 1396 iov_idx = 1; 1397 iov_ofs = 0; 1398 1399 remaining_data_length = buflen; 1400 ksmbd_debug(RDMA, "Sending smb (RDMA): smb_len=%u\n", buflen); 1401 1402 smb_direct_send_ctx_init(&send_ctx, need_invalidate, remote_key); 1403 while (remaining_data_length) { 1404 struct kvec vecs[SMBDIRECT_SEND_IO_MAX_SGE - 1]; /* minus smbdirect hdr */ 1405 size_t possible_bytes = max_iov_size; 1406 size_t possible_vecs; 1407 size_t bytes = 0; 1408 size_t nvecs = 0; 1409 1410 /* 1411 * For the last message remaining_data_length should be 1412 * have been 0 already! 1413 */ 1414 if (WARN_ON_ONCE(iov_idx >= niovs)) { 1415 error = -EINVAL; 1416 goto done; 1417 } 1418 1419 /* 1420 * We have 2 factors which limit the arguments we pass 1421 * to smb_direct_post_send_data(): 1422 * 1423 * 1. The number of supported sges for the send, 1424 * while one is reserved for the smbdirect header. 1425 * And we currently need one SGE per page. 1426 * 2. The number of negotiated payload bytes per send. 1427 */ 1428 possible_vecs = min_t(size_t, ARRAY_SIZE(vecs), niovs - iov_idx); 1429 1430 while (iov_idx < niovs && possible_vecs && possible_bytes) { 1431 struct kvec *v = &vecs[nvecs]; 1432 int page_count; 1433 1434 v->iov_base = ((u8 *)iov[iov_idx].iov_base) + iov_ofs; 1435 v->iov_len = min_t(size_t, 1436 iov[iov_idx].iov_len - iov_ofs, 1437 possible_bytes); 1438 page_count = get_buf_page_count(v->iov_base, v->iov_len); 1439 if (page_count > possible_vecs) { 1440 /* 1441 * If the number of pages in the buffer 1442 * is to much (because we currently require 1443 * one SGE per page), we need to limit the 1444 * length. 1445 * 1446 * We know possible_vecs is at least 1, 1447 * so we always keep the first page. 1448 * 1449 * We need to calculate the number extra 1450 * pages (epages) we can also keep. 1451 * 1452 * We calculate the number of bytes in the 1453 * first page (fplen), this should never be 1454 * larger than v->iov_len because page_count is 1455 * at least 2, but adding a limitation feels 1456 * better. 1457 * 1458 * Then we calculate the number of bytes (elen) 1459 * we can keep for the extra pages. 1460 */ 1461 size_t epages = possible_vecs - 1; 1462 size_t fpofs = offset_in_page(v->iov_base); 1463 size_t fplen = min_t(size_t, PAGE_SIZE - fpofs, v->iov_len); 1464 size_t elen = min_t(size_t, v->iov_len - fplen, epages*PAGE_SIZE); 1465 1466 v->iov_len = fplen + elen; 1467 page_count = get_buf_page_count(v->iov_base, v->iov_len); 1468 if (WARN_ON_ONCE(page_count > possible_vecs)) { 1469 /* 1470 * Something went wrong in the above 1471 * logic... 1472 */ 1473 error = -EINVAL; 1474 goto done; 1475 } 1476 } 1477 possible_vecs -= page_count; 1478 nvecs += 1; 1479 possible_bytes -= v->iov_len; 1480 bytes += v->iov_len; 1481 1482 iov_ofs += v->iov_len; 1483 if (iov_ofs >= iov[iov_idx].iov_len) { 1484 iov_idx += 1; 1485 iov_ofs = 0; 1486 } 1487 } 1488 1489 remaining_data_length -= bytes; 1490 1491 ret = smb_direct_post_send_data(sc, &send_ctx, 1492 vecs, nvecs, 1493 remaining_data_length); 1494 if (unlikely(ret)) { 1495 error = ret; 1496 goto done; 1497 } 1498 } 1499 1500 done: 1501 ret = smb_direct_flush_send_list(sc, &send_ctx, true); 1502 if (unlikely(!ret && error)) 1503 ret = error; 1504 1505 /* 1506 * As an optimization, we don't wait for individual I/O to finish 1507 * before sending the next one. 1508 * Send them all and wait for pending send count to get to 0 1509 * that means all the I/Os have been out and we are good to return 1510 */ 1511 1512 wait_event(sc->send_io.pending.zero_wait_queue, 1513 atomic_read(&sc->send_io.pending.count) == 0 || 1514 sc->status != SMBDIRECT_SOCKET_CONNECTED); 1515 if (sc->status != SMBDIRECT_SOCKET_CONNECTED && ret == 0) 1516 ret = -ENOTCONN; 1517 1518 return ret; 1519 } 1520 1521 static void smb_direct_free_rdma_rw_msg(struct smb_direct_transport *t, 1522 struct smbdirect_rw_io *msg, 1523 enum dma_data_direction dir) 1524 { 1525 struct smbdirect_socket *sc = &t->socket; 1526 1527 rdma_rw_ctx_destroy(&msg->rdma_ctx, sc->ib.qp, sc->ib.qp->port, 1528 msg->sgt.sgl, msg->sgt.nents, dir); 1529 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); 1530 kfree(msg); 1531 } 1532 1533 static void read_write_done(struct ib_cq *cq, struct ib_wc *wc, 1534 enum dma_data_direction dir) 1535 { 1536 struct smbdirect_rw_io *msg = 1537 container_of(wc->wr_cqe, struct smbdirect_rw_io, cqe); 1538 struct smbdirect_socket *sc = msg->socket; 1539 1540 if (wc->status != IB_WC_SUCCESS) { 1541 msg->error = -EIO; 1542 pr_err("read/write error. opcode = %d, status = %s(%d)\n", 1543 wc->opcode, ib_wc_status_msg(wc->status), wc->status); 1544 if (wc->status != IB_WC_WR_FLUSH_ERR) 1545 smb_direct_disconnect_rdma_connection(sc); 1546 } 1547 1548 complete(msg->completion); 1549 } 1550 1551 static void read_done(struct ib_cq *cq, struct ib_wc *wc) 1552 { 1553 read_write_done(cq, wc, DMA_FROM_DEVICE); 1554 } 1555 1556 static void write_done(struct ib_cq *cq, struct ib_wc *wc) 1557 { 1558 read_write_done(cq, wc, DMA_TO_DEVICE); 1559 } 1560 1561 static int smb_direct_rdma_xmit(struct smb_direct_transport *t, 1562 void *buf, int buf_len, 1563 struct smbdirect_buffer_descriptor_v1 *desc, 1564 unsigned int desc_len, 1565 bool is_read) 1566 { 1567 struct smbdirect_socket *sc = &t->socket; 1568 struct smbdirect_socket_parameters *sp = &sc->parameters; 1569 struct smbdirect_rw_io *msg, *next_msg; 1570 int i, ret; 1571 DECLARE_COMPLETION_ONSTACK(completion); 1572 struct ib_send_wr *first_wr; 1573 LIST_HEAD(msg_list); 1574 char *desc_buf; 1575 int credits_needed; 1576 unsigned int desc_buf_len, desc_num = 0; 1577 1578 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 1579 return -ENOTCONN; 1580 1581 if (buf_len > sp->max_read_write_size) 1582 return -EINVAL; 1583 1584 /* calculate needed credits */ 1585 credits_needed = 0; 1586 desc_buf = buf; 1587 for (i = 0; i < desc_len / sizeof(*desc); i++) { 1588 if (!buf_len) 1589 break; 1590 1591 desc_buf_len = le32_to_cpu(desc[i].length); 1592 if (!desc_buf_len) 1593 return -EINVAL; 1594 1595 if (desc_buf_len > buf_len) { 1596 desc_buf_len = buf_len; 1597 desc[i].length = cpu_to_le32(desc_buf_len); 1598 buf_len = 0; 1599 } 1600 1601 credits_needed += calc_rw_credits(sc, desc_buf, desc_buf_len); 1602 desc_buf += desc_buf_len; 1603 buf_len -= desc_buf_len; 1604 desc_num++; 1605 } 1606 1607 ksmbd_debug(RDMA, "RDMA %s, len %#x, needed credits %#x\n", 1608 str_read_write(is_read), buf_len, credits_needed); 1609 1610 ret = wait_for_rw_credits(sc, credits_needed); 1611 if (ret < 0) 1612 return ret; 1613 1614 /* build rdma_rw_ctx for each descriptor */ 1615 desc_buf = buf; 1616 for (i = 0; i < desc_num; i++) { 1617 msg = kzalloc(struct_size(msg, sg_list, SG_CHUNK_SIZE), 1618 KSMBD_DEFAULT_GFP); 1619 if (!msg) { 1620 ret = -ENOMEM; 1621 goto out; 1622 } 1623 1624 desc_buf_len = le32_to_cpu(desc[i].length); 1625 1626 msg->socket = sc; 1627 msg->cqe.done = is_read ? read_done : write_done; 1628 msg->completion = &completion; 1629 1630 msg->sgt.sgl = &msg->sg_list[0]; 1631 ret = sg_alloc_table_chained(&msg->sgt, 1632 get_buf_page_count(desc_buf, desc_buf_len), 1633 msg->sg_list, SG_CHUNK_SIZE); 1634 if (ret) { 1635 ret = -ENOMEM; 1636 goto free_msg; 1637 } 1638 1639 ret = get_sg_list(desc_buf, desc_buf_len, 1640 msg->sgt.sgl, msg->sgt.orig_nents); 1641 if (ret < 0) 1642 goto free_table; 1643 1644 ret = rdma_rw_ctx_init(&msg->rdma_ctx, sc->ib.qp, sc->ib.qp->port, 1645 msg->sgt.sgl, 1646 get_buf_page_count(desc_buf, desc_buf_len), 1647 0, 1648 le64_to_cpu(desc[i].offset), 1649 le32_to_cpu(desc[i].token), 1650 is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 1651 if (ret < 0) { 1652 pr_err("failed to init rdma_rw_ctx: %d\n", ret); 1653 goto free_table; 1654 } 1655 1656 list_add_tail(&msg->list, &msg_list); 1657 desc_buf += desc_buf_len; 1658 } 1659 1660 /* concatenate work requests of rdma_rw_ctxs */ 1661 first_wr = NULL; 1662 list_for_each_entry_reverse(msg, &msg_list, list) { 1663 first_wr = rdma_rw_ctx_wrs(&msg->rdma_ctx, sc->ib.qp, sc->ib.qp->port, 1664 &msg->cqe, first_wr); 1665 } 1666 1667 ret = ib_post_send(sc->ib.qp, first_wr, NULL); 1668 if (ret) { 1669 pr_err("failed to post send wr for RDMA R/W: %d\n", ret); 1670 goto out; 1671 } 1672 1673 msg = list_last_entry(&msg_list, struct smbdirect_rw_io, list); 1674 wait_for_completion(&completion); 1675 ret = msg->error; 1676 out: 1677 list_for_each_entry_safe(msg, next_msg, &msg_list, list) { 1678 list_del(&msg->list); 1679 smb_direct_free_rdma_rw_msg(t, msg, 1680 is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 1681 } 1682 atomic_add(credits_needed, &sc->rw_io.credits.count); 1683 wake_up(&sc->rw_io.credits.wait_queue); 1684 return ret; 1685 1686 free_table: 1687 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); 1688 free_msg: 1689 kfree(msg); 1690 goto out; 1691 } 1692 1693 static int smb_direct_rdma_write(struct ksmbd_transport *t, 1694 void *buf, unsigned int buflen, 1695 struct smbdirect_buffer_descriptor_v1 *desc, 1696 unsigned int desc_len) 1697 { 1698 return smb_direct_rdma_xmit(SMBD_TRANS(t), buf, buflen, 1699 desc, desc_len, false); 1700 } 1701 1702 static int smb_direct_rdma_read(struct ksmbd_transport *t, 1703 void *buf, unsigned int buflen, 1704 struct smbdirect_buffer_descriptor_v1 *desc, 1705 unsigned int desc_len) 1706 { 1707 return smb_direct_rdma_xmit(SMBD_TRANS(t), buf, buflen, 1708 desc, desc_len, true); 1709 } 1710 1711 static void smb_direct_disconnect(struct ksmbd_transport *t) 1712 { 1713 struct smb_direct_transport *st = SMBD_TRANS(t); 1714 struct smbdirect_socket *sc = &st->socket; 1715 1716 ksmbd_debug(RDMA, "Disconnecting cm_id=%p\n", sc->rdma.cm_id); 1717 1718 free_transport(st); 1719 } 1720 1721 static void smb_direct_shutdown(struct ksmbd_transport *t) 1722 { 1723 struct smb_direct_transport *st = SMBD_TRANS(t); 1724 struct smbdirect_socket *sc = &st->socket; 1725 1726 ksmbd_debug(RDMA, "smb-direct shutdown cm_id=%p\n", sc->rdma.cm_id); 1727 1728 smb_direct_disconnect_rdma_work(&sc->disconnect_work); 1729 } 1730 1731 static int smb_direct_cm_handler(struct rdma_cm_id *cm_id, 1732 struct rdma_cm_event *event) 1733 { 1734 struct smbdirect_socket *sc = cm_id->context; 1735 1736 ksmbd_debug(RDMA, "RDMA CM event. cm_id=%p event=%s (%d)\n", 1737 cm_id, rdma_event_msg(event->event), event->event); 1738 1739 switch (event->event) { 1740 case RDMA_CM_EVENT_ESTABLISHED: { 1741 /* 1742 * Some drivers (at least mlx5_ib) might post a 1743 * recv completion before RDMA_CM_EVENT_ESTABLISHED, 1744 * we need to adjust our expectation in that case. 1745 * 1746 * As we already started the negotiation, we just 1747 * ignore RDMA_CM_EVENT_ESTABLISHED here. 1748 */ 1749 if (!sc->first_error && sc->status > SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING) 1750 break; 1751 if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING)) 1752 break; 1753 sc->status = SMBDIRECT_SOCKET_NEGOTIATE_NEEDED; 1754 wake_up(&sc->status_wait); 1755 break; 1756 } 1757 case RDMA_CM_EVENT_DEVICE_REMOVAL: 1758 case RDMA_CM_EVENT_DISCONNECTED: { 1759 sc->status = SMBDIRECT_SOCKET_DISCONNECTED; 1760 smb_direct_disconnect_rdma_work(&sc->disconnect_work); 1761 if (sc->ib.qp) 1762 ib_drain_qp(sc->ib.qp); 1763 break; 1764 } 1765 case RDMA_CM_EVENT_CONNECT_ERROR: { 1766 sc->status = SMBDIRECT_SOCKET_DISCONNECTED; 1767 smb_direct_disconnect_rdma_work(&sc->disconnect_work); 1768 break; 1769 } 1770 default: 1771 pr_err("Unexpected RDMA CM event. cm_id=%p, event=%s (%d)\n", 1772 cm_id, rdma_event_msg(event->event), 1773 event->event); 1774 break; 1775 } 1776 return 0; 1777 } 1778 1779 static void smb_direct_qpair_handler(struct ib_event *event, void *context) 1780 { 1781 struct smbdirect_socket *sc = context; 1782 1783 ksmbd_debug(RDMA, "Received QP event. cm_id=%p, event=%s (%d)\n", 1784 sc->rdma.cm_id, ib_event_msg(event->event), event->event); 1785 1786 switch (event->event) { 1787 case IB_EVENT_CQ_ERR: 1788 case IB_EVENT_QP_FATAL: 1789 smb_direct_disconnect_rdma_connection(sc); 1790 break; 1791 default: 1792 break; 1793 } 1794 } 1795 1796 static int smb_direct_send_negotiate_response(struct smbdirect_socket *sc, 1797 int failed) 1798 { 1799 struct smbdirect_socket_parameters *sp = &sc->parameters; 1800 struct smbdirect_send_io *sendmsg; 1801 struct smbdirect_negotiate_resp *resp; 1802 int ret; 1803 1804 sendmsg = smb_direct_alloc_sendmsg(sc); 1805 if (IS_ERR(sendmsg)) 1806 return -ENOMEM; 1807 1808 resp = (struct smbdirect_negotiate_resp *)sendmsg->packet; 1809 if (failed) { 1810 memset(resp, 0, sizeof(*resp)); 1811 resp->min_version = SMB_DIRECT_VERSION_LE; 1812 resp->max_version = SMB_DIRECT_VERSION_LE; 1813 resp->status = STATUS_NOT_SUPPORTED; 1814 1815 sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; 1816 } else { 1817 resp->status = STATUS_SUCCESS; 1818 resp->min_version = SMB_DIRECT_VERSION_LE; 1819 resp->max_version = SMB_DIRECT_VERSION_LE; 1820 resp->negotiated_version = SMB_DIRECT_VERSION_LE; 1821 resp->reserved = 0; 1822 resp->credits_requested = 1823 cpu_to_le16(sp->send_credit_target); 1824 resp->credits_granted = cpu_to_le16(manage_credits_prior_sending(sc)); 1825 resp->max_readwrite_size = cpu_to_le32(sp->max_read_write_size); 1826 resp->preferred_send_size = cpu_to_le32(sp->max_send_size); 1827 resp->max_receive_size = cpu_to_le32(sp->max_recv_size); 1828 resp->max_fragmented_size = 1829 cpu_to_le32(sp->max_fragmented_recv_size); 1830 1831 sc->recv_io.expected = SMBDIRECT_EXPECT_DATA_TRANSFER; 1832 sc->status = SMBDIRECT_SOCKET_CONNECTED; 1833 } 1834 1835 sendmsg->sge[0].addr = ib_dma_map_single(sc->ib.dev, 1836 (void *)resp, sizeof(*resp), 1837 DMA_TO_DEVICE); 1838 ret = ib_dma_mapping_error(sc->ib.dev, sendmsg->sge[0].addr); 1839 if (ret) { 1840 smb_direct_free_sendmsg(sc, sendmsg); 1841 return ret; 1842 } 1843 1844 sendmsg->num_sge = 1; 1845 sendmsg->sge[0].length = sizeof(*resp); 1846 sendmsg->sge[0].lkey = sc->ib.pd->local_dma_lkey; 1847 1848 ret = post_sendmsg(sc, NULL, sendmsg); 1849 if (ret) { 1850 smb_direct_free_sendmsg(sc, sendmsg); 1851 return ret; 1852 } 1853 1854 wait_event(sc->send_io.pending.zero_wait_queue, 1855 atomic_read(&sc->send_io.pending.count) == 0 || 1856 sc->status != SMBDIRECT_SOCKET_CONNECTED); 1857 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 1858 return -ENOTCONN; 1859 1860 return 0; 1861 } 1862 1863 static int smb_direct_accept_client(struct smbdirect_socket *sc) 1864 { 1865 struct smbdirect_socket_parameters *sp = &sc->parameters; 1866 struct rdma_conn_param conn_param; 1867 __be32 ird_ord_hdr[2]; 1868 int ret; 1869 1870 /* 1871 * smb_direct_handle_connect_request() 1872 * already negotiated sp->initiator_depth 1873 * and sp->responder_resources 1874 */ 1875 memset(&conn_param, 0, sizeof(conn_param)); 1876 conn_param.initiator_depth = sp->initiator_depth; 1877 conn_param.responder_resources = sp->responder_resources; 1878 1879 if (sc->rdma.legacy_iwarp) { 1880 ird_ord_hdr[0] = cpu_to_be32(conn_param.responder_resources); 1881 ird_ord_hdr[1] = cpu_to_be32(conn_param.initiator_depth); 1882 conn_param.private_data = ird_ord_hdr; 1883 conn_param.private_data_len = sizeof(ird_ord_hdr); 1884 } else { 1885 conn_param.private_data = NULL; 1886 conn_param.private_data_len = 0; 1887 } 1888 conn_param.retry_count = SMB_DIRECT_CM_RETRY; 1889 conn_param.rnr_retry_count = SMB_DIRECT_CM_RNR_RETRY; 1890 conn_param.flow_control = 0; 1891 1892 /* 1893 * start with the negotiate timeout and SMBDIRECT_KEEPALIVE_PENDING 1894 * so that the timer will cause a disconnect. 1895 */ 1896 sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; 1897 mod_delayed_work(sc->workqueue, &sc->idle.timer_work, 1898 msecs_to_jiffies(sp->negotiate_timeout_msec)); 1899 1900 WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED); 1901 sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING; 1902 ret = rdma_accept(sc->rdma.cm_id, &conn_param); 1903 if (ret) { 1904 pr_err("error at rdma_accept: %d\n", ret); 1905 return ret; 1906 } 1907 return 0; 1908 } 1909 1910 static int smb_direct_prepare_negotiation(struct smbdirect_socket *sc) 1911 { 1912 struct smbdirect_recv_io *recvmsg; 1913 bool recv_posted = false; 1914 int ret; 1915 1916 WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_CREATED); 1917 sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED; 1918 1919 sc->recv_io.expected = SMBDIRECT_EXPECT_NEGOTIATE_REQ; 1920 1921 recvmsg = get_free_recvmsg(sc); 1922 if (!recvmsg) 1923 return -ENOMEM; 1924 1925 ret = smb_direct_post_recv(sc, recvmsg); 1926 if (ret) { 1927 pr_err("Can't post recv: %d\n", ret); 1928 goto out_err; 1929 } 1930 recv_posted = true; 1931 1932 ret = smb_direct_accept_client(sc); 1933 if (ret) { 1934 pr_err("Can't accept client\n"); 1935 goto out_err; 1936 } 1937 1938 return 0; 1939 out_err: 1940 /* 1941 * If the recv was never posted, return it to the free list. 1942 * If it was posted, leave it alone so disconnect teardown can 1943 * drain the QP and complete it (flush) and the completion path 1944 * will unmap it exactly once. 1945 */ 1946 if (!recv_posted) 1947 put_recvmsg(sc, recvmsg); 1948 return ret; 1949 } 1950 1951 static int smb_direct_init_params(struct smbdirect_socket *sc) 1952 { 1953 struct smbdirect_socket_parameters *sp = &sc->parameters; 1954 int max_send_sges; 1955 unsigned int maxpages; 1956 1957 /* need 3 more sge. because a SMB_DIRECT header, SMB2 header, 1958 * SMB2 response could be mapped. 1959 */ 1960 max_send_sges = DIV_ROUND_UP(sp->max_send_size, PAGE_SIZE) + 3; 1961 if (max_send_sges > SMBDIRECT_SEND_IO_MAX_SGE) { 1962 pr_err("max_send_size %d is too large\n", sp->max_send_size); 1963 return -EINVAL; 1964 } 1965 1966 atomic_set(&sc->send_io.lcredits.count, sp->send_credit_target); 1967 1968 maxpages = DIV_ROUND_UP(sp->max_read_write_size, PAGE_SIZE); 1969 sc->rw_io.credits.max = rdma_rw_mr_factor(sc->ib.dev, 1970 sc->rdma.cm_id->port_num, 1971 maxpages); 1972 sc->rw_io.credits.num_pages = DIV_ROUND_UP(maxpages, sc->rw_io.credits.max); 1973 /* add one extra in order to handle unaligned pages */ 1974 sc->rw_io.credits.max += 1; 1975 1976 sc->recv_io.credits.target = 1; 1977 1978 atomic_set(&sc->rw_io.credits.count, sc->rw_io.credits.max); 1979 1980 return 0; 1981 } 1982 1983 static void smb_direct_destroy_pools(struct smbdirect_socket *sc) 1984 { 1985 struct smbdirect_recv_io *recvmsg; 1986 1987 while ((recvmsg = get_free_recvmsg(sc))) 1988 mempool_free(recvmsg, sc->recv_io.mem.pool); 1989 1990 mempool_destroy(sc->recv_io.mem.pool); 1991 sc->recv_io.mem.pool = NULL; 1992 1993 kmem_cache_destroy(sc->recv_io.mem.cache); 1994 sc->recv_io.mem.cache = NULL; 1995 1996 mempool_destroy(sc->send_io.mem.pool); 1997 sc->send_io.mem.pool = NULL; 1998 1999 kmem_cache_destroy(sc->send_io.mem.cache); 2000 sc->send_io.mem.cache = NULL; 2001 } 2002 2003 static int smb_direct_create_pools(struct smbdirect_socket *sc) 2004 { 2005 struct smbdirect_socket_parameters *sp = &sc->parameters; 2006 char name[80]; 2007 int i; 2008 struct smbdirect_recv_io *recvmsg; 2009 2010 snprintf(name, sizeof(name), "smbdirect_send_io_pool_%p", sc); 2011 sc->send_io.mem.cache = kmem_cache_create(name, 2012 sizeof(struct smbdirect_send_io) + 2013 sizeof(struct smbdirect_negotiate_resp), 2014 0, SLAB_HWCACHE_ALIGN, NULL); 2015 if (!sc->send_io.mem.cache) 2016 return -ENOMEM; 2017 2018 sc->send_io.mem.pool = mempool_create(sp->send_credit_target, 2019 mempool_alloc_slab, mempool_free_slab, 2020 sc->send_io.mem.cache); 2021 if (!sc->send_io.mem.pool) 2022 goto err; 2023 2024 snprintf(name, sizeof(name), "smbdirect_recv_io_pool_%p", sc); 2025 sc->recv_io.mem.cache = kmem_cache_create(name, 2026 sizeof(struct smbdirect_recv_io) + 2027 sp->max_recv_size, 2028 0, SLAB_HWCACHE_ALIGN, NULL); 2029 if (!sc->recv_io.mem.cache) 2030 goto err; 2031 2032 sc->recv_io.mem.pool = 2033 mempool_create(sp->recv_credit_max, mempool_alloc_slab, 2034 mempool_free_slab, sc->recv_io.mem.cache); 2035 if (!sc->recv_io.mem.pool) 2036 goto err; 2037 2038 for (i = 0; i < sp->recv_credit_max; i++) { 2039 recvmsg = mempool_alloc(sc->recv_io.mem.pool, KSMBD_DEFAULT_GFP); 2040 if (!recvmsg) 2041 goto err; 2042 recvmsg->socket = sc; 2043 recvmsg->sge.length = 0; 2044 list_add(&recvmsg->list, &sc->recv_io.free.list); 2045 } 2046 2047 return 0; 2048 err: 2049 smb_direct_destroy_pools(sc); 2050 return -ENOMEM; 2051 } 2052 2053 static u32 smb_direct_rdma_rw_send_wrs(struct ib_device *dev, const struct ib_qp_init_attr *attr) 2054 { 2055 /* 2056 * This could be split out of rdma_rw_init_qp() 2057 * and be a helper function next to rdma_rw_mr_factor() 2058 * 2059 * We can't check unlikely(rdma_rw_force_mr) here, 2060 * but that is most likely 0 anyway. 2061 */ 2062 u32 factor; 2063 2064 WARN_ON_ONCE(attr->port_num == 0); 2065 2066 /* 2067 * Each context needs at least one RDMA READ or WRITE WR. 2068 * 2069 * For some hardware we might need more, eventually we should ask the 2070 * HCA driver for a multiplier here. 2071 */ 2072 factor = 1; 2073 2074 /* 2075 * If the device needs MRs to perform RDMA READ or WRITE operations, 2076 * we'll need two additional MRs for the registrations and the 2077 * invalidation. 2078 */ 2079 if (rdma_protocol_iwarp(dev, attr->port_num) || dev->attrs.max_sgl_rd) 2080 factor += 2; /* inv + reg */ 2081 2082 return factor * attr->cap.max_rdma_ctxs; 2083 } 2084 2085 static int smb_direct_create_qpair(struct smbdirect_socket *sc) 2086 { 2087 struct smbdirect_socket_parameters *sp = &sc->parameters; 2088 int ret; 2089 struct ib_qp_cap qp_cap; 2090 struct ib_qp_init_attr qp_attr; 2091 u32 max_send_wr; 2092 u32 rdma_send_wr; 2093 2094 /* 2095 * Note that {rdma,ib}_create_qp() will call 2096 * rdma_rw_init_qp() if cap->max_rdma_ctxs is not 0. 2097 * It will adjust cap->max_send_wr to the required 2098 * number of additional WRs for the RDMA RW operations. 2099 * It will cap cap->max_send_wr to the device limit. 2100 * 2101 * +1 for ib_drain_qp 2102 */ 2103 qp_cap.max_send_wr = sp->send_credit_target + 1; 2104 qp_cap.max_recv_wr = sp->recv_credit_max + 1; 2105 qp_cap.max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE; 2106 qp_cap.max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE; 2107 qp_cap.max_inline_data = 0; 2108 qp_cap.max_rdma_ctxs = sc->rw_io.credits.max; 2109 2110 /* 2111 * Find out the number of max_send_wr 2112 * after rdma_rw_init_qp() adjusted it. 2113 * 2114 * We only do it on a temporary variable, 2115 * as rdma_create_qp() will trigger 2116 * rdma_rw_init_qp() again. 2117 */ 2118 memset(&qp_attr, 0, sizeof(qp_attr)); 2119 qp_attr.cap = qp_cap; 2120 qp_attr.port_num = sc->rdma.cm_id->port_num; 2121 rdma_send_wr = smb_direct_rdma_rw_send_wrs(sc->ib.dev, &qp_attr); 2122 max_send_wr = qp_cap.max_send_wr + rdma_send_wr; 2123 2124 if (qp_cap.max_send_wr > sc->ib.dev->attrs.max_cqe || 2125 qp_cap.max_send_wr > sc->ib.dev->attrs.max_qp_wr) { 2126 pr_err("Possible CQE overrun: max_send_wr %d\n", 2127 qp_cap.max_send_wr); 2128 pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n", 2129 IB_DEVICE_NAME_MAX, 2130 sc->ib.dev->name, 2131 sc->ib.dev->attrs.max_cqe, 2132 sc->ib.dev->attrs.max_qp_wr); 2133 pr_err("consider lowering send_credit_target = %d\n", 2134 sp->send_credit_target); 2135 return -EINVAL; 2136 } 2137 2138 if (qp_cap.max_rdma_ctxs && 2139 (max_send_wr >= sc->ib.dev->attrs.max_cqe || 2140 max_send_wr >= sc->ib.dev->attrs.max_qp_wr)) { 2141 pr_err("Possible CQE overrun: rdma_send_wr %d + max_send_wr %d = %d\n", 2142 rdma_send_wr, qp_cap.max_send_wr, max_send_wr); 2143 pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n", 2144 IB_DEVICE_NAME_MAX, 2145 sc->ib.dev->name, 2146 sc->ib.dev->attrs.max_cqe, 2147 sc->ib.dev->attrs.max_qp_wr); 2148 pr_err("consider lowering send_credit_target = %d, max_rdma_ctxs = %d\n", 2149 sp->send_credit_target, qp_cap.max_rdma_ctxs); 2150 return -EINVAL; 2151 } 2152 2153 if (qp_cap.max_recv_wr > sc->ib.dev->attrs.max_cqe || 2154 qp_cap.max_recv_wr > sc->ib.dev->attrs.max_qp_wr) { 2155 pr_err("Possible CQE overrun: max_recv_wr %d\n", 2156 qp_cap.max_recv_wr); 2157 pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n", 2158 IB_DEVICE_NAME_MAX, 2159 sc->ib.dev->name, 2160 sc->ib.dev->attrs.max_cqe, 2161 sc->ib.dev->attrs.max_qp_wr); 2162 pr_err("consider lowering receive_credit_max = %d\n", 2163 sp->recv_credit_max); 2164 return -EINVAL; 2165 } 2166 2167 if (qp_cap.max_send_sge > sc->ib.dev->attrs.max_send_sge || 2168 qp_cap.max_recv_sge > sc->ib.dev->attrs.max_recv_sge) { 2169 pr_err("device %.*s max_send_sge/max_recv_sge = %d/%d too small\n", 2170 IB_DEVICE_NAME_MAX, 2171 sc->ib.dev->name, 2172 sc->ib.dev->attrs.max_send_sge, 2173 sc->ib.dev->attrs.max_recv_sge); 2174 return -EINVAL; 2175 } 2176 2177 sc->ib.pd = ib_alloc_pd(sc->ib.dev, 0); 2178 if (IS_ERR(sc->ib.pd)) { 2179 pr_err("Can't create RDMA PD\n"); 2180 ret = PTR_ERR(sc->ib.pd); 2181 sc->ib.pd = NULL; 2182 return ret; 2183 } 2184 2185 sc->ib.send_cq = ib_alloc_cq_any(sc->ib.dev, sc, 2186 max_send_wr, 2187 IB_POLL_WORKQUEUE); 2188 if (IS_ERR(sc->ib.send_cq)) { 2189 pr_err("Can't create RDMA send CQ\n"); 2190 ret = PTR_ERR(sc->ib.send_cq); 2191 sc->ib.send_cq = NULL; 2192 goto err; 2193 } 2194 2195 sc->ib.recv_cq = ib_alloc_cq_any(sc->ib.dev, sc, 2196 qp_cap.max_recv_wr, 2197 IB_POLL_WORKQUEUE); 2198 if (IS_ERR(sc->ib.recv_cq)) { 2199 pr_err("Can't create RDMA recv CQ\n"); 2200 ret = PTR_ERR(sc->ib.recv_cq); 2201 sc->ib.recv_cq = NULL; 2202 goto err; 2203 } 2204 2205 /* 2206 * We reset completely here! 2207 * As the above use was just temporary 2208 * to calc max_send_wr and rdma_send_wr. 2209 * 2210 * rdma_create_qp() will trigger rdma_rw_init_qp() 2211 * again if max_rdma_ctxs is not 0. 2212 */ 2213 memset(&qp_attr, 0, sizeof(qp_attr)); 2214 qp_attr.event_handler = smb_direct_qpair_handler; 2215 qp_attr.qp_context = sc; 2216 qp_attr.cap = qp_cap; 2217 qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 2218 qp_attr.qp_type = IB_QPT_RC; 2219 qp_attr.send_cq = sc->ib.send_cq; 2220 qp_attr.recv_cq = sc->ib.recv_cq; 2221 qp_attr.port_num = ~0; 2222 2223 ret = rdma_create_qp(sc->rdma.cm_id, sc->ib.pd, &qp_attr); 2224 if (ret) { 2225 pr_err("Can't create RDMA QP: %d\n", ret); 2226 goto err; 2227 } 2228 2229 sc->ib.qp = sc->rdma.cm_id->qp; 2230 sc->rdma.cm_id->event_handler = smb_direct_cm_handler; 2231 2232 return 0; 2233 err: 2234 if (sc->ib.qp) { 2235 sc->ib.qp = NULL; 2236 rdma_destroy_qp(sc->rdma.cm_id); 2237 } 2238 if (sc->ib.recv_cq) { 2239 ib_destroy_cq(sc->ib.recv_cq); 2240 sc->ib.recv_cq = NULL; 2241 } 2242 if (sc->ib.send_cq) { 2243 ib_destroy_cq(sc->ib.send_cq); 2244 sc->ib.send_cq = NULL; 2245 } 2246 if (sc->ib.pd) { 2247 ib_dealloc_pd(sc->ib.pd); 2248 sc->ib.pd = NULL; 2249 } 2250 return ret; 2251 } 2252 2253 static int smb_direct_prepare(struct ksmbd_transport *t) 2254 { 2255 struct smb_direct_transport *st = SMBD_TRANS(t); 2256 struct smbdirect_socket *sc = &st->socket; 2257 struct smbdirect_socket_parameters *sp = &sc->parameters; 2258 struct smbdirect_recv_io *recvmsg; 2259 struct smbdirect_negotiate_req *req; 2260 unsigned long flags; 2261 int ret; 2262 2263 /* 2264 * We are waiting to pass the following states: 2265 * 2266 * SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED 2267 * SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING 2268 * SMBDIRECT_SOCKET_NEGOTIATE_NEEDED 2269 * 2270 * To finally get to SMBDIRECT_SOCKET_NEGOTIATE_RUNNING 2271 * in order to continue below. 2272 * 2273 * Everything else is unexpected and an error. 2274 */ 2275 ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n"); 2276 ret = wait_event_interruptible_timeout(sc->status_wait, 2277 sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED && 2278 sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING && 2279 sc->status != SMBDIRECT_SOCKET_NEGOTIATE_NEEDED, 2280 msecs_to_jiffies(sp->negotiate_timeout_msec)); 2281 if (ret <= 0 || sc->status != SMBDIRECT_SOCKET_NEGOTIATE_RUNNING) 2282 return ret < 0 ? ret : -ETIMEDOUT; 2283 2284 recvmsg = get_first_reassembly(sc); 2285 if (!recvmsg) 2286 return -ECONNABORTED; 2287 2288 ret = smb_direct_check_recvmsg(recvmsg); 2289 if (ret) 2290 goto put; 2291 2292 req = (struct smbdirect_negotiate_req *)recvmsg->packet; 2293 sp->max_recv_size = min_t(int, sp->max_recv_size, 2294 le32_to_cpu(req->preferred_send_size)); 2295 sp->max_send_size = min_t(int, sp->max_send_size, 2296 le32_to_cpu(req->max_receive_size)); 2297 sp->max_fragmented_send_size = 2298 le32_to_cpu(req->max_fragmented_size); 2299 sp->max_fragmented_recv_size = 2300 (sp->recv_credit_max * sp->max_recv_size) / 2; 2301 sc->recv_io.credits.target = le16_to_cpu(req->credits_requested); 2302 sc->recv_io.credits.target = min_t(u16, sc->recv_io.credits.target, sp->recv_credit_max); 2303 sc->recv_io.credits.target = max_t(u16, sc->recv_io.credits.target, 1); 2304 2305 put: 2306 spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); 2307 sc->recv_io.reassembly.queue_length--; 2308 list_del(&recvmsg->list); 2309 spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); 2310 put_recvmsg(sc, recvmsg); 2311 2312 if (ret == -ECONNABORTED) 2313 return ret; 2314 2315 if (ret) 2316 goto respond; 2317 2318 /* 2319 * We negotiated with success, so we need to refill the recv queue. 2320 * We do that with sc->idle.immediate_work still being disabled 2321 * via smbdirect_socket_init(), so that queue_work(sc->workqueue, 2322 * &sc->idle.immediate_work) in smb_direct_post_recv_credits() 2323 * is a no-op. 2324 * 2325 * The message that grants the credits to the client is 2326 * the negotiate response. 2327 */ 2328 INIT_WORK(&sc->recv_io.posted.refill_work, smb_direct_post_recv_credits); 2329 smb_direct_post_recv_credits(&sc->recv_io.posted.refill_work); 2330 if (unlikely(sc->first_error)) 2331 return sc->first_error; 2332 INIT_WORK(&sc->idle.immediate_work, smb_direct_send_immediate_work); 2333 2334 respond: 2335 ret = smb_direct_send_negotiate_response(sc, ret); 2336 2337 return ret; 2338 } 2339 2340 static int smb_direct_connect(struct smbdirect_socket *sc) 2341 { 2342 int ret; 2343 2344 ret = smb_direct_init_params(sc); 2345 if (ret) { 2346 pr_err("Can't configure RDMA parameters\n"); 2347 return ret; 2348 } 2349 2350 ret = smb_direct_create_pools(sc); 2351 if (ret) { 2352 pr_err("Can't init RDMA pool: %d\n", ret); 2353 return ret; 2354 } 2355 2356 ret = smb_direct_create_qpair(sc); 2357 if (ret) { 2358 pr_err("Can't accept RDMA client: %d\n", ret); 2359 return ret; 2360 } 2361 2362 ret = smb_direct_prepare_negotiation(sc); 2363 if (ret) { 2364 pr_err("Can't negotiate: %d\n", ret); 2365 return ret; 2366 } 2367 return 0; 2368 } 2369 2370 static bool rdma_frwr_is_supported(struct ib_device_attr *attrs) 2371 { 2372 if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) 2373 return false; 2374 if (attrs->max_fast_reg_page_list_len == 0) 2375 return false; 2376 return true; 2377 } 2378 2379 static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id, 2380 struct rdma_cm_event *event) 2381 { 2382 struct smb_direct_transport *t; 2383 struct smbdirect_socket *sc; 2384 struct smbdirect_socket_parameters *sp; 2385 struct task_struct *handler; 2386 u8 peer_initiator_depth; 2387 u8 peer_responder_resources; 2388 int ret; 2389 2390 if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) { 2391 ksmbd_debug(RDMA, 2392 "Fast Registration Work Requests is not supported. device capabilities=%llx\n", 2393 new_cm_id->device->attrs.device_cap_flags); 2394 return -EPROTONOSUPPORT; 2395 } 2396 2397 t = alloc_transport(new_cm_id); 2398 if (!t) 2399 return -ENOMEM; 2400 sc = &t->socket; 2401 sp = &sc->parameters; 2402 2403 peer_initiator_depth = event->param.conn.initiator_depth; 2404 peer_responder_resources = event->param.conn.responder_resources; 2405 if (rdma_protocol_iwarp(new_cm_id->device, new_cm_id->port_num) && 2406 event->param.conn.private_data_len == 8) { 2407 /* 2408 * Legacy clients with only iWarp MPA v1 support 2409 * need a private blob in order to negotiate 2410 * the IRD/ORD values. 2411 */ 2412 const __be32 *ird_ord_hdr = event->param.conn.private_data; 2413 u32 ird32 = be32_to_cpu(ird_ord_hdr[0]); 2414 u32 ord32 = be32_to_cpu(ird_ord_hdr[1]); 2415 2416 /* 2417 * cifs.ko sends the legacy IRD/ORD negotiation 2418 * event if iWarp MPA v2 was used. 2419 * 2420 * Here we check that the values match and only 2421 * mark the client as legacy if they don't match. 2422 */ 2423 if ((u32)event->param.conn.initiator_depth != ird32 || 2424 (u32)event->param.conn.responder_resources != ord32) { 2425 /* 2426 * There are broken clients (old cifs.ko) 2427 * using little endian and also 2428 * struct rdma_conn_param only uses u8 2429 * for initiator_depth and responder_resources, 2430 * so we truncate the value to U8_MAX. 2431 * 2432 * smb_direct_accept_client() will then 2433 * do the real negotiation in order to 2434 * select the minimum between client and 2435 * server. 2436 */ 2437 ird32 = min_t(u32, ird32, U8_MAX); 2438 ord32 = min_t(u32, ord32, U8_MAX); 2439 2440 sc->rdma.legacy_iwarp = true; 2441 peer_initiator_depth = (u8)ird32; 2442 peer_responder_resources = (u8)ord32; 2443 } 2444 } 2445 2446 /* 2447 * First set what the we as server are able to support 2448 */ 2449 sp->initiator_depth = min_t(u8, sp->initiator_depth, 2450 new_cm_id->device->attrs.max_qp_rd_atom); 2451 2452 /* 2453 * negotiate the value by using the minimum 2454 * between client and server if the client provided 2455 * non 0 values. 2456 */ 2457 if (peer_initiator_depth != 0) 2458 sp->initiator_depth = min_t(u8, sp->initiator_depth, 2459 peer_initiator_depth); 2460 if (peer_responder_resources != 0) 2461 sp->responder_resources = min_t(u8, sp->responder_resources, 2462 peer_responder_resources); 2463 2464 ret = smb_direct_connect(sc); 2465 if (ret) 2466 goto out_err; 2467 2468 handler = kthread_run(ksmbd_conn_handler_loop, 2469 KSMBD_TRANS(t)->conn, "ksmbd:r%u", 2470 smb_direct_port); 2471 if (IS_ERR(handler)) { 2472 ret = PTR_ERR(handler); 2473 pr_err("Can't start thread\n"); 2474 goto out_err; 2475 } 2476 2477 return 0; 2478 out_err: 2479 free_transport(t); 2480 return ret; 2481 } 2482 2483 static int smb_direct_listen_handler(struct rdma_cm_id *cm_id, 2484 struct rdma_cm_event *event) 2485 { 2486 switch (event->event) { 2487 case RDMA_CM_EVENT_CONNECT_REQUEST: { 2488 int ret = smb_direct_handle_connect_request(cm_id, event); 2489 2490 if (ret) { 2491 pr_err("Can't create transport: %d\n", ret); 2492 return ret; 2493 } 2494 2495 ksmbd_debug(RDMA, "Received connection request. cm_id=%p\n", 2496 cm_id); 2497 break; 2498 } 2499 default: 2500 pr_err("Unexpected listen event. cm_id=%p, event=%s (%d)\n", 2501 cm_id, rdma_event_msg(event->event), event->event); 2502 break; 2503 } 2504 return 0; 2505 } 2506 2507 static int smb_direct_listen(int port) 2508 { 2509 int ret; 2510 struct rdma_cm_id *cm_id; 2511 struct sockaddr_in sin = { 2512 .sin_family = AF_INET, 2513 .sin_addr.s_addr = htonl(INADDR_ANY), 2514 .sin_port = htons(port), 2515 }; 2516 2517 cm_id = rdma_create_id(&init_net, smb_direct_listen_handler, 2518 &smb_direct_listener, RDMA_PS_TCP, IB_QPT_RC); 2519 if (IS_ERR(cm_id)) { 2520 pr_err("Can't create cm id: %ld\n", PTR_ERR(cm_id)); 2521 return PTR_ERR(cm_id); 2522 } 2523 2524 ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); 2525 if (ret) { 2526 pr_err("Can't bind: %d\n", ret); 2527 goto err; 2528 } 2529 2530 smb_direct_listener.cm_id = cm_id; 2531 2532 ret = rdma_listen(cm_id, 10); 2533 if (ret) { 2534 pr_err("Can't listen: %d\n", ret); 2535 goto err; 2536 } 2537 return 0; 2538 err: 2539 smb_direct_listener.cm_id = NULL; 2540 rdma_destroy_id(cm_id); 2541 return ret; 2542 } 2543 2544 static int smb_direct_ib_client_add(struct ib_device *ib_dev) 2545 { 2546 struct smb_direct_device *smb_dev; 2547 2548 /* Set 5445 port if device type is iWARP(No IB) */ 2549 if (ib_dev->node_type != RDMA_NODE_IB_CA) 2550 smb_direct_port = SMB_DIRECT_PORT_IWARP; 2551 2552 if (!rdma_frwr_is_supported(&ib_dev->attrs)) 2553 return 0; 2554 2555 smb_dev = kzalloc(sizeof(*smb_dev), KSMBD_DEFAULT_GFP); 2556 if (!smb_dev) 2557 return -ENOMEM; 2558 smb_dev->ib_dev = ib_dev; 2559 2560 write_lock(&smb_direct_device_lock); 2561 list_add(&smb_dev->list, &smb_direct_device_list); 2562 write_unlock(&smb_direct_device_lock); 2563 2564 ksmbd_debug(RDMA, "ib device added: name %s\n", ib_dev->name); 2565 return 0; 2566 } 2567 2568 static void smb_direct_ib_client_remove(struct ib_device *ib_dev, 2569 void *client_data) 2570 { 2571 struct smb_direct_device *smb_dev, *tmp; 2572 2573 write_lock(&smb_direct_device_lock); 2574 list_for_each_entry_safe(smb_dev, tmp, &smb_direct_device_list, list) { 2575 if (smb_dev->ib_dev == ib_dev) { 2576 list_del(&smb_dev->list); 2577 kfree(smb_dev); 2578 break; 2579 } 2580 } 2581 write_unlock(&smb_direct_device_lock); 2582 } 2583 2584 static struct ib_client smb_direct_ib_client = { 2585 .name = "ksmbd_smb_direct_ib", 2586 .add = smb_direct_ib_client_add, 2587 .remove = smb_direct_ib_client_remove, 2588 }; 2589 2590 int ksmbd_rdma_init(void) 2591 { 2592 int ret; 2593 2594 smb_direct_listener.cm_id = NULL; 2595 2596 ret = ib_register_client(&smb_direct_ib_client); 2597 if (ret) { 2598 pr_err("failed to ib_register_client\n"); 2599 return ret; 2600 } 2601 2602 /* When a client is running out of send credits, the credits are 2603 * granted by the server's sending a packet using this queue. 2604 * This avoids the situation that a clients cannot send packets 2605 * for lack of credits 2606 */ 2607 smb_direct_wq = alloc_workqueue("ksmbd-smb_direct-wq", 2608 WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_PERCPU, 2609 0); 2610 if (!smb_direct_wq) 2611 return -ENOMEM; 2612 2613 ret = smb_direct_listen(smb_direct_port); 2614 if (ret) { 2615 destroy_workqueue(smb_direct_wq); 2616 smb_direct_wq = NULL; 2617 pr_err("Can't listen: %d\n", ret); 2618 return ret; 2619 } 2620 2621 ksmbd_debug(RDMA, "init RDMA listener. cm_id=%p\n", 2622 smb_direct_listener.cm_id); 2623 return 0; 2624 } 2625 2626 void ksmbd_rdma_stop_listening(void) 2627 { 2628 if (!smb_direct_listener.cm_id) 2629 return; 2630 2631 ib_unregister_client(&smb_direct_ib_client); 2632 rdma_destroy_id(smb_direct_listener.cm_id); 2633 2634 smb_direct_listener.cm_id = NULL; 2635 } 2636 2637 void ksmbd_rdma_destroy(void) 2638 { 2639 if (smb_direct_wq) { 2640 destroy_workqueue(smb_direct_wq); 2641 smb_direct_wq = NULL; 2642 } 2643 } 2644 2645 static bool ksmbd_find_rdma_capable_netdev(struct net_device *netdev) 2646 { 2647 struct smb_direct_device *smb_dev; 2648 int i; 2649 bool rdma_capable = false; 2650 2651 read_lock(&smb_direct_device_lock); 2652 list_for_each_entry(smb_dev, &smb_direct_device_list, list) { 2653 for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) { 2654 struct net_device *ndev; 2655 2656 ndev = ib_device_get_netdev(smb_dev->ib_dev, i + 1); 2657 if (!ndev) 2658 continue; 2659 2660 if (ndev == netdev) { 2661 dev_put(ndev); 2662 rdma_capable = true; 2663 goto out; 2664 } 2665 dev_put(ndev); 2666 } 2667 } 2668 out: 2669 read_unlock(&smb_direct_device_lock); 2670 2671 if (rdma_capable == false) { 2672 struct ib_device *ibdev; 2673 2674 ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN); 2675 if (ibdev) { 2676 rdma_capable = rdma_frwr_is_supported(&ibdev->attrs); 2677 ib_device_put(ibdev); 2678 } 2679 } 2680 2681 ksmbd_debug(RDMA, "netdev(%s) rdma capable : %s\n", 2682 netdev->name, str_true_false(rdma_capable)); 2683 2684 return rdma_capable; 2685 } 2686 2687 bool ksmbd_rdma_capable_netdev(struct net_device *netdev) 2688 { 2689 struct net_device *lower_dev; 2690 struct list_head *iter; 2691 2692 if (ksmbd_find_rdma_capable_netdev(netdev)) 2693 return true; 2694 2695 /* check if netdev is bridge or VLAN */ 2696 if (netif_is_bridge_master(netdev) || 2697 netdev->priv_flags & IFF_802_1Q_VLAN) 2698 netdev_for_each_lower_dev(netdev, lower_dev, iter) 2699 if (ksmbd_find_rdma_capable_netdev(lower_dev)) 2700 return true; 2701 2702 /* check if netdev is IPoIB safely without layer violation */ 2703 if (netdev->type == ARPHRD_INFINIBAND) 2704 return true; 2705 2706 return false; 2707 } 2708 2709 static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = { 2710 .prepare = smb_direct_prepare, 2711 .disconnect = smb_direct_disconnect, 2712 .shutdown = smb_direct_shutdown, 2713 .writev = smb_direct_writev, 2714 .read = smb_direct_read, 2715 .rdma_read = smb_direct_rdma_read, 2716 .rdma_write = smb_direct_rdma_write, 2717 .free_transport = smb_direct_free_transport, 2718 }; 2719