1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2017, Microsoft Corporation. 4 * 5 * Author(s): Long Li <longli@microsoft.com> 6 */ 7 #include <linux/module.h> 8 #include <linux/highmem.h> 9 #include <linux/folio_queue.h> 10 #include "../common/smbdirect/smbdirect_pdu.h" 11 #include "smbdirect.h" 12 #include "cifs_debug.h" 13 #include "cifsproto.h" 14 #include "smb2proto.h" 15 16 static struct smbdirect_recv_io *get_receive_buffer( 17 struct smbd_connection *info); 18 static void put_receive_buffer( 19 struct smbd_connection *info, 20 struct smbdirect_recv_io *response); 21 static int allocate_receive_buffers(struct smbd_connection *info, int num_buf); 22 static void destroy_receive_buffers(struct smbd_connection *info); 23 24 static void enqueue_reassembly( 25 struct smbd_connection *info, 26 struct smbdirect_recv_io *response, int data_length); 27 static struct smbdirect_recv_io *_get_first_reassembly( 28 struct smbd_connection *info); 29 30 static int smbd_post_recv( 31 struct smbd_connection *info, 32 struct smbdirect_recv_io *response); 33 34 static int smbd_post_send_empty(struct smbd_connection *info); 35 36 static void destroy_mr_list(struct smbd_connection *info); 37 static int allocate_mr_list(struct smbd_connection *info); 38 39 struct smb_extract_to_rdma { 40 struct ib_sge *sge; 41 unsigned int nr_sge; 42 unsigned int max_sge; 43 struct ib_device *device; 44 u32 local_dma_lkey; 45 enum dma_data_direction direction; 46 }; 47 static ssize_t smb_extract_iter_to_rdma(struct iov_iter *iter, size_t len, 48 struct smb_extract_to_rdma *rdma); 49 50 /* Port numbers for SMBD transport */ 51 #define SMB_PORT 445 52 #define SMBD_PORT 5445 53 54 /* Address lookup and resolve timeout in ms */ 55 #define RDMA_RESOLVE_TIMEOUT 5000 56 57 /* SMBD negotiation timeout in seconds */ 58 #define SMBD_NEGOTIATE_TIMEOUT 120 59 60 /* SMBD minimum receive size and fragmented sized defined in [MS-SMBD] */ 61 #define SMBD_MIN_RECEIVE_SIZE 128 62 #define SMBD_MIN_FRAGMENTED_SIZE 131072 63 64 /* 65 * Default maximum number of RDMA read/write outstanding on this connection 66 * This value is possibly decreased during QP creation on hardware limit 67 */ 68 #define SMBD_CM_RESPONDER_RESOURCES 32 69 70 /* Maximum number of retries on data transfer operations */ 71 #define SMBD_CM_RETRY 6 72 /* No need to retry on Receiver Not Ready since SMBD manages credits */ 73 #define SMBD_CM_RNR_RETRY 0 74 75 /* 76 * User configurable initial values per SMBD transport connection 77 * as defined in [MS-SMBD] 3.1.1.1 78 * Those may change after a SMBD negotiation 79 */ 80 /* The local peer's maximum number of credits to grant to the peer */ 81 int smbd_receive_credit_max = 255; 82 83 /* The remote peer's credit request of local peer */ 84 int smbd_send_credit_target = 255; 85 86 /* The maximum single message size can be sent to remote peer */ 87 int smbd_max_send_size = 1364; 88 89 /* The maximum fragmented upper-layer payload receive size supported */ 90 int smbd_max_fragmented_recv_size = 1024 * 1024; 91 92 /* The maximum single-message size which can be received */ 93 int smbd_max_receive_size = 1364; 94 95 /* The timeout to initiate send of a keepalive message on idle */ 96 int smbd_keep_alive_interval = 120; 97 98 /* 99 * User configurable initial values for RDMA transport 100 * The actual values used may be lower and are limited to hardware capabilities 101 */ 102 /* Default maximum number of pages in a single RDMA write/read */ 103 int smbd_max_frmr_depth = 2048; 104 105 /* If payload is less than this byte, use RDMA send/recv not read/write */ 106 int rdma_readwrite_threshold = 4096; 107 108 /* Transport logging functions 109 * Logging are defined as classes. They can be OR'ed to define the actual 110 * logging level via module parameter smbd_logging_class 111 * e.g. cifs.smbd_logging_class=0xa0 will log all log_rdma_recv() and 112 * log_rdma_event() 113 */ 114 #define LOG_OUTGOING 0x1 115 #define LOG_INCOMING 0x2 116 #define LOG_READ 0x4 117 #define LOG_WRITE 0x8 118 #define LOG_RDMA_SEND 0x10 119 #define LOG_RDMA_RECV 0x20 120 #define LOG_KEEP_ALIVE 0x40 121 #define LOG_RDMA_EVENT 0x80 122 #define LOG_RDMA_MR 0x100 123 static unsigned int smbd_logging_class; 124 module_param(smbd_logging_class, uint, 0644); 125 MODULE_PARM_DESC(smbd_logging_class, 126 "Logging class for SMBD transport 0x0 to 0x100"); 127 128 #define ERR 0x0 129 #define INFO 0x1 130 static unsigned int smbd_logging_level = ERR; 131 module_param(smbd_logging_level, uint, 0644); 132 MODULE_PARM_DESC(smbd_logging_level, 133 "Logging level for SMBD transport, 0 (default): error, 1: info"); 134 135 #define log_rdma(level, class, fmt, args...) \ 136 do { \ 137 if (level <= smbd_logging_level || class & smbd_logging_class) \ 138 cifs_dbg(VFS, "%s:%d " fmt, __func__, __LINE__, ##args);\ 139 } while (0) 140 141 #define log_outgoing(level, fmt, args...) \ 142 log_rdma(level, LOG_OUTGOING, fmt, ##args) 143 #define log_incoming(level, fmt, args...) \ 144 log_rdma(level, LOG_INCOMING, fmt, ##args) 145 #define log_read(level, fmt, args...) log_rdma(level, LOG_READ, fmt, ##args) 146 #define log_write(level, fmt, args...) log_rdma(level, LOG_WRITE, fmt, ##args) 147 #define log_rdma_send(level, fmt, args...) \ 148 log_rdma(level, LOG_RDMA_SEND, fmt, ##args) 149 #define log_rdma_recv(level, fmt, args...) \ 150 log_rdma(level, LOG_RDMA_RECV, fmt, ##args) 151 #define log_keep_alive(level, fmt, args...) \ 152 log_rdma(level, LOG_KEEP_ALIVE, fmt, ##args) 153 #define log_rdma_event(level, fmt, args...) \ 154 log_rdma(level, LOG_RDMA_EVENT, fmt, ##args) 155 #define log_rdma_mr(level, fmt, args...) \ 156 log_rdma(level, LOG_RDMA_MR, fmt, ##args) 157 158 static void smbd_disconnect_rdma_work(struct work_struct *work) 159 { 160 struct smbd_connection *info = 161 container_of(work, struct smbd_connection, disconnect_work); 162 struct smbdirect_socket *sc = &info->socket; 163 164 if (sc->status == SMBDIRECT_SOCKET_CONNECTED) { 165 sc->status = SMBDIRECT_SOCKET_DISCONNECTING; 166 rdma_disconnect(sc->rdma.cm_id); 167 } 168 } 169 170 static void smbd_disconnect_rdma_connection(struct smbd_connection *info) 171 { 172 queue_work(info->workqueue, &info->disconnect_work); 173 } 174 175 /* Upcall from RDMA CM */ 176 static int smbd_conn_upcall( 177 struct rdma_cm_id *id, struct rdma_cm_event *event) 178 { 179 struct smbd_connection *info = id->context; 180 struct smbdirect_socket *sc = &info->socket; 181 const char *event_name = rdma_event_msg(event->event); 182 183 log_rdma_event(INFO, "event=%s status=%d\n", 184 event_name, event->status); 185 186 switch (event->event) { 187 case RDMA_CM_EVENT_ADDR_RESOLVED: 188 case RDMA_CM_EVENT_ROUTE_RESOLVED: 189 info->ri_rc = 0; 190 complete(&info->ri_done); 191 break; 192 193 case RDMA_CM_EVENT_ADDR_ERROR: 194 log_rdma_event(ERR, "connecting failed event=%s\n", event_name); 195 info->ri_rc = -EHOSTUNREACH; 196 complete(&info->ri_done); 197 break; 198 199 case RDMA_CM_EVENT_ROUTE_ERROR: 200 log_rdma_event(ERR, "connecting failed event=%s\n", event_name); 201 info->ri_rc = -ENETUNREACH; 202 complete(&info->ri_done); 203 break; 204 205 case RDMA_CM_EVENT_ESTABLISHED: 206 log_rdma_event(INFO, "connected event=%s\n", event_name); 207 sc->status = SMBDIRECT_SOCKET_CONNECTED; 208 wake_up_interruptible(&info->status_wait); 209 break; 210 211 case RDMA_CM_EVENT_CONNECT_ERROR: 212 case RDMA_CM_EVENT_UNREACHABLE: 213 case RDMA_CM_EVENT_REJECTED: 214 log_rdma_event(ERR, "connecting failed event=%s\n", event_name); 215 sc->status = SMBDIRECT_SOCKET_DISCONNECTED; 216 wake_up_interruptible(&info->status_wait); 217 break; 218 219 case RDMA_CM_EVENT_DEVICE_REMOVAL: 220 case RDMA_CM_EVENT_DISCONNECTED: 221 /* This happens when we fail the negotiation */ 222 if (sc->status == SMBDIRECT_SOCKET_NEGOTIATE_FAILED) { 223 log_rdma_event(ERR, "event=%s during negotiation\n", event_name); 224 sc->status = SMBDIRECT_SOCKET_DISCONNECTED; 225 wake_up(&info->status_wait); 226 break; 227 } 228 229 sc->status = SMBDIRECT_SOCKET_DISCONNECTED; 230 wake_up_interruptible(&info->status_wait); 231 wake_up_interruptible(&sc->recv_io.reassembly.wait_queue); 232 wake_up_interruptible_all(&info->wait_send_queue); 233 break; 234 235 default: 236 log_rdma_event(ERR, "unexpected event=%s status=%d\n", 237 event_name, event->status); 238 break; 239 } 240 241 return 0; 242 } 243 244 /* Upcall from RDMA QP */ 245 static void 246 smbd_qp_async_error_upcall(struct ib_event *event, void *context) 247 { 248 struct smbd_connection *info = context; 249 250 log_rdma_event(ERR, "%s on device %s info %p\n", 251 ib_event_msg(event->event), event->device->name, info); 252 253 switch (event->event) { 254 case IB_EVENT_CQ_ERR: 255 case IB_EVENT_QP_FATAL: 256 smbd_disconnect_rdma_connection(info); 257 break; 258 259 default: 260 break; 261 } 262 } 263 264 static inline void *smbdirect_send_io_payload(struct smbdirect_send_io *request) 265 { 266 return (void *)request->packet; 267 } 268 269 static inline void *smbdirect_recv_io_payload(struct smbdirect_recv_io *response) 270 { 271 return (void *)response->packet; 272 } 273 274 /* Called when a RDMA send is done */ 275 static void send_done(struct ib_cq *cq, struct ib_wc *wc) 276 { 277 int i; 278 struct smbdirect_send_io *request = 279 container_of(wc->wr_cqe, struct smbdirect_send_io, cqe); 280 struct smbdirect_socket *sc = request->socket; 281 struct smbd_connection *info = 282 container_of(sc, struct smbd_connection, socket); 283 284 log_rdma_send(INFO, "smbdirect_send_io 0x%p completed wc->status=%d\n", 285 request, wc->status); 286 287 for (i = 0; i < request->num_sge; i++) 288 ib_dma_unmap_single(sc->ib.dev, 289 request->sge[i].addr, 290 request->sge[i].length, 291 DMA_TO_DEVICE); 292 293 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { 294 log_rdma_send(ERR, "wc->status=%d wc->opcode=%d\n", 295 wc->status, wc->opcode); 296 mempool_free(request, sc->send_io.mem.pool); 297 smbd_disconnect_rdma_connection(info); 298 return; 299 } 300 301 if (atomic_dec_and_test(&info->send_pending)) 302 wake_up(&info->wait_send_pending); 303 304 wake_up(&info->wait_post_send); 305 306 mempool_free(request, sc->send_io.mem.pool); 307 } 308 309 static void dump_smbdirect_negotiate_resp(struct smbdirect_negotiate_resp *resp) 310 { 311 log_rdma_event(INFO, "resp message min_version %u max_version %u negotiated_version %u credits_requested %u credits_granted %u status %u max_readwrite_size %u preferred_send_size %u max_receive_size %u max_fragmented_size %u\n", 312 resp->min_version, resp->max_version, 313 resp->negotiated_version, resp->credits_requested, 314 resp->credits_granted, resp->status, 315 resp->max_readwrite_size, resp->preferred_send_size, 316 resp->max_receive_size, resp->max_fragmented_size); 317 } 318 319 /* 320 * Process a negotiation response message, according to [MS-SMBD]3.1.5.7 321 * response, packet_length: the negotiation response message 322 * return value: true if negotiation is a success, false if failed 323 */ 324 static bool process_negotiation_response( 325 struct smbdirect_recv_io *response, int packet_length) 326 { 327 struct smbdirect_socket *sc = response->socket; 328 struct smbd_connection *info = 329 container_of(sc, struct smbd_connection, socket); 330 struct smbdirect_socket_parameters *sp = &sc->parameters; 331 struct smbdirect_negotiate_resp *packet = smbdirect_recv_io_payload(response); 332 333 if (packet_length < sizeof(struct smbdirect_negotiate_resp)) { 334 log_rdma_event(ERR, 335 "error: packet_length=%d\n", packet_length); 336 return false; 337 } 338 339 if (le16_to_cpu(packet->negotiated_version) != SMBDIRECT_V1) { 340 log_rdma_event(ERR, "error: negotiated_version=%x\n", 341 le16_to_cpu(packet->negotiated_version)); 342 return false; 343 } 344 info->protocol = le16_to_cpu(packet->negotiated_version); 345 346 if (packet->credits_requested == 0) { 347 log_rdma_event(ERR, "error: credits_requested==0\n"); 348 return false; 349 } 350 info->receive_credit_target = le16_to_cpu(packet->credits_requested); 351 352 if (packet->credits_granted == 0) { 353 log_rdma_event(ERR, "error: credits_granted==0\n"); 354 return false; 355 } 356 atomic_set(&info->send_credits, le16_to_cpu(packet->credits_granted)); 357 358 atomic_set(&info->receive_credits, 0); 359 360 if (le32_to_cpu(packet->preferred_send_size) > sp->max_recv_size) { 361 log_rdma_event(ERR, "error: preferred_send_size=%d\n", 362 le32_to_cpu(packet->preferred_send_size)); 363 return false; 364 } 365 sp->max_recv_size = le32_to_cpu(packet->preferred_send_size); 366 367 if (le32_to_cpu(packet->max_receive_size) < SMBD_MIN_RECEIVE_SIZE) { 368 log_rdma_event(ERR, "error: max_receive_size=%d\n", 369 le32_to_cpu(packet->max_receive_size)); 370 return false; 371 } 372 sp->max_send_size = min_t(u32, sp->max_send_size, 373 le32_to_cpu(packet->max_receive_size)); 374 375 if (le32_to_cpu(packet->max_fragmented_size) < 376 SMBD_MIN_FRAGMENTED_SIZE) { 377 log_rdma_event(ERR, "error: max_fragmented_size=%d\n", 378 le32_to_cpu(packet->max_fragmented_size)); 379 return false; 380 } 381 sp->max_fragmented_send_size = 382 le32_to_cpu(packet->max_fragmented_size); 383 info->rdma_readwrite_threshold = 384 rdma_readwrite_threshold > sp->max_fragmented_send_size ? 385 sp->max_fragmented_send_size : 386 rdma_readwrite_threshold; 387 388 389 sp->max_read_write_size = min_t(u32, 390 le32_to_cpu(packet->max_readwrite_size), 391 info->max_frmr_depth * PAGE_SIZE); 392 info->max_frmr_depth = sp->max_read_write_size / PAGE_SIZE; 393 394 sc->recv_io.expected = SMBDIRECT_EXPECT_DATA_TRANSFER; 395 return true; 396 } 397 398 static void smbd_post_send_credits(struct work_struct *work) 399 { 400 int ret = 0; 401 int rc; 402 struct smbdirect_recv_io *response; 403 struct smbd_connection *info = 404 container_of(work, struct smbd_connection, 405 post_send_credits_work); 406 struct smbdirect_socket *sc = &info->socket; 407 408 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { 409 wake_up(&info->wait_receive_queues); 410 return; 411 } 412 413 if (info->receive_credit_target > 414 atomic_read(&info->receive_credits)) { 415 while (true) { 416 response = get_receive_buffer(info); 417 if (!response) 418 break; 419 420 response->first_segment = false; 421 rc = smbd_post_recv(info, response); 422 if (rc) { 423 log_rdma_recv(ERR, 424 "post_recv failed rc=%d\n", rc); 425 put_receive_buffer(info, response); 426 break; 427 } 428 429 ret++; 430 } 431 } 432 433 spin_lock(&info->lock_new_credits_offered); 434 info->new_credits_offered += ret; 435 spin_unlock(&info->lock_new_credits_offered); 436 437 /* Promptly send an immediate packet as defined in [MS-SMBD] 3.1.1.1 */ 438 info->send_immediate = true; 439 if (atomic_read(&info->receive_credits) < 440 info->receive_credit_target - 1) { 441 if (info->keep_alive_requested == KEEP_ALIVE_PENDING || 442 info->send_immediate) { 443 log_keep_alive(INFO, "send an empty message\n"); 444 smbd_post_send_empty(info); 445 } 446 } 447 } 448 449 /* Called from softirq, when recv is done */ 450 static void recv_done(struct ib_cq *cq, struct ib_wc *wc) 451 { 452 struct smbdirect_data_transfer *data_transfer; 453 struct smbdirect_recv_io *response = 454 container_of(wc->wr_cqe, struct smbdirect_recv_io, cqe); 455 struct smbdirect_socket *sc = response->socket; 456 struct smbdirect_socket_parameters *sp = &sc->parameters; 457 struct smbd_connection *info = 458 container_of(sc, struct smbd_connection, socket); 459 u32 data_offset = 0; 460 u32 data_length = 0; 461 u32 remaining_data_length = 0; 462 463 log_rdma_recv(INFO, "response=0x%p type=%d wc status=%d wc opcode %d byte_len=%d pkey_index=%u\n", 464 response, sc->recv_io.expected, wc->status, wc->opcode, 465 wc->byte_len, wc->pkey_index); 466 467 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { 468 log_rdma_recv(INFO, "wc->status=%d opcode=%d\n", 469 wc->status, wc->opcode); 470 goto error; 471 } 472 473 ib_dma_sync_single_for_cpu( 474 wc->qp->device, 475 response->sge.addr, 476 response->sge.length, 477 DMA_FROM_DEVICE); 478 479 switch (sc->recv_io.expected) { 480 /* SMBD negotiation response */ 481 case SMBDIRECT_EXPECT_NEGOTIATE_REP: 482 dump_smbdirect_negotiate_resp(smbdirect_recv_io_payload(response)); 483 sc->recv_io.reassembly.full_packet_received = true; 484 info->negotiate_done = 485 process_negotiation_response(response, wc->byte_len); 486 put_receive_buffer(info, response); 487 complete(&info->negotiate_completion); 488 return; 489 490 /* SMBD data transfer packet */ 491 case SMBDIRECT_EXPECT_DATA_TRANSFER: 492 data_transfer = smbdirect_recv_io_payload(response); 493 494 if (wc->byte_len < 495 offsetof(struct smbdirect_data_transfer, padding)) 496 goto error; 497 498 remaining_data_length = le32_to_cpu(data_transfer->remaining_data_length); 499 data_offset = le32_to_cpu(data_transfer->data_offset); 500 data_length = le32_to_cpu(data_transfer->data_length); 501 if (wc->byte_len < data_offset || 502 (u64)wc->byte_len < (u64)data_offset + data_length) 503 goto error; 504 505 if (remaining_data_length > sp->max_fragmented_recv_size || 506 data_length > sp->max_fragmented_recv_size || 507 (u64)remaining_data_length + (u64)data_length > (u64)sp->max_fragmented_recv_size) 508 goto error; 509 510 if (data_length) { 511 if (sc->recv_io.reassembly.full_packet_received) 512 response->first_segment = true; 513 514 if (le32_to_cpu(data_transfer->remaining_data_length)) 515 sc->recv_io.reassembly.full_packet_received = false; 516 else 517 sc->recv_io.reassembly.full_packet_received = true; 518 } 519 520 atomic_dec(&info->receive_credits); 521 info->receive_credit_target = 522 le16_to_cpu(data_transfer->credits_requested); 523 if (le16_to_cpu(data_transfer->credits_granted)) { 524 atomic_add(le16_to_cpu(data_transfer->credits_granted), 525 &info->send_credits); 526 /* 527 * We have new send credits granted from remote peer 528 * If any sender is waiting for credits, unblock it 529 */ 530 wake_up_interruptible(&info->wait_send_queue); 531 } 532 533 log_incoming(INFO, "data flags %d data_offset %d data_length %d remaining_data_length %d\n", 534 le16_to_cpu(data_transfer->flags), 535 le32_to_cpu(data_transfer->data_offset), 536 le32_to_cpu(data_transfer->data_length), 537 le32_to_cpu(data_transfer->remaining_data_length)); 538 539 /* Send a KEEP_ALIVE response right away if requested */ 540 info->keep_alive_requested = KEEP_ALIVE_NONE; 541 if (le16_to_cpu(data_transfer->flags) & 542 SMBDIRECT_FLAG_RESPONSE_REQUESTED) { 543 info->keep_alive_requested = KEEP_ALIVE_PENDING; 544 } 545 546 /* 547 * If this is a packet with data playload place the data in 548 * reassembly queue and wake up the reading thread 549 */ 550 if (data_length) { 551 enqueue_reassembly(info, response, data_length); 552 wake_up_interruptible(&sc->recv_io.reassembly.wait_queue); 553 } else 554 put_receive_buffer(info, response); 555 556 return; 557 558 case SMBDIRECT_EXPECT_NEGOTIATE_REQ: 559 /* Only server... */ 560 break; 561 } 562 563 /* 564 * This is an internal error! 565 */ 566 log_rdma_recv(ERR, "unexpected response type=%d\n", sc->recv_io.expected); 567 WARN_ON_ONCE(sc->recv_io.expected != SMBDIRECT_EXPECT_DATA_TRANSFER); 568 error: 569 put_receive_buffer(info, response); 570 smbd_disconnect_rdma_connection(info); 571 } 572 573 static struct rdma_cm_id *smbd_create_id( 574 struct smbd_connection *info, 575 struct sockaddr *dstaddr, int port) 576 { 577 struct rdma_cm_id *id; 578 int rc; 579 __be16 *sport; 580 581 id = rdma_create_id(&init_net, smbd_conn_upcall, info, 582 RDMA_PS_TCP, IB_QPT_RC); 583 if (IS_ERR(id)) { 584 rc = PTR_ERR(id); 585 log_rdma_event(ERR, "rdma_create_id() failed %i\n", rc); 586 return id; 587 } 588 589 if (dstaddr->sa_family == AF_INET6) 590 sport = &((struct sockaddr_in6 *)dstaddr)->sin6_port; 591 else 592 sport = &((struct sockaddr_in *)dstaddr)->sin_port; 593 594 *sport = htons(port); 595 596 init_completion(&info->ri_done); 597 info->ri_rc = -ETIMEDOUT; 598 599 rc = rdma_resolve_addr(id, NULL, (struct sockaddr *)dstaddr, 600 RDMA_RESOLVE_TIMEOUT); 601 if (rc) { 602 log_rdma_event(ERR, "rdma_resolve_addr() failed %i\n", rc); 603 goto out; 604 } 605 rc = wait_for_completion_interruptible_timeout( 606 &info->ri_done, msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT)); 607 /* e.g. if interrupted returns -ERESTARTSYS */ 608 if (rc < 0) { 609 log_rdma_event(ERR, "rdma_resolve_addr timeout rc: %i\n", rc); 610 goto out; 611 } 612 rc = info->ri_rc; 613 if (rc) { 614 log_rdma_event(ERR, "rdma_resolve_addr() completed %i\n", rc); 615 goto out; 616 } 617 618 info->ri_rc = -ETIMEDOUT; 619 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); 620 if (rc) { 621 log_rdma_event(ERR, "rdma_resolve_route() failed %i\n", rc); 622 goto out; 623 } 624 rc = wait_for_completion_interruptible_timeout( 625 &info->ri_done, msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT)); 626 /* e.g. if interrupted returns -ERESTARTSYS */ 627 if (rc < 0) { 628 log_rdma_event(ERR, "rdma_resolve_addr timeout rc: %i\n", rc); 629 goto out; 630 } 631 rc = info->ri_rc; 632 if (rc) { 633 log_rdma_event(ERR, "rdma_resolve_route() completed %i\n", rc); 634 goto out; 635 } 636 637 return id; 638 639 out: 640 rdma_destroy_id(id); 641 return ERR_PTR(rc); 642 } 643 644 /* 645 * Test if FRWR (Fast Registration Work Requests) is supported on the device 646 * This implementation requires FRWR on RDMA read/write 647 * return value: true if it is supported 648 */ 649 static bool frwr_is_supported(struct ib_device_attr *attrs) 650 { 651 if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) 652 return false; 653 if (attrs->max_fast_reg_page_list_len == 0) 654 return false; 655 return true; 656 } 657 658 static int smbd_ia_open( 659 struct smbd_connection *info, 660 struct sockaddr *dstaddr, int port) 661 { 662 struct smbdirect_socket *sc = &info->socket; 663 int rc; 664 665 sc->rdma.cm_id = smbd_create_id(info, dstaddr, port); 666 if (IS_ERR(sc->rdma.cm_id)) { 667 rc = PTR_ERR(sc->rdma.cm_id); 668 goto out1; 669 } 670 sc->ib.dev = sc->rdma.cm_id->device; 671 672 if (!frwr_is_supported(&sc->ib.dev->attrs)) { 673 log_rdma_event(ERR, "Fast Registration Work Requests (FRWR) is not supported\n"); 674 log_rdma_event(ERR, "Device capability flags = %llx max_fast_reg_page_list_len = %u\n", 675 sc->ib.dev->attrs.device_cap_flags, 676 sc->ib.dev->attrs.max_fast_reg_page_list_len); 677 rc = -EPROTONOSUPPORT; 678 goto out2; 679 } 680 info->max_frmr_depth = min_t(int, 681 smbd_max_frmr_depth, 682 sc->ib.dev->attrs.max_fast_reg_page_list_len); 683 info->mr_type = IB_MR_TYPE_MEM_REG; 684 if (sc->ib.dev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG) 685 info->mr_type = IB_MR_TYPE_SG_GAPS; 686 687 sc->ib.pd = ib_alloc_pd(sc->ib.dev, 0); 688 if (IS_ERR(sc->ib.pd)) { 689 rc = PTR_ERR(sc->ib.pd); 690 log_rdma_event(ERR, "ib_alloc_pd() returned %d\n", rc); 691 goto out2; 692 } 693 694 return 0; 695 696 out2: 697 rdma_destroy_id(sc->rdma.cm_id); 698 sc->rdma.cm_id = NULL; 699 700 out1: 701 return rc; 702 } 703 704 /* 705 * Send a negotiation request message to the peer 706 * The negotiation procedure is in [MS-SMBD] 3.1.5.2 and 3.1.5.3 707 * After negotiation, the transport is connected and ready for 708 * carrying upper layer SMB payload 709 */ 710 static int smbd_post_send_negotiate_req(struct smbd_connection *info) 711 { 712 struct smbdirect_socket *sc = &info->socket; 713 struct smbdirect_socket_parameters *sp = &sc->parameters; 714 struct ib_send_wr send_wr; 715 int rc = -ENOMEM; 716 struct smbdirect_send_io *request; 717 struct smbdirect_negotiate_req *packet; 718 719 request = mempool_alloc(sc->send_io.mem.pool, GFP_KERNEL); 720 if (!request) 721 return rc; 722 723 request->socket = sc; 724 725 packet = smbdirect_send_io_payload(request); 726 packet->min_version = cpu_to_le16(SMBDIRECT_V1); 727 packet->max_version = cpu_to_le16(SMBDIRECT_V1); 728 packet->reserved = 0; 729 packet->credits_requested = cpu_to_le16(sp->send_credit_target); 730 packet->preferred_send_size = cpu_to_le32(sp->max_send_size); 731 packet->max_receive_size = cpu_to_le32(sp->max_recv_size); 732 packet->max_fragmented_size = 733 cpu_to_le32(sp->max_fragmented_recv_size); 734 735 request->num_sge = 1; 736 request->sge[0].addr = ib_dma_map_single( 737 sc->ib.dev, (void *)packet, 738 sizeof(*packet), DMA_TO_DEVICE); 739 if (ib_dma_mapping_error(sc->ib.dev, request->sge[0].addr)) { 740 rc = -EIO; 741 goto dma_mapping_failed; 742 } 743 744 request->sge[0].length = sizeof(*packet); 745 request->sge[0].lkey = sc->ib.pd->local_dma_lkey; 746 747 ib_dma_sync_single_for_device( 748 sc->ib.dev, request->sge[0].addr, 749 request->sge[0].length, DMA_TO_DEVICE); 750 751 request->cqe.done = send_done; 752 753 send_wr.next = NULL; 754 send_wr.wr_cqe = &request->cqe; 755 send_wr.sg_list = request->sge; 756 send_wr.num_sge = request->num_sge; 757 send_wr.opcode = IB_WR_SEND; 758 send_wr.send_flags = IB_SEND_SIGNALED; 759 760 log_rdma_send(INFO, "sge addr=0x%llx length=%u lkey=0x%x\n", 761 request->sge[0].addr, 762 request->sge[0].length, request->sge[0].lkey); 763 764 atomic_inc(&info->send_pending); 765 rc = ib_post_send(sc->ib.qp, &send_wr, NULL); 766 if (!rc) 767 return 0; 768 769 /* if we reach here, post send failed */ 770 log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc); 771 atomic_dec(&info->send_pending); 772 ib_dma_unmap_single(sc->ib.dev, request->sge[0].addr, 773 request->sge[0].length, DMA_TO_DEVICE); 774 775 smbd_disconnect_rdma_connection(info); 776 777 dma_mapping_failed: 778 mempool_free(request, sc->send_io.mem.pool); 779 return rc; 780 } 781 782 /* 783 * Extend the credits to remote peer 784 * This implements [MS-SMBD] 3.1.5.9 785 * The idea is that we should extend credits to remote peer as quickly as 786 * it's allowed, to maintain data flow. We allocate as much receive 787 * buffer as possible, and extend the receive credits to remote peer 788 * return value: the new credtis being granted. 789 */ 790 static int manage_credits_prior_sending(struct smbd_connection *info) 791 { 792 int new_credits; 793 794 spin_lock(&info->lock_new_credits_offered); 795 new_credits = info->new_credits_offered; 796 info->new_credits_offered = 0; 797 spin_unlock(&info->lock_new_credits_offered); 798 799 return new_credits; 800 } 801 802 /* 803 * Check if we need to send a KEEP_ALIVE message 804 * The idle connection timer triggers a KEEP_ALIVE message when expires 805 * SMBDIRECT_FLAG_RESPONSE_REQUESTED is set in the message flag to have peer send 806 * back a response. 807 * return value: 808 * 1 if SMBDIRECT_FLAG_RESPONSE_REQUESTED needs to be set 809 * 0: otherwise 810 */ 811 static int manage_keep_alive_before_sending(struct smbd_connection *info) 812 { 813 if (info->keep_alive_requested == KEEP_ALIVE_PENDING) { 814 info->keep_alive_requested = KEEP_ALIVE_SENT; 815 return 1; 816 } 817 return 0; 818 } 819 820 /* Post the send request */ 821 static int smbd_post_send(struct smbd_connection *info, 822 struct smbdirect_send_io *request) 823 { 824 struct smbdirect_socket *sc = &info->socket; 825 struct smbdirect_socket_parameters *sp = &sc->parameters; 826 struct ib_send_wr send_wr; 827 int rc, i; 828 829 for (i = 0; i < request->num_sge; i++) { 830 log_rdma_send(INFO, 831 "rdma_request sge[%d] addr=0x%llx length=%u\n", 832 i, request->sge[i].addr, request->sge[i].length); 833 ib_dma_sync_single_for_device( 834 sc->ib.dev, 835 request->sge[i].addr, 836 request->sge[i].length, 837 DMA_TO_DEVICE); 838 } 839 840 request->cqe.done = send_done; 841 842 send_wr.next = NULL; 843 send_wr.wr_cqe = &request->cqe; 844 send_wr.sg_list = request->sge; 845 send_wr.num_sge = request->num_sge; 846 send_wr.opcode = IB_WR_SEND; 847 send_wr.send_flags = IB_SEND_SIGNALED; 848 849 rc = ib_post_send(sc->ib.qp, &send_wr, NULL); 850 if (rc) { 851 log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc); 852 smbd_disconnect_rdma_connection(info); 853 rc = -EAGAIN; 854 } else 855 /* Reset timer for idle connection after packet is sent */ 856 mod_delayed_work(info->workqueue, &info->idle_timer_work, 857 msecs_to_jiffies(sp->keepalive_interval_msec)); 858 859 return rc; 860 } 861 862 static int smbd_post_send_iter(struct smbd_connection *info, 863 struct iov_iter *iter, 864 int *_remaining_data_length) 865 { 866 struct smbdirect_socket *sc = &info->socket; 867 struct smbdirect_socket_parameters *sp = &sc->parameters; 868 int i, rc; 869 int header_length; 870 int data_length; 871 struct smbdirect_send_io *request; 872 struct smbdirect_data_transfer *packet; 873 int new_credits = 0; 874 875 wait_credit: 876 /* Wait for send credits. A SMBD packet needs one credit */ 877 rc = wait_event_interruptible(info->wait_send_queue, 878 atomic_read(&info->send_credits) > 0 || 879 sc->status != SMBDIRECT_SOCKET_CONNECTED); 880 if (rc) 881 goto err_wait_credit; 882 883 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { 884 log_outgoing(ERR, "disconnected not sending on wait_credit\n"); 885 rc = -EAGAIN; 886 goto err_wait_credit; 887 } 888 if (unlikely(atomic_dec_return(&info->send_credits) < 0)) { 889 atomic_inc(&info->send_credits); 890 goto wait_credit; 891 } 892 893 wait_send_queue: 894 wait_event(info->wait_post_send, 895 atomic_read(&info->send_pending) < sp->send_credit_target || 896 sc->status != SMBDIRECT_SOCKET_CONNECTED); 897 898 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { 899 log_outgoing(ERR, "disconnected not sending on wait_send_queue\n"); 900 rc = -EAGAIN; 901 goto err_wait_send_queue; 902 } 903 904 if (unlikely(atomic_inc_return(&info->send_pending) > 905 sp->send_credit_target)) { 906 atomic_dec(&info->send_pending); 907 goto wait_send_queue; 908 } 909 910 request = mempool_alloc(sc->send_io.mem.pool, GFP_KERNEL); 911 if (!request) { 912 rc = -ENOMEM; 913 goto err_alloc; 914 } 915 916 request->socket = sc; 917 memset(request->sge, 0, sizeof(request->sge)); 918 919 /* Fill in the data payload to find out how much data we can add */ 920 if (iter) { 921 struct smb_extract_to_rdma extract = { 922 .nr_sge = 1, 923 .max_sge = SMBDIRECT_SEND_IO_MAX_SGE, 924 .sge = request->sge, 925 .device = sc->ib.dev, 926 .local_dma_lkey = sc->ib.pd->local_dma_lkey, 927 .direction = DMA_TO_DEVICE, 928 }; 929 size_t payload_len = umin(*_remaining_data_length, 930 sp->max_send_size - sizeof(*packet)); 931 932 rc = smb_extract_iter_to_rdma(iter, payload_len, 933 &extract); 934 if (rc < 0) 935 goto err_dma; 936 data_length = rc; 937 request->num_sge = extract.nr_sge; 938 *_remaining_data_length -= data_length; 939 } else { 940 data_length = 0; 941 request->num_sge = 1; 942 } 943 944 /* Fill in the packet header */ 945 packet = smbdirect_send_io_payload(request); 946 packet->credits_requested = cpu_to_le16(sp->send_credit_target); 947 948 new_credits = manage_credits_prior_sending(info); 949 atomic_add(new_credits, &info->receive_credits); 950 packet->credits_granted = cpu_to_le16(new_credits); 951 952 info->send_immediate = false; 953 954 packet->flags = 0; 955 if (manage_keep_alive_before_sending(info)) 956 packet->flags |= cpu_to_le16(SMBDIRECT_FLAG_RESPONSE_REQUESTED); 957 958 packet->reserved = 0; 959 if (!data_length) 960 packet->data_offset = 0; 961 else 962 packet->data_offset = cpu_to_le32(24); 963 packet->data_length = cpu_to_le32(data_length); 964 packet->remaining_data_length = cpu_to_le32(*_remaining_data_length); 965 packet->padding = 0; 966 967 log_outgoing(INFO, "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n", 968 le16_to_cpu(packet->credits_requested), 969 le16_to_cpu(packet->credits_granted), 970 le32_to_cpu(packet->data_offset), 971 le32_to_cpu(packet->data_length), 972 le32_to_cpu(packet->remaining_data_length)); 973 974 /* Map the packet to DMA */ 975 header_length = sizeof(struct smbdirect_data_transfer); 976 /* If this is a packet without payload, don't send padding */ 977 if (!data_length) 978 header_length = offsetof(struct smbdirect_data_transfer, padding); 979 980 request->sge[0].addr = ib_dma_map_single(sc->ib.dev, 981 (void *)packet, 982 header_length, 983 DMA_TO_DEVICE); 984 if (ib_dma_mapping_error(sc->ib.dev, request->sge[0].addr)) { 985 rc = -EIO; 986 request->sge[0].addr = 0; 987 goto err_dma; 988 } 989 990 request->sge[0].length = header_length; 991 request->sge[0].lkey = sc->ib.pd->local_dma_lkey; 992 993 rc = smbd_post_send(info, request); 994 if (!rc) 995 return 0; 996 997 err_dma: 998 for (i = 0; i < request->num_sge; i++) 999 if (request->sge[i].addr) 1000 ib_dma_unmap_single(sc->ib.dev, 1001 request->sge[i].addr, 1002 request->sge[i].length, 1003 DMA_TO_DEVICE); 1004 mempool_free(request, sc->send_io.mem.pool); 1005 1006 /* roll back receive credits and credits to be offered */ 1007 spin_lock(&info->lock_new_credits_offered); 1008 info->new_credits_offered += new_credits; 1009 spin_unlock(&info->lock_new_credits_offered); 1010 atomic_sub(new_credits, &info->receive_credits); 1011 1012 err_alloc: 1013 if (atomic_dec_and_test(&info->send_pending)) 1014 wake_up(&info->wait_send_pending); 1015 1016 err_wait_send_queue: 1017 /* roll back send credits and pending */ 1018 atomic_inc(&info->send_credits); 1019 1020 err_wait_credit: 1021 return rc; 1022 } 1023 1024 /* 1025 * Send an empty message 1026 * Empty message is used to extend credits to peer to for keep live 1027 * while there is no upper layer payload to send at the time 1028 */ 1029 static int smbd_post_send_empty(struct smbd_connection *info) 1030 { 1031 int remaining_data_length = 0; 1032 1033 info->count_send_empty++; 1034 return smbd_post_send_iter(info, NULL, &remaining_data_length); 1035 } 1036 1037 static int smbd_post_send_full_iter(struct smbd_connection *info, 1038 struct iov_iter *iter, 1039 int *_remaining_data_length) 1040 { 1041 int rc = 0; 1042 1043 /* 1044 * smbd_post_send_iter() respects the 1045 * negotiated max_send_size, so we need to 1046 * loop until the full iter is posted 1047 */ 1048 1049 while (iov_iter_count(iter) > 0) { 1050 rc = smbd_post_send_iter(info, iter, _remaining_data_length); 1051 if (rc < 0) 1052 break; 1053 } 1054 1055 return rc; 1056 } 1057 1058 /* 1059 * Post a receive request to the transport 1060 * The remote peer can only send data when a receive request is posted 1061 * The interaction is controlled by send/receive credit system 1062 */ 1063 static int smbd_post_recv( 1064 struct smbd_connection *info, struct smbdirect_recv_io *response) 1065 { 1066 struct smbdirect_socket *sc = &info->socket; 1067 struct smbdirect_socket_parameters *sp = &sc->parameters; 1068 struct ib_recv_wr recv_wr; 1069 int rc = -EIO; 1070 1071 response->sge.addr = ib_dma_map_single( 1072 sc->ib.dev, response->packet, 1073 sp->max_recv_size, DMA_FROM_DEVICE); 1074 if (ib_dma_mapping_error(sc->ib.dev, response->sge.addr)) 1075 return rc; 1076 1077 response->sge.length = sp->max_recv_size; 1078 response->sge.lkey = sc->ib.pd->local_dma_lkey; 1079 1080 response->cqe.done = recv_done; 1081 1082 recv_wr.wr_cqe = &response->cqe; 1083 recv_wr.next = NULL; 1084 recv_wr.sg_list = &response->sge; 1085 recv_wr.num_sge = 1; 1086 1087 rc = ib_post_recv(sc->ib.qp, &recv_wr, NULL); 1088 if (rc) { 1089 ib_dma_unmap_single(sc->ib.dev, response->sge.addr, 1090 response->sge.length, DMA_FROM_DEVICE); 1091 response->sge.length = 0; 1092 smbd_disconnect_rdma_connection(info); 1093 log_rdma_recv(ERR, "ib_post_recv failed rc=%d\n", rc); 1094 } 1095 1096 return rc; 1097 } 1098 1099 /* Perform SMBD negotiate according to [MS-SMBD] 3.1.5.2 */ 1100 static int smbd_negotiate(struct smbd_connection *info) 1101 { 1102 struct smbdirect_socket *sc = &info->socket; 1103 int rc; 1104 struct smbdirect_recv_io *response = get_receive_buffer(info); 1105 1106 sc->recv_io.expected = SMBDIRECT_EXPECT_NEGOTIATE_REP; 1107 rc = smbd_post_recv(info, response); 1108 log_rdma_event(INFO, "smbd_post_recv rc=%d iov.addr=0x%llx iov.length=%u iov.lkey=0x%x\n", 1109 rc, response->sge.addr, 1110 response->sge.length, response->sge.lkey); 1111 if (rc) { 1112 put_receive_buffer(info, response); 1113 return rc; 1114 } 1115 1116 init_completion(&info->negotiate_completion); 1117 info->negotiate_done = false; 1118 rc = smbd_post_send_negotiate_req(info); 1119 if (rc) 1120 return rc; 1121 1122 rc = wait_for_completion_interruptible_timeout( 1123 &info->negotiate_completion, SMBD_NEGOTIATE_TIMEOUT * HZ); 1124 log_rdma_event(INFO, "wait_for_completion_timeout rc=%d\n", rc); 1125 1126 if (info->negotiate_done) 1127 return 0; 1128 1129 if (rc == 0) 1130 rc = -ETIMEDOUT; 1131 else if (rc == -ERESTARTSYS) 1132 rc = -EINTR; 1133 else 1134 rc = -ENOTCONN; 1135 1136 return rc; 1137 } 1138 1139 /* 1140 * Implement Connection.FragmentReassemblyBuffer defined in [MS-SMBD] 3.1.1.1 1141 * This is a queue for reassembling upper layer payload and present to upper 1142 * layer. All the inncoming payload go to the reassembly queue, regardless of 1143 * if reassembly is required. The uuper layer code reads from the queue for all 1144 * incoming payloads. 1145 * Put a received packet to the reassembly queue 1146 * response: the packet received 1147 * data_length: the size of payload in this packet 1148 */ 1149 static void enqueue_reassembly( 1150 struct smbd_connection *info, 1151 struct smbdirect_recv_io *response, 1152 int data_length) 1153 { 1154 struct smbdirect_socket *sc = &info->socket; 1155 1156 spin_lock(&sc->recv_io.reassembly.lock); 1157 list_add_tail(&response->list, &sc->recv_io.reassembly.list); 1158 sc->recv_io.reassembly.queue_length++; 1159 /* 1160 * Make sure reassembly_data_length is updated after list and 1161 * reassembly_queue_length are updated. On the dequeue side 1162 * reassembly_data_length is checked without a lock to determine 1163 * if reassembly_queue_length and list is up to date 1164 */ 1165 virt_wmb(); 1166 sc->recv_io.reassembly.data_length += data_length; 1167 spin_unlock(&sc->recv_io.reassembly.lock); 1168 info->count_reassembly_queue++; 1169 info->count_enqueue_reassembly_queue++; 1170 } 1171 1172 /* 1173 * Get the first entry at the front of reassembly queue 1174 * Caller is responsible for locking 1175 * return value: the first entry if any, NULL if queue is empty 1176 */ 1177 static struct smbdirect_recv_io *_get_first_reassembly(struct smbd_connection *info) 1178 { 1179 struct smbdirect_socket *sc = &info->socket; 1180 struct smbdirect_recv_io *ret = NULL; 1181 1182 if (!list_empty(&sc->recv_io.reassembly.list)) { 1183 ret = list_first_entry( 1184 &sc->recv_io.reassembly.list, 1185 struct smbdirect_recv_io, list); 1186 } 1187 return ret; 1188 } 1189 1190 /* 1191 * Get a receive buffer 1192 * For each remote send, we need to post a receive. The receive buffers are 1193 * pre-allocated in advance. 1194 * return value: the receive buffer, NULL if none is available 1195 */ 1196 static struct smbdirect_recv_io *get_receive_buffer(struct smbd_connection *info) 1197 { 1198 struct smbdirect_socket *sc = &info->socket; 1199 struct smbdirect_recv_io *ret = NULL; 1200 unsigned long flags; 1201 1202 spin_lock_irqsave(&sc->recv_io.free.lock, flags); 1203 if (!list_empty(&sc->recv_io.free.list)) { 1204 ret = list_first_entry( 1205 &sc->recv_io.free.list, 1206 struct smbdirect_recv_io, list); 1207 list_del(&ret->list); 1208 info->count_receive_queue--; 1209 info->count_get_receive_buffer++; 1210 } 1211 spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); 1212 1213 return ret; 1214 } 1215 1216 /* 1217 * Return a receive buffer 1218 * Upon returning of a receive buffer, we can post new receive and extend 1219 * more receive credits to remote peer. This is done immediately after a 1220 * receive buffer is returned. 1221 */ 1222 static void put_receive_buffer( 1223 struct smbd_connection *info, struct smbdirect_recv_io *response) 1224 { 1225 struct smbdirect_socket *sc = &info->socket; 1226 unsigned long flags; 1227 1228 if (likely(response->sge.length != 0)) { 1229 ib_dma_unmap_single(sc->ib.dev, 1230 response->sge.addr, 1231 response->sge.length, 1232 DMA_FROM_DEVICE); 1233 response->sge.length = 0; 1234 } 1235 1236 spin_lock_irqsave(&sc->recv_io.free.lock, flags); 1237 list_add_tail(&response->list, &sc->recv_io.free.list); 1238 info->count_receive_queue++; 1239 info->count_put_receive_buffer++; 1240 spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); 1241 1242 queue_work(info->workqueue, &info->post_send_credits_work); 1243 } 1244 1245 /* Preallocate all receive buffer on transport establishment */ 1246 static int allocate_receive_buffers(struct smbd_connection *info, int num_buf) 1247 { 1248 struct smbdirect_socket *sc = &info->socket; 1249 struct smbdirect_recv_io *response; 1250 int i; 1251 1252 INIT_LIST_HEAD(&sc->recv_io.reassembly.list); 1253 spin_lock_init(&sc->recv_io.reassembly.lock); 1254 sc->recv_io.reassembly.data_length = 0; 1255 sc->recv_io.reassembly.queue_length = 0; 1256 1257 INIT_LIST_HEAD(&sc->recv_io.free.list); 1258 spin_lock_init(&sc->recv_io.free.lock); 1259 info->count_receive_queue = 0; 1260 1261 init_waitqueue_head(&info->wait_receive_queues); 1262 1263 for (i = 0; i < num_buf; i++) { 1264 response = mempool_alloc(sc->recv_io.mem.pool, GFP_KERNEL); 1265 if (!response) 1266 goto allocate_failed; 1267 1268 response->socket = sc; 1269 response->sge.length = 0; 1270 list_add_tail(&response->list, &sc->recv_io.free.list); 1271 info->count_receive_queue++; 1272 } 1273 1274 return 0; 1275 1276 allocate_failed: 1277 while (!list_empty(&sc->recv_io.free.list)) { 1278 response = list_first_entry( 1279 &sc->recv_io.free.list, 1280 struct smbdirect_recv_io, list); 1281 list_del(&response->list); 1282 info->count_receive_queue--; 1283 1284 mempool_free(response, sc->recv_io.mem.pool); 1285 } 1286 return -ENOMEM; 1287 } 1288 1289 static void destroy_receive_buffers(struct smbd_connection *info) 1290 { 1291 struct smbdirect_socket *sc = &info->socket; 1292 struct smbdirect_recv_io *response; 1293 1294 while ((response = get_receive_buffer(info))) 1295 mempool_free(response, sc->recv_io.mem.pool); 1296 } 1297 1298 /* Implement idle connection timer [MS-SMBD] 3.1.6.2 */ 1299 static void idle_connection_timer(struct work_struct *work) 1300 { 1301 struct smbd_connection *info = container_of( 1302 work, struct smbd_connection, 1303 idle_timer_work.work); 1304 struct smbdirect_socket *sc = &info->socket; 1305 struct smbdirect_socket_parameters *sp = &sc->parameters; 1306 1307 if (info->keep_alive_requested != KEEP_ALIVE_NONE) { 1308 log_keep_alive(ERR, 1309 "error status info->keep_alive_requested=%d\n", 1310 info->keep_alive_requested); 1311 smbd_disconnect_rdma_connection(info); 1312 return; 1313 } 1314 1315 log_keep_alive(INFO, "about to send an empty idle message\n"); 1316 smbd_post_send_empty(info); 1317 1318 /* Setup the next idle timeout work */ 1319 queue_delayed_work(info->workqueue, &info->idle_timer_work, 1320 msecs_to_jiffies(sp->keepalive_interval_msec)); 1321 } 1322 1323 /* 1324 * Destroy the transport and related RDMA and memory resources 1325 * Need to go through all the pending counters and make sure on one is using 1326 * the transport while it is destroyed 1327 */ 1328 void smbd_destroy(struct TCP_Server_Info *server) 1329 { 1330 struct smbd_connection *info = server->smbd_conn; 1331 struct smbdirect_socket *sc; 1332 struct smbdirect_socket_parameters *sp; 1333 struct smbdirect_recv_io *response; 1334 unsigned long flags; 1335 1336 if (!info) { 1337 log_rdma_event(INFO, "rdma session already destroyed\n"); 1338 return; 1339 } 1340 sc = &info->socket; 1341 sp = &sc->parameters; 1342 1343 log_rdma_event(INFO, "destroying rdma session\n"); 1344 if (sc->status != SMBDIRECT_SOCKET_DISCONNECTED) { 1345 rdma_disconnect(sc->rdma.cm_id); 1346 log_rdma_event(INFO, "wait for transport being disconnected\n"); 1347 wait_event_interruptible( 1348 info->status_wait, 1349 sc->status == SMBDIRECT_SOCKET_DISCONNECTED); 1350 } 1351 1352 log_rdma_event(INFO, "cancelling post_send_credits_work\n"); 1353 disable_work_sync(&info->post_send_credits_work); 1354 1355 log_rdma_event(INFO, "destroying qp\n"); 1356 ib_drain_qp(sc->ib.qp); 1357 rdma_destroy_qp(sc->rdma.cm_id); 1358 sc->ib.qp = NULL; 1359 1360 log_rdma_event(INFO, "cancelling idle timer\n"); 1361 disable_delayed_work_sync(&info->idle_timer_work); 1362 1363 /* It's not possible for upper layer to get to reassembly */ 1364 log_rdma_event(INFO, "drain the reassembly queue\n"); 1365 do { 1366 spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); 1367 response = _get_first_reassembly(info); 1368 if (response) { 1369 list_del(&response->list); 1370 spin_unlock_irqrestore( 1371 &sc->recv_io.reassembly.lock, flags); 1372 put_receive_buffer(info, response); 1373 } else 1374 spin_unlock_irqrestore( 1375 &sc->recv_io.reassembly.lock, flags); 1376 } while (response); 1377 sc->recv_io.reassembly.data_length = 0; 1378 1379 log_rdma_event(INFO, "free receive buffers\n"); 1380 wait_event(info->wait_receive_queues, 1381 info->count_receive_queue == sp->recv_credit_max); 1382 destroy_receive_buffers(info); 1383 1384 /* 1385 * For performance reasons, memory registration and deregistration 1386 * are not locked by srv_mutex. It is possible some processes are 1387 * blocked on transport srv_mutex while holding memory registration. 1388 * Release the transport srv_mutex to allow them to hit the failure 1389 * path when sending data, and then release memory registrations. 1390 */ 1391 log_rdma_event(INFO, "freeing mr list\n"); 1392 wake_up_interruptible_all(&info->wait_mr); 1393 while (atomic_read(&info->mr_used_count)) { 1394 cifs_server_unlock(server); 1395 msleep(1000); 1396 cifs_server_lock(server); 1397 } 1398 destroy_mr_list(info); 1399 1400 ib_free_cq(sc->ib.send_cq); 1401 ib_free_cq(sc->ib.recv_cq); 1402 ib_dealloc_pd(sc->ib.pd); 1403 rdma_destroy_id(sc->rdma.cm_id); 1404 1405 /* free mempools */ 1406 mempool_destroy(sc->send_io.mem.pool); 1407 kmem_cache_destroy(sc->send_io.mem.cache); 1408 1409 mempool_destroy(sc->recv_io.mem.pool); 1410 kmem_cache_destroy(sc->recv_io.mem.cache); 1411 1412 sc->status = SMBDIRECT_SOCKET_DESTROYED; 1413 1414 destroy_workqueue(info->workqueue); 1415 log_rdma_event(INFO, "rdma session destroyed\n"); 1416 kfree(info); 1417 server->smbd_conn = NULL; 1418 } 1419 1420 /* 1421 * Reconnect this SMBD connection, called from upper layer 1422 * return value: 0 on success, or actual error code 1423 */ 1424 int smbd_reconnect(struct TCP_Server_Info *server) 1425 { 1426 log_rdma_event(INFO, "reconnecting rdma session\n"); 1427 1428 if (!server->smbd_conn) { 1429 log_rdma_event(INFO, "rdma session already destroyed\n"); 1430 goto create_conn; 1431 } 1432 1433 /* 1434 * This is possible if transport is disconnected and we haven't received 1435 * notification from RDMA, but upper layer has detected timeout 1436 */ 1437 if (server->smbd_conn->socket.status == SMBDIRECT_SOCKET_CONNECTED) { 1438 log_rdma_event(INFO, "disconnecting transport\n"); 1439 smbd_destroy(server); 1440 } 1441 1442 create_conn: 1443 log_rdma_event(INFO, "creating rdma session\n"); 1444 server->smbd_conn = smbd_get_connection( 1445 server, (struct sockaddr *) &server->dstaddr); 1446 1447 if (server->smbd_conn) { 1448 cifs_dbg(VFS, "RDMA transport re-established\n"); 1449 trace_smb3_smbd_connect_done(server->hostname, server->conn_id, &server->dstaddr); 1450 return 0; 1451 } 1452 trace_smb3_smbd_connect_err(server->hostname, server->conn_id, &server->dstaddr); 1453 return -ENOENT; 1454 } 1455 1456 static void destroy_caches_and_workqueue(struct smbd_connection *info) 1457 { 1458 struct smbdirect_socket *sc = &info->socket; 1459 1460 destroy_receive_buffers(info); 1461 destroy_workqueue(info->workqueue); 1462 mempool_destroy(sc->recv_io.mem.pool); 1463 kmem_cache_destroy(sc->recv_io.mem.cache); 1464 mempool_destroy(sc->send_io.mem.pool); 1465 kmem_cache_destroy(sc->send_io.mem.cache); 1466 } 1467 1468 #define MAX_NAME_LEN 80 1469 static int allocate_caches_and_workqueue(struct smbd_connection *info) 1470 { 1471 struct smbdirect_socket *sc = &info->socket; 1472 struct smbdirect_socket_parameters *sp = &sc->parameters; 1473 char name[MAX_NAME_LEN]; 1474 int rc; 1475 1476 if (WARN_ON_ONCE(sp->max_recv_size < sizeof(struct smbdirect_data_transfer))) 1477 return -ENOMEM; 1478 1479 scnprintf(name, MAX_NAME_LEN, "smbdirect_send_io_%p", info); 1480 sc->send_io.mem.cache = 1481 kmem_cache_create( 1482 name, 1483 sizeof(struct smbdirect_send_io) + 1484 sizeof(struct smbdirect_data_transfer), 1485 0, SLAB_HWCACHE_ALIGN, NULL); 1486 if (!sc->send_io.mem.cache) 1487 return -ENOMEM; 1488 1489 sc->send_io.mem.pool = 1490 mempool_create(sp->send_credit_target, mempool_alloc_slab, 1491 mempool_free_slab, sc->send_io.mem.cache); 1492 if (!sc->send_io.mem.pool) 1493 goto out1; 1494 1495 scnprintf(name, MAX_NAME_LEN, "smbdirect_recv_io_%p", info); 1496 1497 struct kmem_cache_args response_args = { 1498 .align = __alignof__(struct smbdirect_recv_io), 1499 .useroffset = (offsetof(struct smbdirect_recv_io, packet) + 1500 sizeof(struct smbdirect_data_transfer)), 1501 .usersize = sp->max_recv_size - sizeof(struct smbdirect_data_transfer), 1502 }; 1503 sc->recv_io.mem.cache = 1504 kmem_cache_create(name, 1505 sizeof(struct smbdirect_recv_io) + sp->max_recv_size, 1506 &response_args, SLAB_HWCACHE_ALIGN); 1507 if (!sc->recv_io.mem.cache) 1508 goto out2; 1509 1510 sc->recv_io.mem.pool = 1511 mempool_create(sp->recv_credit_max, mempool_alloc_slab, 1512 mempool_free_slab, sc->recv_io.mem.cache); 1513 if (!sc->recv_io.mem.pool) 1514 goto out3; 1515 1516 scnprintf(name, MAX_NAME_LEN, "smbd_%p", info); 1517 info->workqueue = create_workqueue(name); 1518 if (!info->workqueue) 1519 goto out4; 1520 1521 rc = allocate_receive_buffers(info, sp->recv_credit_max); 1522 if (rc) { 1523 log_rdma_event(ERR, "failed to allocate receive buffers\n"); 1524 goto out5; 1525 } 1526 1527 return 0; 1528 1529 out5: 1530 destroy_workqueue(info->workqueue); 1531 out4: 1532 mempool_destroy(sc->recv_io.mem.pool); 1533 out3: 1534 kmem_cache_destroy(sc->recv_io.mem.cache); 1535 out2: 1536 mempool_destroy(sc->send_io.mem.pool); 1537 out1: 1538 kmem_cache_destroy(sc->send_io.mem.cache); 1539 return -ENOMEM; 1540 } 1541 1542 /* Create a SMBD connection, called by upper layer */ 1543 static struct smbd_connection *_smbd_get_connection( 1544 struct TCP_Server_Info *server, struct sockaddr *dstaddr, int port) 1545 { 1546 int rc; 1547 struct smbd_connection *info; 1548 struct smbdirect_socket *sc; 1549 struct smbdirect_socket_parameters *sp; 1550 struct rdma_conn_param conn_param; 1551 struct ib_qp_init_attr qp_attr; 1552 struct sockaddr_in *addr_in = (struct sockaddr_in *) dstaddr; 1553 struct ib_port_immutable port_immutable; 1554 u32 ird_ord_hdr[2]; 1555 1556 info = kzalloc(sizeof(struct smbd_connection), GFP_KERNEL); 1557 if (!info) 1558 return NULL; 1559 sc = &info->socket; 1560 sp = &sc->parameters; 1561 1562 sc->status = SMBDIRECT_SOCKET_CONNECTING; 1563 rc = smbd_ia_open(info, dstaddr, port); 1564 if (rc) { 1565 log_rdma_event(INFO, "smbd_ia_open rc=%d\n", rc); 1566 goto create_id_failed; 1567 } 1568 1569 if (smbd_send_credit_target > sc->ib.dev->attrs.max_cqe || 1570 smbd_send_credit_target > sc->ib.dev->attrs.max_qp_wr) { 1571 log_rdma_event(ERR, "consider lowering send_credit_target = %d. Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", 1572 smbd_send_credit_target, 1573 sc->ib.dev->attrs.max_cqe, 1574 sc->ib.dev->attrs.max_qp_wr); 1575 goto config_failed; 1576 } 1577 1578 if (smbd_receive_credit_max > sc->ib.dev->attrs.max_cqe || 1579 smbd_receive_credit_max > sc->ib.dev->attrs.max_qp_wr) { 1580 log_rdma_event(ERR, "consider lowering receive_credit_max = %d. Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", 1581 smbd_receive_credit_max, 1582 sc->ib.dev->attrs.max_cqe, 1583 sc->ib.dev->attrs.max_qp_wr); 1584 goto config_failed; 1585 } 1586 1587 sp->recv_credit_max = smbd_receive_credit_max; 1588 sp->send_credit_target = smbd_send_credit_target; 1589 sp->max_send_size = smbd_max_send_size; 1590 sp->max_fragmented_recv_size = smbd_max_fragmented_recv_size; 1591 sp->max_recv_size = smbd_max_receive_size; 1592 sp->keepalive_interval_msec = smbd_keep_alive_interval * 1000; 1593 1594 if (sc->ib.dev->attrs.max_send_sge < SMBDIRECT_SEND_IO_MAX_SGE || 1595 sc->ib.dev->attrs.max_recv_sge < SMBDIRECT_RECV_IO_MAX_SGE) { 1596 log_rdma_event(ERR, 1597 "device %.*s max_send_sge/max_recv_sge = %d/%d too small\n", 1598 IB_DEVICE_NAME_MAX, 1599 sc->ib.dev->name, 1600 sc->ib.dev->attrs.max_send_sge, 1601 sc->ib.dev->attrs.max_recv_sge); 1602 goto config_failed; 1603 } 1604 1605 sc->ib.send_cq = 1606 ib_alloc_cq_any(sc->ib.dev, info, 1607 sp->send_credit_target, IB_POLL_SOFTIRQ); 1608 if (IS_ERR(sc->ib.send_cq)) { 1609 sc->ib.send_cq = NULL; 1610 goto alloc_cq_failed; 1611 } 1612 1613 sc->ib.recv_cq = 1614 ib_alloc_cq_any(sc->ib.dev, info, 1615 sp->recv_credit_max, IB_POLL_SOFTIRQ); 1616 if (IS_ERR(sc->ib.recv_cq)) { 1617 sc->ib.recv_cq = NULL; 1618 goto alloc_cq_failed; 1619 } 1620 1621 memset(&qp_attr, 0, sizeof(qp_attr)); 1622 qp_attr.event_handler = smbd_qp_async_error_upcall; 1623 qp_attr.qp_context = info; 1624 qp_attr.cap.max_send_wr = sp->send_credit_target; 1625 qp_attr.cap.max_recv_wr = sp->recv_credit_max; 1626 qp_attr.cap.max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE; 1627 qp_attr.cap.max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE; 1628 qp_attr.cap.max_inline_data = 0; 1629 qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 1630 qp_attr.qp_type = IB_QPT_RC; 1631 qp_attr.send_cq = sc->ib.send_cq; 1632 qp_attr.recv_cq = sc->ib.recv_cq; 1633 qp_attr.port_num = ~0; 1634 1635 rc = rdma_create_qp(sc->rdma.cm_id, sc->ib.pd, &qp_attr); 1636 if (rc) { 1637 log_rdma_event(ERR, "rdma_create_qp failed %i\n", rc); 1638 goto create_qp_failed; 1639 } 1640 sc->ib.qp = sc->rdma.cm_id->qp; 1641 1642 memset(&conn_param, 0, sizeof(conn_param)); 1643 conn_param.initiator_depth = 0; 1644 1645 conn_param.responder_resources = 1646 min(sc->ib.dev->attrs.max_qp_rd_atom, 1647 SMBD_CM_RESPONDER_RESOURCES); 1648 info->responder_resources = conn_param.responder_resources; 1649 log_rdma_mr(INFO, "responder_resources=%d\n", 1650 info->responder_resources); 1651 1652 /* Need to send IRD/ORD in private data for iWARP */ 1653 sc->ib.dev->ops.get_port_immutable( 1654 sc->ib.dev, sc->rdma.cm_id->port_num, &port_immutable); 1655 if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) { 1656 ird_ord_hdr[0] = info->responder_resources; 1657 ird_ord_hdr[1] = 1; 1658 conn_param.private_data = ird_ord_hdr; 1659 conn_param.private_data_len = sizeof(ird_ord_hdr); 1660 } else { 1661 conn_param.private_data = NULL; 1662 conn_param.private_data_len = 0; 1663 } 1664 1665 conn_param.retry_count = SMBD_CM_RETRY; 1666 conn_param.rnr_retry_count = SMBD_CM_RNR_RETRY; 1667 conn_param.flow_control = 0; 1668 1669 log_rdma_event(INFO, "connecting to IP %pI4 port %d\n", 1670 &addr_in->sin_addr, port); 1671 1672 init_waitqueue_head(&info->status_wait); 1673 init_waitqueue_head(&sc->recv_io.reassembly.wait_queue); 1674 rc = rdma_connect(sc->rdma.cm_id, &conn_param); 1675 if (rc) { 1676 log_rdma_event(ERR, "rdma_connect() failed with %i\n", rc); 1677 goto rdma_connect_failed; 1678 } 1679 1680 wait_event_interruptible_timeout( 1681 info->status_wait, 1682 sc->status != SMBDIRECT_SOCKET_CONNECTING, 1683 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT)); 1684 1685 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { 1686 log_rdma_event(ERR, "rdma_connect failed port=%d\n", port); 1687 goto rdma_connect_failed; 1688 } 1689 1690 log_rdma_event(INFO, "rdma_connect connected\n"); 1691 1692 rc = allocate_caches_and_workqueue(info); 1693 if (rc) { 1694 log_rdma_event(ERR, "cache allocation failed\n"); 1695 goto allocate_cache_failed; 1696 } 1697 1698 init_waitqueue_head(&info->wait_send_queue); 1699 INIT_DELAYED_WORK(&info->idle_timer_work, idle_connection_timer); 1700 queue_delayed_work(info->workqueue, &info->idle_timer_work, 1701 msecs_to_jiffies(sp->keepalive_interval_msec)); 1702 1703 init_waitqueue_head(&info->wait_send_pending); 1704 atomic_set(&info->send_pending, 0); 1705 1706 init_waitqueue_head(&info->wait_post_send); 1707 1708 INIT_WORK(&info->disconnect_work, smbd_disconnect_rdma_work); 1709 INIT_WORK(&info->post_send_credits_work, smbd_post_send_credits); 1710 info->new_credits_offered = 0; 1711 spin_lock_init(&info->lock_new_credits_offered); 1712 1713 rc = smbd_negotiate(info); 1714 if (rc) { 1715 log_rdma_event(ERR, "smbd_negotiate rc=%d\n", rc); 1716 goto negotiation_failed; 1717 } 1718 1719 rc = allocate_mr_list(info); 1720 if (rc) { 1721 log_rdma_mr(ERR, "memory registration allocation failed\n"); 1722 goto allocate_mr_failed; 1723 } 1724 1725 return info; 1726 1727 allocate_mr_failed: 1728 /* At this point, need to a full transport shutdown */ 1729 server->smbd_conn = info; 1730 smbd_destroy(server); 1731 return NULL; 1732 1733 negotiation_failed: 1734 disable_delayed_work_sync(&info->idle_timer_work); 1735 destroy_caches_and_workqueue(info); 1736 sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; 1737 rdma_disconnect(sc->rdma.cm_id); 1738 wait_event(info->status_wait, 1739 sc->status == SMBDIRECT_SOCKET_DISCONNECTED); 1740 1741 allocate_cache_failed: 1742 rdma_connect_failed: 1743 rdma_destroy_qp(sc->rdma.cm_id); 1744 1745 create_qp_failed: 1746 alloc_cq_failed: 1747 if (sc->ib.send_cq) 1748 ib_free_cq(sc->ib.send_cq); 1749 if (sc->ib.recv_cq) 1750 ib_free_cq(sc->ib.recv_cq); 1751 1752 config_failed: 1753 ib_dealloc_pd(sc->ib.pd); 1754 rdma_destroy_id(sc->rdma.cm_id); 1755 1756 create_id_failed: 1757 kfree(info); 1758 return NULL; 1759 } 1760 1761 struct smbd_connection *smbd_get_connection( 1762 struct TCP_Server_Info *server, struct sockaddr *dstaddr) 1763 { 1764 struct smbd_connection *ret; 1765 int port = SMBD_PORT; 1766 1767 try_again: 1768 ret = _smbd_get_connection(server, dstaddr, port); 1769 1770 /* Try SMB_PORT if SMBD_PORT doesn't work */ 1771 if (!ret && port == SMBD_PORT) { 1772 port = SMB_PORT; 1773 goto try_again; 1774 } 1775 return ret; 1776 } 1777 1778 /* 1779 * Receive data from the transport's receive reassembly queue 1780 * All the incoming data packets are placed in reassembly queue 1781 * iter: the buffer to read data into 1782 * size: the length of data to read 1783 * return value: actual data read 1784 * 1785 * Note: this implementation copies the data from reassembly queue to receive 1786 * buffers used by upper layer. This is not the optimal code path. A better way 1787 * to do it is to not have upper layer allocate its receive buffers but rather 1788 * borrow the buffer from reassembly queue, and return it after data is 1789 * consumed. But this will require more changes to upper layer code, and also 1790 * need to consider packet boundaries while they still being reassembled. 1791 */ 1792 int smbd_recv(struct smbd_connection *info, struct msghdr *msg) 1793 { 1794 struct smbdirect_socket *sc = &info->socket; 1795 struct smbdirect_recv_io *response; 1796 struct smbdirect_data_transfer *data_transfer; 1797 size_t size = iov_iter_count(&msg->msg_iter); 1798 int to_copy, to_read, data_read, offset; 1799 u32 data_length, remaining_data_length, data_offset; 1800 int rc; 1801 1802 if (WARN_ON_ONCE(iov_iter_rw(&msg->msg_iter) == WRITE)) 1803 return -EINVAL; /* It's a bug in upper layer to get there */ 1804 1805 again: 1806 /* 1807 * No need to hold the reassembly queue lock all the time as we are 1808 * the only one reading from the front of the queue. The transport 1809 * may add more entries to the back of the queue at the same time 1810 */ 1811 log_read(INFO, "size=%zd sc->recv_io.reassembly.data_length=%d\n", size, 1812 sc->recv_io.reassembly.data_length); 1813 if (sc->recv_io.reassembly.data_length >= size) { 1814 int queue_length; 1815 int queue_removed = 0; 1816 1817 /* 1818 * Need to make sure reassembly_data_length is read before 1819 * reading reassembly_queue_length and calling 1820 * _get_first_reassembly. This call is lock free 1821 * as we never read at the end of the queue which are being 1822 * updated in SOFTIRQ as more data is received 1823 */ 1824 virt_rmb(); 1825 queue_length = sc->recv_io.reassembly.queue_length; 1826 data_read = 0; 1827 to_read = size; 1828 offset = sc->recv_io.reassembly.first_entry_offset; 1829 while (data_read < size) { 1830 response = _get_first_reassembly(info); 1831 data_transfer = smbdirect_recv_io_payload(response); 1832 data_length = le32_to_cpu(data_transfer->data_length); 1833 remaining_data_length = 1834 le32_to_cpu( 1835 data_transfer->remaining_data_length); 1836 data_offset = le32_to_cpu(data_transfer->data_offset); 1837 1838 /* 1839 * The upper layer expects RFC1002 length at the 1840 * beginning of the payload. Return it to indicate 1841 * the total length of the packet. This minimize the 1842 * change to upper layer packet processing logic. This 1843 * will be eventually remove when an intermediate 1844 * transport layer is added 1845 */ 1846 if (response->first_segment && size == 4) { 1847 unsigned int rfc1002_len = 1848 data_length + remaining_data_length; 1849 __be32 rfc1002_hdr = cpu_to_be32(rfc1002_len); 1850 if (copy_to_iter(&rfc1002_hdr, sizeof(rfc1002_hdr), 1851 &msg->msg_iter) != sizeof(rfc1002_hdr)) 1852 return -EFAULT; 1853 data_read = 4; 1854 response->first_segment = false; 1855 log_read(INFO, "returning rfc1002 length %d\n", 1856 rfc1002_len); 1857 goto read_rfc1002_done; 1858 } 1859 1860 to_copy = min_t(int, data_length - offset, to_read); 1861 if (copy_to_iter((char *)data_transfer + data_offset + offset, 1862 to_copy, &msg->msg_iter) != to_copy) 1863 return -EFAULT; 1864 1865 /* move on to the next buffer? */ 1866 if (to_copy == data_length - offset) { 1867 queue_length--; 1868 /* 1869 * No need to lock if we are not at the 1870 * end of the queue 1871 */ 1872 if (queue_length) 1873 list_del(&response->list); 1874 else { 1875 spin_lock_irq( 1876 &sc->recv_io.reassembly.lock); 1877 list_del(&response->list); 1878 spin_unlock_irq( 1879 &sc->recv_io.reassembly.lock); 1880 } 1881 queue_removed++; 1882 info->count_reassembly_queue--; 1883 info->count_dequeue_reassembly_queue++; 1884 put_receive_buffer(info, response); 1885 offset = 0; 1886 log_read(INFO, "put_receive_buffer offset=0\n"); 1887 } else 1888 offset += to_copy; 1889 1890 to_read -= to_copy; 1891 data_read += to_copy; 1892 1893 log_read(INFO, "_get_first_reassembly memcpy %d bytes data_transfer_length-offset=%d after that to_read=%d data_read=%d offset=%d\n", 1894 to_copy, data_length - offset, 1895 to_read, data_read, offset); 1896 } 1897 1898 spin_lock_irq(&sc->recv_io.reassembly.lock); 1899 sc->recv_io.reassembly.data_length -= data_read; 1900 sc->recv_io.reassembly.queue_length -= queue_removed; 1901 spin_unlock_irq(&sc->recv_io.reassembly.lock); 1902 1903 sc->recv_io.reassembly.first_entry_offset = offset; 1904 log_read(INFO, "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n", 1905 data_read, sc->recv_io.reassembly.data_length, 1906 sc->recv_io.reassembly.first_entry_offset); 1907 read_rfc1002_done: 1908 return data_read; 1909 } 1910 1911 log_read(INFO, "wait_event on more data\n"); 1912 rc = wait_event_interruptible( 1913 sc->recv_io.reassembly.wait_queue, 1914 sc->recv_io.reassembly.data_length >= size || 1915 sc->status != SMBDIRECT_SOCKET_CONNECTED); 1916 /* Don't return any data if interrupted */ 1917 if (rc) 1918 return rc; 1919 1920 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { 1921 log_read(ERR, "disconnected\n"); 1922 return -ECONNABORTED; 1923 } 1924 1925 goto again; 1926 } 1927 1928 /* 1929 * Send data to transport 1930 * Each rqst is transported as a SMBDirect payload 1931 * rqst: the data to write 1932 * return value: 0 if successfully write, otherwise error code 1933 */ 1934 int smbd_send(struct TCP_Server_Info *server, 1935 int num_rqst, struct smb_rqst *rqst_array) 1936 { 1937 struct smbd_connection *info = server->smbd_conn; 1938 struct smbdirect_socket *sc = &info->socket; 1939 struct smbdirect_socket_parameters *sp = &sc->parameters; 1940 struct smb_rqst *rqst; 1941 struct iov_iter iter; 1942 unsigned int remaining_data_length, klen; 1943 int rc, i, rqst_idx; 1944 1945 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 1946 return -EAGAIN; 1947 1948 /* 1949 * Add in the page array if there is one. The caller needs to set 1950 * rq_tailsz to PAGE_SIZE when the buffer has multiple pages and 1951 * ends at page boundary 1952 */ 1953 remaining_data_length = 0; 1954 for (i = 0; i < num_rqst; i++) 1955 remaining_data_length += smb_rqst_len(server, &rqst_array[i]); 1956 1957 if (unlikely(remaining_data_length > sp->max_fragmented_send_size)) { 1958 /* assertion: payload never exceeds negotiated maximum */ 1959 log_write(ERR, "payload size %d > max size %d\n", 1960 remaining_data_length, sp->max_fragmented_send_size); 1961 return -EINVAL; 1962 } 1963 1964 log_write(INFO, "num_rqst=%d total length=%u\n", 1965 num_rqst, remaining_data_length); 1966 1967 rqst_idx = 0; 1968 do { 1969 rqst = &rqst_array[rqst_idx]; 1970 1971 cifs_dbg(FYI, "Sending smb (RDMA): idx=%d smb_len=%lu\n", 1972 rqst_idx, smb_rqst_len(server, rqst)); 1973 for (i = 0; i < rqst->rq_nvec; i++) 1974 dump_smb(rqst->rq_iov[i].iov_base, rqst->rq_iov[i].iov_len); 1975 1976 log_write(INFO, "RDMA-WR[%u] nvec=%d len=%u iter=%zu rqlen=%lu\n", 1977 rqst_idx, rqst->rq_nvec, remaining_data_length, 1978 iov_iter_count(&rqst->rq_iter), smb_rqst_len(server, rqst)); 1979 1980 /* Send the metadata pages. */ 1981 klen = 0; 1982 for (i = 0; i < rqst->rq_nvec; i++) 1983 klen += rqst->rq_iov[i].iov_len; 1984 iov_iter_kvec(&iter, ITER_SOURCE, rqst->rq_iov, rqst->rq_nvec, klen); 1985 1986 rc = smbd_post_send_full_iter(info, &iter, &remaining_data_length); 1987 if (rc < 0) 1988 break; 1989 1990 if (iov_iter_count(&rqst->rq_iter) > 0) { 1991 /* And then the data pages if there are any */ 1992 rc = smbd_post_send_full_iter(info, &rqst->rq_iter, 1993 &remaining_data_length); 1994 if (rc < 0) 1995 break; 1996 } 1997 1998 } while (++rqst_idx < num_rqst); 1999 2000 /* 2001 * As an optimization, we don't wait for individual I/O to finish 2002 * before sending the next one. 2003 * Send them all and wait for pending send count to get to 0 2004 * that means all the I/Os have been out and we are good to return 2005 */ 2006 2007 wait_event(info->wait_send_pending, 2008 atomic_read(&info->send_pending) == 0 || 2009 sc->status != SMBDIRECT_SOCKET_CONNECTED); 2010 2011 if (sc->status != SMBDIRECT_SOCKET_CONNECTED && rc == 0) 2012 rc = -EAGAIN; 2013 2014 return rc; 2015 } 2016 2017 static void register_mr_done(struct ib_cq *cq, struct ib_wc *wc) 2018 { 2019 struct smbd_mr *mr; 2020 struct ib_cqe *cqe; 2021 2022 if (wc->status) { 2023 log_rdma_mr(ERR, "status=%d\n", wc->status); 2024 cqe = wc->wr_cqe; 2025 mr = container_of(cqe, struct smbd_mr, cqe); 2026 smbd_disconnect_rdma_connection(mr->conn); 2027 } 2028 } 2029 2030 /* 2031 * The work queue function that recovers MRs 2032 * We need to call ib_dereg_mr() and ib_alloc_mr() before this MR can be used 2033 * again. Both calls are slow, so finish them in a workqueue. This will not 2034 * block I/O path. 2035 * There is one workqueue that recovers MRs, there is no need to lock as the 2036 * I/O requests calling smbd_register_mr will never update the links in the 2037 * mr_list. 2038 */ 2039 static void smbd_mr_recovery_work(struct work_struct *work) 2040 { 2041 struct smbd_connection *info = 2042 container_of(work, struct smbd_connection, mr_recovery_work); 2043 struct smbdirect_socket *sc = &info->socket; 2044 struct smbd_mr *smbdirect_mr; 2045 int rc; 2046 2047 list_for_each_entry(smbdirect_mr, &info->mr_list, list) { 2048 if (smbdirect_mr->state == MR_ERROR) { 2049 2050 /* recover this MR entry */ 2051 rc = ib_dereg_mr(smbdirect_mr->mr); 2052 if (rc) { 2053 log_rdma_mr(ERR, 2054 "ib_dereg_mr failed rc=%x\n", 2055 rc); 2056 smbd_disconnect_rdma_connection(info); 2057 continue; 2058 } 2059 2060 smbdirect_mr->mr = ib_alloc_mr( 2061 sc->ib.pd, info->mr_type, 2062 info->max_frmr_depth); 2063 if (IS_ERR(smbdirect_mr->mr)) { 2064 log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n", 2065 info->mr_type, 2066 info->max_frmr_depth); 2067 smbd_disconnect_rdma_connection(info); 2068 continue; 2069 } 2070 } else 2071 /* This MR is being used, don't recover it */ 2072 continue; 2073 2074 smbdirect_mr->state = MR_READY; 2075 2076 /* smbdirect_mr->state is updated by this function 2077 * and is read and updated by I/O issuing CPUs trying 2078 * to get a MR, the call to atomic_inc_return 2079 * implicates a memory barrier and guarantees this 2080 * value is updated before waking up any calls to 2081 * get_mr() from the I/O issuing CPUs 2082 */ 2083 if (atomic_inc_return(&info->mr_ready_count) == 1) 2084 wake_up_interruptible(&info->wait_mr); 2085 } 2086 } 2087 2088 static void destroy_mr_list(struct smbd_connection *info) 2089 { 2090 struct smbdirect_socket *sc = &info->socket; 2091 struct smbd_mr *mr, *tmp; 2092 2093 disable_work_sync(&info->mr_recovery_work); 2094 list_for_each_entry_safe(mr, tmp, &info->mr_list, list) { 2095 if (mr->state == MR_INVALIDATED) 2096 ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, 2097 mr->sgt.nents, mr->dir); 2098 ib_dereg_mr(mr->mr); 2099 kfree(mr->sgt.sgl); 2100 kfree(mr); 2101 } 2102 } 2103 2104 /* 2105 * Allocate MRs used for RDMA read/write 2106 * The number of MRs will not exceed hardware capability in responder_resources 2107 * All MRs are kept in mr_list. The MR can be recovered after it's used 2108 * Recovery is done in smbd_mr_recovery_work. The content of list entry changes 2109 * as MRs are used and recovered for I/O, but the list links will not change 2110 */ 2111 static int allocate_mr_list(struct smbd_connection *info) 2112 { 2113 struct smbdirect_socket *sc = &info->socket; 2114 int i; 2115 struct smbd_mr *smbdirect_mr, *tmp; 2116 2117 INIT_LIST_HEAD(&info->mr_list); 2118 init_waitqueue_head(&info->wait_mr); 2119 spin_lock_init(&info->mr_list_lock); 2120 atomic_set(&info->mr_ready_count, 0); 2121 atomic_set(&info->mr_used_count, 0); 2122 init_waitqueue_head(&info->wait_for_mr_cleanup); 2123 INIT_WORK(&info->mr_recovery_work, smbd_mr_recovery_work); 2124 /* Allocate more MRs (2x) than hardware responder_resources */ 2125 for (i = 0; i < info->responder_resources * 2; i++) { 2126 smbdirect_mr = kzalloc(sizeof(*smbdirect_mr), GFP_KERNEL); 2127 if (!smbdirect_mr) 2128 goto cleanup_entries; 2129 smbdirect_mr->mr = ib_alloc_mr(sc->ib.pd, info->mr_type, 2130 info->max_frmr_depth); 2131 if (IS_ERR(smbdirect_mr->mr)) { 2132 log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n", 2133 info->mr_type, info->max_frmr_depth); 2134 goto out; 2135 } 2136 smbdirect_mr->sgt.sgl = kcalloc(info->max_frmr_depth, 2137 sizeof(struct scatterlist), 2138 GFP_KERNEL); 2139 if (!smbdirect_mr->sgt.sgl) { 2140 log_rdma_mr(ERR, "failed to allocate sgl\n"); 2141 ib_dereg_mr(smbdirect_mr->mr); 2142 goto out; 2143 } 2144 smbdirect_mr->state = MR_READY; 2145 smbdirect_mr->conn = info; 2146 2147 list_add_tail(&smbdirect_mr->list, &info->mr_list); 2148 atomic_inc(&info->mr_ready_count); 2149 } 2150 return 0; 2151 2152 out: 2153 kfree(smbdirect_mr); 2154 cleanup_entries: 2155 list_for_each_entry_safe(smbdirect_mr, tmp, &info->mr_list, list) { 2156 list_del(&smbdirect_mr->list); 2157 ib_dereg_mr(smbdirect_mr->mr); 2158 kfree(smbdirect_mr->sgt.sgl); 2159 kfree(smbdirect_mr); 2160 } 2161 return -ENOMEM; 2162 } 2163 2164 /* 2165 * Get a MR from mr_list. This function waits until there is at least one 2166 * MR available in the list. It may access the list while the 2167 * smbd_mr_recovery_work is recovering the MR list. This doesn't need a lock 2168 * as they never modify the same places. However, there may be several CPUs 2169 * issuing I/O trying to get MR at the same time, mr_list_lock is used to 2170 * protect this situation. 2171 */ 2172 static struct smbd_mr *get_mr(struct smbd_connection *info) 2173 { 2174 struct smbdirect_socket *sc = &info->socket; 2175 struct smbd_mr *ret; 2176 int rc; 2177 again: 2178 rc = wait_event_interruptible(info->wait_mr, 2179 atomic_read(&info->mr_ready_count) || 2180 sc->status != SMBDIRECT_SOCKET_CONNECTED); 2181 if (rc) { 2182 log_rdma_mr(ERR, "wait_event_interruptible rc=%x\n", rc); 2183 return NULL; 2184 } 2185 2186 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { 2187 log_rdma_mr(ERR, "sc->status=%x\n", sc->status); 2188 return NULL; 2189 } 2190 2191 spin_lock(&info->mr_list_lock); 2192 list_for_each_entry(ret, &info->mr_list, list) { 2193 if (ret->state == MR_READY) { 2194 ret->state = MR_REGISTERED; 2195 spin_unlock(&info->mr_list_lock); 2196 atomic_dec(&info->mr_ready_count); 2197 atomic_inc(&info->mr_used_count); 2198 return ret; 2199 } 2200 } 2201 2202 spin_unlock(&info->mr_list_lock); 2203 /* 2204 * It is possible that we could fail to get MR because other processes may 2205 * try to acquire a MR at the same time. If this is the case, retry it. 2206 */ 2207 goto again; 2208 } 2209 2210 /* 2211 * Transcribe the pages from an iterator into an MR scatterlist. 2212 */ 2213 static int smbd_iter_to_mr(struct smbd_connection *info, 2214 struct iov_iter *iter, 2215 struct sg_table *sgt, 2216 unsigned int max_sg) 2217 { 2218 int ret; 2219 2220 memset(sgt->sgl, 0, max_sg * sizeof(struct scatterlist)); 2221 2222 ret = extract_iter_to_sg(iter, iov_iter_count(iter), sgt, max_sg, 0); 2223 WARN_ON(ret < 0); 2224 if (sgt->nents > 0) 2225 sg_mark_end(&sgt->sgl[sgt->nents - 1]); 2226 return ret; 2227 } 2228 2229 /* 2230 * Register memory for RDMA read/write 2231 * iter: the buffer to register memory with 2232 * writing: true if this is a RDMA write (SMB read), false for RDMA read 2233 * need_invalidate: true if this MR needs to be locally invalidated after I/O 2234 * return value: the MR registered, NULL if failed. 2235 */ 2236 struct smbd_mr *smbd_register_mr(struct smbd_connection *info, 2237 struct iov_iter *iter, 2238 bool writing, bool need_invalidate) 2239 { 2240 struct smbdirect_socket *sc = &info->socket; 2241 struct smbd_mr *smbdirect_mr; 2242 int rc, num_pages; 2243 enum dma_data_direction dir; 2244 struct ib_reg_wr *reg_wr; 2245 2246 num_pages = iov_iter_npages(iter, info->max_frmr_depth + 1); 2247 if (num_pages > info->max_frmr_depth) { 2248 log_rdma_mr(ERR, "num_pages=%d max_frmr_depth=%d\n", 2249 num_pages, info->max_frmr_depth); 2250 WARN_ON_ONCE(1); 2251 return NULL; 2252 } 2253 2254 smbdirect_mr = get_mr(info); 2255 if (!smbdirect_mr) { 2256 log_rdma_mr(ERR, "get_mr returning NULL\n"); 2257 return NULL; 2258 } 2259 2260 dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; 2261 smbdirect_mr->dir = dir; 2262 smbdirect_mr->need_invalidate = need_invalidate; 2263 smbdirect_mr->sgt.nents = 0; 2264 smbdirect_mr->sgt.orig_nents = 0; 2265 2266 log_rdma_mr(INFO, "num_pages=0x%x count=0x%zx depth=%u\n", 2267 num_pages, iov_iter_count(iter), info->max_frmr_depth); 2268 smbd_iter_to_mr(info, iter, &smbdirect_mr->sgt, info->max_frmr_depth); 2269 2270 rc = ib_dma_map_sg(sc->ib.dev, smbdirect_mr->sgt.sgl, 2271 smbdirect_mr->sgt.nents, dir); 2272 if (!rc) { 2273 log_rdma_mr(ERR, "ib_dma_map_sg num_pages=%x dir=%x rc=%x\n", 2274 num_pages, dir, rc); 2275 goto dma_map_error; 2276 } 2277 2278 rc = ib_map_mr_sg(smbdirect_mr->mr, smbdirect_mr->sgt.sgl, 2279 smbdirect_mr->sgt.nents, NULL, PAGE_SIZE); 2280 if (rc != smbdirect_mr->sgt.nents) { 2281 log_rdma_mr(ERR, 2282 "ib_map_mr_sg failed rc = %d nents = %x\n", 2283 rc, smbdirect_mr->sgt.nents); 2284 goto map_mr_error; 2285 } 2286 2287 ib_update_fast_reg_key(smbdirect_mr->mr, 2288 ib_inc_rkey(smbdirect_mr->mr->rkey)); 2289 reg_wr = &smbdirect_mr->wr; 2290 reg_wr->wr.opcode = IB_WR_REG_MR; 2291 smbdirect_mr->cqe.done = register_mr_done; 2292 reg_wr->wr.wr_cqe = &smbdirect_mr->cqe; 2293 reg_wr->wr.num_sge = 0; 2294 reg_wr->wr.send_flags = IB_SEND_SIGNALED; 2295 reg_wr->mr = smbdirect_mr->mr; 2296 reg_wr->key = smbdirect_mr->mr->rkey; 2297 reg_wr->access = writing ? 2298 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : 2299 IB_ACCESS_REMOTE_READ; 2300 2301 /* 2302 * There is no need for waiting for complemtion on ib_post_send 2303 * on IB_WR_REG_MR. Hardware enforces a barrier and order of execution 2304 * on the next ib_post_send when we actually send I/O to remote peer 2305 */ 2306 rc = ib_post_send(sc->ib.qp, ®_wr->wr, NULL); 2307 if (!rc) 2308 return smbdirect_mr; 2309 2310 log_rdma_mr(ERR, "ib_post_send failed rc=%x reg_wr->key=%x\n", 2311 rc, reg_wr->key); 2312 2313 /* If all failed, attempt to recover this MR by setting it MR_ERROR*/ 2314 map_mr_error: 2315 ib_dma_unmap_sg(sc->ib.dev, smbdirect_mr->sgt.sgl, 2316 smbdirect_mr->sgt.nents, smbdirect_mr->dir); 2317 2318 dma_map_error: 2319 smbdirect_mr->state = MR_ERROR; 2320 if (atomic_dec_and_test(&info->mr_used_count)) 2321 wake_up(&info->wait_for_mr_cleanup); 2322 2323 smbd_disconnect_rdma_connection(info); 2324 2325 return NULL; 2326 } 2327 2328 static void local_inv_done(struct ib_cq *cq, struct ib_wc *wc) 2329 { 2330 struct smbd_mr *smbdirect_mr; 2331 struct ib_cqe *cqe; 2332 2333 cqe = wc->wr_cqe; 2334 smbdirect_mr = container_of(cqe, struct smbd_mr, cqe); 2335 smbdirect_mr->state = MR_INVALIDATED; 2336 if (wc->status != IB_WC_SUCCESS) { 2337 log_rdma_mr(ERR, "invalidate failed status=%x\n", wc->status); 2338 smbdirect_mr->state = MR_ERROR; 2339 } 2340 complete(&smbdirect_mr->invalidate_done); 2341 } 2342 2343 /* 2344 * Deregister a MR after I/O is done 2345 * This function may wait if remote invalidation is not used 2346 * and we have to locally invalidate the buffer to prevent data is being 2347 * modified by remote peer after upper layer consumes it 2348 */ 2349 int smbd_deregister_mr(struct smbd_mr *smbdirect_mr) 2350 { 2351 struct ib_send_wr *wr; 2352 struct smbd_connection *info = smbdirect_mr->conn; 2353 struct smbdirect_socket *sc = &info->socket; 2354 int rc = 0; 2355 2356 if (smbdirect_mr->need_invalidate) { 2357 /* Need to finish local invalidation before returning */ 2358 wr = &smbdirect_mr->inv_wr; 2359 wr->opcode = IB_WR_LOCAL_INV; 2360 smbdirect_mr->cqe.done = local_inv_done; 2361 wr->wr_cqe = &smbdirect_mr->cqe; 2362 wr->num_sge = 0; 2363 wr->ex.invalidate_rkey = smbdirect_mr->mr->rkey; 2364 wr->send_flags = IB_SEND_SIGNALED; 2365 2366 init_completion(&smbdirect_mr->invalidate_done); 2367 rc = ib_post_send(sc->ib.qp, wr, NULL); 2368 if (rc) { 2369 log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc); 2370 smbd_disconnect_rdma_connection(info); 2371 goto done; 2372 } 2373 wait_for_completion(&smbdirect_mr->invalidate_done); 2374 smbdirect_mr->need_invalidate = false; 2375 } else 2376 /* 2377 * For remote invalidation, just set it to MR_INVALIDATED 2378 * and defer to mr_recovery_work to recover the MR for next use 2379 */ 2380 smbdirect_mr->state = MR_INVALIDATED; 2381 2382 if (smbdirect_mr->state == MR_INVALIDATED) { 2383 ib_dma_unmap_sg( 2384 sc->ib.dev, smbdirect_mr->sgt.sgl, 2385 smbdirect_mr->sgt.nents, 2386 smbdirect_mr->dir); 2387 smbdirect_mr->state = MR_READY; 2388 if (atomic_inc_return(&info->mr_ready_count) == 1) 2389 wake_up_interruptible(&info->wait_mr); 2390 } else 2391 /* 2392 * Schedule the work to do MR recovery for future I/Os MR 2393 * recovery is slow and don't want it to block current I/O 2394 */ 2395 queue_work(info->workqueue, &info->mr_recovery_work); 2396 2397 done: 2398 if (atomic_dec_and_test(&info->mr_used_count)) 2399 wake_up(&info->wait_for_mr_cleanup); 2400 2401 return rc; 2402 } 2403 2404 static bool smb_set_sge(struct smb_extract_to_rdma *rdma, 2405 struct page *lowest_page, size_t off, size_t len) 2406 { 2407 struct ib_sge *sge = &rdma->sge[rdma->nr_sge]; 2408 u64 addr; 2409 2410 addr = ib_dma_map_page(rdma->device, lowest_page, 2411 off, len, rdma->direction); 2412 if (ib_dma_mapping_error(rdma->device, addr)) 2413 return false; 2414 2415 sge->addr = addr; 2416 sge->length = len; 2417 sge->lkey = rdma->local_dma_lkey; 2418 rdma->nr_sge++; 2419 return true; 2420 } 2421 2422 /* 2423 * Extract page fragments from a BVEC-class iterator and add them to an RDMA 2424 * element list. The pages are not pinned. 2425 */ 2426 static ssize_t smb_extract_bvec_to_rdma(struct iov_iter *iter, 2427 struct smb_extract_to_rdma *rdma, 2428 ssize_t maxsize) 2429 { 2430 const struct bio_vec *bv = iter->bvec; 2431 unsigned long start = iter->iov_offset; 2432 unsigned int i; 2433 ssize_t ret = 0; 2434 2435 for (i = 0; i < iter->nr_segs; i++) { 2436 size_t off, len; 2437 2438 len = bv[i].bv_len; 2439 if (start >= len) { 2440 start -= len; 2441 continue; 2442 } 2443 2444 len = min_t(size_t, maxsize, len - start); 2445 off = bv[i].bv_offset + start; 2446 2447 if (!smb_set_sge(rdma, bv[i].bv_page, off, len)) 2448 return -EIO; 2449 2450 ret += len; 2451 maxsize -= len; 2452 if (rdma->nr_sge >= rdma->max_sge || maxsize <= 0) 2453 break; 2454 start = 0; 2455 } 2456 2457 if (ret > 0) 2458 iov_iter_advance(iter, ret); 2459 return ret; 2460 } 2461 2462 /* 2463 * Extract fragments from a KVEC-class iterator and add them to an RDMA list. 2464 * This can deal with vmalloc'd buffers as well as kmalloc'd or static buffers. 2465 * The pages are not pinned. 2466 */ 2467 static ssize_t smb_extract_kvec_to_rdma(struct iov_iter *iter, 2468 struct smb_extract_to_rdma *rdma, 2469 ssize_t maxsize) 2470 { 2471 const struct kvec *kv = iter->kvec; 2472 unsigned long start = iter->iov_offset; 2473 unsigned int i; 2474 ssize_t ret = 0; 2475 2476 for (i = 0; i < iter->nr_segs; i++) { 2477 struct page *page; 2478 unsigned long kaddr; 2479 size_t off, len, seg; 2480 2481 len = kv[i].iov_len; 2482 if (start >= len) { 2483 start -= len; 2484 continue; 2485 } 2486 2487 kaddr = (unsigned long)kv[i].iov_base + start; 2488 off = kaddr & ~PAGE_MASK; 2489 len = min_t(size_t, maxsize, len - start); 2490 kaddr &= PAGE_MASK; 2491 2492 maxsize -= len; 2493 do { 2494 seg = min_t(size_t, len, PAGE_SIZE - off); 2495 2496 if (is_vmalloc_or_module_addr((void *)kaddr)) 2497 page = vmalloc_to_page((void *)kaddr); 2498 else 2499 page = virt_to_page((void *)kaddr); 2500 2501 if (!smb_set_sge(rdma, page, off, seg)) 2502 return -EIO; 2503 2504 ret += seg; 2505 len -= seg; 2506 kaddr += PAGE_SIZE; 2507 off = 0; 2508 } while (len > 0 && rdma->nr_sge < rdma->max_sge); 2509 2510 if (rdma->nr_sge >= rdma->max_sge || maxsize <= 0) 2511 break; 2512 start = 0; 2513 } 2514 2515 if (ret > 0) 2516 iov_iter_advance(iter, ret); 2517 return ret; 2518 } 2519 2520 /* 2521 * Extract folio fragments from a FOLIOQ-class iterator and add them to an RDMA 2522 * list. The folios are not pinned. 2523 */ 2524 static ssize_t smb_extract_folioq_to_rdma(struct iov_iter *iter, 2525 struct smb_extract_to_rdma *rdma, 2526 ssize_t maxsize) 2527 { 2528 const struct folio_queue *folioq = iter->folioq; 2529 unsigned int slot = iter->folioq_slot; 2530 ssize_t ret = 0; 2531 size_t offset = iter->iov_offset; 2532 2533 BUG_ON(!folioq); 2534 2535 if (slot >= folioq_nr_slots(folioq)) { 2536 folioq = folioq->next; 2537 if (WARN_ON_ONCE(!folioq)) 2538 return -EIO; 2539 slot = 0; 2540 } 2541 2542 do { 2543 struct folio *folio = folioq_folio(folioq, slot); 2544 size_t fsize = folioq_folio_size(folioq, slot); 2545 2546 if (offset < fsize) { 2547 size_t part = umin(maxsize, fsize - offset); 2548 2549 if (!smb_set_sge(rdma, folio_page(folio, 0), offset, part)) 2550 return -EIO; 2551 2552 offset += part; 2553 ret += part; 2554 maxsize -= part; 2555 } 2556 2557 if (offset >= fsize) { 2558 offset = 0; 2559 slot++; 2560 if (slot >= folioq_nr_slots(folioq)) { 2561 if (!folioq->next) { 2562 WARN_ON_ONCE(ret < iter->count); 2563 break; 2564 } 2565 folioq = folioq->next; 2566 slot = 0; 2567 } 2568 } 2569 } while (rdma->nr_sge < rdma->max_sge && maxsize > 0); 2570 2571 iter->folioq = folioq; 2572 iter->folioq_slot = slot; 2573 iter->iov_offset = offset; 2574 iter->count -= ret; 2575 return ret; 2576 } 2577 2578 /* 2579 * Extract page fragments from up to the given amount of the source iterator 2580 * and build up an RDMA list that refers to all of those bits. The RDMA list 2581 * is appended to, up to the maximum number of elements set in the parameter 2582 * block. 2583 * 2584 * The extracted page fragments are not pinned or ref'd in any way; if an 2585 * IOVEC/UBUF-type iterator is to be used, it should be converted to a 2586 * BVEC-type iterator and the pages pinned, ref'd or otherwise held in some 2587 * way. 2588 */ 2589 static ssize_t smb_extract_iter_to_rdma(struct iov_iter *iter, size_t len, 2590 struct smb_extract_to_rdma *rdma) 2591 { 2592 ssize_t ret; 2593 int before = rdma->nr_sge; 2594 2595 switch (iov_iter_type(iter)) { 2596 case ITER_BVEC: 2597 ret = smb_extract_bvec_to_rdma(iter, rdma, len); 2598 break; 2599 case ITER_KVEC: 2600 ret = smb_extract_kvec_to_rdma(iter, rdma, len); 2601 break; 2602 case ITER_FOLIOQ: 2603 ret = smb_extract_folioq_to_rdma(iter, rdma, len); 2604 break; 2605 default: 2606 WARN_ON_ONCE(1); 2607 return -EIO; 2608 } 2609 2610 if (ret < 0) { 2611 while (rdma->nr_sge > before) { 2612 struct ib_sge *sge = &rdma->sge[rdma->nr_sge--]; 2613 2614 ib_dma_unmap_single(rdma->device, sge->addr, sge->length, 2615 rdma->direction); 2616 sge->addr = 0; 2617 } 2618 } 2619 2620 return ret; 2621 } 2622