1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2017, Microsoft Corporation. 4 * 5 * Author(s): Long Li <longli@microsoft.com> 6 */ 7 #include <linux/module.h> 8 #include <linux/highmem.h> 9 #include <linux/folio_queue.h> 10 #define __SMBDIRECT_SOCKET_DISCONNECT(__sc) smbd_disconnect_rdma_connection(__sc) 11 #include "../common/smbdirect/smbdirect_pdu.h" 12 #include "smbdirect.h" 13 #include "cifs_debug.h" 14 #include "cifsproto.h" 15 #include "smb2proto.h" 16 17 const struct smbdirect_socket_parameters *smbd_get_parameters(struct smbd_connection *conn) 18 { 19 struct smbdirect_socket *sc = &conn->socket; 20 21 return &sc->parameters; 22 } 23 24 static struct smbdirect_recv_io *get_receive_buffer( 25 struct smbdirect_socket *sc); 26 static void put_receive_buffer( 27 struct smbdirect_socket *sc, 28 struct smbdirect_recv_io *response); 29 static int allocate_receive_buffers(struct smbdirect_socket *sc, int num_buf); 30 static void destroy_receive_buffers(struct smbdirect_socket *sc); 31 32 static void enqueue_reassembly( 33 struct smbdirect_socket *sc, 34 struct smbdirect_recv_io *response, int data_length); 35 static struct smbdirect_recv_io *_get_first_reassembly( 36 struct smbdirect_socket *sc); 37 38 static int smbd_post_recv( 39 struct smbdirect_socket *sc, 40 struct smbdirect_recv_io *response); 41 42 static int smbd_post_send_empty(struct smbdirect_socket *sc); 43 44 static void destroy_mr_list(struct smbdirect_socket *sc); 45 static int allocate_mr_list(struct smbdirect_socket *sc); 46 47 struct smb_extract_to_rdma { 48 struct ib_sge *sge; 49 unsigned int nr_sge; 50 unsigned int max_sge; 51 struct ib_device *device; 52 u32 local_dma_lkey; 53 enum dma_data_direction direction; 54 }; 55 static ssize_t smb_extract_iter_to_rdma(struct iov_iter *iter, size_t len, 56 struct smb_extract_to_rdma *rdma); 57 58 /* Port numbers for SMBD transport */ 59 #define SMB_PORT 445 60 #define SMBD_PORT 5445 61 62 /* Address lookup and resolve timeout in ms */ 63 #define RDMA_RESOLVE_TIMEOUT 5000 64 65 /* SMBD negotiation timeout in seconds */ 66 #define SMBD_NEGOTIATE_TIMEOUT 120 67 68 /* The timeout to wait for a keepalive message from peer in seconds */ 69 #define KEEPALIVE_RECV_TIMEOUT 5 70 71 /* SMBD minimum receive size and fragmented sized defined in [MS-SMBD] */ 72 #define SMBD_MIN_RECEIVE_SIZE 128 73 #define SMBD_MIN_FRAGMENTED_SIZE 131072 74 75 /* 76 * Default maximum number of RDMA read/write outstanding on this connection 77 * This value is possibly decreased during QP creation on hardware limit 78 */ 79 #define SMBD_CM_RESPONDER_RESOURCES 32 80 81 /* Maximum number of retries on data transfer operations */ 82 #define SMBD_CM_RETRY 6 83 /* No need to retry on Receiver Not Ready since SMBD manages credits */ 84 #define SMBD_CM_RNR_RETRY 0 85 86 /* 87 * User configurable initial values per SMBD transport connection 88 * as defined in [MS-SMBD] 3.1.1.1 89 * Those may change after a SMBD negotiation 90 */ 91 /* The local peer's maximum number of credits to grant to the peer */ 92 int smbd_receive_credit_max = 255; 93 94 /* The remote peer's credit request of local peer */ 95 int smbd_send_credit_target = 255; 96 97 /* The maximum single message size can be sent to remote peer */ 98 int smbd_max_send_size = 1364; 99 100 /* The maximum fragmented upper-layer payload receive size supported */ 101 int smbd_max_fragmented_recv_size = 1024 * 1024; 102 103 /* The maximum single-message size which can be received */ 104 int smbd_max_receive_size = 1364; 105 106 /* The timeout to initiate send of a keepalive message on idle */ 107 int smbd_keep_alive_interval = 120; 108 109 /* 110 * User configurable initial values for RDMA transport 111 * The actual values used may be lower and are limited to hardware capabilities 112 */ 113 /* Default maximum number of pages in a single RDMA write/read */ 114 int smbd_max_frmr_depth = 2048; 115 116 /* If payload is less than this byte, use RDMA send/recv not read/write */ 117 int rdma_readwrite_threshold = 4096; 118 119 /* Transport logging functions 120 * Logging are defined as classes. They can be OR'ed to define the actual 121 * logging level via module parameter smbd_logging_class 122 * e.g. cifs.smbd_logging_class=0xa0 will log all log_rdma_recv() and 123 * log_rdma_event() 124 */ 125 #define LOG_OUTGOING 0x1 126 #define LOG_INCOMING 0x2 127 #define LOG_READ 0x4 128 #define LOG_WRITE 0x8 129 #define LOG_RDMA_SEND 0x10 130 #define LOG_RDMA_RECV 0x20 131 #define LOG_KEEP_ALIVE 0x40 132 #define LOG_RDMA_EVENT 0x80 133 #define LOG_RDMA_MR 0x100 134 static unsigned int smbd_logging_class; 135 module_param(smbd_logging_class, uint, 0644); 136 MODULE_PARM_DESC(smbd_logging_class, 137 "Logging class for SMBD transport 0x0 to 0x100"); 138 139 #define ERR 0x0 140 #define INFO 0x1 141 static unsigned int smbd_logging_level = ERR; 142 module_param(smbd_logging_level, uint, 0644); 143 MODULE_PARM_DESC(smbd_logging_level, 144 "Logging level for SMBD transport, 0 (default): error, 1: info"); 145 146 #define log_rdma(level, class, fmt, args...) \ 147 do { \ 148 if (level <= smbd_logging_level || class & smbd_logging_class) \ 149 cifs_dbg(VFS, "%s:%d " fmt, __func__, __LINE__, ##args);\ 150 } while (0) 151 152 #define log_outgoing(level, fmt, args...) \ 153 log_rdma(level, LOG_OUTGOING, fmt, ##args) 154 #define log_incoming(level, fmt, args...) \ 155 log_rdma(level, LOG_INCOMING, fmt, ##args) 156 #define log_read(level, fmt, args...) log_rdma(level, LOG_READ, fmt, ##args) 157 #define log_write(level, fmt, args...) log_rdma(level, LOG_WRITE, fmt, ##args) 158 #define log_rdma_send(level, fmt, args...) \ 159 log_rdma(level, LOG_RDMA_SEND, fmt, ##args) 160 #define log_rdma_recv(level, fmt, args...) \ 161 log_rdma(level, LOG_RDMA_RECV, fmt, ##args) 162 #define log_keep_alive(level, fmt, args...) \ 163 log_rdma(level, LOG_KEEP_ALIVE, fmt, ##args) 164 #define log_rdma_event(level, fmt, args...) \ 165 log_rdma(level, LOG_RDMA_EVENT, fmt, ##args) 166 #define log_rdma_mr(level, fmt, args...) \ 167 log_rdma(level, LOG_RDMA_MR, fmt, ##args) 168 169 static void smbd_disconnect_wake_up_all(struct smbdirect_socket *sc) 170 { 171 /* 172 * Wake up all waiters in all wait queues 173 * in order to notice the broken connection. 174 */ 175 wake_up_all(&sc->status_wait); 176 wake_up_all(&sc->send_io.lcredits.wait_queue); 177 wake_up_all(&sc->send_io.credits.wait_queue); 178 wake_up_all(&sc->send_io.pending.dec_wait_queue); 179 wake_up_all(&sc->send_io.pending.zero_wait_queue); 180 wake_up_all(&sc->recv_io.reassembly.wait_queue); 181 wake_up_all(&sc->mr_io.ready.wait_queue); 182 wake_up_all(&sc->mr_io.cleanup.wait_queue); 183 } 184 185 static void smbd_disconnect_rdma_work(struct work_struct *work) 186 { 187 struct smbdirect_socket *sc = 188 container_of(work, struct smbdirect_socket, disconnect_work); 189 190 if (sc->first_error == 0) 191 sc->first_error = -ECONNABORTED; 192 193 /* 194 * make sure this and other work is not queued again 195 * but here we don't block and avoid 196 * disable[_delayed]_work_sync() 197 */ 198 disable_work(&sc->disconnect_work); 199 disable_work(&sc->recv_io.posted.refill_work); 200 disable_work(&sc->mr_io.recovery_work); 201 disable_work(&sc->idle.immediate_work); 202 disable_delayed_work(&sc->idle.timer_work); 203 204 switch (sc->status) { 205 case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED: 206 case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING: 207 case SMBDIRECT_SOCKET_NEGOTIATE_FAILED: 208 case SMBDIRECT_SOCKET_CONNECTED: 209 case SMBDIRECT_SOCKET_ERROR: 210 sc->status = SMBDIRECT_SOCKET_DISCONNECTING; 211 rdma_disconnect(sc->rdma.cm_id); 212 break; 213 214 case SMBDIRECT_SOCKET_CREATED: 215 case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: 216 case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: 217 case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: 218 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED: 219 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING: 220 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED: 221 case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED: 222 case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING: 223 case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED: 224 /* 225 * rdma_connect() never reached 226 * RDMA_CM_EVENT_ESTABLISHED 227 */ 228 sc->status = SMBDIRECT_SOCKET_DISCONNECTED; 229 break; 230 231 case SMBDIRECT_SOCKET_DISCONNECTING: 232 case SMBDIRECT_SOCKET_DISCONNECTED: 233 case SMBDIRECT_SOCKET_DESTROYED: 234 break; 235 } 236 237 /* 238 * Wake up all waiters in all wait queues 239 * in order to notice the broken connection. 240 */ 241 smbd_disconnect_wake_up_all(sc); 242 } 243 244 static void smbd_disconnect_rdma_connection(struct smbdirect_socket *sc) 245 { 246 if (sc->first_error == 0) 247 sc->first_error = -ECONNABORTED; 248 249 /* 250 * make sure other work (than disconnect_work) is 251 * not queued again but here we don't block and avoid 252 * disable[_delayed]_work_sync() 253 */ 254 disable_work(&sc->recv_io.posted.refill_work); 255 disable_work(&sc->mr_io.recovery_work); 256 disable_work(&sc->idle.immediate_work); 257 disable_delayed_work(&sc->idle.timer_work); 258 259 switch (sc->status) { 260 case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: 261 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED: 262 case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED: 263 case SMBDIRECT_SOCKET_NEGOTIATE_FAILED: 264 case SMBDIRECT_SOCKET_ERROR: 265 case SMBDIRECT_SOCKET_DISCONNECTING: 266 case SMBDIRECT_SOCKET_DISCONNECTED: 267 case SMBDIRECT_SOCKET_DESTROYED: 268 /* 269 * Keep the current error status 270 */ 271 break; 272 273 case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: 274 case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: 275 sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED; 276 break; 277 278 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED: 279 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING: 280 sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED; 281 break; 282 283 case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED: 284 case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING: 285 sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED; 286 break; 287 288 case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED: 289 case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING: 290 sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; 291 break; 292 293 case SMBDIRECT_SOCKET_CREATED: 294 sc->status = SMBDIRECT_SOCKET_DISCONNECTED; 295 break; 296 297 case SMBDIRECT_SOCKET_CONNECTED: 298 sc->status = SMBDIRECT_SOCKET_ERROR; 299 break; 300 } 301 302 /* 303 * Wake up all waiters in all wait queues 304 * in order to notice the broken connection. 305 */ 306 smbd_disconnect_wake_up_all(sc); 307 308 queue_work(sc->workqueue, &sc->disconnect_work); 309 } 310 311 /* Upcall from RDMA CM */ 312 static int smbd_conn_upcall( 313 struct rdma_cm_id *id, struct rdma_cm_event *event) 314 { 315 struct smbdirect_socket *sc = id->context; 316 struct smbdirect_socket_parameters *sp = &sc->parameters; 317 const char *event_name = rdma_event_msg(event->event); 318 u8 peer_initiator_depth; 319 u8 peer_responder_resources; 320 321 log_rdma_event(INFO, "event=%s status=%d\n", 322 event_name, event->status); 323 324 switch (event->event) { 325 case RDMA_CM_EVENT_ADDR_RESOLVED: 326 if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING)) 327 break; 328 sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED; 329 wake_up(&sc->status_wait); 330 break; 331 332 case RDMA_CM_EVENT_ROUTE_RESOLVED: 333 if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING)) 334 break; 335 sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED; 336 wake_up(&sc->status_wait); 337 break; 338 339 case RDMA_CM_EVENT_ADDR_ERROR: 340 log_rdma_event(ERR, "connecting failed event=%s\n", event_name); 341 sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED; 342 smbd_disconnect_rdma_work(&sc->disconnect_work); 343 break; 344 345 case RDMA_CM_EVENT_ROUTE_ERROR: 346 log_rdma_event(ERR, "connecting failed event=%s\n", event_name); 347 sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED; 348 smbd_disconnect_rdma_work(&sc->disconnect_work); 349 break; 350 351 case RDMA_CM_EVENT_ESTABLISHED: 352 log_rdma_event(INFO, "connected event=%s\n", event_name); 353 354 /* 355 * Here we work around an inconsistency between 356 * iWarp and other devices (at least rxe and irdma using RoCEv2) 357 */ 358 if (rdma_protocol_iwarp(id->device, id->port_num)) { 359 /* 360 * iWarp devices report the peer's values 361 * with the perspective of the peer here. 362 * Tested with siw and irdma (in iwarp mode) 363 * We need to change to our perspective here, 364 * so we need to switch the values. 365 */ 366 peer_initiator_depth = event->param.conn.responder_resources; 367 peer_responder_resources = event->param.conn.initiator_depth; 368 } else { 369 /* 370 * Non iWarp devices report the peer's values 371 * already changed to our perspective here. 372 * Tested with rxe and irdma (in roce mode). 373 */ 374 peer_initiator_depth = event->param.conn.initiator_depth; 375 peer_responder_resources = event->param.conn.responder_resources; 376 } 377 if (rdma_protocol_iwarp(id->device, id->port_num) && 378 event->param.conn.private_data_len == 8) { 379 /* 380 * Legacy clients with only iWarp MPA v1 support 381 * need a private blob in order to negotiate 382 * the IRD/ORD values. 383 */ 384 const __be32 *ird_ord_hdr = event->param.conn.private_data; 385 u32 ird32 = be32_to_cpu(ird_ord_hdr[0]); 386 u32 ord32 = be32_to_cpu(ird_ord_hdr[1]); 387 388 /* 389 * cifs.ko sends the legacy IRD/ORD negotiation 390 * event if iWarp MPA v2 was used. 391 * 392 * Here we check that the values match and only 393 * mark the client as legacy if they don't match. 394 */ 395 if ((u32)event->param.conn.initiator_depth != ird32 || 396 (u32)event->param.conn.responder_resources != ord32) { 397 /* 398 * There are broken clients (old cifs.ko) 399 * using little endian and also 400 * struct rdma_conn_param only uses u8 401 * for initiator_depth and responder_resources, 402 * so we truncate the value to U8_MAX. 403 * 404 * smb_direct_accept_client() will then 405 * do the real negotiation in order to 406 * select the minimum between client and 407 * server. 408 */ 409 ird32 = min_t(u32, ird32, U8_MAX); 410 ord32 = min_t(u32, ord32, U8_MAX); 411 412 sc->rdma.legacy_iwarp = true; 413 peer_initiator_depth = (u8)ird32; 414 peer_responder_resources = (u8)ord32; 415 } 416 } 417 418 /* 419 * negotiate the value by using the minimum 420 * between client and server if the client provided 421 * non 0 values. 422 */ 423 if (peer_initiator_depth != 0) 424 sp->initiator_depth = 425 min_t(u8, sp->initiator_depth, 426 peer_initiator_depth); 427 if (peer_responder_resources != 0) 428 sp->responder_resources = 429 min_t(u8, sp->responder_resources, 430 peer_responder_resources); 431 432 if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING)) 433 break; 434 sc->status = SMBDIRECT_SOCKET_NEGOTIATE_NEEDED; 435 wake_up(&sc->status_wait); 436 break; 437 438 case RDMA_CM_EVENT_CONNECT_ERROR: 439 case RDMA_CM_EVENT_UNREACHABLE: 440 case RDMA_CM_EVENT_REJECTED: 441 log_rdma_event(ERR, "connecting failed event=%s\n", event_name); 442 sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED; 443 smbd_disconnect_rdma_work(&sc->disconnect_work); 444 break; 445 446 case RDMA_CM_EVENT_DEVICE_REMOVAL: 447 case RDMA_CM_EVENT_DISCONNECTED: 448 /* This happens when we fail the negotiation */ 449 if (sc->status == SMBDIRECT_SOCKET_NEGOTIATE_FAILED) { 450 log_rdma_event(ERR, "event=%s during negotiation\n", event_name); 451 } 452 453 sc->status = SMBDIRECT_SOCKET_DISCONNECTED; 454 smbd_disconnect_rdma_work(&sc->disconnect_work); 455 break; 456 457 default: 458 log_rdma_event(ERR, "unexpected event=%s status=%d\n", 459 event_name, event->status); 460 break; 461 } 462 463 return 0; 464 } 465 466 /* Upcall from RDMA QP */ 467 static void 468 smbd_qp_async_error_upcall(struct ib_event *event, void *context) 469 { 470 struct smbdirect_socket *sc = context; 471 472 log_rdma_event(ERR, "%s on device %s socket %p\n", 473 ib_event_msg(event->event), event->device->name, sc); 474 475 switch (event->event) { 476 case IB_EVENT_CQ_ERR: 477 case IB_EVENT_QP_FATAL: 478 smbd_disconnect_rdma_connection(sc); 479 break; 480 481 default: 482 break; 483 } 484 } 485 486 static inline void *smbdirect_send_io_payload(struct smbdirect_send_io *request) 487 { 488 return (void *)request->packet; 489 } 490 491 static inline void *smbdirect_recv_io_payload(struct smbdirect_recv_io *response) 492 { 493 return (void *)response->packet; 494 } 495 496 /* Called when a RDMA send is done */ 497 static void send_done(struct ib_cq *cq, struct ib_wc *wc) 498 { 499 int i; 500 struct smbdirect_send_io *request = 501 container_of(wc->wr_cqe, struct smbdirect_send_io, cqe); 502 struct smbdirect_socket *sc = request->socket; 503 int lcredits = 0; 504 505 log_rdma_send(INFO, "smbdirect_send_io 0x%p completed wc->status=%s\n", 506 request, ib_wc_status_msg(wc->status)); 507 508 for (i = 0; i < request->num_sge; i++) 509 ib_dma_unmap_single(sc->ib.dev, 510 request->sge[i].addr, 511 request->sge[i].length, 512 DMA_TO_DEVICE); 513 mempool_free(request, sc->send_io.mem.pool); 514 lcredits += 1; 515 516 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { 517 if (wc->status != IB_WC_WR_FLUSH_ERR) 518 log_rdma_send(ERR, "wc->status=%s wc->opcode=%d\n", 519 ib_wc_status_msg(wc->status), wc->opcode); 520 smbd_disconnect_rdma_connection(sc); 521 return; 522 } 523 524 atomic_add(lcredits, &sc->send_io.lcredits.count); 525 wake_up(&sc->send_io.lcredits.wait_queue); 526 527 if (atomic_dec_and_test(&sc->send_io.pending.count)) 528 wake_up(&sc->send_io.pending.zero_wait_queue); 529 530 wake_up(&sc->send_io.pending.dec_wait_queue); 531 } 532 533 static void dump_smbdirect_negotiate_resp(struct smbdirect_negotiate_resp *resp) 534 { 535 log_rdma_event(INFO, "resp message min_version %u max_version %u negotiated_version %u credits_requested %u credits_granted %u status %u max_readwrite_size %u preferred_send_size %u max_receive_size %u max_fragmented_size %u\n", 536 resp->min_version, resp->max_version, 537 resp->negotiated_version, resp->credits_requested, 538 resp->credits_granted, resp->status, 539 resp->max_readwrite_size, resp->preferred_send_size, 540 resp->max_receive_size, resp->max_fragmented_size); 541 } 542 543 /* 544 * Process a negotiation response message, according to [MS-SMBD]3.1.5.7 545 * response, packet_length: the negotiation response message 546 * return value: true if negotiation is a success, false if failed 547 */ 548 static bool process_negotiation_response( 549 struct smbdirect_recv_io *response, int packet_length) 550 { 551 struct smbdirect_socket *sc = response->socket; 552 struct smbdirect_socket_parameters *sp = &sc->parameters; 553 struct smbdirect_negotiate_resp *packet = smbdirect_recv_io_payload(response); 554 555 if (packet_length < sizeof(struct smbdirect_negotiate_resp)) { 556 log_rdma_event(ERR, 557 "error: packet_length=%d\n", packet_length); 558 return false; 559 } 560 561 if (le16_to_cpu(packet->negotiated_version) != SMBDIRECT_V1) { 562 log_rdma_event(ERR, "error: negotiated_version=%x\n", 563 le16_to_cpu(packet->negotiated_version)); 564 return false; 565 } 566 567 if (packet->credits_requested == 0) { 568 log_rdma_event(ERR, "error: credits_requested==0\n"); 569 return false; 570 } 571 sc->recv_io.credits.target = le16_to_cpu(packet->credits_requested); 572 sc->recv_io.credits.target = min_t(u16, sc->recv_io.credits.target, sp->recv_credit_max); 573 574 if (packet->credits_granted == 0) { 575 log_rdma_event(ERR, "error: credits_granted==0\n"); 576 return false; 577 } 578 atomic_set(&sc->send_io.lcredits.count, sp->send_credit_target); 579 atomic_set(&sc->send_io.credits.count, le16_to_cpu(packet->credits_granted)); 580 581 if (le32_to_cpu(packet->preferred_send_size) > sp->max_recv_size) { 582 log_rdma_event(ERR, "error: preferred_send_size=%d\n", 583 le32_to_cpu(packet->preferred_send_size)); 584 return false; 585 } 586 sp->max_recv_size = le32_to_cpu(packet->preferred_send_size); 587 588 if (le32_to_cpu(packet->max_receive_size) < SMBD_MIN_RECEIVE_SIZE) { 589 log_rdma_event(ERR, "error: max_receive_size=%d\n", 590 le32_to_cpu(packet->max_receive_size)); 591 return false; 592 } 593 sp->max_send_size = min_t(u32, sp->max_send_size, 594 le32_to_cpu(packet->max_receive_size)); 595 596 if (le32_to_cpu(packet->max_fragmented_size) < 597 SMBD_MIN_FRAGMENTED_SIZE) { 598 log_rdma_event(ERR, "error: max_fragmented_size=%d\n", 599 le32_to_cpu(packet->max_fragmented_size)); 600 return false; 601 } 602 sp->max_fragmented_send_size = 603 le32_to_cpu(packet->max_fragmented_size); 604 605 606 sp->max_read_write_size = min_t(u32, 607 le32_to_cpu(packet->max_readwrite_size), 608 sp->max_frmr_depth * PAGE_SIZE); 609 sp->max_frmr_depth = sp->max_read_write_size / PAGE_SIZE; 610 611 sc->recv_io.expected = SMBDIRECT_EXPECT_DATA_TRANSFER; 612 return true; 613 } 614 615 static void smbd_post_send_credits(struct work_struct *work) 616 { 617 int rc; 618 struct smbdirect_recv_io *response; 619 struct smbdirect_socket *sc = 620 container_of(work, struct smbdirect_socket, recv_io.posted.refill_work); 621 622 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { 623 return; 624 } 625 626 if (sc->recv_io.credits.target > 627 atomic_read(&sc->recv_io.credits.count)) { 628 while (true) { 629 response = get_receive_buffer(sc); 630 if (!response) 631 break; 632 633 response->first_segment = false; 634 rc = smbd_post_recv(sc, response); 635 if (rc) { 636 log_rdma_recv(ERR, 637 "post_recv failed rc=%d\n", rc); 638 put_receive_buffer(sc, response); 639 break; 640 } 641 642 atomic_inc(&sc->recv_io.posted.count); 643 } 644 } 645 646 /* Promptly send an immediate packet as defined in [MS-SMBD] 3.1.1.1 */ 647 if (atomic_read(&sc->recv_io.credits.count) < 648 sc->recv_io.credits.target - 1) { 649 log_keep_alive(INFO, "schedule send of an empty message\n"); 650 queue_work(sc->workqueue, &sc->idle.immediate_work); 651 } 652 } 653 654 /* Called from softirq, when recv is done */ 655 static void recv_done(struct ib_cq *cq, struct ib_wc *wc) 656 { 657 struct smbdirect_data_transfer *data_transfer; 658 struct smbdirect_recv_io *response = 659 container_of(wc->wr_cqe, struct smbdirect_recv_io, cqe); 660 struct smbdirect_socket *sc = response->socket; 661 struct smbdirect_socket_parameters *sp = &sc->parameters; 662 u16 old_recv_credit_target; 663 u32 data_offset = 0; 664 u32 data_length = 0; 665 u32 remaining_data_length = 0; 666 bool negotiate_done = false; 667 668 log_rdma_recv(INFO, 669 "response=0x%p type=%d wc status=%s wc opcode %d byte_len=%d pkey_index=%u\n", 670 response, sc->recv_io.expected, 671 ib_wc_status_msg(wc->status), wc->opcode, 672 wc->byte_len, wc->pkey_index); 673 674 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { 675 if (wc->status != IB_WC_WR_FLUSH_ERR) 676 log_rdma_recv(ERR, "wc->status=%s opcode=%d\n", 677 ib_wc_status_msg(wc->status), wc->opcode); 678 goto error; 679 } 680 681 ib_dma_sync_single_for_cpu( 682 wc->qp->device, 683 response->sge.addr, 684 response->sge.length, 685 DMA_FROM_DEVICE); 686 687 /* 688 * Reset timer to the keepalive interval in 689 * order to trigger our next keepalive message. 690 */ 691 sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; 692 mod_delayed_work(sc->workqueue, &sc->idle.timer_work, 693 msecs_to_jiffies(sp->keepalive_interval_msec)); 694 695 switch (sc->recv_io.expected) { 696 /* SMBD negotiation response */ 697 case SMBDIRECT_EXPECT_NEGOTIATE_REP: 698 dump_smbdirect_negotiate_resp(smbdirect_recv_io_payload(response)); 699 sc->recv_io.reassembly.full_packet_received = true; 700 negotiate_done = 701 process_negotiation_response(response, wc->byte_len); 702 put_receive_buffer(sc, response); 703 if (SMBDIRECT_CHECK_STATUS_WARN(sc, SMBDIRECT_SOCKET_NEGOTIATE_RUNNING)) 704 negotiate_done = false; 705 if (!negotiate_done) { 706 sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; 707 smbd_disconnect_rdma_connection(sc); 708 } else { 709 sc->status = SMBDIRECT_SOCKET_CONNECTED; 710 wake_up(&sc->status_wait); 711 } 712 713 return; 714 715 /* SMBD data transfer packet */ 716 case SMBDIRECT_EXPECT_DATA_TRANSFER: 717 data_transfer = smbdirect_recv_io_payload(response); 718 719 if (wc->byte_len < 720 offsetof(struct smbdirect_data_transfer, padding)) 721 goto error; 722 723 remaining_data_length = le32_to_cpu(data_transfer->remaining_data_length); 724 data_offset = le32_to_cpu(data_transfer->data_offset); 725 data_length = le32_to_cpu(data_transfer->data_length); 726 if (wc->byte_len < data_offset || 727 (u64)wc->byte_len < (u64)data_offset + data_length) 728 goto error; 729 730 if (remaining_data_length > sp->max_fragmented_recv_size || 731 data_length > sp->max_fragmented_recv_size || 732 (u64)remaining_data_length + (u64)data_length > (u64)sp->max_fragmented_recv_size) 733 goto error; 734 735 if (data_length) { 736 if (sc->recv_io.reassembly.full_packet_received) 737 response->first_segment = true; 738 739 if (le32_to_cpu(data_transfer->remaining_data_length)) 740 sc->recv_io.reassembly.full_packet_received = false; 741 else 742 sc->recv_io.reassembly.full_packet_received = true; 743 } 744 745 atomic_dec(&sc->recv_io.posted.count); 746 atomic_dec(&sc->recv_io.credits.count); 747 old_recv_credit_target = sc->recv_io.credits.target; 748 sc->recv_io.credits.target = 749 le16_to_cpu(data_transfer->credits_requested); 750 sc->recv_io.credits.target = 751 min_t(u16, sc->recv_io.credits.target, sp->recv_credit_max); 752 sc->recv_io.credits.target = 753 max_t(u16, sc->recv_io.credits.target, 1); 754 if (le16_to_cpu(data_transfer->credits_granted)) { 755 atomic_add(le16_to_cpu(data_transfer->credits_granted), 756 &sc->send_io.credits.count); 757 /* 758 * We have new send credits granted from remote peer 759 * If any sender is waiting for credits, unblock it 760 */ 761 wake_up(&sc->send_io.credits.wait_queue); 762 } 763 764 log_incoming(INFO, "data flags %d data_offset %d data_length %d remaining_data_length %d\n", 765 le16_to_cpu(data_transfer->flags), 766 le32_to_cpu(data_transfer->data_offset), 767 le32_to_cpu(data_transfer->data_length), 768 le32_to_cpu(data_transfer->remaining_data_length)); 769 770 /* Send an immediate response right away if requested */ 771 if (le16_to_cpu(data_transfer->flags) & 772 SMBDIRECT_FLAG_RESPONSE_REQUESTED) { 773 log_keep_alive(INFO, "schedule send of immediate response\n"); 774 queue_work(sc->workqueue, &sc->idle.immediate_work); 775 } 776 777 /* 778 * If this is a packet with data playload place the data in 779 * reassembly queue and wake up the reading thread 780 */ 781 if (data_length) { 782 if (sc->recv_io.credits.target > old_recv_credit_target) 783 queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); 784 785 enqueue_reassembly(sc, response, data_length); 786 wake_up(&sc->recv_io.reassembly.wait_queue); 787 } else 788 put_receive_buffer(sc, response); 789 790 return; 791 792 case SMBDIRECT_EXPECT_NEGOTIATE_REQ: 793 /* Only server... */ 794 break; 795 } 796 797 /* 798 * This is an internal error! 799 */ 800 log_rdma_recv(ERR, "unexpected response type=%d\n", sc->recv_io.expected); 801 WARN_ON_ONCE(sc->recv_io.expected != SMBDIRECT_EXPECT_DATA_TRANSFER); 802 error: 803 put_receive_buffer(sc, response); 804 smbd_disconnect_rdma_connection(sc); 805 } 806 807 static struct rdma_cm_id *smbd_create_id( 808 struct smbdirect_socket *sc, 809 struct sockaddr *dstaddr, int port) 810 { 811 struct smbdirect_socket_parameters *sp = &sc->parameters; 812 struct rdma_cm_id *id; 813 int rc; 814 __be16 *sport; 815 816 id = rdma_create_id(&init_net, smbd_conn_upcall, sc, 817 RDMA_PS_TCP, IB_QPT_RC); 818 if (IS_ERR(id)) { 819 rc = PTR_ERR(id); 820 log_rdma_event(ERR, "rdma_create_id() failed %i\n", rc); 821 return id; 822 } 823 824 if (dstaddr->sa_family == AF_INET6) 825 sport = &((struct sockaddr_in6 *)dstaddr)->sin6_port; 826 else 827 sport = &((struct sockaddr_in *)dstaddr)->sin_port; 828 829 *sport = htons(port); 830 831 WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED); 832 sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING; 833 rc = rdma_resolve_addr(id, NULL, (struct sockaddr *)dstaddr, 834 sp->resolve_addr_timeout_msec); 835 if (rc) { 836 log_rdma_event(ERR, "rdma_resolve_addr() failed %i\n", rc); 837 goto out; 838 } 839 rc = wait_event_interruptible_timeout( 840 sc->status_wait, 841 sc->status != SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING, 842 msecs_to_jiffies(sp->resolve_addr_timeout_msec)); 843 /* e.g. if interrupted returns -ERESTARTSYS */ 844 if (rc < 0) { 845 log_rdma_event(ERR, "rdma_resolve_addr timeout rc: %i\n", rc); 846 goto out; 847 } 848 if (sc->status == SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING) { 849 rc = -ETIMEDOUT; 850 log_rdma_event(ERR, "rdma_resolve_addr() completed %i\n", rc); 851 goto out; 852 } 853 if (sc->status != SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED) { 854 rc = -EHOSTUNREACH; 855 log_rdma_event(ERR, "rdma_resolve_addr() completed %i\n", rc); 856 goto out; 857 } 858 859 WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED); 860 sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING; 861 rc = rdma_resolve_route(id, sp->resolve_route_timeout_msec); 862 if (rc) { 863 log_rdma_event(ERR, "rdma_resolve_route() failed %i\n", rc); 864 goto out; 865 } 866 rc = wait_event_interruptible_timeout( 867 sc->status_wait, 868 sc->status != SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING, 869 msecs_to_jiffies(sp->resolve_route_timeout_msec)); 870 /* e.g. if interrupted returns -ERESTARTSYS */ 871 if (rc < 0) { 872 log_rdma_event(ERR, "rdma_resolve_addr timeout rc: %i\n", rc); 873 goto out; 874 } 875 if (sc->status == SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING) { 876 rc = -ETIMEDOUT; 877 log_rdma_event(ERR, "rdma_resolve_route() completed %i\n", rc); 878 goto out; 879 } 880 if (sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED) { 881 rc = -ENETUNREACH; 882 log_rdma_event(ERR, "rdma_resolve_route() completed %i\n", rc); 883 goto out; 884 } 885 886 return id; 887 888 out: 889 rdma_destroy_id(id); 890 return ERR_PTR(rc); 891 } 892 893 /* 894 * Test if FRWR (Fast Registration Work Requests) is supported on the device 895 * This implementation requires FRWR on RDMA read/write 896 * return value: true if it is supported 897 */ 898 static bool frwr_is_supported(struct ib_device_attr *attrs) 899 { 900 if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) 901 return false; 902 if (attrs->max_fast_reg_page_list_len == 0) 903 return false; 904 return true; 905 } 906 907 static int smbd_ia_open( 908 struct smbdirect_socket *sc, 909 struct sockaddr *dstaddr, int port) 910 { 911 struct smbdirect_socket_parameters *sp = &sc->parameters; 912 int rc; 913 914 WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_CREATED); 915 sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED; 916 917 sc->rdma.cm_id = smbd_create_id(sc, dstaddr, port); 918 if (IS_ERR(sc->rdma.cm_id)) { 919 rc = PTR_ERR(sc->rdma.cm_id); 920 goto out1; 921 } 922 sc->ib.dev = sc->rdma.cm_id->device; 923 924 if (!frwr_is_supported(&sc->ib.dev->attrs)) { 925 log_rdma_event(ERR, "Fast Registration Work Requests (FRWR) is not supported\n"); 926 log_rdma_event(ERR, "Device capability flags = %llx max_fast_reg_page_list_len = %u\n", 927 sc->ib.dev->attrs.device_cap_flags, 928 sc->ib.dev->attrs.max_fast_reg_page_list_len); 929 rc = -EPROTONOSUPPORT; 930 goto out2; 931 } 932 sp->max_frmr_depth = min_t(u32, 933 sp->max_frmr_depth, 934 sc->ib.dev->attrs.max_fast_reg_page_list_len); 935 sc->mr_io.type = IB_MR_TYPE_MEM_REG; 936 if (sc->ib.dev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG) 937 sc->mr_io.type = IB_MR_TYPE_SG_GAPS; 938 939 return 0; 940 941 out2: 942 rdma_destroy_id(sc->rdma.cm_id); 943 sc->rdma.cm_id = NULL; 944 945 out1: 946 return rc; 947 } 948 949 /* 950 * Send a negotiation request message to the peer 951 * The negotiation procedure is in [MS-SMBD] 3.1.5.2 and 3.1.5.3 952 * After negotiation, the transport is connected and ready for 953 * carrying upper layer SMB payload 954 */ 955 static int smbd_post_send_negotiate_req(struct smbdirect_socket *sc) 956 { 957 struct smbdirect_socket_parameters *sp = &sc->parameters; 958 struct ib_send_wr send_wr; 959 int rc = -ENOMEM; 960 struct smbdirect_send_io *request; 961 struct smbdirect_negotiate_req *packet; 962 963 request = mempool_alloc(sc->send_io.mem.pool, GFP_KERNEL); 964 if (!request) 965 return rc; 966 967 request->socket = sc; 968 969 packet = smbdirect_send_io_payload(request); 970 packet->min_version = cpu_to_le16(SMBDIRECT_V1); 971 packet->max_version = cpu_to_le16(SMBDIRECT_V1); 972 packet->reserved = 0; 973 packet->credits_requested = cpu_to_le16(sp->send_credit_target); 974 packet->preferred_send_size = cpu_to_le32(sp->max_send_size); 975 packet->max_receive_size = cpu_to_le32(sp->max_recv_size); 976 packet->max_fragmented_size = 977 cpu_to_le32(sp->max_fragmented_recv_size); 978 979 request->num_sge = 1; 980 request->sge[0].addr = ib_dma_map_single( 981 sc->ib.dev, (void *)packet, 982 sizeof(*packet), DMA_TO_DEVICE); 983 if (ib_dma_mapping_error(sc->ib.dev, request->sge[0].addr)) { 984 rc = -EIO; 985 goto dma_mapping_failed; 986 } 987 988 request->sge[0].length = sizeof(*packet); 989 request->sge[0].lkey = sc->ib.pd->local_dma_lkey; 990 991 ib_dma_sync_single_for_device( 992 sc->ib.dev, request->sge[0].addr, 993 request->sge[0].length, DMA_TO_DEVICE); 994 995 request->cqe.done = send_done; 996 997 send_wr.next = NULL; 998 send_wr.wr_cqe = &request->cqe; 999 send_wr.sg_list = request->sge; 1000 send_wr.num_sge = request->num_sge; 1001 send_wr.opcode = IB_WR_SEND; 1002 send_wr.send_flags = IB_SEND_SIGNALED; 1003 1004 log_rdma_send(INFO, "sge addr=0x%llx length=%u lkey=0x%x\n", 1005 request->sge[0].addr, 1006 request->sge[0].length, request->sge[0].lkey); 1007 1008 atomic_inc(&sc->send_io.pending.count); 1009 rc = ib_post_send(sc->ib.qp, &send_wr, NULL); 1010 if (!rc) 1011 return 0; 1012 1013 /* if we reach here, post send failed */ 1014 log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc); 1015 atomic_dec(&sc->send_io.pending.count); 1016 ib_dma_unmap_single(sc->ib.dev, request->sge[0].addr, 1017 request->sge[0].length, DMA_TO_DEVICE); 1018 1019 smbd_disconnect_rdma_connection(sc); 1020 1021 dma_mapping_failed: 1022 mempool_free(request, sc->send_io.mem.pool); 1023 return rc; 1024 } 1025 1026 /* 1027 * Extend the credits to remote peer 1028 * This implements [MS-SMBD] 3.1.5.9 1029 * The idea is that we should extend credits to remote peer as quickly as 1030 * it's allowed, to maintain data flow. We allocate as much receive 1031 * buffer as possible, and extend the receive credits to remote peer 1032 * return value: the new credtis being granted. 1033 */ 1034 static int manage_credits_prior_sending(struct smbdirect_socket *sc) 1035 { 1036 int new_credits; 1037 1038 if (atomic_read(&sc->recv_io.credits.count) >= sc->recv_io.credits.target) 1039 return 0; 1040 1041 new_credits = atomic_read(&sc->recv_io.posted.count); 1042 if (new_credits == 0) 1043 return 0; 1044 1045 new_credits -= atomic_read(&sc->recv_io.credits.count); 1046 if (new_credits <= 0) 1047 return 0; 1048 1049 return new_credits; 1050 } 1051 1052 /* 1053 * Check if we need to send a KEEP_ALIVE message 1054 * The idle connection timer triggers a KEEP_ALIVE message when expires 1055 * SMBDIRECT_FLAG_RESPONSE_REQUESTED is set in the message flag to have peer send 1056 * back a response. 1057 * return value: 1058 * 1 if SMBDIRECT_FLAG_RESPONSE_REQUESTED needs to be set 1059 * 0: otherwise 1060 */ 1061 static int manage_keep_alive_before_sending(struct smbdirect_socket *sc) 1062 { 1063 struct smbdirect_socket_parameters *sp = &sc->parameters; 1064 1065 if (sc->idle.keepalive == SMBDIRECT_KEEPALIVE_PENDING) { 1066 sc->idle.keepalive = SMBDIRECT_KEEPALIVE_SENT; 1067 /* 1068 * Now use the keepalive timeout (instead of keepalive interval) 1069 * in order to wait for a response 1070 */ 1071 mod_delayed_work(sc->workqueue, &sc->idle.timer_work, 1072 msecs_to_jiffies(sp->keepalive_timeout_msec)); 1073 return 1; 1074 } 1075 return 0; 1076 } 1077 1078 /* Post the send request */ 1079 static int smbd_post_send(struct smbdirect_socket *sc, 1080 struct smbdirect_send_io *request) 1081 { 1082 struct ib_send_wr send_wr; 1083 int rc, i; 1084 1085 for (i = 0; i < request->num_sge; i++) { 1086 log_rdma_send(INFO, 1087 "rdma_request sge[%d] addr=0x%llx length=%u\n", 1088 i, request->sge[i].addr, request->sge[i].length); 1089 ib_dma_sync_single_for_device( 1090 sc->ib.dev, 1091 request->sge[i].addr, 1092 request->sge[i].length, 1093 DMA_TO_DEVICE); 1094 } 1095 1096 request->cqe.done = send_done; 1097 1098 send_wr.next = NULL; 1099 send_wr.wr_cqe = &request->cqe; 1100 send_wr.sg_list = request->sge; 1101 send_wr.num_sge = request->num_sge; 1102 send_wr.opcode = IB_WR_SEND; 1103 send_wr.send_flags = IB_SEND_SIGNALED; 1104 1105 rc = ib_post_send(sc->ib.qp, &send_wr, NULL); 1106 if (rc) { 1107 log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc); 1108 smbd_disconnect_rdma_connection(sc); 1109 rc = -EAGAIN; 1110 } 1111 1112 return rc; 1113 } 1114 1115 static int smbd_post_send_iter(struct smbdirect_socket *sc, 1116 struct iov_iter *iter, 1117 int *_remaining_data_length) 1118 { 1119 struct smbdirect_socket_parameters *sp = &sc->parameters; 1120 int i, rc; 1121 int header_length; 1122 int data_length; 1123 struct smbdirect_send_io *request; 1124 struct smbdirect_data_transfer *packet; 1125 int new_credits = 0; 1126 1127 wait_lcredit: 1128 /* Wait for local send credits */ 1129 rc = wait_event_interruptible(sc->send_io.lcredits.wait_queue, 1130 atomic_read(&sc->send_io.lcredits.count) > 0 || 1131 sc->status != SMBDIRECT_SOCKET_CONNECTED); 1132 if (rc) 1133 goto err_wait_lcredit; 1134 1135 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { 1136 log_outgoing(ERR, "disconnected not sending on wait_credit\n"); 1137 rc = -EAGAIN; 1138 goto err_wait_lcredit; 1139 } 1140 if (unlikely(atomic_dec_return(&sc->send_io.lcredits.count) < 0)) { 1141 atomic_inc(&sc->send_io.lcredits.count); 1142 goto wait_lcredit; 1143 } 1144 1145 wait_credit: 1146 /* Wait for send credits. A SMBD packet needs one credit */ 1147 rc = wait_event_interruptible(sc->send_io.credits.wait_queue, 1148 atomic_read(&sc->send_io.credits.count) > 0 || 1149 sc->status != SMBDIRECT_SOCKET_CONNECTED); 1150 if (rc) 1151 goto err_wait_credit; 1152 1153 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { 1154 log_outgoing(ERR, "disconnected not sending on wait_credit\n"); 1155 rc = -EAGAIN; 1156 goto err_wait_credit; 1157 } 1158 if (unlikely(atomic_dec_return(&sc->send_io.credits.count) < 0)) { 1159 atomic_inc(&sc->send_io.credits.count); 1160 goto wait_credit; 1161 } 1162 1163 request = mempool_alloc(sc->send_io.mem.pool, GFP_KERNEL); 1164 if (!request) { 1165 rc = -ENOMEM; 1166 goto err_alloc; 1167 } 1168 1169 request->socket = sc; 1170 memset(request->sge, 0, sizeof(request->sge)); 1171 1172 /* Map the packet to DMA */ 1173 header_length = sizeof(struct smbdirect_data_transfer); 1174 /* If this is a packet without payload, don't send padding */ 1175 if (!iter) 1176 header_length = offsetof(struct smbdirect_data_transfer, padding); 1177 1178 packet = smbdirect_send_io_payload(request); 1179 request->sge[0].addr = ib_dma_map_single(sc->ib.dev, 1180 (void *)packet, 1181 header_length, 1182 DMA_TO_DEVICE); 1183 if (ib_dma_mapping_error(sc->ib.dev, request->sge[0].addr)) { 1184 rc = -EIO; 1185 goto err_dma; 1186 } 1187 1188 request->sge[0].length = header_length; 1189 request->sge[0].lkey = sc->ib.pd->local_dma_lkey; 1190 request->num_sge = 1; 1191 1192 /* Fill in the data payload to find out how much data we can add */ 1193 if (iter) { 1194 struct smb_extract_to_rdma extract = { 1195 .nr_sge = request->num_sge, 1196 .max_sge = SMBDIRECT_SEND_IO_MAX_SGE, 1197 .sge = request->sge, 1198 .device = sc->ib.dev, 1199 .local_dma_lkey = sc->ib.pd->local_dma_lkey, 1200 .direction = DMA_TO_DEVICE, 1201 }; 1202 size_t payload_len = umin(*_remaining_data_length, 1203 sp->max_send_size - sizeof(*packet)); 1204 1205 rc = smb_extract_iter_to_rdma(iter, payload_len, 1206 &extract); 1207 if (rc < 0) 1208 goto err_dma; 1209 data_length = rc; 1210 request->num_sge = extract.nr_sge; 1211 *_remaining_data_length -= data_length; 1212 } else { 1213 data_length = 0; 1214 } 1215 1216 /* Fill in the packet header */ 1217 packet->credits_requested = cpu_to_le16(sp->send_credit_target); 1218 1219 new_credits = manage_credits_prior_sending(sc); 1220 atomic_add(new_credits, &sc->recv_io.credits.count); 1221 packet->credits_granted = cpu_to_le16(new_credits); 1222 1223 packet->flags = 0; 1224 if (manage_keep_alive_before_sending(sc)) 1225 packet->flags |= cpu_to_le16(SMBDIRECT_FLAG_RESPONSE_REQUESTED); 1226 1227 packet->reserved = 0; 1228 if (!data_length) 1229 packet->data_offset = 0; 1230 else 1231 packet->data_offset = cpu_to_le32(24); 1232 packet->data_length = cpu_to_le32(data_length); 1233 packet->remaining_data_length = cpu_to_le32(*_remaining_data_length); 1234 packet->padding = 0; 1235 1236 log_outgoing(INFO, "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n", 1237 le16_to_cpu(packet->credits_requested), 1238 le16_to_cpu(packet->credits_granted), 1239 le32_to_cpu(packet->data_offset), 1240 le32_to_cpu(packet->data_length), 1241 le32_to_cpu(packet->remaining_data_length)); 1242 1243 /* 1244 * Now that we got a local and a remote credit 1245 * we add us as pending 1246 */ 1247 atomic_inc(&sc->send_io.pending.count); 1248 1249 rc = smbd_post_send(sc, request); 1250 if (!rc) 1251 return 0; 1252 1253 if (atomic_dec_and_test(&sc->send_io.pending.count)) 1254 wake_up(&sc->send_io.pending.zero_wait_queue); 1255 1256 wake_up(&sc->send_io.pending.dec_wait_queue); 1257 1258 err_dma: 1259 for (i = 0; i < request->num_sge; i++) 1260 if (request->sge[i].addr) 1261 ib_dma_unmap_single(sc->ib.dev, 1262 request->sge[i].addr, 1263 request->sge[i].length, 1264 DMA_TO_DEVICE); 1265 mempool_free(request, sc->send_io.mem.pool); 1266 1267 /* roll back the granted receive credits */ 1268 atomic_sub(new_credits, &sc->recv_io.credits.count); 1269 1270 err_alloc: 1271 atomic_inc(&sc->send_io.credits.count); 1272 wake_up(&sc->send_io.credits.wait_queue); 1273 1274 err_wait_credit: 1275 atomic_inc(&sc->send_io.lcredits.count); 1276 wake_up(&sc->send_io.lcredits.wait_queue); 1277 1278 err_wait_lcredit: 1279 return rc; 1280 } 1281 1282 /* 1283 * Send an empty message 1284 * Empty message is used to extend credits to peer to for keep live 1285 * while there is no upper layer payload to send at the time 1286 */ 1287 static int smbd_post_send_empty(struct smbdirect_socket *sc) 1288 { 1289 int remaining_data_length = 0; 1290 1291 sc->statistics.send_empty++; 1292 return smbd_post_send_iter(sc, NULL, &remaining_data_length); 1293 } 1294 1295 static int smbd_post_send_full_iter(struct smbdirect_socket *sc, 1296 struct iov_iter *iter, 1297 int *_remaining_data_length) 1298 { 1299 int rc = 0; 1300 1301 /* 1302 * smbd_post_send_iter() respects the 1303 * negotiated max_send_size, so we need to 1304 * loop until the full iter is posted 1305 */ 1306 1307 while (iov_iter_count(iter) > 0) { 1308 rc = smbd_post_send_iter(sc, iter, _remaining_data_length); 1309 if (rc < 0) 1310 break; 1311 } 1312 1313 return rc; 1314 } 1315 1316 /* 1317 * Post a receive request to the transport 1318 * The remote peer can only send data when a receive request is posted 1319 * The interaction is controlled by send/receive credit system 1320 */ 1321 static int smbd_post_recv( 1322 struct smbdirect_socket *sc, struct smbdirect_recv_io *response) 1323 { 1324 struct smbdirect_socket_parameters *sp = &sc->parameters; 1325 struct ib_recv_wr recv_wr; 1326 int rc = -EIO; 1327 1328 response->sge.addr = ib_dma_map_single( 1329 sc->ib.dev, response->packet, 1330 sp->max_recv_size, DMA_FROM_DEVICE); 1331 if (ib_dma_mapping_error(sc->ib.dev, response->sge.addr)) 1332 return rc; 1333 1334 response->sge.length = sp->max_recv_size; 1335 response->sge.lkey = sc->ib.pd->local_dma_lkey; 1336 1337 response->cqe.done = recv_done; 1338 1339 recv_wr.wr_cqe = &response->cqe; 1340 recv_wr.next = NULL; 1341 recv_wr.sg_list = &response->sge; 1342 recv_wr.num_sge = 1; 1343 1344 rc = ib_post_recv(sc->ib.qp, &recv_wr, NULL); 1345 if (rc) { 1346 ib_dma_unmap_single(sc->ib.dev, response->sge.addr, 1347 response->sge.length, DMA_FROM_DEVICE); 1348 response->sge.length = 0; 1349 smbd_disconnect_rdma_connection(sc); 1350 log_rdma_recv(ERR, "ib_post_recv failed rc=%d\n", rc); 1351 } 1352 1353 return rc; 1354 } 1355 1356 /* Perform SMBD negotiate according to [MS-SMBD] 3.1.5.2 */ 1357 static int smbd_negotiate(struct smbdirect_socket *sc) 1358 { 1359 struct smbdirect_socket_parameters *sp = &sc->parameters; 1360 int rc; 1361 struct smbdirect_recv_io *response = get_receive_buffer(sc); 1362 1363 WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_NEGOTIATE_NEEDED); 1364 sc->status = SMBDIRECT_SOCKET_NEGOTIATE_RUNNING; 1365 1366 sc->recv_io.expected = SMBDIRECT_EXPECT_NEGOTIATE_REP; 1367 rc = smbd_post_recv(sc, response); 1368 log_rdma_event(INFO, "smbd_post_recv rc=%d iov.addr=0x%llx iov.length=%u iov.lkey=0x%x\n", 1369 rc, response->sge.addr, 1370 response->sge.length, response->sge.lkey); 1371 if (rc) { 1372 put_receive_buffer(sc, response); 1373 return rc; 1374 } 1375 1376 rc = smbd_post_send_negotiate_req(sc); 1377 if (rc) 1378 return rc; 1379 1380 rc = wait_event_interruptible_timeout( 1381 sc->status_wait, 1382 sc->status != SMBDIRECT_SOCKET_NEGOTIATE_RUNNING, 1383 msecs_to_jiffies(sp->negotiate_timeout_msec)); 1384 log_rdma_event(INFO, "wait_event_interruptible_timeout rc=%d\n", rc); 1385 1386 if (sc->status == SMBDIRECT_SOCKET_CONNECTED) 1387 return 0; 1388 1389 if (rc == 0) 1390 rc = -ETIMEDOUT; 1391 else if (rc == -ERESTARTSYS) 1392 rc = -EINTR; 1393 else 1394 rc = -ENOTCONN; 1395 1396 return rc; 1397 } 1398 1399 /* 1400 * Implement Connection.FragmentReassemblyBuffer defined in [MS-SMBD] 3.1.1.1 1401 * This is a queue for reassembling upper layer payload and present to upper 1402 * layer. All the inncoming payload go to the reassembly queue, regardless of 1403 * if reassembly is required. The uuper layer code reads from the queue for all 1404 * incoming payloads. 1405 * Put a received packet to the reassembly queue 1406 * response: the packet received 1407 * data_length: the size of payload in this packet 1408 */ 1409 static void enqueue_reassembly( 1410 struct smbdirect_socket *sc, 1411 struct smbdirect_recv_io *response, 1412 int data_length) 1413 { 1414 unsigned long flags; 1415 1416 spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); 1417 list_add_tail(&response->list, &sc->recv_io.reassembly.list); 1418 sc->recv_io.reassembly.queue_length++; 1419 /* 1420 * Make sure reassembly_data_length is updated after list and 1421 * reassembly_queue_length are updated. On the dequeue side 1422 * reassembly_data_length is checked without a lock to determine 1423 * if reassembly_queue_length and list is up to date 1424 */ 1425 virt_wmb(); 1426 sc->recv_io.reassembly.data_length += data_length; 1427 spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); 1428 sc->statistics.enqueue_reassembly_queue++; 1429 } 1430 1431 /* 1432 * Get the first entry at the front of reassembly queue 1433 * Caller is responsible for locking 1434 * return value: the first entry if any, NULL if queue is empty 1435 */ 1436 static struct smbdirect_recv_io *_get_first_reassembly(struct smbdirect_socket *sc) 1437 { 1438 struct smbdirect_recv_io *ret = NULL; 1439 1440 if (!list_empty(&sc->recv_io.reassembly.list)) { 1441 ret = list_first_entry( 1442 &sc->recv_io.reassembly.list, 1443 struct smbdirect_recv_io, list); 1444 } 1445 return ret; 1446 } 1447 1448 /* 1449 * Get a receive buffer 1450 * For each remote send, we need to post a receive. The receive buffers are 1451 * pre-allocated in advance. 1452 * return value: the receive buffer, NULL if none is available 1453 */ 1454 static struct smbdirect_recv_io *get_receive_buffer(struct smbdirect_socket *sc) 1455 { 1456 struct smbdirect_recv_io *ret = NULL; 1457 unsigned long flags; 1458 1459 spin_lock_irqsave(&sc->recv_io.free.lock, flags); 1460 if (!list_empty(&sc->recv_io.free.list)) { 1461 ret = list_first_entry( 1462 &sc->recv_io.free.list, 1463 struct smbdirect_recv_io, list); 1464 list_del(&ret->list); 1465 sc->statistics.get_receive_buffer++; 1466 } 1467 spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); 1468 1469 return ret; 1470 } 1471 1472 /* 1473 * Return a receive buffer 1474 * Upon returning of a receive buffer, we can post new receive and extend 1475 * more receive credits to remote peer. This is done immediately after a 1476 * receive buffer is returned. 1477 */ 1478 static void put_receive_buffer( 1479 struct smbdirect_socket *sc, struct smbdirect_recv_io *response) 1480 { 1481 unsigned long flags; 1482 1483 if (likely(response->sge.length != 0)) { 1484 ib_dma_unmap_single(sc->ib.dev, 1485 response->sge.addr, 1486 response->sge.length, 1487 DMA_FROM_DEVICE); 1488 response->sge.length = 0; 1489 } 1490 1491 spin_lock_irqsave(&sc->recv_io.free.lock, flags); 1492 list_add_tail(&response->list, &sc->recv_io.free.list); 1493 sc->statistics.put_receive_buffer++; 1494 spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); 1495 1496 queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); 1497 } 1498 1499 /* Preallocate all receive buffer on transport establishment */ 1500 static int allocate_receive_buffers(struct smbdirect_socket *sc, int num_buf) 1501 { 1502 struct smbdirect_recv_io *response; 1503 int i; 1504 1505 for (i = 0; i < num_buf; i++) { 1506 response = mempool_alloc(sc->recv_io.mem.pool, GFP_KERNEL); 1507 if (!response) 1508 goto allocate_failed; 1509 1510 response->socket = sc; 1511 response->sge.length = 0; 1512 list_add_tail(&response->list, &sc->recv_io.free.list); 1513 } 1514 1515 return 0; 1516 1517 allocate_failed: 1518 while (!list_empty(&sc->recv_io.free.list)) { 1519 response = list_first_entry( 1520 &sc->recv_io.free.list, 1521 struct smbdirect_recv_io, list); 1522 list_del(&response->list); 1523 1524 mempool_free(response, sc->recv_io.mem.pool); 1525 } 1526 return -ENOMEM; 1527 } 1528 1529 static void destroy_receive_buffers(struct smbdirect_socket *sc) 1530 { 1531 struct smbdirect_recv_io *response; 1532 1533 while ((response = get_receive_buffer(sc))) 1534 mempool_free(response, sc->recv_io.mem.pool); 1535 } 1536 1537 static void send_immediate_empty_message(struct work_struct *work) 1538 { 1539 struct smbdirect_socket *sc = 1540 container_of(work, struct smbdirect_socket, idle.immediate_work); 1541 1542 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 1543 return; 1544 1545 log_keep_alive(INFO, "send an empty message\n"); 1546 smbd_post_send_empty(sc); 1547 } 1548 1549 /* Implement idle connection timer [MS-SMBD] 3.1.6.2 */ 1550 static void idle_connection_timer(struct work_struct *work) 1551 { 1552 struct smbdirect_socket *sc = 1553 container_of(work, struct smbdirect_socket, idle.timer_work.work); 1554 struct smbdirect_socket_parameters *sp = &sc->parameters; 1555 1556 if (sc->idle.keepalive != SMBDIRECT_KEEPALIVE_NONE) { 1557 log_keep_alive(ERR, 1558 "error status sc->idle.keepalive=%d\n", 1559 sc->idle.keepalive); 1560 smbd_disconnect_rdma_connection(sc); 1561 return; 1562 } 1563 1564 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 1565 return; 1566 1567 /* 1568 * Now use the keepalive timeout (instead of keepalive interval) 1569 * in order to wait for a response 1570 */ 1571 sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; 1572 mod_delayed_work(sc->workqueue, &sc->idle.timer_work, 1573 msecs_to_jiffies(sp->keepalive_timeout_msec)); 1574 log_keep_alive(INFO, "schedule send of empty idle message\n"); 1575 queue_work(sc->workqueue, &sc->idle.immediate_work); 1576 } 1577 1578 /* 1579 * Destroy the transport and related RDMA and memory resources 1580 * Need to go through all the pending counters and make sure on one is using 1581 * the transport while it is destroyed 1582 */ 1583 void smbd_destroy(struct TCP_Server_Info *server) 1584 { 1585 struct smbd_connection *info = server->smbd_conn; 1586 struct smbdirect_socket *sc; 1587 struct smbdirect_recv_io *response; 1588 unsigned long flags; 1589 1590 if (!info) { 1591 log_rdma_event(INFO, "rdma session already destroyed\n"); 1592 return; 1593 } 1594 sc = &info->socket; 1595 1596 log_rdma_event(INFO, "cancelling and disable disconnect_work\n"); 1597 disable_work_sync(&sc->disconnect_work); 1598 1599 log_rdma_event(INFO, "destroying rdma session\n"); 1600 if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING) 1601 smbd_disconnect_rdma_work(&sc->disconnect_work); 1602 if (sc->status < SMBDIRECT_SOCKET_DISCONNECTED) { 1603 log_rdma_event(INFO, "wait for transport being disconnected\n"); 1604 wait_event(sc->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED); 1605 log_rdma_event(INFO, "waited for transport being disconnected\n"); 1606 } 1607 1608 /* 1609 * Wake up all waiters in all wait queues 1610 * in order to notice the broken connection. 1611 * 1612 * Most likely this was already called via 1613 * smbd_disconnect_rdma_work(), but call it again... 1614 */ 1615 smbd_disconnect_wake_up_all(sc); 1616 1617 log_rdma_event(INFO, "cancelling recv_io.posted.refill_work\n"); 1618 disable_work_sync(&sc->recv_io.posted.refill_work); 1619 1620 log_rdma_event(INFO, "destroying qp\n"); 1621 ib_drain_qp(sc->ib.qp); 1622 rdma_destroy_qp(sc->rdma.cm_id); 1623 sc->ib.qp = NULL; 1624 1625 log_rdma_event(INFO, "cancelling idle timer\n"); 1626 disable_delayed_work_sync(&sc->idle.timer_work); 1627 log_rdma_event(INFO, "cancelling send immediate work\n"); 1628 disable_work_sync(&sc->idle.immediate_work); 1629 1630 /* It's not possible for upper layer to get to reassembly */ 1631 log_rdma_event(INFO, "drain the reassembly queue\n"); 1632 do { 1633 spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); 1634 response = _get_first_reassembly(sc); 1635 if (response) { 1636 list_del(&response->list); 1637 spin_unlock_irqrestore( 1638 &sc->recv_io.reassembly.lock, flags); 1639 put_receive_buffer(sc, response); 1640 } else 1641 spin_unlock_irqrestore( 1642 &sc->recv_io.reassembly.lock, flags); 1643 } while (response); 1644 sc->recv_io.reassembly.data_length = 0; 1645 1646 log_rdma_event(INFO, "free receive buffers\n"); 1647 destroy_receive_buffers(sc); 1648 1649 log_rdma_event(INFO, "freeing mr list\n"); 1650 destroy_mr_list(sc); 1651 1652 ib_free_cq(sc->ib.send_cq); 1653 ib_free_cq(sc->ib.recv_cq); 1654 ib_dealloc_pd(sc->ib.pd); 1655 rdma_destroy_id(sc->rdma.cm_id); 1656 1657 /* free mempools */ 1658 mempool_destroy(sc->send_io.mem.pool); 1659 kmem_cache_destroy(sc->send_io.mem.cache); 1660 1661 mempool_destroy(sc->recv_io.mem.pool); 1662 kmem_cache_destroy(sc->recv_io.mem.cache); 1663 1664 sc->status = SMBDIRECT_SOCKET_DESTROYED; 1665 1666 destroy_workqueue(sc->workqueue); 1667 log_rdma_event(INFO, "rdma session destroyed\n"); 1668 kfree(info); 1669 server->smbd_conn = NULL; 1670 } 1671 1672 /* 1673 * Reconnect this SMBD connection, called from upper layer 1674 * return value: 0 on success, or actual error code 1675 */ 1676 int smbd_reconnect(struct TCP_Server_Info *server) 1677 { 1678 log_rdma_event(INFO, "reconnecting rdma session\n"); 1679 1680 if (!server->smbd_conn) { 1681 log_rdma_event(INFO, "rdma session already destroyed\n"); 1682 goto create_conn; 1683 } 1684 1685 /* 1686 * This is possible if transport is disconnected and we haven't received 1687 * notification from RDMA, but upper layer has detected timeout 1688 */ 1689 if (server->smbd_conn->socket.status == SMBDIRECT_SOCKET_CONNECTED) { 1690 log_rdma_event(INFO, "disconnecting transport\n"); 1691 smbd_destroy(server); 1692 } 1693 1694 create_conn: 1695 log_rdma_event(INFO, "creating rdma session\n"); 1696 server->smbd_conn = smbd_get_connection( 1697 server, (struct sockaddr *) &server->dstaddr); 1698 1699 if (server->smbd_conn) { 1700 cifs_dbg(VFS, "RDMA transport re-established\n"); 1701 trace_smb3_smbd_connect_done(server->hostname, server->conn_id, &server->dstaddr); 1702 return 0; 1703 } 1704 trace_smb3_smbd_connect_err(server->hostname, server->conn_id, &server->dstaddr); 1705 return -ENOENT; 1706 } 1707 1708 static void destroy_caches(struct smbdirect_socket *sc) 1709 { 1710 destroy_receive_buffers(sc); 1711 mempool_destroy(sc->recv_io.mem.pool); 1712 kmem_cache_destroy(sc->recv_io.mem.cache); 1713 mempool_destroy(sc->send_io.mem.pool); 1714 kmem_cache_destroy(sc->send_io.mem.cache); 1715 } 1716 1717 #define MAX_NAME_LEN 80 1718 static int allocate_caches(struct smbdirect_socket *sc) 1719 { 1720 struct smbdirect_socket_parameters *sp = &sc->parameters; 1721 char name[MAX_NAME_LEN]; 1722 int rc; 1723 1724 if (WARN_ON_ONCE(sp->max_recv_size < sizeof(struct smbdirect_data_transfer))) 1725 return -ENOMEM; 1726 1727 scnprintf(name, MAX_NAME_LEN, "smbdirect_send_io_%p", sc); 1728 sc->send_io.mem.cache = 1729 kmem_cache_create( 1730 name, 1731 sizeof(struct smbdirect_send_io) + 1732 sizeof(struct smbdirect_data_transfer), 1733 0, SLAB_HWCACHE_ALIGN, NULL); 1734 if (!sc->send_io.mem.cache) 1735 return -ENOMEM; 1736 1737 sc->send_io.mem.pool = 1738 mempool_create(sp->send_credit_target, mempool_alloc_slab, 1739 mempool_free_slab, sc->send_io.mem.cache); 1740 if (!sc->send_io.mem.pool) 1741 goto out1; 1742 1743 scnprintf(name, MAX_NAME_LEN, "smbdirect_recv_io_%p", sc); 1744 1745 struct kmem_cache_args response_args = { 1746 .align = __alignof__(struct smbdirect_recv_io), 1747 .useroffset = (offsetof(struct smbdirect_recv_io, packet) + 1748 sizeof(struct smbdirect_data_transfer)), 1749 .usersize = sp->max_recv_size - sizeof(struct smbdirect_data_transfer), 1750 }; 1751 sc->recv_io.mem.cache = 1752 kmem_cache_create(name, 1753 sizeof(struct smbdirect_recv_io) + sp->max_recv_size, 1754 &response_args, SLAB_HWCACHE_ALIGN); 1755 if (!sc->recv_io.mem.cache) 1756 goto out2; 1757 1758 sc->recv_io.mem.pool = 1759 mempool_create(sp->recv_credit_max, mempool_alloc_slab, 1760 mempool_free_slab, sc->recv_io.mem.cache); 1761 if (!sc->recv_io.mem.pool) 1762 goto out3; 1763 1764 rc = allocate_receive_buffers(sc, sp->recv_credit_max); 1765 if (rc) { 1766 log_rdma_event(ERR, "failed to allocate receive buffers\n"); 1767 goto out4; 1768 } 1769 1770 return 0; 1771 1772 out4: 1773 mempool_destroy(sc->recv_io.mem.pool); 1774 out3: 1775 kmem_cache_destroy(sc->recv_io.mem.cache); 1776 out2: 1777 mempool_destroy(sc->send_io.mem.pool); 1778 out1: 1779 kmem_cache_destroy(sc->send_io.mem.cache); 1780 return -ENOMEM; 1781 } 1782 1783 /* Create a SMBD connection, called by upper layer */ 1784 static struct smbd_connection *_smbd_get_connection( 1785 struct TCP_Server_Info *server, struct sockaddr *dstaddr, int port) 1786 { 1787 int rc; 1788 struct smbd_connection *info; 1789 struct smbdirect_socket *sc; 1790 struct smbdirect_socket_parameters *sp; 1791 struct rdma_conn_param conn_param; 1792 struct ib_qp_cap qp_cap; 1793 struct ib_qp_init_attr qp_attr; 1794 struct sockaddr_in *addr_in = (struct sockaddr_in *) dstaddr; 1795 struct ib_port_immutable port_immutable; 1796 __be32 ird_ord_hdr[2]; 1797 char wq_name[80]; 1798 struct workqueue_struct *workqueue; 1799 1800 info = kzalloc(sizeof(struct smbd_connection), GFP_KERNEL); 1801 if (!info) 1802 return NULL; 1803 sc = &info->socket; 1804 scnprintf(wq_name, ARRAY_SIZE(wq_name), "smbd_%p", sc); 1805 workqueue = create_workqueue(wq_name); 1806 if (!workqueue) 1807 goto create_wq_failed; 1808 smbdirect_socket_init(sc); 1809 sc->workqueue = workqueue; 1810 sp = &sc->parameters; 1811 1812 INIT_WORK(&sc->disconnect_work, smbd_disconnect_rdma_work); 1813 1814 sp->resolve_addr_timeout_msec = RDMA_RESOLVE_TIMEOUT; 1815 sp->resolve_route_timeout_msec = RDMA_RESOLVE_TIMEOUT; 1816 sp->rdma_connect_timeout_msec = RDMA_RESOLVE_TIMEOUT; 1817 sp->negotiate_timeout_msec = SMBD_NEGOTIATE_TIMEOUT * 1000; 1818 sp->initiator_depth = 1; 1819 sp->responder_resources = SMBD_CM_RESPONDER_RESOURCES; 1820 sp->recv_credit_max = smbd_receive_credit_max; 1821 sp->send_credit_target = smbd_send_credit_target; 1822 sp->max_send_size = smbd_max_send_size; 1823 sp->max_fragmented_recv_size = smbd_max_fragmented_recv_size; 1824 sp->max_recv_size = smbd_max_receive_size; 1825 sp->max_frmr_depth = smbd_max_frmr_depth; 1826 sp->keepalive_interval_msec = smbd_keep_alive_interval * 1000; 1827 sp->keepalive_timeout_msec = KEEPALIVE_RECV_TIMEOUT * 1000; 1828 1829 rc = smbd_ia_open(sc, dstaddr, port); 1830 if (rc) { 1831 log_rdma_event(INFO, "smbd_ia_open rc=%d\n", rc); 1832 goto create_id_failed; 1833 } 1834 1835 if (sp->send_credit_target > sc->ib.dev->attrs.max_cqe || 1836 sp->send_credit_target > sc->ib.dev->attrs.max_qp_wr) { 1837 log_rdma_event(ERR, "consider lowering send_credit_target = %d. Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", 1838 sp->send_credit_target, 1839 sc->ib.dev->attrs.max_cqe, 1840 sc->ib.dev->attrs.max_qp_wr); 1841 goto config_failed; 1842 } 1843 1844 if (sp->recv_credit_max > sc->ib.dev->attrs.max_cqe || 1845 sp->recv_credit_max > sc->ib.dev->attrs.max_qp_wr) { 1846 log_rdma_event(ERR, "consider lowering receive_credit_max = %d. Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", 1847 sp->recv_credit_max, 1848 sc->ib.dev->attrs.max_cqe, 1849 sc->ib.dev->attrs.max_qp_wr); 1850 goto config_failed; 1851 } 1852 1853 if (sc->ib.dev->attrs.max_send_sge < SMBDIRECT_SEND_IO_MAX_SGE || 1854 sc->ib.dev->attrs.max_recv_sge < SMBDIRECT_RECV_IO_MAX_SGE) { 1855 log_rdma_event(ERR, 1856 "device %.*s max_send_sge/max_recv_sge = %d/%d too small\n", 1857 IB_DEVICE_NAME_MAX, 1858 sc->ib.dev->name, 1859 sc->ib.dev->attrs.max_send_sge, 1860 sc->ib.dev->attrs.max_recv_sge); 1861 goto config_failed; 1862 } 1863 1864 sp->responder_resources = 1865 min_t(u8, sp->responder_resources, 1866 sc->ib.dev->attrs.max_qp_rd_atom); 1867 log_rdma_mr(INFO, "responder_resources=%d\n", 1868 sp->responder_resources); 1869 1870 /* 1871 * We use allocate sp->responder_resources * 2 MRs 1872 * and each MR needs WRs for REG and INV, so 1873 * we use '* 4'. 1874 * 1875 * +1 for ib_drain_qp() 1876 */ 1877 memset(&qp_cap, 0, sizeof(qp_cap)); 1878 qp_cap.max_send_wr = sp->send_credit_target + sp->responder_resources * 4 + 1; 1879 qp_cap.max_recv_wr = sp->recv_credit_max + 1; 1880 qp_cap.max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE; 1881 qp_cap.max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE; 1882 1883 sc->ib.pd = ib_alloc_pd(sc->ib.dev, 0); 1884 if (IS_ERR(sc->ib.pd)) { 1885 rc = PTR_ERR(sc->ib.pd); 1886 sc->ib.pd = NULL; 1887 log_rdma_event(ERR, "ib_alloc_pd() returned %d\n", rc); 1888 goto alloc_pd_failed; 1889 } 1890 1891 sc->ib.send_cq = 1892 ib_alloc_cq_any(sc->ib.dev, sc, 1893 qp_cap.max_send_wr, IB_POLL_SOFTIRQ); 1894 if (IS_ERR(sc->ib.send_cq)) { 1895 sc->ib.send_cq = NULL; 1896 goto alloc_cq_failed; 1897 } 1898 1899 sc->ib.recv_cq = 1900 ib_alloc_cq_any(sc->ib.dev, sc, 1901 qp_cap.max_recv_wr, IB_POLL_SOFTIRQ); 1902 if (IS_ERR(sc->ib.recv_cq)) { 1903 sc->ib.recv_cq = NULL; 1904 goto alloc_cq_failed; 1905 } 1906 1907 memset(&qp_attr, 0, sizeof(qp_attr)); 1908 qp_attr.event_handler = smbd_qp_async_error_upcall; 1909 qp_attr.qp_context = sc; 1910 qp_attr.cap = qp_cap; 1911 qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 1912 qp_attr.qp_type = IB_QPT_RC; 1913 qp_attr.send_cq = sc->ib.send_cq; 1914 qp_attr.recv_cq = sc->ib.recv_cq; 1915 qp_attr.port_num = ~0; 1916 1917 rc = rdma_create_qp(sc->rdma.cm_id, sc->ib.pd, &qp_attr); 1918 if (rc) { 1919 log_rdma_event(ERR, "rdma_create_qp failed %i\n", rc); 1920 goto create_qp_failed; 1921 } 1922 sc->ib.qp = sc->rdma.cm_id->qp; 1923 1924 memset(&conn_param, 0, sizeof(conn_param)); 1925 conn_param.initiator_depth = sp->initiator_depth; 1926 conn_param.responder_resources = sp->responder_resources; 1927 1928 /* Need to send IRD/ORD in private data for iWARP */ 1929 sc->ib.dev->ops.get_port_immutable( 1930 sc->ib.dev, sc->rdma.cm_id->port_num, &port_immutable); 1931 if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) { 1932 ird_ord_hdr[0] = cpu_to_be32(conn_param.responder_resources); 1933 ird_ord_hdr[1] = cpu_to_be32(conn_param.initiator_depth); 1934 conn_param.private_data = ird_ord_hdr; 1935 conn_param.private_data_len = sizeof(ird_ord_hdr); 1936 } else { 1937 conn_param.private_data = NULL; 1938 conn_param.private_data_len = 0; 1939 } 1940 1941 conn_param.retry_count = SMBD_CM_RETRY; 1942 conn_param.rnr_retry_count = SMBD_CM_RNR_RETRY; 1943 conn_param.flow_control = 0; 1944 1945 log_rdma_event(INFO, "connecting to IP %pI4 port %d\n", 1946 &addr_in->sin_addr, port); 1947 1948 WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED); 1949 sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING; 1950 rc = rdma_connect(sc->rdma.cm_id, &conn_param); 1951 if (rc) { 1952 log_rdma_event(ERR, "rdma_connect() failed with %i\n", rc); 1953 goto rdma_connect_failed; 1954 } 1955 1956 wait_event_interruptible_timeout( 1957 sc->status_wait, 1958 sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING, 1959 msecs_to_jiffies(sp->rdma_connect_timeout_msec)); 1960 1961 if (sc->status != SMBDIRECT_SOCKET_NEGOTIATE_NEEDED) { 1962 log_rdma_event(ERR, "rdma_connect failed port=%d\n", port); 1963 goto rdma_connect_failed; 1964 } 1965 1966 log_rdma_event(INFO, "rdma_connect connected\n"); 1967 1968 rc = allocate_caches(sc); 1969 if (rc) { 1970 log_rdma_event(ERR, "cache allocation failed\n"); 1971 goto allocate_cache_failed; 1972 } 1973 1974 INIT_WORK(&sc->idle.immediate_work, send_immediate_empty_message); 1975 INIT_DELAYED_WORK(&sc->idle.timer_work, idle_connection_timer); 1976 /* 1977 * start with the negotiate timeout and SMBDIRECT_KEEPALIVE_PENDING 1978 * so that the timer will cause a disconnect. 1979 */ 1980 sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; 1981 mod_delayed_work(sc->workqueue, &sc->idle.timer_work, 1982 msecs_to_jiffies(sp->negotiate_timeout_msec)); 1983 1984 INIT_WORK(&sc->recv_io.posted.refill_work, smbd_post_send_credits); 1985 1986 rc = smbd_negotiate(sc); 1987 if (rc) { 1988 log_rdma_event(ERR, "smbd_negotiate rc=%d\n", rc); 1989 goto negotiation_failed; 1990 } 1991 1992 rc = allocate_mr_list(sc); 1993 if (rc) { 1994 log_rdma_mr(ERR, "memory registration allocation failed\n"); 1995 goto allocate_mr_failed; 1996 } 1997 1998 return info; 1999 2000 allocate_mr_failed: 2001 /* At this point, need to a full transport shutdown */ 2002 server->smbd_conn = info; 2003 smbd_destroy(server); 2004 return NULL; 2005 2006 negotiation_failed: 2007 disable_delayed_work_sync(&sc->idle.timer_work); 2008 destroy_caches(sc); 2009 sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; 2010 rdma_disconnect(sc->rdma.cm_id); 2011 wait_event(sc->status_wait, 2012 sc->status == SMBDIRECT_SOCKET_DISCONNECTED); 2013 2014 allocate_cache_failed: 2015 rdma_connect_failed: 2016 rdma_destroy_qp(sc->rdma.cm_id); 2017 2018 create_qp_failed: 2019 alloc_cq_failed: 2020 if (sc->ib.send_cq) 2021 ib_free_cq(sc->ib.send_cq); 2022 if (sc->ib.recv_cq) 2023 ib_free_cq(sc->ib.recv_cq); 2024 2025 ib_dealloc_pd(sc->ib.pd); 2026 2027 alloc_pd_failed: 2028 config_failed: 2029 rdma_destroy_id(sc->rdma.cm_id); 2030 2031 create_id_failed: 2032 destroy_workqueue(sc->workqueue); 2033 create_wq_failed: 2034 kfree(info); 2035 return NULL; 2036 } 2037 2038 struct smbd_connection *smbd_get_connection( 2039 struct TCP_Server_Info *server, struct sockaddr *dstaddr) 2040 { 2041 struct smbd_connection *ret; 2042 const struct smbdirect_socket_parameters *sp; 2043 int port = SMBD_PORT; 2044 2045 try_again: 2046 ret = _smbd_get_connection(server, dstaddr, port); 2047 2048 /* Try SMB_PORT if SMBD_PORT doesn't work */ 2049 if (!ret && port == SMBD_PORT) { 2050 port = SMB_PORT; 2051 goto try_again; 2052 } 2053 if (!ret) 2054 return NULL; 2055 2056 sp = &ret->socket.parameters; 2057 2058 server->rdma_readwrite_threshold = 2059 rdma_readwrite_threshold > sp->max_fragmented_send_size ? 2060 sp->max_fragmented_send_size : 2061 rdma_readwrite_threshold; 2062 2063 return ret; 2064 } 2065 2066 /* 2067 * Receive data from the transport's receive reassembly queue 2068 * All the incoming data packets are placed in reassembly queue 2069 * iter: the buffer to read data into 2070 * size: the length of data to read 2071 * return value: actual data read 2072 * 2073 * Note: this implementation copies the data from reassembly queue to receive 2074 * buffers used by upper layer. This is not the optimal code path. A better way 2075 * to do it is to not have upper layer allocate its receive buffers but rather 2076 * borrow the buffer from reassembly queue, and return it after data is 2077 * consumed. But this will require more changes to upper layer code, and also 2078 * need to consider packet boundaries while they still being reassembled. 2079 */ 2080 int smbd_recv(struct smbd_connection *info, struct msghdr *msg) 2081 { 2082 struct smbdirect_socket *sc = &info->socket; 2083 struct smbdirect_recv_io *response; 2084 struct smbdirect_data_transfer *data_transfer; 2085 size_t size = iov_iter_count(&msg->msg_iter); 2086 int to_copy, to_read, data_read, offset; 2087 u32 data_length, remaining_data_length, data_offset; 2088 int rc; 2089 2090 if (WARN_ON_ONCE(iov_iter_rw(&msg->msg_iter) == WRITE)) 2091 return -EINVAL; /* It's a bug in upper layer to get there */ 2092 2093 again: 2094 /* 2095 * No need to hold the reassembly queue lock all the time as we are 2096 * the only one reading from the front of the queue. The transport 2097 * may add more entries to the back of the queue at the same time 2098 */ 2099 log_read(INFO, "size=%zd sc->recv_io.reassembly.data_length=%d\n", size, 2100 sc->recv_io.reassembly.data_length); 2101 if (sc->recv_io.reassembly.data_length >= size) { 2102 int queue_length; 2103 int queue_removed = 0; 2104 unsigned long flags; 2105 2106 /* 2107 * Need to make sure reassembly_data_length is read before 2108 * reading reassembly_queue_length and calling 2109 * _get_first_reassembly. This call is lock free 2110 * as we never read at the end of the queue which are being 2111 * updated in SOFTIRQ as more data is received 2112 */ 2113 virt_rmb(); 2114 queue_length = sc->recv_io.reassembly.queue_length; 2115 data_read = 0; 2116 to_read = size; 2117 offset = sc->recv_io.reassembly.first_entry_offset; 2118 while (data_read < size) { 2119 response = _get_first_reassembly(sc); 2120 data_transfer = smbdirect_recv_io_payload(response); 2121 data_length = le32_to_cpu(data_transfer->data_length); 2122 remaining_data_length = 2123 le32_to_cpu( 2124 data_transfer->remaining_data_length); 2125 data_offset = le32_to_cpu(data_transfer->data_offset); 2126 2127 /* 2128 * The upper layer expects RFC1002 length at the 2129 * beginning of the payload. Return it to indicate 2130 * the total length of the packet. This minimize the 2131 * change to upper layer packet processing logic. This 2132 * will be eventually remove when an intermediate 2133 * transport layer is added 2134 */ 2135 if (response->first_segment && size == 4) { 2136 unsigned int rfc1002_len = 2137 data_length + remaining_data_length; 2138 __be32 rfc1002_hdr = cpu_to_be32(rfc1002_len); 2139 if (copy_to_iter(&rfc1002_hdr, sizeof(rfc1002_hdr), 2140 &msg->msg_iter) != sizeof(rfc1002_hdr)) 2141 return -EFAULT; 2142 data_read = 4; 2143 response->first_segment = false; 2144 log_read(INFO, "returning rfc1002 length %d\n", 2145 rfc1002_len); 2146 goto read_rfc1002_done; 2147 } 2148 2149 to_copy = min_t(int, data_length - offset, to_read); 2150 if (copy_to_iter((char *)data_transfer + data_offset + offset, 2151 to_copy, &msg->msg_iter) != to_copy) 2152 return -EFAULT; 2153 2154 /* move on to the next buffer? */ 2155 if (to_copy == data_length - offset) { 2156 queue_length--; 2157 /* 2158 * No need to lock if we are not at the 2159 * end of the queue 2160 */ 2161 if (queue_length) 2162 list_del(&response->list); 2163 else { 2164 spin_lock_irqsave( 2165 &sc->recv_io.reassembly.lock, flags); 2166 list_del(&response->list); 2167 spin_unlock_irqrestore( 2168 &sc->recv_io.reassembly.lock, flags); 2169 } 2170 queue_removed++; 2171 sc->statistics.dequeue_reassembly_queue++; 2172 put_receive_buffer(sc, response); 2173 offset = 0; 2174 log_read(INFO, "put_receive_buffer offset=0\n"); 2175 } else 2176 offset += to_copy; 2177 2178 to_read -= to_copy; 2179 data_read += to_copy; 2180 2181 log_read(INFO, "_get_first_reassembly memcpy %d bytes data_transfer_length-offset=%d after that to_read=%d data_read=%d offset=%d\n", 2182 to_copy, data_length - offset, 2183 to_read, data_read, offset); 2184 } 2185 2186 spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); 2187 sc->recv_io.reassembly.data_length -= data_read; 2188 sc->recv_io.reassembly.queue_length -= queue_removed; 2189 spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); 2190 2191 sc->recv_io.reassembly.first_entry_offset = offset; 2192 log_read(INFO, "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n", 2193 data_read, sc->recv_io.reassembly.data_length, 2194 sc->recv_io.reassembly.first_entry_offset); 2195 read_rfc1002_done: 2196 return data_read; 2197 } 2198 2199 log_read(INFO, "wait_event on more data\n"); 2200 rc = wait_event_interruptible( 2201 sc->recv_io.reassembly.wait_queue, 2202 sc->recv_io.reassembly.data_length >= size || 2203 sc->status != SMBDIRECT_SOCKET_CONNECTED); 2204 /* Don't return any data if interrupted */ 2205 if (rc) 2206 return rc; 2207 2208 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { 2209 log_read(ERR, "disconnected\n"); 2210 return -ECONNABORTED; 2211 } 2212 2213 goto again; 2214 } 2215 2216 /* 2217 * Send data to transport 2218 * Each rqst is transported as a SMBDirect payload 2219 * rqst: the data to write 2220 * return value: 0 if successfully write, otherwise error code 2221 */ 2222 int smbd_send(struct TCP_Server_Info *server, 2223 int num_rqst, struct smb_rqst *rqst_array) 2224 { 2225 struct smbd_connection *info = server->smbd_conn; 2226 struct smbdirect_socket *sc = &info->socket; 2227 struct smbdirect_socket_parameters *sp = &sc->parameters; 2228 struct smb_rqst *rqst; 2229 struct iov_iter iter; 2230 unsigned int remaining_data_length, klen; 2231 int rc, i, rqst_idx; 2232 2233 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 2234 return -EAGAIN; 2235 2236 /* 2237 * Add in the page array if there is one. The caller needs to set 2238 * rq_tailsz to PAGE_SIZE when the buffer has multiple pages and 2239 * ends at page boundary 2240 */ 2241 remaining_data_length = 0; 2242 for (i = 0; i < num_rqst; i++) 2243 remaining_data_length += smb_rqst_len(server, &rqst_array[i]); 2244 2245 if (unlikely(remaining_data_length > sp->max_fragmented_send_size)) { 2246 /* assertion: payload never exceeds negotiated maximum */ 2247 log_write(ERR, "payload size %d > max size %d\n", 2248 remaining_data_length, sp->max_fragmented_send_size); 2249 return -EINVAL; 2250 } 2251 2252 log_write(INFO, "num_rqst=%d total length=%u\n", 2253 num_rqst, remaining_data_length); 2254 2255 rqst_idx = 0; 2256 do { 2257 rqst = &rqst_array[rqst_idx]; 2258 2259 cifs_dbg(FYI, "Sending smb (RDMA): idx=%d smb_len=%lu\n", 2260 rqst_idx, smb_rqst_len(server, rqst)); 2261 for (i = 0; i < rqst->rq_nvec; i++) 2262 dump_smb(rqst->rq_iov[i].iov_base, rqst->rq_iov[i].iov_len); 2263 2264 log_write(INFO, "RDMA-WR[%u] nvec=%d len=%u iter=%zu rqlen=%lu\n", 2265 rqst_idx, rqst->rq_nvec, remaining_data_length, 2266 iov_iter_count(&rqst->rq_iter), smb_rqst_len(server, rqst)); 2267 2268 /* Send the metadata pages. */ 2269 klen = 0; 2270 for (i = 0; i < rqst->rq_nvec; i++) 2271 klen += rqst->rq_iov[i].iov_len; 2272 iov_iter_kvec(&iter, ITER_SOURCE, rqst->rq_iov, rqst->rq_nvec, klen); 2273 2274 rc = smbd_post_send_full_iter(sc, &iter, &remaining_data_length); 2275 if (rc < 0) 2276 break; 2277 2278 if (iov_iter_count(&rqst->rq_iter) > 0) { 2279 /* And then the data pages if there are any */ 2280 rc = smbd_post_send_full_iter(sc, &rqst->rq_iter, 2281 &remaining_data_length); 2282 if (rc < 0) 2283 break; 2284 } 2285 2286 } while (++rqst_idx < num_rqst); 2287 2288 /* 2289 * As an optimization, we don't wait for individual I/O to finish 2290 * before sending the next one. 2291 * Send them all and wait for pending send count to get to 0 2292 * that means all the I/Os have been out and we are good to return 2293 */ 2294 2295 wait_event(sc->send_io.pending.zero_wait_queue, 2296 atomic_read(&sc->send_io.pending.count) == 0 || 2297 sc->status != SMBDIRECT_SOCKET_CONNECTED); 2298 2299 if (sc->status != SMBDIRECT_SOCKET_CONNECTED && rc == 0) 2300 rc = -EAGAIN; 2301 2302 return rc; 2303 } 2304 2305 static void register_mr_done(struct ib_cq *cq, struct ib_wc *wc) 2306 { 2307 struct smbdirect_mr_io *mr = 2308 container_of(wc->wr_cqe, struct smbdirect_mr_io, cqe); 2309 struct smbdirect_socket *sc = mr->socket; 2310 2311 if (wc->status) { 2312 log_rdma_mr(ERR, "status=%d\n", wc->status); 2313 smbd_disconnect_rdma_connection(sc); 2314 } 2315 } 2316 2317 /* 2318 * The work queue function that recovers MRs 2319 * We need to call ib_dereg_mr() and ib_alloc_mr() before this MR can be used 2320 * again. Both calls are slow, so finish them in a workqueue. This will not 2321 * block I/O path. 2322 * There is one workqueue that recovers MRs, there is no need to lock as the 2323 * I/O requests calling smbd_register_mr will never update the links in the 2324 * mr_list. 2325 */ 2326 static void smbd_mr_recovery_work(struct work_struct *work) 2327 { 2328 struct smbdirect_socket *sc = 2329 container_of(work, struct smbdirect_socket, mr_io.recovery_work); 2330 struct smbdirect_socket_parameters *sp = &sc->parameters; 2331 struct smbdirect_mr_io *smbdirect_mr; 2332 int rc; 2333 2334 list_for_each_entry(smbdirect_mr, &sc->mr_io.all.list, list) { 2335 if (smbdirect_mr->state == SMBDIRECT_MR_ERROR) { 2336 2337 /* recover this MR entry */ 2338 rc = ib_dereg_mr(smbdirect_mr->mr); 2339 if (rc) { 2340 log_rdma_mr(ERR, 2341 "ib_dereg_mr failed rc=%x\n", 2342 rc); 2343 smbd_disconnect_rdma_connection(sc); 2344 continue; 2345 } 2346 2347 smbdirect_mr->mr = ib_alloc_mr( 2348 sc->ib.pd, sc->mr_io.type, 2349 sp->max_frmr_depth); 2350 if (IS_ERR(smbdirect_mr->mr)) { 2351 log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n", 2352 sc->mr_io.type, 2353 sp->max_frmr_depth); 2354 smbd_disconnect_rdma_connection(sc); 2355 continue; 2356 } 2357 } else 2358 /* This MR is being used, don't recover it */ 2359 continue; 2360 2361 smbdirect_mr->state = SMBDIRECT_MR_READY; 2362 2363 /* smbdirect_mr->state is updated by this function 2364 * and is read and updated by I/O issuing CPUs trying 2365 * to get a MR, the call to atomic_inc_return 2366 * implicates a memory barrier and guarantees this 2367 * value is updated before waking up any calls to 2368 * get_mr() from the I/O issuing CPUs 2369 */ 2370 if (atomic_inc_return(&sc->mr_io.ready.count) == 1) 2371 wake_up(&sc->mr_io.ready.wait_queue); 2372 } 2373 } 2374 2375 static void smbd_mr_disable_locked(struct smbdirect_mr_io *mr) 2376 { 2377 struct smbdirect_socket *sc = mr->socket; 2378 2379 lockdep_assert_held(&mr->mutex); 2380 2381 if (mr->state == SMBDIRECT_MR_DISABLED) 2382 return; 2383 2384 if (mr->mr) 2385 ib_dereg_mr(mr->mr); 2386 if (mr->sgt.nents) 2387 ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); 2388 kfree(mr->sgt.sgl); 2389 2390 mr->mr = NULL; 2391 mr->sgt.sgl = NULL; 2392 mr->sgt.nents = 0; 2393 2394 mr->state = SMBDIRECT_MR_DISABLED; 2395 } 2396 2397 static void smbd_mr_free_locked(struct kref *kref) 2398 { 2399 struct smbdirect_mr_io *mr = 2400 container_of(kref, struct smbdirect_mr_io, kref); 2401 2402 lockdep_assert_held(&mr->mutex); 2403 2404 /* 2405 * smbd_mr_disable_locked() should already be called! 2406 */ 2407 if (WARN_ON_ONCE(mr->state != SMBDIRECT_MR_DISABLED)) 2408 smbd_mr_disable_locked(mr); 2409 2410 mutex_unlock(&mr->mutex); 2411 mutex_destroy(&mr->mutex); 2412 kfree(mr); 2413 } 2414 2415 static void destroy_mr_list(struct smbdirect_socket *sc) 2416 { 2417 struct smbdirect_mr_io *mr, *tmp; 2418 LIST_HEAD(all_list); 2419 unsigned long flags; 2420 2421 disable_work_sync(&sc->mr_io.recovery_work); 2422 2423 spin_lock_irqsave(&sc->mr_io.all.lock, flags); 2424 list_splice_tail_init(&sc->mr_io.all.list, &all_list); 2425 spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); 2426 2427 list_for_each_entry_safe(mr, tmp, &all_list, list) { 2428 mutex_lock(&mr->mutex); 2429 2430 smbd_mr_disable_locked(mr); 2431 list_del(&mr->list); 2432 mr->socket = NULL; 2433 2434 /* 2435 * No kref_put_mutex() as it's already locked. 2436 * 2437 * If smbd_mr_free_locked() is called 2438 * and the mutex is unlocked and mr is gone, 2439 * in that case kref_put() returned 1. 2440 * 2441 * If kref_put() returned 0 we know that 2442 * smbd_mr_free_locked() didn't 2443 * run. Not by us nor by anyone else, as we 2444 * still hold the mutex, so we need to unlock. 2445 * 2446 * If the mr is still registered it will 2447 * be dangling (detached from the connection 2448 * waiting for smbd_deregister_mr() to be 2449 * called in order to free the memory. 2450 */ 2451 if (!kref_put(&mr->kref, smbd_mr_free_locked)) 2452 mutex_unlock(&mr->mutex); 2453 } 2454 } 2455 2456 /* 2457 * Allocate MRs used for RDMA read/write 2458 * The number of MRs will not exceed hardware capability in responder_resources 2459 * All MRs are kept in mr_list. The MR can be recovered after it's used 2460 * Recovery is done in smbd_mr_recovery_work. The content of list entry changes 2461 * as MRs are used and recovered for I/O, but the list links will not change 2462 */ 2463 static int allocate_mr_list(struct smbdirect_socket *sc) 2464 { 2465 struct smbdirect_socket_parameters *sp = &sc->parameters; 2466 struct smbdirect_mr_io *mr; 2467 int ret; 2468 u32 i; 2469 2470 if (sp->responder_resources == 0) { 2471 log_rdma_mr(ERR, "responder_resources negotiated as 0\n"); 2472 return -EINVAL; 2473 } 2474 2475 /* Allocate more MRs (2x) than hardware responder_resources */ 2476 for (i = 0; i < sp->responder_resources * 2; i++) { 2477 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 2478 if (!mr) { 2479 ret = -ENOMEM; 2480 goto kzalloc_mr_failed; 2481 } 2482 2483 kref_init(&mr->kref); 2484 mutex_init(&mr->mutex); 2485 2486 mr->mr = ib_alloc_mr(sc->ib.pd, 2487 sc->mr_io.type, 2488 sp->max_frmr_depth); 2489 if (IS_ERR(mr->mr)) { 2490 ret = PTR_ERR(mr->mr); 2491 log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n", 2492 sc->mr_io.type, sp->max_frmr_depth); 2493 goto ib_alloc_mr_failed; 2494 } 2495 2496 mr->sgt.sgl = kcalloc(sp->max_frmr_depth, 2497 sizeof(struct scatterlist), 2498 GFP_KERNEL); 2499 if (!mr->sgt.sgl) { 2500 ret = -ENOMEM; 2501 log_rdma_mr(ERR, "failed to allocate sgl\n"); 2502 goto kcalloc_sgl_failed; 2503 } 2504 mr->state = SMBDIRECT_MR_READY; 2505 mr->socket = sc; 2506 2507 list_add_tail(&mr->list, &sc->mr_io.all.list); 2508 atomic_inc(&sc->mr_io.ready.count); 2509 } 2510 2511 INIT_WORK(&sc->mr_io.recovery_work, smbd_mr_recovery_work); 2512 2513 return 0; 2514 2515 kcalloc_sgl_failed: 2516 ib_dereg_mr(mr->mr); 2517 ib_alloc_mr_failed: 2518 mutex_destroy(&mr->mutex); 2519 kfree(mr); 2520 kzalloc_mr_failed: 2521 destroy_mr_list(sc); 2522 return ret; 2523 } 2524 2525 /* 2526 * Get a MR from mr_list. This function waits until there is at least one 2527 * MR available in the list. It may access the list while the 2528 * smbd_mr_recovery_work is recovering the MR list. This doesn't need a lock 2529 * as they never modify the same places. However, there may be several CPUs 2530 * issuing I/O trying to get MR at the same time, mr_list_lock is used to 2531 * protect this situation. 2532 */ 2533 static struct smbdirect_mr_io *get_mr(struct smbdirect_socket *sc) 2534 { 2535 struct smbdirect_mr_io *ret; 2536 unsigned long flags; 2537 int rc; 2538 again: 2539 rc = wait_event_interruptible(sc->mr_io.ready.wait_queue, 2540 atomic_read(&sc->mr_io.ready.count) || 2541 sc->status != SMBDIRECT_SOCKET_CONNECTED); 2542 if (rc) { 2543 log_rdma_mr(ERR, "wait_event_interruptible rc=%x\n", rc); 2544 return NULL; 2545 } 2546 2547 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { 2548 log_rdma_mr(ERR, "sc->status=%x\n", sc->status); 2549 return NULL; 2550 } 2551 2552 spin_lock_irqsave(&sc->mr_io.all.lock, flags); 2553 list_for_each_entry(ret, &sc->mr_io.all.list, list) { 2554 if (ret->state == SMBDIRECT_MR_READY) { 2555 ret->state = SMBDIRECT_MR_REGISTERED; 2556 kref_get(&ret->kref); 2557 spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); 2558 atomic_dec(&sc->mr_io.ready.count); 2559 atomic_inc(&sc->mr_io.used.count); 2560 return ret; 2561 } 2562 } 2563 2564 spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); 2565 /* 2566 * It is possible that we could fail to get MR because other processes may 2567 * try to acquire a MR at the same time. If this is the case, retry it. 2568 */ 2569 goto again; 2570 } 2571 2572 /* 2573 * Transcribe the pages from an iterator into an MR scatterlist. 2574 */ 2575 static int smbd_iter_to_mr(struct iov_iter *iter, 2576 struct sg_table *sgt, 2577 unsigned int max_sg) 2578 { 2579 int ret; 2580 2581 memset(sgt->sgl, 0, max_sg * sizeof(struct scatterlist)); 2582 2583 ret = extract_iter_to_sg(iter, iov_iter_count(iter), sgt, max_sg, 0); 2584 WARN_ON(ret < 0); 2585 if (sgt->nents > 0) 2586 sg_mark_end(&sgt->sgl[sgt->nents - 1]); 2587 return ret; 2588 } 2589 2590 /* 2591 * Register memory for RDMA read/write 2592 * iter: the buffer to register memory with 2593 * writing: true if this is a RDMA write (SMB read), false for RDMA read 2594 * need_invalidate: true if this MR needs to be locally invalidated after I/O 2595 * return value: the MR registered, NULL if failed. 2596 */ 2597 struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info, 2598 struct iov_iter *iter, 2599 bool writing, bool need_invalidate) 2600 { 2601 struct smbdirect_socket *sc = &info->socket; 2602 struct smbdirect_socket_parameters *sp = &sc->parameters; 2603 struct smbdirect_mr_io *mr; 2604 int rc, num_pages; 2605 struct ib_reg_wr *reg_wr; 2606 2607 num_pages = iov_iter_npages(iter, sp->max_frmr_depth + 1); 2608 if (num_pages > sp->max_frmr_depth) { 2609 log_rdma_mr(ERR, "num_pages=%d max_frmr_depth=%d\n", 2610 num_pages, sp->max_frmr_depth); 2611 WARN_ON_ONCE(1); 2612 return NULL; 2613 } 2614 2615 mr = get_mr(sc); 2616 if (!mr) { 2617 log_rdma_mr(ERR, "get_mr returning NULL\n"); 2618 return NULL; 2619 } 2620 2621 mutex_lock(&mr->mutex); 2622 2623 mr->dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; 2624 mr->need_invalidate = need_invalidate; 2625 mr->sgt.nents = 0; 2626 mr->sgt.orig_nents = 0; 2627 2628 log_rdma_mr(INFO, "num_pages=0x%x count=0x%zx depth=%u\n", 2629 num_pages, iov_iter_count(iter), sp->max_frmr_depth); 2630 smbd_iter_to_mr(iter, &mr->sgt, sp->max_frmr_depth); 2631 2632 rc = ib_dma_map_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); 2633 if (!rc) { 2634 log_rdma_mr(ERR, "ib_dma_map_sg num_pages=%x dir=%x rc=%x\n", 2635 num_pages, mr->dir, rc); 2636 goto dma_map_error; 2637 } 2638 2639 rc = ib_map_mr_sg(mr->mr, mr->sgt.sgl, mr->sgt.nents, NULL, PAGE_SIZE); 2640 if (rc != mr->sgt.nents) { 2641 log_rdma_mr(ERR, 2642 "ib_map_mr_sg failed rc = %d nents = %x\n", 2643 rc, mr->sgt.nents); 2644 goto map_mr_error; 2645 } 2646 2647 ib_update_fast_reg_key(mr->mr, ib_inc_rkey(mr->mr->rkey)); 2648 reg_wr = &mr->wr; 2649 reg_wr->wr.opcode = IB_WR_REG_MR; 2650 mr->cqe.done = register_mr_done; 2651 reg_wr->wr.wr_cqe = &mr->cqe; 2652 reg_wr->wr.num_sge = 0; 2653 reg_wr->wr.send_flags = IB_SEND_SIGNALED; 2654 reg_wr->mr = mr->mr; 2655 reg_wr->key = mr->mr->rkey; 2656 reg_wr->access = writing ? 2657 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : 2658 IB_ACCESS_REMOTE_READ; 2659 2660 /* 2661 * There is no need for waiting for complemtion on ib_post_send 2662 * on IB_WR_REG_MR. Hardware enforces a barrier and order of execution 2663 * on the next ib_post_send when we actually send I/O to remote peer 2664 */ 2665 rc = ib_post_send(sc->ib.qp, ®_wr->wr, NULL); 2666 if (!rc) { 2667 /* 2668 * get_mr() gave us a reference 2669 * via kref_get(&mr->kref), we keep that and let 2670 * the caller use smbd_deregister_mr() 2671 * to remove it again. 2672 */ 2673 mutex_unlock(&mr->mutex); 2674 return mr; 2675 } 2676 2677 log_rdma_mr(ERR, "ib_post_send failed rc=%x reg_wr->key=%x\n", 2678 rc, reg_wr->key); 2679 2680 /* If all failed, attempt to recover this MR by setting it SMBDIRECT_MR_ERROR*/ 2681 map_mr_error: 2682 ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); 2683 2684 dma_map_error: 2685 mr->sgt.nents = 0; 2686 mr->state = SMBDIRECT_MR_ERROR; 2687 if (atomic_dec_and_test(&sc->mr_io.used.count)) 2688 wake_up(&sc->mr_io.cleanup.wait_queue); 2689 2690 smbd_disconnect_rdma_connection(sc); 2691 2692 /* 2693 * get_mr() gave us a reference 2694 * via kref_get(&mr->kref), we need to remove it again 2695 * on error. 2696 * 2697 * No kref_put_mutex() as it's already locked. 2698 * 2699 * If smbd_mr_free_locked() is called 2700 * and the mutex is unlocked and mr is gone, 2701 * in that case kref_put() returned 1. 2702 * 2703 * If kref_put() returned 0 we know that 2704 * smbd_mr_free_locked() didn't 2705 * run. Not by us nor by anyone else, as we 2706 * still hold the mutex, so we need to unlock. 2707 */ 2708 if (!kref_put(&mr->kref, smbd_mr_free_locked)) 2709 mutex_unlock(&mr->mutex); 2710 2711 return NULL; 2712 } 2713 2714 static void local_inv_done(struct ib_cq *cq, struct ib_wc *wc) 2715 { 2716 struct smbdirect_mr_io *smbdirect_mr; 2717 struct ib_cqe *cqe; 2718 2719 cqe = wc->wr_cqe; 2720 smbdirect_mr = container_of(cqe, struct smbdirect_mr_io, cqe); 2721 smbdirect_mr->state = SMBDIRECT_MR_INVALIDATED; 2722 if (wc->status != IB_WC_SUCCESS) { 2723 log_rdma_mr(ERR, "invalidate failed status=%x\n", wc->status); 2724 smbdirect_mr->state = SMBDIRECT_MR_ERROR; 2725 } 2726 complete(&smbdirect_mr->invalidate_done); 2727 } 2728 2729 /* 2730 * Deregister a MR after I/O is done 2731 * This function may wait if remote invalidation is not used 2732 * and we have to locally invalidate the buffer to prevent data is being 2733 * modified by remote peer after upper layer consumes it 2734 */ 2735 void smbd_deregister_mr(struct smbdirect_mr_io *mr) 2736 { 2737 struct smbdirect_socket *sc = mr->socket; 2738 2739 mutex_lock(&mr->mutex); 2740 if (mr->state == SMBDIRECT_MR_DISABLED) 2741 goto put_kref; 2742 2743 if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { 2744 smbd_mr_disable_locked(mr); 2745 goto put_kref; 2746 } 2747 2748 if (mr->need_invalidate) { 2749 struct ib_send_wr *wr = &mr->inv_wr; 2750 int rc; 2751 2752 /* Need to finish local invalidation before returning */ 2753 wr->opcode = IB_WR_LOCAL_INV; 2754 mr->cqe.done = local_inv_done; 2755 wr->wr_cqe = &mr->cqe; 2756 wr->num_sge = 0; 2757 wr->ex.invalidate_rkey = mr->mr->rkey; 2758 wr->send_flags = IB_SEND_SIGNALED; 2759 2760 init_completion(&mr->invalidate_done); 2761 rc = ib_post_send(sc->ib.qp, wr, NULL); 2762 if (rc) { 2763 log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc); 2764 smbd_mr_disable_locked(mr); 2765 smbd_disconnect_rdma_connection(sc); 2766 goto done; 2767 } 2768 wait_for_completion(&mr->invalidate_done); 2769 mr->need_invalidate = false; 2770 } else 2771 /* 2772 * For remote invalidation, just set it to SMBDIRECT_MR_INVALIDATED 2773 * and defer to mr_recovery_work to recover the MR for next use 2774 */ 2775 mr->state = SMBDIRECT_MR_INVALIDATED; 2776 2777 if (mr->sgt.nents) { 2778 ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); 2779 mr->sgt.nents = 0; 2780 } 2781 2782 if (mr->state == SMBDIRECT_MR_INVALIDATED) { 2783 mr->state = SMBDIRECT_MR_READY; 2784 if (atomic_inc_return(&sc->mr_io.ready.count) == 1) 2785 wake_up(&sc->mr_io.ready.wait_queue); 2786 } else 2787 /* 2788 * Schedule the work to do MR recovery for future I/Os MR 2789 * recovery is slow and don't want it to block current I/O 2790 */ 2791 queue_work(sc->workqueue, &sc->mr_io.recovery_work); 2792 2793 done: 2794 if (atomic_dec_and_test(&sc->mr_io.used.count)) 2795 wake_up(&sc->mr_io.cleanup.wait_queue); 2796 2797 put_kref: 2798 /* 2799 * No kref_put_mutex() as it's already locked. 2800 * 2801 * If smbd_mr_free_locked() is called 2802 * and the mutex is unlocked and mr is gone, 2803 * in that case kref_put() returned 1. 2804 * 2805 * If kref_put() returned 0 we know that 2806 * smbd_mr_free_locked() didn't 2807 * run. Not by us nor by anyone else, as we 2808 * still hold the mutex, so we need to unlock 2809 * and keep the mr in SMBDIRECT_MR_READY or 2810 * SMBDIRECT_MR_ERROR state. 2811 */ 2812 if (!kref_put(&mr->kref, smbd_mr_free_locked)) 2813 mutex_unlock(&mr->mutex); 2814 } 2815 2816 static bool smb_set_sge(struct smb_extract_to_rdma *rdma, 2817 struct page *lowest_page, size_t off, size_t len) 2818 { 2819 struct ib_sge *sge = &rdma->sge[rdma->nr_sge]; 2820 u64 addr; 2821 2822 addr = ib_dma_map_page(rdma->device, lowest_page, 2823 off, len, rdma->direction); 2824 if (ib_dma_mapping_error(rdma->device, addr)) 2825 return false; 2826 2827 sge->addr = addr; 2828 sge->length = len; 2829 sge->lkey = rdma->local_dma_lkey; 2830 rdma->nr_sge++; 2831 return true; 2832 } 2833 2834 /* 2835 * Extract page fragments from a BVEC-class iterator and add them to an RDMA 2836 * element list. The pages are not pinned. 2837 */ 2838 static ssize_t smb_extract_bvec_to_rdma(struct iov_iter *iter, 2839 struct smb_extract_to_rdma *rdma, 2840 ssize_t maxsize) 2841 { 2842 const struct bio_vec *bv = iter->bvec; 2843 unsigned long start = iter->iov_offset; 2844 unsigned int i; 2845 ssize_t ret = 0; 2846 2847 for (i = 0; i < iter->nr_segs; i++) { 2848 size_t off, len; 2849 2850 len = bv[i].bv_len; 2851 if (start >= len) { 2852 start -= len; 2853 continue; 2854 } 2855 2856 len = min_t(size_t, maxsize, len - start); 2857 off = bv[i].bv_offset + start; 2858 2859 if (!smb_set_sge(rdma, bv[i].bv_page, off, len)) 2860 return -EIO; 2861 2862 ret += len; 2863 maxsize -= len; 2864 if (rdma->nr_sge >= rdma->max_sge || maxsize <= 0) 2865 break; 2866 start = 0; 2867 } 2868 2869 if (ret > 0) 2870 iov_iter_advance(iter, ret); 2871 return ret; 2872 } 2873 2874 /* 2875 * Extract fragments from a KVEC-class iterator and add them to an RDMA list. 2876 * This can deal with vmalloc'd buffers as well as kmalloc'd or static buffers. 2877 * The pages are not pinned. 2878 */ 2879 static ssize_t smb_extract_kvec_to_rdma(struct iov_iter *iter, 2880 struct smb_extract_to_rdma *rdma, 2881 ssize_t maxsize) 2882 { 2883 const struct kvec *kv = iter->kvec; 2884 unsigned long start = iter->iov_offset; 2885 unsigned int i; 2886 ssize_t ret = 0; 2887 2888 for (i = 0; i < iter->nr_segs; i++) { 2889 struct page *page; 2890 unsigned long kaddr; 2891 size_t off, len, seg; 2892 2893 len = kv[i].iov_len; 2894 if (start >= len) { 2895 start -= len; 2896 continue; 2897 } 2898 2899 kaddr = (unsigned long)kv[i].iov_base + start; 2900 off = kaddr & ~PAGE_MASK; 2901 len = min_t(size_t, maxsize, len - start); 2902 kaddr &= PAGE_MASK; 2903 2904 maxsize -= len; 2905 do { 2906 seg = min_t(size_t, len, PAGE_SIZE - off); 2907 2908 if (is_vmalloc_or_module_addr((void *)kaddr)) 2909 page = vmalloc_to_page((void *)kaddr); 2910 else 2911 page = virt_to_page((void *)kaddr); 2912 2913 if (!smb_set_sge(rdma, page, off, seg)) 2914 return -EIO; 2915 2916 ret += seg; 2917 len -= seg; 2918 kaddr += PAGE_SIZE; 2919 off = 0; 2920 } while (len > 0 && rdma->nr_sge < rdma->max_sge); 2921 2922 if (rdma->nr_sge >= rdma->max_sge || maxsize <= 0) 2923 break; 2924 start = 0; 2925 } 2926 2927 if (ret > 0) 2928 iov_iter_advance(iter, ret); 2929 return ret; 2930 } 2931 2932 /* 2933 * Extract folio fragments from a FOLIOQ-class iterator and add them to an RDMA 2934 * list. The folios are not pinned. 2935 */ 2936 static ssize_t smb_extract_folioq_to_rdma(struct iov_iter *iter, 2937 struct smb_extract_to_rdma *rdma, 2938 ssize_t maxsize) 2939 { 2940 const struct folio_queue *folioq = iter->folioq; 2941 unsigned int slot = iter->folioq_slot; 2942 ssize_t ret = 0; 2943 size_t offset = iter->iov_offset; 2944 2945 BUG_ON(!folioq); 2946 2947 if (slot >= folioq_nr_slots(folioq)) { 2948 folioq = folioq->next; 2949 if (WARN_ON_ONCE(!folioq)) 2950 return -EIO; 2951 slot = 0; 2952 } 2953 2954 do { 2955 struct folio *folio = folioq_folio(folioq, slot); 2956 size_t fsize = folioq_folio_size(folioq, slot); 2957 2958 if (offset < fsize) { 2959 size_t part = umin(maxsize, fsize - offset); 2960 2961 if (!smb_set_sge(rdma, folio_page(folio, 0), offset, part)) 2962 return -EIO; 2963 2964 offset += part; 2965 ret += part; 2966 maxsize -= part; 2967 } 2968 2969 if (offset >= fsize) { 2970 offset = 0; 2971 slot++; 2972 if (slot >= folioq_nr_slots(folioq)) { 2973 if (!folioq->next) { 2974 WARN_ON_ONCE(ret < iter->count); 2975 break; 2976 } 2977 folioq = folioq->next; 2978 slot = 0; 2979 } 2980 } 2981 } while (rdma->nr_sge < rdma->max_sge && maxsize > 0); 2982 2983 iter->folioq = folioq; 2984 iter->folioq_slot = slot; 2985 iter->iov_offset = offset; 2986 iter->count -= ret; 2987 return ret; 2988 } 2989 2990 /* 2991 * Extract page fragments from up to the given amount of the source iterator 2992 * and build up an RDMA list that refers to all of those bits. The RDMA list 2993 * is appended to, up to the maximum number of elements set in the parameter 2994 * block. 2995 * 2996 * The extracted page fragments are not pinned or ref'd in any way; if an 2997 * IOVEC/UBUF-type iterator is to be used, it should be converted to a 2998 * BVEC-type iterator and the pages pinned, ref'd or otherwise held in some 2999 * way. 3000 */ 3001 static ssize_t smb_extract_iter_to_rdma(struct iov_iter *iter, size_t len, 3002 struct smb_extract_to_rdma *rdma) 3003 { 3004 ssize_t ret; 3005 int before = rdma->nr_sge; 3006 3007 switch (iov_iter_type(iter)) { 3008 case ITER_BVEC: 3009 ret = smb_extract_bvec_to_rdma(iter, rdma, len); 3010 break; 3011 case ITER_KVEC: 3012 ret = smb_extract_kvec_to_rdma(iter, rdma, len); 3013 break; 3014 case ITER_FOLIOQ: 3015 ret = smb_extract_folioq_to_rdma(iter, rdma, len); 3016 break; 3017 default: 3018 WARN_ON_ONCE(1); 3019 return -EIO; 3020 } 3021 3022 if (ret < 0) { 3023 while (rdma->nr_sge > before) { 3024 struct ib_sge *sge = &rdma->sge[rdma->nr_sge--]; 3025 3026 ib_dma_unmap_single(rdma->device, sge->addr, sge->length, 3027 rdma->direction); 3028 sge->addr = 0; 3029 } 3030 } 3031 3032 return ret; 3033 } 3034