1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 /* 3 * Copyright (c) 2025 Stefan Metzmacher 4 */ 5 6 #ifndef __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ 7 #define __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ 8 9 #include <rdma/rw.h> 10 11 enum smbdirect_socket_status { 12 SMBDIRECT_SOCKET_CREATED, 13 SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED, 14 SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING, 15 SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED, 16 SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED, 17 SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING, 18 SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED, 19 SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED, 20 SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING, 21 SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED, 22 SMBDIRECT_SOCKET_NEGOTIATE_NEEDED, 23 SMBDIRECT_SOCKET_NEGOTIATE_RUNNING, 24 SMBDIRECT_SOCKET_NEGOTIATE_FAILED, 25 SMBDIRECT_SOCKET_CONNECTED, 26 SMBDIRECT_SOCKET_ERROR, 27 SMBDIRECT_SOCKET_DISCONNECTING, 28 SMBDIRECT_SOCKET_DISCONNECTED, 29 SMBDIRECT_SOCKET_DESTROYED 30 }; 31 32 static __always_inline 33 const char *smbdirect_socket_status_string(enum smbdirect_socket_status status) 34 { 35 switch (status) { 36 case SMBDIRECT_SOCKET_CREATED: 37 return "CREATED"; 38 case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: 39 return "RESOLVE_ADDR_NEEDED"; 40 case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: 41 return "RESOLVE_ADDR_RUNNING"; 42 case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: 43 return "RESOLVE_ADDR_FAILED"; 44 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED: 45 return "RESOLVE_ROUTE_NEEDED"; 46 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING: 47 return "RESOLVE_ROUTE_RUNNING"; 48 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED: 49 return "RESOLVE_ROUTE_FAILED"; 50 case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED: 51 return "RDMA_CONNECT_NEEDED"; 52 case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING: 53 return "RDMA_CONNECT_RUNNING"; 54 case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED: 55 return "RDMA_CONNECT_FAILED"; 56 case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED: 57 return "NEGOTIATE_NEEDED"; 58 case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING: 59 return "NEGOTIATE_RUNNING"; 60 case SMBDIRECT_SOCKET_NEGOTIATE_FAILED: 61 return "NEGOTIATE_FAILED"; 62 case SMBDIRECT_SOCKET_CONNECTED: 63 return "CONNECTED"; 64 case SMBDIRECT_SOCKET_ERROR: 65 return "ERROR"; 66 case SMBDIRECT_SOCKET_DISCONNECTING: 67 return "DISCONNECTING"; 68 case SMBDIRECT_SOCKET_DISCONNECTED: 69 return "DISCONNECTED"; 70 case SMBDIRECT_SOCKET_DESTROYED: 71 return "DESTROYED"; 72 } 73 74 return "<unknown>"; 75 } 76 77 /* 78 * This can be used with %1pe to print errors as strings or '0' 79 * And it avoids warnings like: warn: passing zero to 'ERR_PTR' 80 * from smatch -p=kernel --pedantic 81 */ 82 static __always_inline 83 const void * __must_check SMBDIRECT_DEBUG_ERR_PTR(long error) 84 { 85 if (error == 0) 86 return NULL; 87 return ERR_PTR(error); 88 } 89 90 enum smbdirect_keepalive_status { 91 SMBDIRECT_KEEPALIVE_NONE, 92 SMBDIRECT_KEEPALIVE_PENDING, 93 SMBDIRECT_KEEPALIVE_SENT 94 }; 95 96 struct smbdirect_socket { 97 enum smbdirect_socket_status status; 98 wait_queue_head_t status_wait; 99 int first_error; 100 101 /* 102 * This points to the workqueue to 103 * be used for this socket. 104 * It can be per socket (on the client) 105 * or point to a global workqueue (on the server) 106 */ 107 struct workqueue_struct *workqueue; 108 109 struct work_struct disconnect_work; 110 111 /* RDMA related */ 112 struct { 113 struct rdma_cm_id *cm_id; 114 /* 115 * This is for iWarp MPA v1 116 */ 117 bool legacy_iwarp; 118 } rdma; 119 120 /* IB verbs related */ 121 struct { 122 struct ib_pd *pd; 123 struct ib_cq *send_cq; 124 struct ib_cq *recv_cq; 125 126 /* 127 * shortcuts for rdma.cm_id->{qp,device}; 128 */ 129 struct ib_qp *qp; 130 struct ib_device *dev; 131 } ib; 132 133 struct smbdirect_socket_parameters parameters; 134 135 /* 136 * The state for connect/negotiation 137 */ 138 struct { 139 spinlock_t lock; 140 struct work_struct work; 141 } connect; 142 143 /* 144 * The state for keepalive and timeout handling 145 */ 146 struct { 147 enum smbdirect_keepalive_status keepalive; 148 struct work_struct immediate_work; 149 struct delayed_work timer_work; 150 } idle; 151 152 /* 153 * The state for posted send buffers 154 */ 155 struct { 156 /* 157 * Memory pools for preallocating 158 * smbdirect_send_io buffers 159 */ 160 struct { 161 struct kmem_cache *cache; 162 mempool_t *pool; 163 } mem; 164 165 /* 166 * This is a coordination for smbdirect_send_batch. 167 * 168 * There's only one possible credit, which means 169 * only one instance is running at a time. 170 */ 171 struct { 172 atomic_t count; 173 wait_queue_head_t wait_queue; 174 } bcredits; 175 176 /* 177 * The local credit state for ib_post_send() 178 */ 179 struct { 180 atomic_t count; 181 wait_queue_head_t wait_queue; 182 } lcredits; 183 184 /* 185 * The remote credit state for the send side 186 */ 187 struct { 188 atomic_t count; 189 wait_queue_head_t wait_queue; 190 } credits; 191 192 /* 193 * The state about posted/pending sends 194 */ 195 struct { 196 atomic_t count; 197 /* 198 * woken when count is decremented 199 */ 200 wait_queue_head_t dec_wait_queue; 201 /* 202 * woken when count reached zero 203 */ 204 wait_queue_head_t zero_wait_queue; 205 } pending; 206 } send_io; 207 208 /* 209 * The state for posted receive buffers 210 */ 211 struct { 212 /* 213 * The type of PDU we are expecting 214 */ 215 enum { 216 SMBDIRECT_EXPECT_NEGOTIATE_REQ = 1, 217 SMBDIRECT_EXPECT_NEGOTIATE_REP = 2, 218 SMBDIRECT_EXPECT_DATA_TRANSFER = 3, 219 } expected; 220 221 /* 222 * Memory pools for preallocating 223 * smbdirect_recv_io buffers 224 */ 225 struct { 226 struct kmem_cache *cache; 227 mempool_t *pool; 228 } mem; 229 230 /* 231 * The list of free smbdirect_recv_io 232 * structures 233 */ 234 struct { 235 struct list_head list; 236 spinlock_t lock; 237 } free; 238 239 /* 240 * The state for posted recv_io messages 241 * and the refill work struct. 242 */ 243 struct { 244 atomic_t count; 245 struct work_struct refill_work; 246 } posted; 247 248 /* 249 * The credit state for the recv side 250 */ 251 struct { 252 u16 target; 253 atomic_t available; 254 atomic_t count; 255 } credits; 256 257 /* 258 * The list of arrived non-empty smbdirect_recv_io 259 * structures 260 * 261 * This represents the reassembly queue. 262 */ 263 struct { 264 struct list_head list; 265 spinlock_t lock; 266 wait_queue_head_t wait_queue; 267 /* total data length of reassembly queue */ 268 int data_length; 269 int queue_length; 270 /* the offset to first buffer in reassembly queue */ 271 int first_entry_offset; 272 /* 273 * Indicate if we have received a full packet on the 274 * connection This is used to identify the first SMBD 275 * packet of a assembled payload (SMB packet) in 276 * reassembly queue so we can return a RFC1002 length to 277 * upper layer to indicate the length of the SMB packet 278 * received 279 */ 280 bool full_packet_received; 281 } reassembly; 282 } recv_io; 283 284 /* 285 * The state for Memory registrations on the client 286 */ 287 struct { 288 enum ib_mr_type type; 289 290 /* 291 * The list of free smbdirect_mr_io 292 * structures 293 */ 294 struct { 295 struct list_head list; 296 spinlock_t lock; 297 } all; 298 299 /* 300 * The number of available MRs ready for memory registration 301 */ 302 struct { 303 atomic_t count; 304 wait_queue_head_t wait_queue; 305 } ready; 306 307 /* 308 * The number of used MRs 309 */ 310 struct { 311 atomic_t count; 312 } used; 313 314 struct work_struct recovery_work; 315 316 /* Used by transport to wait until all MRs are returned */ 317 struct { 318 wait_queue_head_t wait_queue; 319 } cleanup; 320 } mr_io; 321 322 /* 323 * The state for RDMA read/write requests on the server 324 */ 325 struct { 326 /* 327 * The credit state for the send side 328 */ 329 struct { 330 /* 331 * The maximum number of rw credits 332 */ 333 size_t max; 334 /* 335 * The number of pages per credit 336 */ 337 size_t num_pages; 338 atomic_t count; 339 wait_queue_head_t wait_queue; 340 } credits; 341 } rw_io; 342 343 /* 344 * For debug purposes 345 */ 346 struct { 347 u64 get_receive_buffer; 348 u64 put_receive_buffer; 349 u64 enqueue_reassembly_queue; 350 u64 dequeue_reassembly_queue; 351 u64 send_empty; 352 } statistics; 353 }; 354 355 static void __smbdirect_socket_disabled_work(struct work_struct *work) 356 { 357 /* 358 * Should never be called as disable_[delayed_]work_sync() was used. 359 */ 360 WARN_ON_ONCE(1); 361 } 362 363 static __always_inline void smbdirect_socket_init(struct smbdirect_socket *sc) 364 { 365 /* 366 * This also sets status = SMBDIRECT_SOCKET_CREATED 367 */ 368 BUILD_BUG_ON(SMBDIRECT_SOCKET_CREATED != 0); 369 memset(sc, 0, sizeof(*sc)); 370 371 init_waitqueue_head(&sc->status_wait); 372 373 INIT_WORK(&sc->disconnect_work, __smbdirect_socket_disabled_work); 374 disable_work_sync(&sc->disconnect_work); 375 376 spin_lock_init(&sc->connect.lock); 377 INIT_WORK(&sc->connect.work, __smbdirect_socket_disabled_work); 378 disable_work_sync(&sc->connect.work); 379 380 INIT_WORK(&sc->idle.immediate_work, __smbdirect_socket_disabled_work); 381 disable_work_sync(&sc->idle.immediate_work); 382 INIT_DELAYED_WORK(&sc->idle.timer_work, __smbdirect_socket_disabled_work); 383 disable_delayed_work_sync(&sc->idle.timer_work); 384 385 atomic_set(&sc->send_io.bcredits.count, 0); 386 init_waitqueue_head(&sc->send_io.bcredits.wait_queue); 387 388 atomic_set(&sc->send_io.lcredits.count, 0); 389 init_waitqueue_head(&sc->send_io.lcredits.wait_queue); 390 391 atomic_set(&sc->send_io.credits.count, 0); 392 init_waitqueue_head(&sc->send_io.credits.wait_queue); 393 394 atomic_set(&sc->send_io.pending.count, 0); 395 init_waitqueue_head(&sc->send_io.pending.dec_wait_queue); 396 init_waitqueue_head(&sc->send_io.pending.zero_wait_queue); 397 398 INIT_LIST_HEAD(&sc->recv_io.free.list); 399 spin_lock_init(&sc->recv_io.free.lock); 400 401 atomic_set(&sc->recv_io.posted.count, 0); 402 INIT_WORK(&sc->recv_io.posted.refill_work, __smbdirect_socket_disabled_work); 403 disable_work_sync(&sc->recv_io.posted.refill_work); 404 405 atomic_set(&sc->recv_io.credits.available, 0); 406 atomic_set(&sc->recv_io.credits.count, 0); 407 408 INIT_LIST_HEAD(&sc->recv_io.reassembly.list); 409 spin_lock_init(&sc->recv_io.reassembly.lock); 410 init_waitqueue_head(&sc->recv_io.reassembly.wait_queue); 411 412 atomic_set(&sc->rw_io.credits.count, 0); 413 init_waitqueue_head(&sc->rw_io.credits.wait_queue); 414 415 spin_lock_init(&sc->mr_io.all.lock); 416 INIT_LIST_HEAD(&sc->mr_io.all.list); 417 atomic_set(&sc->mr_io.ready.count, 0); 418 init_waitqueue_head(&sc->mr_io.ready.wait_queue); 419 atomic_set(&sc->mr_io.used.count, 0); 420 INIT_WORK(&sc->mr_io.recovery_work, __smbdirect_socket_disabled_work); 421 disable_work_sync(&sc->mr_io.recovery_work); 422 init_waitqueue_head(&sc->mr_io.cleanup.wait_queue); 423 } 424 425 #define __SMBDIRECT_CHECK_STATUS_FAILED(__sc, __expected_status, __error_cmd, __unexpected_cmd) ({ \ 426 bool __failed = false; \ 427 if (unlikely((__sc)->first_error)) { \ 428 __failed = true; \ 429 __error_cmd \ 430 } else if (unlikely((__sc)->status != (__expected_status))) { \ 431 __failed = true; \ 432 __unexpected_cmd \ 433 } \ 434 __failed; \ 435 }) 436 437 #define __SMBDIRECT_CHECK_STATUS_WARN(__sc, __expected_status, __unexpected_cmd) \ 438 __SMBDIRECT_CHECK_STATUS_FAILED(__sc, __expected_status, \ 439 , \ 440 { \ 441 const struct sockaddr_storage *__src = NULL; \ 442 const struct sockaddr_storage *__dst = NULL; \ 443 if ((__sc)->rdma.cm_id) { \ 444 __src = &(__sc)->rdma.cm_id->route.addr.src_addr; \ 445 __dst = &(__sc)->rdma.cm_id->route.addr.dst_addr; \ 446 } \ 447 WARN_ONCE(1, \ 448 "expected[%s] != %s first_error=%1pe local=%pISpsfc remote=%pISpsfc\n", \ 449 smbdirect_socket_status_string(__expected_status), \ 450 smbdirect_socket_status_string((__sc)->status), \ 451 SMBDIRECT_DEBUG_ERR_PTR((__sc)->first_error), \ 452 __src, __dst); \ 453 __unexpected_cmd \ 454 }) 455 456 #define SMBDIRECT_CHECK_STATUS_WARN(__sc, __expected_status) \ 457 __SMBDIRECT_CHECK_STATUS_WARN(__sc, __expected_status, /* nothing */) 458 459 #define SMBDIRECT_CHECK_STATUS_DISCONNECT(__sc, __expected_status) \ 460 __SMBDIRECT_CHECK_STATUS_WARN(__sc, __expected_status, \ 461 __SMBDIRECT_SOCKET_DISCONNECT(__sc);) 462 463 struct smbdirect_send_io { 464 struct smbdirect_socket *socket; 465 struct ib_cqe cqe; 466 467 /* 468 * The SGE entries for this work request 469 * 470 * The first points to the packet header 471 */ 472 #define SMBDIRECT_SEND_IO_MAX_SGE 6 473 size_t num_sge; 474 struct ib_sge sge[SMBDIRECT_SEND_IO_MAX_SGE]; 475 476 /* 477 * Link to the list of sibling smbdirect_send_io 478 * messages. 479 */ 480 struct list_head sibling_list; 481 struct ib_send_wr wr; 482 483 /* SMBD packet header follows this structure */ 484 u8 packet[]; 485 }; 486 487 struct smbdirect_send_batch { 488 /* 489 * List of smbdirect_send_io messages 490 */ 491 struct list_head msg_list; 492 /* 493 * Number of list entries 494 */ 495 size_t wr_cnt; 496 497 /* 498 * Possible remote key invalidation state 499 */ 500 bool need_invalidate_rkey; 501 u32 remote_key; 502 503 int credit; 504 }; 505 506 struct smbdirect_recv_io { 507 struct smbdirect_socket *socket; 508 struct ib_cqe cqe; 509 510 /* 511 * For now we only use a single SGE 512 * as we have just one large buffer 513 * per posted recv. 514 */ 515 #define SMBDIRECT_RECV_IO_MAX_SGE 1 516 struct ib_sge sge; 517 518 /* Link to free or reassembly list */ 519 struct list_head list; 520 521 /* Indicate if this is the 1st packet of a payload */ 522 bool first_segment; 523 524 /* SMBD packet header and payload follows this structure */ 525 u8 packet[]; 526 }; 527 528 enum smbdirect_mr_state { 529 SMBDIRECT_MR_READY, 530 SMBDIRECT_MR_REGISTERED, 531 SMBDIRECT_MR_INVALIDATED, 532 SMBDIRECT_MR_ERROR, 533 SMBDIRECT_MR_DISABLED 534 }; 535 536 struct smbdirect_mr_io { 537 struct smbdirect_socket *socket; 538 struct ib_cqe cqe; 539 540 /* 541 * We can have up to two references: 542 * 1. by the connection 543 * 2. by the registration 544 */ 545 struct kref kref; 546 struct mutex mutex; 547 548 struct list_head list; 549 550 enum smbdirect_mr_state state; 551 struct ib_mr *mr; 552 struct sg_table sgt; 553 enum dma_data_direction dir; 554 union { 555 struct ib_reg_wr wr; 556 struct ib_send_wr inv_wr; 557 }; 558 559 bool need_invalidate; 560 struct completion invalidate_done; 561 }; 562 563 struct smbdirect_rw_io { 564 struct smbdirect_socket *socket; 565 struct ib_cqe cqe; 566 567 struct list_head list; 568 569 int error; 570 struct completion *completion; 571 572 struct rdma_rw_ctx rdma_ctx; 573 struct sg_table sgt; 574 struct scatterlist sg_list[]; 575 }; 576 577 #endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ */ 578