1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 /* 3 * Copyright (c) 2025 Stefan Metzmacher 4 */ 5 6 #ifndef __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ 7 #define __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ 8 9 #include <rdma/rw.h> 10 11 enum smbdirect_socket_status { 12 SMBDIRECT_SOCKET_CREATED, 13 SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED, 14 SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING, 15 SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED, 16 SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED, 17 SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING, 18 SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED, 19 SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED, 20 SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING, 21 SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED, 22 SMBDIRECT_SOCKET_NEGOTIATE_NEEDED, 23 SMBDIRECT_SOCKET_NEGOTIATE_RUNNING, 24 SMBDIRECT_SOCKET_NEGOTIATE_FAILED, 25 SMBDIRECT_SOCKET_CONNECTED, 26 SMBDIRECT_SOCKET_ERROR, 27 SMBDIRECT_SOCKET_DISCONNECTING, 28 SMBDIRECT_SOCKET_DISCONNECTED, 29 SMBDIRECT_SOCKET_DESTROYED 30 }; 31 32 static __always_inline 33 const char *smbdirect_socket_status_string(enum smbdirect_socket_status status) 34 { 35 switch (status) { 36 case SMBDIRECT_SOCKET_CREATED: 37 return "CREATED"; 38 case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: 39 return "RESOLVE_ADDR_NEEDED"; 40 case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: 41 return "RESOLVE_ADDR_RUNNING"; 42 case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: 43 return "RESOLVE_ADDR_FAILED"; 44 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED: 45 return "RESOLVE_ROUTE_NEEDED"; 46 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING: 47 return "RESOLVE_ROUTE_RUNNING"; 48 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED: 49 return "RESOLVE_ROUTE_FAILED"; 50 case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED: 51 return "RDMA_CONNECT_NEEDED"; 52 case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING: 53 return "RDMA_CONNECT_RUNNING"; 54 case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED: 55 return "RDMA_CONNECT_FAILED"; 56 case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED: 57 return "NEGOTIATE_NEEDED"; 58 case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING: 59 return "NEGOTIATE_RUNNING"; 60 case SMBDIRECT_SOCKET_NEGOTIATE_FAILED: 61 return "NEGOTIATE_FAILED"; 62 case SMBDIRECT_SOCKET_CONNECTED: 63 return "CONNECTED"; 64 case SMBDIRECT_SOCKET_ERROR: 65 return "ERROR"; 66 case SMBDIRECT_SOCKET_DISCONNECTING: 67 return "DISCONNECTING"; 68 case SMBDIRECT_SOCKET_DISCONNECTED: 69 return "DISCONNECTED"; 70 case SMBDIRECT_SOCKET_DESTROYED: 71 return "DESTROYED"; 72 } 73 74 return "<unknown>"; 75 } 76 77 /* 78 * This can be used with %1pe to print errors as strings or '0' 79 * And it avoids warnings like: warn: passing zero to 'ERR_PTR' 80 * from smatch -p=kernel --pedantic 81 */ 82 static __always_inline 83 const void * __must_check SMBDIRECT_DEBUG_ERR_PTR(long error) 84 { 85 if (error == 0) 86 return NULL; 87 return ERR_PTR(error); 88 } 89 90 enum smbdirect_keepalive_status { 91 SMBDIRECT_KEEPALIVE_NONE, 92 SMBDIRECT_KEEPALIVE_PENDING, 93 SMBDIRECT_KEEPALIVE_SENT 94 }; 95 96 struct smbdirect_socket { 97 enum smbdirect_socket_status status; 98 wait_queue_head_t status_wait; 99 int first_error; 100 101 /* 102 * This points to the workqueue to 103 * be used for this socket. 104 * It can be per socket (on the client) 105 * or point to a global workqueue (on the server) 106 */ 107 struct workqueue_struct *workqueue; 108 109 struct work_struct disconnect_work; 110 111 /* RDMA related */ 112 struct { 113 struct rdma_cm_id *cm_id; 114 /* 115 * This is for iWarp MPA v1 116 */ 117 bool legacy_iwarp; 118 } rdma; 119 120 /* IB verbs related */ 121 struct { 122 struct ib_pd *pd; 123 struct ib_cq *send_cq; 124 struct ib_cq *recv_cq; 125 126 /* 127 * shortcuts for rdma.cm_id->{qp,device}; 128 */ 129 struct ib_qp *qp; 130 struct ib_device *dev; 131 } ib; 132 133 struct smbdirect_socket_parameters parameters; 134 135 /* 136 * The state for connect/negotiation 137 */ 138 struct { 139 spinlock_t lock; 140 struct work_struct work; 141 } connect; 142 143 /* 144 * The state for keepalive and timeout handling 145 */ 146 struct { 147 enum smbdirect_keepalive_status keepalive; 148 struct work_struct immediate_work; 149 struct delayed_work timer_work; 150 } idle; 151 152 /* 153 * The state for posted send buffers 154 */ 155 struct { 156 /* 157 * Memory pools for preallocating 158 * smbdirect_send_io buffers 159 */ 160 struct { 161 struct kmem_cache *cache; 162 mempool_t *pool; 163 } mem; 164 165 /* 166 * The local credit state for ib_post_send() 167 */ 168 struct { 169 atomic_t count; 170 wait_queue_head_t wait_queue; 171 } lcredits; 172 173 /* 174 * The remote credit state for the send side 175 */ 176 struct { 177 atomic_t count; 178 wait_queue_head_t wait_queue; 179 } credits; 180 181 /* 182 * The state about posted/pending sends 183 */ 184 struct { 185 atomic_t count; 186 /* 187 * woken when count is decremented 188 */ 189 wait_queue_head_t dec_wait_queue; 190 /* 191 * woken when count reached zero 192 */ 193 wait_queue_head_t zero_wait_queue; 194 } pending; 195 } send_io; 196 197 /* 198 * The state for posted receive buffers 199 */ 200 struct { 201 /* 202 * The type of PDU we are expecting 203 */ 204 enum { 205 SMBDIRECT_EXPECT_NEGOTIATE_REQ = 1, 206 SMBDIRECT_EXPECT_NEGOTIATE_REP = 2, 207 SMBDIRECT_EXPECT_DATA_TRANSFER = 3, 208 } expected; 209 210 /* 211 * Memory pools for preallocating 212 * smbdirect_recv_io buffers 213 */ 214 struct { 215 struct kmem_cache *cache; 216 mempool_t *pool; 217 } mem; 218 219 /* 220 * The list of free smbdirect_recv_io 221 * structures 222 */ 223 struct { 224 struct list_head list; 225 spinlock_t lock; 226 } free; 227 228 /* 229 * The state for posted recv_io messages 230 * and the refill work struct. 231 */ 232 struct { 233 atomic_t count; 234 struct work_struct refill_work; 235 } posted; 236 237 /* 238 * The credit state for the recv side 239 */ 240 struct { 241 u16 target; 242 atomic_t count; 243 } credits; 244 245 /* 246 * The list of arrived non-empty smbdirect_recv_io 247 * structures 248 * 249 * This represents the reassembly queue. 250 */ 251 struct { 252 struct list_head list; 253 spinlock_t lock; 254 wait_queue_head_t wait_queue; 255 /* total data length of reassembly queue */ 256 int data_length; 257 int queue_length; 258 /* the offset to first buffer in reassembly queue */ 259 int first_entry_offset; 260 /* 261 * Indicate if we have received a full packet on the 262 * connection This is used to identify the first SMBD 263 * packet of a assembled payload (SMB packet) in 264 * reassembly queue so we can return a RFC1002 length to 265 * upper layer to indicate the length of the SMB packet 266 * received 267 */ 268 bool full_packet_received; 269 } reassembly; 270 } recv_io; 271 272 /* 273 * The state for Memory registrations on the client 274 */ 275 struct { 276 enum ib_mr_type type; 277 278 /* 279 * The list of free smbdirect_mr_io 280 * structures 281 */ 282 struct { 283 struct list_head list; 284 spinlock_t lock; 285 } all; 286 287 /* 288 * The number of available MRs ready for memory registration 289 */ 290 struct { 291 atomic_t count; 292 wait_queue_head_t wait_queue; 293 } ready; 294 295 /* 296 * The number of used MRs 297 */ 298 struct { 299 atomic_t count; 300 } used; 301 302 struct work_struct recovery_work; 303 304 /* Used by transport to wait until all MRs are returned */ 305 struct { 306 wait_queue_head_t wait_queue; 307 } cleanup; 308 } mr_io; 309 310 /* 311 * The state for RDMA read/write requests on the server 312 */ 313 struct { 314 /* 315 * The credit state for the send side 316 */ 317 struct { 318 /* 319 * The maximum number of rw credits 320 */ 321 size_t max; 322 /* 323 * The number of pages per credit 324 */ 325 size_t num_pages; 326 atomic_t count; 327 wait_queue_head_t wait_queue; 328 } credits; 329 } rw_io; 330 331 /* 332 * For debug purposes 333 */ 334 struct { 335 u64 get_receive_buffer; 336 u64 put_receive_buffer; 337 u64 enqueue_reassembly_queue; 338 u64 dequeue_reassembly_queue; 339 u64 send_empty; 340 } statistics; 341 }; 342 343 static void __smbdirect_socket_disabled_work(struct work_struct *work) 344 { 345 /* 346 * Should never be called as disable_[delayed_]work_sync() was used. 347 */ 348 WARN_ON_ONCE(1); 349 } 350 351 static __always_inline void smbdirect_socket_init(struct smbdirect_socket *sc) 352 { 353 /* 354 * This also sets status = SMBDIRECT_SOCKET_CREATED 355 */ 356 BUILD_BUG_ON(SMBDIRECT_SOCKET_CREATED != 0); 357 memset(sc, 0, sizeof(*sc)); 358 359 init_waitqueue_head(&sc->status_wait); 360 361 INIT_WORK(&sc->disconnect_work, __smbdirect_socket_disabled_work); 362 disable_work_sync(&sc->disconnect_work); 363 364 spin_lock_init(&sc->connect.lock); 365 INIT_WORK(&sc->connect.work, __smbdirect_socket_disabled_work); 366 disable_work_sync(&sc->connect.work); 367 368 INIT_WORK(&sc->idle.immediate_work, __smbdirect_socket_disabled_work); 369 disable_work_sync(&sc->idle.immediate_work); 370 INIT_DELAYED_WORK(&sc->idle.timer_work, __smbdirect_socket_disabled_work); 371 disable_delayed_work_sync(&sc->idle.timer_work); 372 373 atomic_set(&sc->send_io.lcredits.count, 0); 374 init_waitqueue_head(&sc->send_io.lcredits.wait_queue); 375 376 atomic_set(&sc->send_io.credits.count, 0); 377 init_waitqueue_head(&sc->send_io.credits.wait_queue); 378 379 atomic_set(&sc->send_io.pending.count, 0); 380 init_waitqueue_head(&sc->send_io.pending.dec_wait_queue); 381 init_waitqueue_head(&sc->send_io.pending.zero_wait_queue); 382 383 INIT_LIST_HEAD(&sc->recv_io.free.list); 384 spin_lock_init(&sc->recv_io.free.lock); 385 386 atomic_set(&sc->recv_io.posted.count, 0); 387 INIT_WORK(&sc->recv_io.posted.refill_work, __smbdirect_socket_disabled_work); 388 disable_work_sync(&sc->recv_io.posted.refill_work); 389 390 atomic_set(&sc->recv_io.credits.count, 0); 391 392 INIT_LIST_HEAD(&sc->recv_io.reassembly.list); 393 spin_lock_init(&sc->recv_io.reassembly.lock); 394 init_waitqueue_head(&sc->recv_io.reassembly.wait_queue); 395 396 atomic_set(&sc->rw_io.credits.count, 0); 397 init_waitqueue_head(&sc->rw_io.credits.wait_queue); 398 399 spin_lock_init(&sc->mr_io.all.lock); 400 INIT_LIST_HEAD(&sc->mr_io.all.list); 401 atomic_set(&sc->mr_io.ready.count, 0); 402 init_waitqueue_head(&sc->mr_io.ready.wait_queue); 403 atomic_set(&sc->mr_io.used.count, 0); 404 INIT_WORK(&sc->mr_io.recovery_work, __smbdirect_socket_disabled_work); 405 disable_work_sync(&sc->mr_io.recovery_work); 406 init_waitqueue_head(&sc->mr_io.cleanup.wait_queue); 407 } 408 409 #define __SMBDIRECT_CHECK_STATUS_FAILED(__sc, __expected_status, __error_cmd, __unexpected_cmd) ({ \ 410 bool __failed = false; \ 411 if (unlikely((__sc)->first_error)) { \ 412 __failed = true; \ 413 __error_cmd \ 414 } else if (unlikely((__sc)->status != (__expected_status))) { \ 415 __failed = true; \ 416 __unexpected_cmd \ 417 } \ 418 __failed; \ 419 }) 420 421 #define __SMBDIRECT_CHECK_STATUS_WARN(__sc, __expected_status, __unexpected_cmd) \ 422 __SMBDIRECT_CHECK_STATUS_FAILED(__sc, __expected_status, \ 423 , \ 424 { \ 425 const struct sockaddr_storage *__src = NULL; \ 426 const struct sockaddr_storage *__dst = NULL; \ 427 if ((__sc)->rdma.cm_id) { \ 428 __src = &(__sc)->rdma.cm_id->route.addr.src_addr; \ 429 __dst = &(__sc)->rdma.cm_id->route.addr.dst_addr; \ 430 } \ 431 WARN_ONCE(1, \ 432 "expected[%s] != %s first_error=%1pe local=%pISpsfc remote=%pISpsfc\n", \ 433 smbdirect_socket_status_string(__expected_status), \ 434 smbdirect_socket_status_string((__sc)->status), \ 435 SMBDIRECT_DEBUG_ERR_PTR((__sc)->first_error), \ 436 __src, __dst); \ 437 __unexpected_cmd \ 438 }) 439 440 #define SMBDIRECT_CHECK_STATUS_WARN(__sc, __expected_status) \ 441 __SMBDIRECT_CHECK_STATUS_WARN(__sc, __expected_status, /* nothing */) 442 443 #define SMBDIRECT_CHECK_STATUS_DISCONNECT(__sc, __expected_status) \ 444 __SMBDIRECT_CHECK_STATUS_WARN(__sc, __expected_status, \ 445 __SMBDIRECT_SOCKET_DISCONNECT(__sc);) 446 447 struct smbdirect_send_io { 448 struct smbdirect_socket *socket; 449 struct ib_cqe cqe; 450 451 /* 452 * The SGE entries for this work request 453 * 454 * The first points to the packet header 455 */ 456 #define SMBDIRECT_SEND_IO_MAX_SGE 6 457 size_t num_sge; 458 struct ib_sge sge[SMBDIRECT_SEND_IO_MAX_SGE]; 459 460 /* 461 * Link to the list of sibling smbdirect_send_io 462 * messages. 463 */ 464 struct list_head sibling_list; 465 struct ib_send_wr wr; 466 467 /* SMBD packet header follows this structure */ 468 u8 packet[]; 469 }; 470 471 struct smbdirect_send_batch { 472 /* 473 * List of smbdirect_send_io messages 474 */ 475 struct list_head msg_list; 476 /* 477 * Number of list entries 478 */ 479 size_t wr_cnt; 480 481 /* 482 * Possible remote key invalidation state 483 */ 484 bool need_invalidate_rkey; 485 u32 remote_key; 486 }; 487 488 struct smbdirect_recv_io { 489 struct smbdirect_socket *socket; 490 struct ib_cqe cqe; 491 492 /* 493 * For now we only use a single SGE 494 * as we have just one large buffer 495 * per posted recv. 496 */ 497 #define SMBDIRECT_RECV_IO_MAX_SGE 1 498 struct ib_sge sge; 499 500 /* Link to free or reassembly list */ 501 struct list_head list; 502 503 /* Indicate if this is the 1st packet of a payload */ 504 bool first_segment; 505 506 /* SMBD packet header and payload follows this structure */ 507 u8 packet[]; 508 }; 509 510 enum smbdirect_mr_state { 511 SMBDIRECT_MR_READY, 512 SMBDIRECT_MR_REGISTERED, 513 SMBDIRECT_MR_INVALIDATED, 514 SMBDIRECT_MR_ERROR, 515 SMBDIRECT_MR_DISABLED 516 }; 517 518 struct smbdirect_mr_io { 519 struct smbdirect_socket *socket; 520 struct ib_cqe cqe; 521 522 /* 523 * We can have up to two references: 524 * 1. by the connection 525 * 2. by the registration 526 */ 527 struct kref kref; 528 struct mutex mutex; 529 530 struct list_head list; 531 532 enum smbdirect_mr_state state; 533 struct ib_mr *mr; 534 struct sg_table sgt; 535 enum dma_data_direction dir; 536 union { 537 struct ib_reg_wr wr; 538 struct ib_send_wr inv_wr; 539 }; 540 541 bool need_invalidate; 542 struct completion invalidate_done; 543 }; 544 545 struct smbdirect_rw_io { 546 struct smbdirect_socket *socket; 547 struct ib_cqe cqe; 548 549 struct list_head list; 550 551 int error; 552 struct completion *completion; 553 554 struct rdma_rw_ctx rdma_ctx; 555 struct sg_table sgt; 556 struct scatterlist sg_list[]; 557 }; 558 559 #endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ */ 560