1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 /* 3 * Copyright (c) 2025 Stefan Metzmacher 4 */ 5 6 #ifndef __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ 7 #define __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ 8 9 #include <rdma/rw.h> 10 11 enum smbdirect_socket_status { 12 SMBDIRECT_SOCKET_CREATED, 13 SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED, 14 SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING, 15 SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED, 16 SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED, 17 SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING, 18 SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED, 19 SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED, 20 SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING, 21 SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED, 22 SMBDIRECT_SOCKET_NEGOTIATE_NEEDED, 23 SMBDIRECT_SOCKET_NEGOTIATE_RUNNING, 24 SMBDIRECT_SOCKET_NEGOTIATE_FAILED, 25 SMBDIRECT_SOCKET_CONNECTED, 26 SMBDIRECT_SOCKET_ERROR, 27 SMBDIRECT_SOCKET_DISCONNECTING, 28 SMBDIRECT_SOCKET_DISCONNECTED, 29 SMBDIRECT_SOCKET_DESTROYED 30 }; 31 32 static __always_inline 33 const char *smbdirect_socket_status_string(enum smbdirect_socket_status status) 34 { 35 switch (status) { 36 case SMBDIRECT_SOCKET_CREATED: 37 return "CREATED"; 38 case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: 39 return "RESOLVE_ADDR_NEEDED"; 40 case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: 41 return "RESOLVE_ADDR_RUNNING"; 42 case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: 43 return "RESOLVE_ADDR_FAILED"; 44 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED: 45 return "RESOLVE_ROUTE_NEEDED"; 46 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING: 47 return "RESOLVE_ROUTE_RUNNING"; 48 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED: 49 return "RESOLVE_ROUTE_FAILED"; 50 case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED: 51 return "RDMA_CONNECT_NEEDED"; 52 case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING: 53 return "RDMA_CONNECT_RUNNING"; 54 case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED: 55 return "RDMA_CONNECT_FAILED"; 56 case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED: 57 return "NEGOTIATE_NEEDED"; 58 case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING: 59 return "NEGOTIATE_RUNNING"; 60 case SMBDIRECT_SOCKET_NEGOTIATE_FAILED: 61 return "NEGOTIATE_FAILED"; 62 case SMBDIRECT_SOCKET_CONNECTED: 63 return "CONNECTED"; 64 case SMBDIRECT_SOCKET_ERROR: 65 return "ERROR"; 66 case SMBDIRECT_SOCKET_DISCONNECTING: 67 return "DISCONNECTING"; 68 case SMBDIRECT_SOCKET_DISCONNECTED: 69 return "DISCONNECTED"; 70 case SMBDIRECT_SOCKET_DESTROYED: 71 return "DESTROYED"; 72 } 73 74 return "<unknown>"; 75 } 76 77 /* 78 * This can be used with %1pe to print errors as strings or '0' 79 * And it avoids warnings like: warn: passing zero to 'ERR_PTR' 80 * from smatch -p=kernel --pedantic 81 */ 82 static __always_inline 83 const void * __must_check SMBDIRECT_DEBUG_ERR_PTR(long error) 84 { 85 if (error == 0) 86 return NULL; 87 return ERR_PTR(error); 88 } 89 90 enum smbdirect_keepalive_status { 91 SMBDIRECT_KEEPALIVE_NONE, 92 SMBDIRECT_KEEPALIVE_PENDING, 93 SMBDIRECT_KEEPALIVE_SENT 94 }; 95 96 struct smbdirect_socket { 97 enum smbdirect_socket_status status; 98 wait_queue_head_t status_wait; 99 int first_error; 100 101 /* 102 * This points to the workqueue to 103 * be used for this socket. 104 * It can be per socket (on the client) 105 * or point to a global workqueue (on the server) 106 */ 107 struct workqueue_struct *workqueue; 108 109 struct work_struct disconnect_work; 110 111 /* RDMA related */ 112 struct { 113 struct rdma_cm_id *cm_id; 114 /* 115 * This is for iWarp MPA v1 116 */ 117 bool legacy_iwarp; 118 } rdma; 119 120 /* IB verbs related */ 121 struct { 122 struct ib_pd *pd; 123 struct ib_cq *send_cq; 124 struct ib_cq *recv_cq; 125 126 /* 127 * shortcuts for rdma.cm_id->{qp,device}; 128 */ 129 struct ib_qp *qp; 130 struct ib_device *dev; 131 } ib; 132 133 struct smbdirect_socket_parameters parameters; 134 135 /* 136 * The state for keepalive and timeout handling 137 */ 138 struct { 139 enum smbdirect_keepalive_status keepalive; 140 struct work_struct immediate_work; 141 struct delayed_work timer_work; 142 } idle; 143 144 /* 145 * The state for posted send buffers 146 */ 147 struct { 148 /* 149 * Memory pools for preallocating 150 * smbdirect_send_io buffers 151 */ 152 struct { 153 struct kmem_cache *cache; 154 mempool_t *pool; 155 } mem; 156 157 /* 158 * The local credit state for ib_post_send() 159 */ 160 struct { 161 atomic_t count; 162 wait_queue_head_t wait_queue; 163 } lcredits; 164 165 /* 166 * The remote credit state for the send side 167 */ 168 struct { 169 atomic_t count; 170 wait_queue_head_t wait_queue; 171 } credits; 172 173 /* 174 * The state about posted/pending sends 175 */ 176 struct { 177 atomic_t count; 178 /* 179 * woken when count is decremented 180 */ 181 wait_queue_head_t dec_wait_queue; 182 /* 183 * woken when count reached zero 184 */ 185 wait_queue_head_t zero_wait_queue; 186 } pending; 187 } send_io; 188 189 /* 190 * The state for posted receive buffers 191 */ 192 struct { 193 /* 194 * The type of PDU we are expecting 195 */ 196 enum { 197 SMBDIRECT_EXPECT_NEGOTIATE_REQ = 1, 198 SMBDIRECT_EXPECT_NEGOTIATE_REP = 2, 199 SMBDIRECT_EXPECT_DATA_TRANSFER = 3, 200 } expected; 201 202 /* 203 * Memory pools for preallocating 204 * smbdirect_recv_io buffers 205 */ 206 struct { 207 struct kmem_cache *cache; 208 mempool_t *pool; 209 } mem; 210 211 /* 212 * The list of free smbdirect_recv_io 213 * structures 214 */ 215 struct { 216 struct list_head list; 217 spinlock_t lock; 218 } free; 219 220 /* 221 * The state for posted recv_io messages 222 * and the refill work struct. 223 */ 224 struct { 225 atomic_t count; 226 struct work_struct refill_work; 227 } posted; 228 229 /* 230 * The credit state for the recv side 231 */ 232 struct { 233 u16 target; 234 atomic_t count; 235 } credits; 236 237 /* 238 * The list of arrived non-empty smbdirect_recv_io 239 * structures 240 * 241 * This represents the reassembly queue. 242 */ 243 struct { 244 struct list_head list; 245 spinlock_t lock; 246 wait_queue_head_t wait_queue; 247 /* total data length of reassembly queue */ 248 int data_length; 249 int queue_length; 250 /* the offset to first buffer in reassembly queue */ 251 int first_entry_offset; 252 /* 253 * Indicate if we have received a full packet on the 254 * connection This is used to identify the first SMBD 255 * packet of a assembled payload (SMB packet) in 256 * reassembly queue so we can return a RFC1002 length to 257 * upper layer to indicate the length of the SMB packet 258 * received 259 */ 260 bool full_packet_received; 261 } reassembly; 262 } recv_io; 263 264 /* 265 * The state for Memory registrations on the client 266 */ 267 struct { 268 enum ib_mr_type type; 269 270 /* 271 * The list of free smbdirect_mr_io 272 * structures 273 */ 274 struct { 275 struct list_head list; 276 spinlock_t lock; 277 } all; 278 279 /* 280 * The number of available MRs ready for memory registration 281 */ 282 struct { 283 atomic_t count; 284 wait_queue_head_t wait_queue; 285 } ready; 286 287 /* 288 * The number of used MRs 289 */ 290 struct { 291 atomic_t count; 292 } used; 293 294 struct work_struct recovery_work; 295 296 /* Used by transport to wait until all MRs are returned */ 297 struct { 298 wait_queue_head_t wait_queue; 299 } cleanup; 300 } mr_io; 301 302 /* 303 * The state for RDMA read/write requests on the server 304 */ 305 struct { 306 /* 307 * The credit state for the send side 308 */ 309 struct { 310 /* 311 * The maximum number of rw credits 312 */ 313 size_t max; 314 /* 315 * The number of pages per credit 316 */ 317 size_t num_pages; 318 atomic_t count; 319 wait_queue_head_t wait_queue; 320 } credits; 321 } rw_io; 322 323 /* 324 * For debug purposes 325 */ 326 struct { 327 u64 get_receive_buffer; 328 u64 put_receive_buffer; 329 u64 enqueue_reassembly_queue; 330 u64 dequeue_reassembly_queue; 331 u64 send_empty; 332 } statistics; 333 }; 334 335 static void __smbdirect_socket_disabled_work(struct work_struct *work) 336 { 337 /* 338 * Should never be called as disable_[delayed_]work_sync() was used. 339 */ 340 WARN_ON_ONCE(1); 341 } 342 343 static __always_inline void smbdirect_socket_init(struct smbdirect_socket *sc) 344 { 345 /* 346 * This also sets status = SMBDIRECT_SOCKET_CREATED 347 */ 348 BUILD_BUG_ON(SMBDIRECT_SOCKET_CREATED != 0); 349 memset(sc, 0, sizeof(*sc)); 350 351 init_waitqueue_head(&sc->status_wait); 352 353 INIT_WORK(&sc->disconnect_work, __smbdirect_socket_disabled_work); 354 disable_work_sync(&sc->disconnect_work); 355 356 INIT_WORK(&sc->idle.immediate_work, __smbdirect_socket_disabled_work); 357 disable_work_sync(&sc->idle.immediate_work); 358 INIT_DELAYED_WORK(&sc->idle.timer_work, __smbdirect_socket_disabled_work); 359 disable_delayed_work_sync(&sc->idle.timer_work); 360 361 atomic_set(&sc->send_io.lcredits.count, 0); 362 init_waitqueue_head(&sc->send_io.lcredits.wait_queue); 363 364 atomic_set(&sc->send_io.credits.count, 0); 365 init_waitqueue_head(&sc->send_io.credits.wait_queue); 366 367 atomic_set(&sc->send_io.pending.count, 0); 368 init_waitqueue_head(&sc->send_io.pending.dec_wait_queue); 369 init_waitqueue_head(&sc->send_io.pending.zero_wait_queue); 370 371 INIT_LIST_HEAD(&sc->recv_io.free.list); 372 spin_lock_init(&sc->recv_io.free.lock); 373 374 atomic_set(&sc->recv_io.posted.count, 0); 375 INIT_WORK(&sc->recv_io.posted.refill_work, __smbdirect_socket_disabled_work); 376 disable_work_sync(&sc->recv_io.posted.refill_work); 377 378 atomic_set(&sc->recv_io.credits.count, 0); 379 380 INIT_LIST_HEAD(&sc->recv_io.reassembly.list); 381 spin_lock_init(&sc->recv_io.reassembly.lock); 382 init_waitqueue_head(&sc->recv_io.reassembly.wait_queue); 383 384 atomic_set(&sc->rw_io.credits.count, 0); 385 init_waitqueue_head(&sc->rw_io.credits.wait_queue); 386 387 spin_lock_init(&sc->mr_io.all.lock); 388 INIT_LIST_HEAD(&sc->mr_io.all.list); 389 atomic_set(&sc->mr_io.ready.count, 0); 390 init_waitqueue_head(&sc->mr_io.ready.wait_queue); 391 atomic_set(&sc->mr_io.used.count, 0); 392 INIT_WORK(&sc->mr_io.recovery_work, __smbdirect_socket_disabled_work); 393 disable_work_sync(&sc->mr_io.recovery_work); 394 init_waitqueue_head(&sc->mr_io.cleanup.wait_queue); 395 } 396 397 #define __SMBDIRECT_CHECK_STATUS_FAILED(__sc, __expected_status, __error_cmd, __unexpected_cmd) ({ \ 398 bool __failed = false; \ 399 if (unlikely((__sc)->first_error)) { \ 400 __failed = true; \ 401 __error_cmd \ 402 } else if (unlikely((__sc)->status != (__expected_status))) { \ 403 __failed = true; \ 404 __unexpected_cmd \ 405 } \ 406 __failed; \ 407 }) 408 409 #define __SMBDIRECT_CHECK_STATUS_WARN(__sc, __expected_status, __unexpected_cmd) \ 410 __SMBDIRECT_CHECK_STATUS_FAILED(__sc, __expected_status, \ 411 , \ 412 { \ 413 const struct sockaddr_storage *__src = NULL; \ 414 const struct sockaddr_storage *__dst = NULL; \ 415 if ((__sc)->rdma.cm_id) { \ 416 __src = &(__sc)->rdma.cm_id->route.addr.src_addr; \ 417 __dst = &(__sc)->rdma.cm_id->route.addr.dst_addr; \ 418 } \ 419 WARN_ONCE(1, \ 420 "expected[%s] != %s first_error=%1pe local=%pISpsfc remote=%pISpsfc\n", \ 421 smbdirect_socket_status_string(__expected_status), \ 422 smbdirect_socket_status_string((__sc)->status), \ 423 SMBDIRECT_DEBUG_ERR_PTR((__sc)->first_error), \ 424 __src, __dst); \ 425 __unexpected_cmd \ 426 }) 427 428 #define SMBDIRECT_CHECK_STATUS_WARN(__sc, __expected_status) \ 429 __SMBDIRECT_CHECK_STATUS_WARN(__sc, __expected_status, /* nothing */) 430 431 #define SMBDIRECT_CHECK_STATUS_DISCONNECT(__sc, __expected_status) \ 432 __SMBDIRECT_CHECK_STATUS_WARN(__sc, __expected_status, \ 433 __SMBDIRECT_SOCKET_DISCONNECT(__sc);) 434 435 struct smbdirect_send_io { 436 struct smbdirect_socket *socket; 437 struct ib_cqe cqe; 438 439 /* 440 * The SGE entries for this work request 441 * 442 * The first points to the packet header 443 */ 444 #define SMBDIRECT_SEND_IO_MAX_SGE 6 445 size_t num_sge; 446 struct ib_sge sge[SMBDIRECT_SEND_IO_MAX_SGE]; 447 448 /* 449 * Link to the list of sibling smbdirect_send_io 450 * messages. 451 */ 452 struct list_head sibling_list; 453 struct ib_send_wr wr; 454 455 /* SMBD packet header follows this structure */ 456 u8 packet[]; 457 }; 458 459 struct smbdirect_send_batch { 460 /* 461 * List of smbdirect_send_io messages 462 */ 463 struct list_head msg_list; 464 /* 465 * Number of list entries 466 */ 467 size_t wr_cnt; 468 469 /* 470 * Possible remote key invalidation state 471 */ 472 bool need_invalidate_rkey; 473 u32 remote_key; 474 }; 475 476 struct smbdirect_recv_io { 477 struct smbdirect_socket *socket; 478 struct ib_cqe cqe; 479 480 /* 481 * For now we only use a single SGE 482 * as we have just one large buffer 483 * per posted recv. 484 */ 485 #define SMBDIRECT_RECV_IO_MAX_SGE 1 486 struct ib_sge sge; 487 488 /* Link to free or reassembly list */ 489 struct list_head list; 490 491 /* Indicate if this is the 1st packet of a payload */ 492 bool first_segment; 493 494 /* SMBD packet header and payload follows this structure */ 495 u8 packet[]; 496 }; 497 498 enum smbdirect_mr_state { 499 SMBDIRECT_MR_READY, 500 SMBDIRECT_MR_REGISTERED, 501 SMBDIRECT_MR_INVALIDATED, 502 SMBDIRECT_MR_ERROR, 503 SMBDIRECT_MR_DISABLED 504 }; 505 506 struct smbdirect_mr_io { 507 struct smbdirect_socket *socket; 508 struct ib_cqe cqe; 509 510 /* 511 * We can have up to two references: 512 * 1. by the connection 513 * 2. by the registration 514 */ 515 struct kref kref; 516 struct mutex mutex; 517 518 struct list_head list; 519 520 enum smbdirect_mr_state state; 521 struct ib_mr *mr; 522 struct sg_table sgt; 523 enum dma_data_direction dir; 524 union { 525 struct ib_reg_wr wr; 526 struct ib_send_wr inv_wr; 527 }; 528 529 bool need_invalidate; 530 struct completion invalidate_done; 531 }; 532 533 struct smbdirect_rw_io { 534 struct smbdirect_socket *socket; 535 struct ib_cqe cqe; 536 537 struct list_head list; 538 539 int error; 540 struct completion *completion; 541 542 struct rdma_rw_ctx rdma_ctx; 543 struct sg_table sgt; 544 struct scatterlist sg_list[]; 545 }; 546 547 #endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ */ 548