1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 /* 3 * Copyright (c) 2025 Stefan Metzmacher 4 */ 5 6 #ifndef __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ 7 #define __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ 8 9 #include <rdma/rw.h> 10 11 enum smbdirect_socket_status { 12 SMBDIRECT_SOCKET_CREATED, 13 SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED, 14 SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING, 15 SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED, 16 SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED, 17 SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING, 18 SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED, 19 SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED, 20 SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING, 21 SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED, 22 SMBDIRECT_SOCKET_NEGOTIATE_NEEDED, 23 SMBDIRECT_SOCKET_NEGOTIATE_RUNNING, 24 SMBDIRECT_SOCKET_NEGOTIATE_FAILED, 25 SMBDIRECT_SOCKET_CONNECTED, 26 SMBDIRECT_SOCKET_ERROR, 27 SMBDIRECT_SOCKET_DISCONNECTING, 28 SMBDIRECT_SOCKET_DISCONNECTED, 29 SMBDIRECT_SOCKET_DESTROYED 30 }; 31 32 static __always_inline 33 const char *smbdirect_socket_status_string(enum smbdirect_socket_status status) 34 { 35 switch (status) { 36 case SMBDIRECT_SOCKET_CREATED: 37 return "CREATED"; 38 case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: 39 return "RESOLVE_ADDR_NEEDED"; 40 case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: 41 return "RESOLVE_ADDR_RUNNING"; 42 case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: 43 return "RESOLVE_ADDR_FAILED"; 44 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED: 45 return "RESOLVE_ROUTE_NEEDED"; 46 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING: 47 return "RESOLVE_ROUTE_RUNNING"; 48 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED: 49 return "RESOLVE_ROUTE_FAILED"; 50 case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED: 51 return "RDMA_CONNECT_NEEDED"; 52 case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING: 53 return "RDMA_CONNECT_RUNNING"; 54 case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED: 55 return "RDMA_CONNECT_FAILED"; 56 case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED: 57 return "NEGOTIATE_NEEDED"; 58 case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING: 59 return "NEGOTIATE_RUNNING"; 60 case SMBDIRECT_SOCKET_NEGOTIATE_FAILED: 61 return "NEGOTIATE_FAILED"; 62 case SMBDIRECT_SOCKET_CONNECTED: 63 return "CONNECTED"; 64 case SMBDIRECT_SOCKET_ERROR: 65 return "ERROR"; 66 case SMBDIRECT_SOCKET_DISCONNECTING: 67 return "DISCONNECTING"; 68 case SMBDIRECT_SOCKET_DISCONNECTED: 69 return "DISCONNECTED"; 70 case SMBDIRECT_SOCKET_DESTROYED: 71 return "DESTROYED"; 72 } 73 74 return "<unknown>"; 75 } 76 77 enum smbdirect_keepalive_status { 78 SMBDIRECT_KEEPALIVE_NONE, 79 SMBDIRECT_KEEPALIVE_PENDING, 80 SMBDIRECT_KEEPALIVE_SENT 81 }; 82 83 struct smbdirect_socket { 84 enum smbdirect_socket_status status; 85 wait_queue_head_t status_wait; 86 int first_error; 87 88 /* 89 * This points to the workqueue to 90 * be used for this socket. 91 * It can be per socket (on the client) 92 * or point to a global workqueue (on the server) 93 */ 94 struct workqueue_struct *workqueue; 95 96 struct work_struct disconnect_work; 97 98 /* RDMA related */ 99 struct { 100 struct rdma_cm_id *cm_id; 101 /* 102 * This is for iWarp MPA v1 103 */ 104 bool legacy_iwarp; 105 } rdma; 106 107 /* IB verbs related */ 108 struct { 109 struct ib_pd *pd; 110 struct ib_cq *send_cq; 111 struct ib_cq *recv_cq; 112 113 /* 114 * shortcuts for rdma.cm_id->{qp,device}; 115 */ 116 struct ib_qp *qp; 117 struct ib_device *dev; 118 } ib; 119 120 struct smbdirect_socket_parameters parameters; 121 122 /* 123 * The state for keepalive and timeout handling 124 */ 125 struct { 126 enum smbdirect_keepalive_status keepalive; 127 struct work_struct immediate_work; 128 struct delayed_work timer_work; 129 } idle; 130 131 /* 132 * The state for posted send buffers 133 */ 134 struct { 135 /* 136 * Memory pools for preallocating 137 * smbdirect_send_io buffers 138 */ 139 struct { 140 struct kmem_cache *cache; 141 mempool_t *pool; 142 } mem; 143 144 /* 145 * The local credit state for ib_post_send() 146 */ 147 struct { 148 atomic_t count; 149 wait_queue_head_t wait_queue; 150 } lcredits; 151 152 /* 153 * The remote credit state for the send side 154 */ 155 struct { 156 atomic_t count; 157 wait_queue_head_t wait_queue; 158 } credits; 159 160 /* 161 * The state about posted/pending sends 162 */ 163 struct { 164 atomic_t count; 165 /* 166 * woken when count is decremented 167 */ 168 wait_queue_head_t dec_wait_queue; 169 /* 170 * woken when count reached zero 171 */ 172 wait_queue_head_t zero_wait_queue; 173 } pending; 174 } send_io; 175 176 /* 177 * The state for posted receive buffers 178 */ 179 struct { 180 /* 181 * The type of PDU we are expecting 182 */ 183 enum { 184 SMBDIRECT_EXPECT_NEGOTIATE_REQ = 1, 185 SMBDIRECT_EXPECT_NEGOTIATE_REP = 2, 186 SMBDIRECT_EXPECT_DATA_TRANSFER = 3, 187 } expected; 188 189 /* 190 * Memory pools for preallocating 191 * smbdirect_recv_io buffers 192 */ 193 struct { 194 struct kmem_cache *cache; 195 mempool_t *pool; 196 } mem; 197 198 /* 199 * The list of free smbdirect_recv_io 200 * structures 201 */ 202 struct { 203 struct list_head list; 204 spinlock_t lock; 205 } free; 206 207 /* 208 * The state for posted recv_io messages 209 * and the refill work struct. 210 */ 211 struct { 212 atomic_t count; 213 struct work_struct refill_work; 214 } posted; 215 216 /* 217 * The credit state for the recv side 218 */ 219 struct { 220 u16 target; 221 atomic_t count; 222 } credits; 223 224 /* 225 * The list of arrived non-empty smbdirect_recv_io 226 * structures 227 * 228 * This represents the reassembly queue. 229 */ 230 struct { 231 struct list_head list; 232 spinlock_t lock; 233 wait_queue_head_t wait_queue; 234 /* total data length of reassembly queue */ 235 int data_length; 236 int queue_length; 237 /* the offset to first buffer in reassembly queue */ 238 int first_entry_offset; 239 /* 240 * Indicate if we have received a full packet on the 241 * connection This is used to identify the first SMBD 242 * packet of a assembled payload (SMB packet) in 243 * reassembly queue so we can return a RFC1002 length to 244 * upper layer to indicate the length of the SMB packet 245 * received 246 */ 247 bool full_packet_received; 248 } reassembly; 249 } recv_io; 250 251 /* 252 * The state for Memory registrations on the client 253 */ 254 struct { 255 enum ib_mr_type type; 256 257 /* 258 * The list of free smbdirect_mr_io 259 * structures 260 */ 261 struct { 262 struct list_head list; 263 spinlock_t lock; 264 } all; 265 266 /* 267 * The number of available MRs ready for memory registration 268 */ 269 struct { 270 atomic_t count; 271 wait_queue_head_t wait_queue; 272 } ready; 273 274 /* 275 * The number of used MRs 276 */ 277 struct { 278 atomic_t count; 279 } used; 280 281 struct work_struct recovery_work; 282 283 /* Used by transport to wait until all MRs are returned */ 284 struct { 285 wait_queue_head_t wait_queue; 286 } cleanup; 287 } mr_io; 288 289 /* 290 * The state for RDMA read/write requests on the server 291 */ 292 struct { 293 /* 294 * The credit state for the send side 295 */ 296 struct { 297 /* 298 * The maximum number of rw credits 299 */ 300 size_t max; 301 /* 302 * The number of pages per credit 303 */ 304 size_t num_pages; 305 atomic_t count; 306 wait_queue_head_t wait_queue; 307 } credits; 308 } rw_io; 309 310 /* 311 * For debug purposes 312 */ 313 struct { 314 u64 get_receive_buffer; 315 u64 put_receive_buffer; 316 u64 enqueue_reassembly_queue; 317 u64 dequeue_reassembly_queue; 318 u64 send_empty; 319 } statistics; 320 }; 321 322 static void __smbdirect_socket_disabled_work(struct work_struct *work) 323 { 324 /* 325 * Should never be called as disable_[delayed_]work_sync() was used. 326 */ 327 WARN_ON_ONCE(1); 328 } 329 330 static __always_inline void smbdirect_socket_init(struct smbdirect_socket *sc) 331 { 332 /* 333 * This also sets status = SMBDIRECT_SOCKET_CREATED 334 */ 335 BUILD_BUG_ON(SMBDIRECT_SOCKET_CREATED != 0); 336 memset(sc, 0, sizeof(*sc)); 337 338 init_waitqueue_head(&sc->status_wait); 339 340 INIT_WORK(&sc->disconnect_work, __smbdirect_socket_disabled_work); 341 disable_work_sync(&sc->disconnect_work); 342 343 INIT_WORK(&sc->idle.immediate_work, __smbdirect_socket_disabled_work); 344 disable_work_sync(&sc->idle.immediate_work); 345 INIT_DELAYED_WORK(&sc->idle.timer_work, __smbdirect_socket_disabled_work); 346 disable_delayed_work_sync(&sc->idle.timer_work); 347 348 atomic_set(&sc->send_io.lcredits.count, 0); 349 init_waitqueue_head(&sc->send_io.lcredits.wait_queue); 350 351 atomic_set(&sc->send_io.credits.count, 0); 352 init_waitqueue_head(&sc->send_io.credits.wait_queue); 353 354 atomic_set(&sc->send_io.pending.count, 0); 355 init_waitqueue_head(&sc->send_io.pending.dec_wait_queue); 356 init_waitqueue_head(&sc->send_io.pending.zero_wait_queue); 357 358 INIT_LIST_HEAD(&sc->recv_io.free.list); 359 spin_lock_init(&sc->recv_io.free.lock); 360 361 atomic_set(&sc->recv_io.posted.count, 0); 362 INIT_WORK(&sc->recv_io.posted.refill_work, __smbdirect_socket_disabled_work); 363 disable_work_sync(&sc->recv_io.posted.refill_work); 364 365 atomic_set(&sc->recv_io.credits.count, 0); 366 367 INIT_LIST_HEAD(&sc->recv_io.reassembly.list); 368 spin_lock_init(&sc->recv_io.reassembly.lock); 369 init_waitqueue_head(&sc->recv_io.reassembly.wait_queue); 370 371 atomic_set(&sc->rw_io.credits.count, 0); 372 init_waitqueue_head(&sc->rw_io.credits.wait_queue); 373 374 spin_lock_init(&sc->mr_io.all.lock); 375 INIT_LIST_HEAD(&sc->mr_io.all.list); 376 atomic_set(&sc->mr_io.ready.count, 0); 377 init_waitqueue_head(&sc->mr_io.ready.wait_queue); 378 atomic_set(&sc->mr_io.used.count, 0); 379 INIT_WORK(&sc->mr_io.recovery_work, __smbdirect_socket_disabled_work); 380 disable_work_sync(&sc->mr_io.recovery_work); 381 init_waitqueue_head(&sc->mr_io.cleanup.wait_queue); 382 } 383 384 struct smbdirect_send_io { 385 struct smbdirect_socket *socket; 386 struct ib_cqe cqe; 387 388 /* 389 * The SGE entries for this work request 390 * 391 * The first points to the packet header 392 */ 393 #define SMBDIRECT_SEND_IO_MAX_SGE 6 394 size_t num_sge; 395 struct ib_sge sge[SMBDIRECT_SEND_IO_MAX_SGE]; 396 397 /* 398 * Link to the list of sibling smbdirect_send_io 399 * messages. 400 */ 401 struct list_head sibling_list; 402 struct ib_send_wr wr; 403 404 /* SMBD packet header follows this structure */ 405 u8 packet[]; 406 }; 407 408 struct smbdirect_send_batch { 409 /* 410 * List of smbdirect_send_io messages 411 */ 412 struct list_head msg_list; 413 /* 414 * Number of list entries 415 */ 416 size_t wr_cnt; 417 418 /* 419 * Possible remote key invalidation state 420 */ 421 bool need_invalidate_rkey; 422 u32 remote_key; 423 }; 424 425 struct smbdirect_recv_io { 426 struct smbdirect_socket *socket; 427 struct ib_cqe cqe; 428 429 /* 430 * For now we only use a single SGE 431 * as we have just one large buffer 432 * per posted recv. 433 */ 434 #define SMBDIRECT_RECV_IO_MAX_SGE 1 435 struct ib_sge sge; 436 437 /* Link to free or reassembly list */ 438 struct list_head list; 439 440 /* Indicate if this is the 1st packet of a payload */ 441 bool first_segment; 442 443 /* SMBD packet header and payload follows this structure */ 444 u8 packet[]; 445 }; 446 447 enum smbdirect_mr_state { 448 SMBDIRECT_MR_READY, 449 SMBDIRECT_MR_REGISTERED, 450 SMBDIRECT_MR_INVALIDATED, 451 SMBDIRECT_MR_ERROR, 452 SMBDIRECT_MR_DISABLED 453 }; 454 455 struct smbdirect_mr_io { 456 struct smbdirect_socket *socket; 457 struct ib_cqe cqe; 458 459 /* 460 * We can have up to two references: 461 * 1. by the connection 462 * 2. by the registration 463 */ 464 struct kref kref; 465 struct mutex mutex; 466 467 struct list_head list; 468 469 enum smbdirect_mr_state state; 470 struct ib_mr *mr; 471 struct sg_table sgt; 472 enum dma_data_direction dir; 473 union { 474 struct ib_reg_wr wr; 475 struct ib_send_wr inv_wr; 476 }; 477 478 bool need_invalidate; 479 struct completion invalidate_done; 480 }; 481 482 struct smbdirect_rw_io { 483 struct smbdirect_socket *socket; 484 struct ib_cqe cqe; 485 486 struct list_head list; 487 488 int error; 489 struct completion *completion; 490 491 struct rdma_rw_ctx rdma_ctx; 492 struct sg_table sgt; 493 struct scatterlist sg_list[]; 494 }; 495 496 #endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ */ 497