1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3 * Copyright (c) 2025 Stefan Metzmacher
4 */
5
6 #ifndef __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__
7 #define __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__
8
9 #include <rdma/rw.h>
10
11 enum smbdirect_socket_status {
12 SMBDIRECT_SOCKET_CREATED,
13 SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED,
14 SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING,
15 SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED,
16 SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED,
17 SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING,
18 SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED,
19 SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED,
20 SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING,
21 SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED,
22 SMBDIRECT_SOCKET_NEGOTIATE_NEEDED,
23 SMBDIRECT_SOCKET_NEGOTIATE_RUNNING,
24 SMBDIRECT_SOCKET_NEGOTIATE_FAILED,
25 SMBDIRECT_SOCKET_CONNECTED,
26 SMBDIRECT_SOCKET_ERROR,
27 SMBDIRECT_SOCKET_DISCONNECTING,
28 SMBDIRECT_SOCKET_DISCONNECTED,
29 SMBDIRECT_SOCKET_DESTROYED
30 };
31
32 static __always_inline
smbdirect_socket_status_string(enum smbdirect_socket_status status)33 const char *smbdirect_socket_status_string(enum smbdirect_socket_status status)
34 {
35 switch (status) {
36 case SMBDIRECT_SOCKET_CREATED:
37 return "CREATED";
38 case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED:
39 return "RESOLVE_ADDR_NEEDED";
40 case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING:
41 return "RESOLVE_ADDR_RUNNING";
42 case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED:
43 return "RESOLVE_ADDR_FAILED";
44 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED:
45 return "RESOLVE_ROUTE_NEEDED";
46 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING:
47 return "RESOLVE_ROUTE_RUNNING";
48 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED:
49 return "RESOLVE_ROUTE_FAILED";
50 case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED:
51 return "RDMA_CONNECT_NEEDED";
52 case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING:
53 return "RDMA_CONNECT_RUNNING";
54 case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED:
55 return "RDMA_CONNECT_FAILED";
56 case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED:
57 return "NEGOTIATE_NEEDED";
58 case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING:
59 return "NEGOTIATE_RUNNING";
60 case SMBDIRECT_SOCKET_NEGOTIATE_FAILED:
61 return "NEGOTIATE_FAILED";
62 case SMBDIRECT_SOCKET_CONNECTED:
63 return "CONNECTED";
64 case SMBDIRECT_SOCKET_ERROR:
65 return "ERROR";
66 case SMBDIRECT_SOCKET_DISCONNECTING:
67 return "DISCONNECTING";
68 case SMBDIRECT_SOCKET_DISCONNECTED:
69 return "DISCONNECTED";
70 case SMBDIRECT_SOCKET_DESTROYED:
71 return "DESTROYED";
72 }
73
74 return "<unknown>";
75 }
76
77 /*
78 * This can be used with %1pe to print errors as strings or '0'
79 * And it avoids warnings like: warn: passing zero to 'ERR_PTR'
80 * from smatch -p=kernel --pedantic
81 */
82 static __always_inline
SMBDIRECT_DEBUG_ERR_PTR(long error)83 const void * __must_check SMBDIRECT_DEBUG_ERR_PTR(long error)
84 {
85 if (error == 0)
86 return NULL;
87 return ERR_PTR(error);
88 }
89
90 enum smbdirect_keepalive_status {
91 SMBDIRECT_KEEPALIVE_NONE,
92 SMBDIRECT_KEEPALIVE_PENDING,
93 SMBDIRECT_KEEPALIVE_SENT
94 };
95
96 struct smbdirect_socket {
97 enum smbdirect_socket_status status;
98 wait_queue_head_t status_wait;
99 int first_error;
100
101 /*
102 * This points to the workqueue to
103 * be used for this socket.
104 * It can be per socket (on the client)
105 * or point to a global workqueue (on the server)
106 */
107 struct workqueue_struct *workqueue;
108
109 struct work_struct disconnect_work;
110
111 /* RDMA related */
112 struct {
113 struct rdma_cm_id *cm_id;
114 /*
115 * This is for iWarp MPA v1
116 */
117 bool legacy_iwarp;
118 } rdma;
119
120 /* IB verbs related */
121 struct {
122 struct ib_pd *pd;
123 struct ib_cq *send_cq;
124 struct ib_cq *recv_cq;
125
126 /*
127 * shortcuts for rdma.cm_id->{qp,device};
128 */
129 struct ib_qp *qp;
130 struct ib_device *dev;
131 } ib;
132
133 struct smbdirect_socket_parameters parameters;
134
135 /*
136 * The state for connect/negotiation
137 */
138 struct {
139 spinlock_t lock;
140 struct work_struct work;
141 } connect;
142
143 /*
144 * The state for keepalive and timeout handling
145 */
146 struct {
147 enum smbdirect_keepalive_status keepalive;
148 struct work_struct immediate_work;
149 struct delayed_work timer_work;
150 } idle;
151
152 /*
153 * The state for posted send buffers
154 */
155 struct {
156 /*
157 * Memory pools for preallocating
158 * smbdirect_send_io buffers
159 */
160 struct {
161 struct kmem_cache *cache;
162 mempool_t *pool;
163 } mem;
164
165 /*
166 * The local credit state for ib_post_send()
167 */
168 struct {
169 atomic_t count;
170 wait_queue_head_t wait_queue;
171 } lcredits;
172
173 /*
174 * The remote credit state for the send side
175 */
176 struct {
177 atomic_t count;
178 wait_queue_head_t wait_queue;
179 } credits;
180
181 /*
182 * The state about posted/pending sends
183 */
184 struct {
185 atomic_t count;
186 /*
187 * woken when count is decremented
188 */
189 wait_queue_head_t dec_wait_queue;
190 /*
191 * woken when count reached zero
192 */
193 wait_queue_head_t zero_wait_queue;
194 } pending;
195 } send_io;
196
197 /*
198 * The state for posted receive buffers
199 */
200 struct {
201 /*
202 * The type of PDU we are expecting
203 */
204 enum {
205 SMBDIRECT_EXPECT_NEGOTIATE_REQ = 1,
206 SMBDIRECT_EXPECT_NEGOTIATE_REP = 2,
207 SMBDIRECT_EXPECT_DATA_TRANSFER = 3,
208 } expected;
209
210 /*
211 * Memory pools for preallocating
212 * smbdirect_recv_io buffers
213 */
214 struct {
215 struct kmem_cache *cache;
216 mempool_t *pool;
217 } mem;
218
219 /*
220 * The list of free smbdirect_recv_io
221 * structures
222 */
223 struct {
224 struct list_head list;
225 spinlock_t lock;
226 } free;
227
228 /*
229 * The state for posted recv_io messages
230 * and the refill work struct.
231 */
232 struct {
233 atomic_t count;
234 struct work_struct refill_work;
235 } posted;
236
237 /*
238 * The credit state for the recv side
239 */
240 struct {
241 u16 target;
242 atomic_t count;
243 } credits;
244
245 /*
246 * The list of arrived non-empty smbdirect_recv_io
247 * structures
248 *
249 * This represents the reassembly queue.
250 */
251 struct {
252 struct list_head list;
253 spinlock_t lock;
254 wait_queue_head_t wait_queue;
255 /* total data length of reassembly queue */
256 int data_length;
257 int queue_length;
258 /* the offset to first buffer in reassembly queue */
259 int first_entry_offset;
260 /*
261 * Indicate if we have received a full packet on the
262 * connection This is used to identify the first SMBD
263 * packet of a assembled payload (SMB packet) in
264 * reassembly queue so we can return a RFC1002 length to
265 * upper layer to indicate the length of the SMB packet
266 * received
267 */
268 bool full_packet_received;
269 } reassembly;
270 } recv_io;
271
272 /*
273 * The state for Memory registrations on the client
274 */
275 struct {
276 enum ib_mr_type type;
277
278 /*
279 * The list of free smbdirect_mr_io
280 * structures
281 */
282 struct {
283 struct list_head list;
284 spinlock_t lock;
285 } all;
286
287 /*
288 * The number of available MRs ready for memory registration
289 */
290 struct {
291 atomic_t count;
292 wait_queue_head_t wait_queue;
293 } ready;
294
295 /*
296 * The number of used MRs
297 */
298 struct {
299 atomic_t count;
300 } used;
301
302 struct work_struct recovery_work;
303
304 /* Used by transport to wait until all MRs are returned */
305 struct {
306 wait_queue_head_t wait_queue;
307 } cleanup;
308 } mr_io;
309
310 /*
311 * The state for RDMA read/write requests on the server
312 */
313 struct {
314 /*
315 * The credit state for the send side
316 */
317 struct {
318 /*
319 * The maximum number of rw credits
320 */
321 size_t max;
322 /*
323 * The number of pages per credit
324 */
325 size_t num_pages;
326 atomic_t count;
327 wait_queue_head_t wait_queue;
328 } credits;
329 } rw_io;
330
331 /*
332 * For debug purposes
333 */
334 struct {
335 u64 get_receive_buffer;
336 u64 put_receive_buffer;
337 u64 enqueue_reassembly_queue;
338 u64 dequeue_reassembly_queue;
339 u64 send_empty;
340 } statistics;
341 };
342
__smbdirect_socket_disabled_work(struct work_struct * work)343 static void __smbdirect_socket_disabled_work(struct work_struct *work)
344 {
345 /*
346 * Should never be called as disable_[delayed_]work_sync() was used.
347 */
348 WARN_ON_ONCE(1);
349 }
350
smbdirect_socket_init(struct smbdirect_socket * sc)351 static __always_inline void smbdirect_socket_init(struct smbdirect_socket *sc)
352 {
353 /*
354 * This also sets status = SMBDIRECT_SOCKET_CREATED
355 */
356 BUILD_BUG_ON(SMBDIRECT_SOCKET_CREATED != 0);
357 memset(sc, 0, sizeof(*sc));
358
359 init_waitqueue_head(&sc->status_wait);
360
361 INIT_WORK(&sc->disconnect_work, __smbdirect_socket_disabled_work);
362 disable_work_sync(&sc->disconnect_work);
363
364 spin_lock_init(&sc->connect.lock);
365 INIT_WORK(&sc->connect.work, __smbdirect_socket_disabled_work);
366 disable_work_sync(&sc->connect.work);
367
368 INIT_WORK(&sc->idle.immediate_work, __smbdirect_socket_disabled_work);
369 disable_work_sync(&sc->idle.immediate_work);
370 INIT_DELAYED_WORK(&sc->idle.timer_work, __smbdirect_socket_disabled_work);
371 disable_delayed_work_sync(&sc->idle.timer_work);
372
373 atomic_set(&sc->send_io.lcredits.count, 0);
374 init_waitqueue_head(&sc->send_io.lcredits.wait_queue);
375
376 atomic_set(&sc->send_io.credits.count, 0);
377 init_waitqueue_head(&sc->send_io.credits.wait_queue);
378
379 atomic_set(&sc->send_io.pending.count, 0);
380 init_waitqueue_head(&sc->send_io.pending.dec_wait_queue);
381 init_waitqueue_head(&sc->send_io.pending.zero_wait_queue);
382
383 INIT_LIST_HEAD(&sc->recv_io.free.list);
384 spin_lock_init(&sc->recv_io.free.lock);
385
386 atomic_set(&sc->recv_io.posted.count, 0);
387 INIT_WORK(&sc->recv_io.posted.refill_work, __smbdirect_socket_disabled_work);
388 disable_work_sync(&sc->recv_io.posted.refill_work);
389
390 atomic_set(&sc->recv_io.credits.count, 0);
391
392 INIT_LIST_HEAD(&sc->recv_io.reassembly.list);
393 spin_lock_init(&sc->recv_io.reassembly.lock);
394 init_waitqueue_head(&sc->recv_io.reassembly.wait_queue);
395
396 atomic_set(&sc->rw_io.credits.count, 0);
397 init_waitqueue_head(&sc->rw_io.credits.wait_queue);
398
399 spin_lock_init(&sc->mr_io.all.lock);
400 INIT_LIST_HEAD(&sc->mr_io.all.list);
401 atomic_set(&sc->mr_io.ready.count, 0);
402 init_waitqueue_head(&sc->mr_io.ready.wait_queue);
403 atomic_set(&sc->mr_io.used.count, 0);
404 INIT_WORK(&sc->mr_io.recovery_work, __smbdirect_socket_disabled_work);
405 disable_work_sync(&sc->mr_io.recovery_work);
406 init_waitqueue_head(&sc->mr_io.cleanup.wait_queue);
407 }
408
409 #define __SMBDIRECT_CHECK_STATUS_FAILED(__sc, __expected_status, __error_cmd, __unexpected_cmd) ({ \
410 bool __failed = false; \
411 if (unlikely((__sc)->first_error)) { \
412 __failed = true; \
413 __error_cmd \
414 } else if (unlikely((__sc)->status != (__expected_status))) { \
415 __failed = true; \
416 __unexpected_cmd \
417 } \
418 __failed; \
419 })
420
421 #define __SMBDIRECT_CHECK_STATUS_WARN(__sc, __expected_status, __unexpected_cmd) \
422 __SMBDIRECT_CHECK_STATUS_FAILED(__sc, __expected_status, \
423 , \
424 { \
425 const struct sockaddr_storage *__src = NULL; \
426 const struct sockaddr_storage *__dst = NULL; \
427 if ((__sc)->rdma.cm_id) { \
428 __src = &(__sc)->rdma.cm_id->route.addr.src_addr; \
429 __dst = &(__sc)->rdma.cm_id->route.addr.dst_addr; \
430 } \
431 WARN_ONCE(1, \
432 "expected[%s] != %s first_error=%1pe local=%pISpsfc remote=%pISpsfc\n", \
433 smbdirect_socket_status_string(__expected_status), \
434 smbdirect_socket_status_string((__sc)->status), \
435 SMBDIRECT_DEBUG_ERR_PTR((__sc)->first_error), \
436 __src, __dst); \
437 __unexpected_cmd \
438 })
439
440 #define SMBDIRECT_CHECK_STATUS_WARN(__sc, __expected_status) \
441 __SMBDIRECT_CHECK_STATUS_WARN(__sc, __expected_status, /* nothing */)
442
443 #define SMBDIRECT_CHECK_STATUS_DISCONNECT(__sc, __expected_status) \
444 __SMBDIRECT_CHECK_STATUS_WARN(__sc, __expected_status, \
445 __SMBDIRECT_SOCKET_DISCONNECT(__sc);)
446
447 struct smbdirect_send_io {
448 struct smbdirect_socket *socket;
449 struct ib_cqe cqe;
450
451 /*
452 * The SGE entries for this work request
453 *
454 * The first points to the packet header
455 */
456 #define SMBDIRECT_SEND_IO_MAX_SGE 6
457 size_t num_sge;
458 struct ib_sge sge[SMBDIRECT_SEND_IO_MAX_SGE];
459
460 /*
461 * Link to the list of sibling smbdirect_send_io
462 * messages.
463 */
464 struct list_head sibling_list;
465 struct ib_send_wr wr;
466
467 /* SMBD packet header follows this structure */
468 u8 packet[];
469 };
470
471 struct smbdirect_send_batch {
472 /*
473 * List of smbdirect_send_io messages
474 */
475 struct list_head msg_list;
476 /*
477 * Number of list entries
478 */
479 size_t wr_cnt;
480
481 /*
482 * Possible remote key invalidation state
483 */
484 bool need_invalidate_rkey;
485 u32 remote_key;
486 };
487
488 struct smbdirect_recv_io {
489 struct smbdirect_socket *socket;
490 struct ib_cqe cqe;
491
492 /*
493 * For now we only use a single SGE
494 * as we have just one large buffer
495 * per posted recv.
496 */
497 #define SMBDIRECT_RECV_IO_MAX_SGE 1
498 struct ib_sge sge;
499
500 /* Link to free or reassembly list */
501 struct list_head list;
502
503 /* Indicate if this is the 1st packet of a payload */
504 bool first_segment;
505
506 /* SMBD packet header and payload follows this structure */
507 u8 packet[];
508 };
509
510 enum smbdirect_mr_state {
511 SMBDIRECT_MR_READY,
512 SMBDIRECT_MR_REGISTERED,
513 SMBDIRECT_MR_INVALIDATED,
514 SMBDIRECT_MR_ERROR,
515 SMBDIRECT_MR_DISABLED
516 };
517
518 struct smbdirect_mr_io {
519 struct smbdirect_socket *socket;
520 struct ib_cqe cqe;
521
522 /*
523 * We can have up to two references:
524 * 1. by the connection
525 * 2. by the registration
526 */
527 struct kref kref;
528 struct mutex mutex;
529
530 struct list_head list;
531
532 enum smbdirect_mr_state state;
533 struct ib_mr *mr;
534 struct sg_table sgt;
535 enum dma_data_direction dir;
536 union {
537 struct ib_reg_wr wr;
538 struct ib_send_wr inv_wr;
539 };
540
541 bool need_invalidate;
542 struct completion invalidate_done;
543 };
544
545 struct smbdirect_rw_io {
546 struct smbdirect_socket *socket;
547 struct ib_cqe cqe;
548
549 struct list_head list;
550
551 int error;
552 struct completion *completion;
553
554 struct rdma_rw_ctx rdma_ctx;
555 struct sg_table sgt;
556 struct scatterlist sg_list[];
557 };
558
559 #endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ */
560