1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3 * Copyright (c) 2025 Stefan Metzmacher
4 */
5
6 #ifndef __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__
7 #define __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__
8
9 #include <rdma/rw.h>
10
11 enum smbdirect_socket_status {
12 SMBDIRECT_SOCKET_CREATED,
13 SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED,
14 SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING,
15 SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED,
16 SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED,
17 SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING,
18 SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED,
19 SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED,
20 SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING,
21 SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED,
22 SMBDIRECT_SOCKET_NEGOTIATE_NEEDED,
23 SMBDIRECT_SOCKET_NEGOTIATE_RUNNING,
24 SMBDIRECT_SOCKET_NEGOTIATE_FAILED,
25 SMBDIRECT_SOCKET_CONNECTED,
26 SMBDIRECT_SOCKET_ERROR,
27 SMBDIRECT_SOCKET_DISCONNECTING,
28 SMBDIRECT_SOCKET_DISCONNECTED,
29 SMBDIRECT_SOCKET_DESTROYED
30 };
31
32 static __always_inline
smbdirect_socket_status_string(enum smbdirect_socket_status status)33 const char *smbdirect_socket_status_string(enum smbdirect_socket_status status)
34 {
35 switch (status) {
36 case SMBDIRECT_SOCKET_CREATED:
37 return "CREATED";
38 case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED:
39 return "RESOLVE_ADDR_NEEDED";
40 case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING:
41 return "RESOLVE_ADDR_RUNNING";
42 case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED:
43 return "RESOLVE_ADDR_FAILED";
44 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED:
45 return "RESOLVE_ROUTE_NEEDED";
46 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING:
47 return "RESOLVE_ROUTE_RUNNING";
48 case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED:
49 return "RESOLVE_ROUTE_FAILED";
50 case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED:
51 return "RDMA_CONNECT_NEEDED";
52 case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING:
53 return "RDMA_CONNECT_RUNNING";
54 case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED:
55 return "RDMA_CONNECT_FAILED";
56 case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED:
57 return "NEGOTIATE_NEEDED";
58 case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING:
59 return "NEGOTIATE_RUNNING";
60 case SMBDIRECT_SOCKET_NEGOTIATE_FAILED:
61 return "NEGOTIATE_FAILED";
62 case SMBDIRECT_SOCKET_CONNECTED:
63 return "CONNECTED";
64 case SMBDIRECT_SOCKET_ERROR:
65 return "ERROR";
66 case SMBDIRECT_SOCKET_DISCONNECTING:
67 return "DISCONNECTING";
68 case SMBDIRECT_SOCKET_DISCONNECTED:
69 return "DISCONNECTED";
70 case SMBDIRECT_SOCKET_DESTROYED:
71 return "DESTROYED";
72 }
73
74 return "<unknown>";
75 }
76
77 /*
78 * This can be used with %1pe to print errors as strings or '0'
79 * And it avoids warnings like: warn: passing zero to 'ERR_PTR'
80 * from smatch -p=kernel --pedantic
81 */
82 static __always_inline
SMBDIRECT_DEBUG_ERR_PTR(long error)83 const void * __must_check SMBDIRECT_DEBUG_ERR_PTR(long error)
84 {
85 if (error == 0)
86 return NULL;
87 return ERR_PTR(error);
88 }
89
90 enum smbdirect_keepalive_status {
91 SMBDIRECT_KEEPALIVE_NONE,
92 SMBDIRECT_KEEPALIVE_PENDING,
93 SMBDIRECT_KEEPALIVE_SENT
94 };
95
96 struct smbdirect_socket {
97 enum smbdirect_socket_status status;
98 wait_queue_head_t status_wait;
99 int first_error;
100
101 /*
102 * This points to the workqueue to
103 * be used for this socket.
104 * It can be per socket (on the client)
105 * or point to a global workqueue (on the server)
106 */
107 struct workqueue_struct *workqueue;
108
109 struct work_struct disconnect_work;
110
111 /* RDMA related */
112 struct {
113 struct rdma_cm_id *cm_id;
114 /*
115 * This is for iWarp MPA v1
116 */
117 bool legacy_iwarp;
118 } rdma;
119
120 /* IB verbs related */
121 struct {
122 struct ib_pd *pd;
123 struct ib_cq *send_cq;
124 struct ib_cq *recv_cq;
125
126 /*
127 * shortcuts for rdma.cm_id->{qp,device};
128 */
129 struct ib_qp *qp;
130 struct ib_device *dev;
131 } ib;
132
133 struct smbdirect_socket_parameters parameters;
134
135 /*
136 * The state for keepalive and timeout handling
137 */
138 struct {
139 enum smbdirect_keepalive_status keepalive;
140 struct work_struct immediate_work;
141 struct delayed_work timer_work;
142 } idle;
143
144 /*
145 * The state for posted send buffers
146 */
147 struct {
148 /*
149 * Memory pools for preallocating
150 * smbdirect_send_io buffers
151 */
152 struct {
153 struct kmem_cache *cache;
154 mempool_t *pool;
155 } mem;
156
157 /*
158 * The local credit state for ib_post_send()
159 */
160 struct {
161 atomic_t count;
162 wait_queue_head_t wait_queue;
163 } lcredits;
164
165 /*
166 * The remote credit state for the send side
167 */
168 struct {
169 atomic_t count;
170 wait_queue_head_t wait_queue;
171 } credits;
172
173 /*
174 * The state about posted/pending sends
175 */
176 struct {
177 atomic_t count;
178 /*
179 * woken when count is decremented
180 */
181 wait_queue_head_t dec_wait_queue;
182 /*
183 * woken when count reached zero
184 */
185 wait_queue_head_t zero_wait_queue;
186 } pending;
187 } send_io;
188
189 /*
190 * The state for posted receive buffers
191 */
192 struct {
193 /*
194 * The type of PDU we are expecting
195 */
196 enum {
197 SMBDIRECT_EXPECT_NEGOTIATE_REQ = 1,
198 SMBDIRECT_EXPECT_NEGOTIATE_REP = 2,
199 SMBDIRECT_EXPECT_DATA_TRANSFER = 3,
200 } expected;
201
202 /*
203 * Memory pools for preallocating
204 * smbdirect_recv_io buffers
205 */
206 struct {
207 struct kmem_cache *cache;
208 mempool_t *pool;
209 } mem;
210
211 /*
212 * The list of free smbdirect_recv_io
213 * structures
214 */
215 struct {
216 struct list_head list;
217 spinlock_t lock;
218 } free;
219
220 /*
221 * The state for posted recv_io messages
222 * and the refill work struct.
223 */
224 struct {
225 atomic_t count;
226 struct work_struct refill_work;
227 } posted;
228
229 /*
230 * The credit state for the recv side
231 */
232 struct {
233 u16 target;
234 atomic_t count;
235 } credits;
236
237 /*
238 * The list of arrived non-empty smbdirect_recv_io
239 * structures
240 *
241 * This represents the reassembly queue.
242 */
243 struct {
244 struct list_head list;
245 spinlock_t lock;
246 wait_queue_head_t wait_queue;
247 /* total data length of reassembly queue */
248 int data_length;
249 int queue_length;
250 /* the offset to first buffer in reassembly queue */
251 int first_entry_offset;
252 /*
253 * Indicate if we have received a full packet on the
254 * connection This is used to identify the first SMBD
255 * packet of a assembled payload (SMB packet) in
256 * reassembly queue so we can return a RFC1002 length to
257 * upper layer to indicate the length of the SMB packet
258 * received
259 */
260 bool full_packet_received;
261 } reassembly;
262 } recv_io;
263
264 /*
265 * The state for Memory registrations on the client
266 */
267 struct {
268 enum ib_mr_type type;
269
270 /*
271 * The list of free smbdirect_mr_io
272 * structures
273 */
274 struct {
275 struct list_head list;
276 spinlock_t lock;
277 } all;
278
279 /*
280 * The number of available MRs ready for memory registration
281 */
282 struct {
283 atomic_t count;
284 wait_queue_head_t wait_queue;
285 } ready;
286
287 /*
288 * The number of used MRs
289 */
290 struct {
291 atomic_t count;
292 } used;
293
294 struct work_struct recovery_work;
295
296 /* Used by transport to wait until all MRs are returned */
297 struct {
298 wait_queue_head_t wait_queue;
299 } cleanup;
300 } mr_io;
301
302 /*
303 * The state for RDMA read/write requests on the server
304 */
305 struct {
306 /*
307 * The credit state for the send side
308 */
309 struct {
310 /*
311 * The maximum number of rw credits
312 */
313 size_t max;
314 /*
315 * The number of pages per credit
316 */
317 size_t num_pages;
318 atomic_t count;
319 wait_queue_head_t wait_queue;
320 } credits;
321 } rw_io;
322
323 /*
324 * For debug purposes
325 */
326 struct {
327 u64 get_receive_buffer;
328 u64 put_receive_buffer;
329 u64 enqueue_reassembly_queue;
330 u64 dequeue_reassembly_queue;
331 u64 send_empty;
332 } statistics;
333 };
334
__smbdirect_socket_disabled_work(struct work_struct * work)335 static void __smbdirect_socket_disabled_work(struct work_struct *work)
336 {
337 /*
338 * Should never be called as disable_[delayed_]work_sync() was used.
339 */
340 WARN_ON_ONCE(1);
341 }
342
smbdirect_socket_init(struct smbdirect_socket * sc)343 static __always_inline void smbdirect_socket_init(struct smbdirect_socket *sc)
344 {
345 /*
346 * This also sets status = SMBDIRECT_SOCKET_CREATED
347 */
348 BUILD_BUG_ON(SMBDIRECT_SOCKET_CREATED != 0);
349 memset(sc, 0, sizeof(*sc));
350
351 init_waitqueue_head(&sc->status_wait);
352
353 INIT_WORK(&sc->disconnect_work, __smbdirect_socket_disabled_work);
354 disable_work_sync(&sc->disconnect_work);
355
356 INIT_WORK(&sc->idle.immediate_work, __smbdirect_socket_disabled_work);
357 disable_work_sync(&sc->idle.immediate_work);
358 INIT_DELAYED_WORK(&sc->idle.timer_work, __smbdirect_socket_disabled_work);
359 disable_delayed_work_sync(&sc->idle.timer_work);
360
361 atomic_set(&sc->send_io.lcredits.count, 0);
362 init_waitqueue_head(&sc->send_io.lcredits.wait_queue);
363
364 atomic_set(&sc->send_io.credits.count, 0);
365 init_waitqueue_head(&sc->send_io.credits.wait_queue);
366
367 atomic_set(&sc->send_io.pending.count, 0);
368 init_waitqueue_head(&sc->send_io.pending.dec_wait_queue);
369 init_waitqueue_head(&sc->send_io.pending.zero_wait_queue);
370
371 INIT_LIST_HEAD(&sc->recv_io.free.list);
372 spin_lock_init(&sc->recv_io.free.lock);
373
374 atomic_set(&sc->recv_io.posted.count, 0);
375 INIT_WORK(&sc->recv_io.posted.refill_work, __smbdirect_socket_disabled_work);
376 disable_work_sync(&sc->recv_io.posted.refill_work);
377
378 atomic_set(&sc->recv_io.credits.count, 0);
379
380 INIT_LIST_HEAD(&sc->recv_io.reassembly.list);
381 spin_lock_init(&sc->recv_io.reassembly.lock);
382 init_waitqueue_head(&sc->recv_io.reassembly.wait_queue);
383
384 atomic_set(&sc->rw_io.credits.count, 0);
385 init_waitqueue_head(&sc->rw_io.credits.wait_queue);
386
387 spin_lock_init(&sc->mr_io.all.lock);
388 INIT_LIST_HEAD(&sc->mr_io.all.list);
389 atomic_set(&sc->mr_io.ready.count, 0);
390 init_waitqueue_head(&sc->mr_io.ready.wait_queue);
391 atomic_set(&sc->mr_io.used.count, 0);
392 INIT_WORK(&sc->mr_io.recovery_work, __smbdirect_socket_disabled_work);
393 disable_work_sync(&sc->mr_io.recovery_work);
394 init_waitqueue_head(&sc->mr_io.cleanup.wait_queue);
395 }
396
397 #define __SMBDIRECT_CHECK_STATUS_FAILED(__sc, __expected_status, __error_cmd, __unexpected_cmd) ({ \
398 bool __failed = false; \
399 if (unlikely((__sc)->first_error)) { \
400 __failed = true; \
401 __error_cmd \
402 } else if (unlikely((__sc)->status != (__expected_status))) { \
403 __failed = true; \
404 __unexpected_cmd \
405 } \
406 __failed; \
407 })
408
409 #define __SMBDIRECT_CHECK_STATUS_WARN(__sc, __expected_status, __unexpected_cmd) \
410 __SMBDIRECT_CHECK_STATUS_FAILED(__sc, __expected_status, \
411 , \
412 { \
413 const struct sockaddr_storage *__src = NULL; \
414 const struct sockaddr_storage *__dst = NULL; \
415 if ((__sc)->rdma.cm_id) { \
416 __src = &(__sc)->rdma.cm_id->route.addr.src_addr; \
417 __dst = &(__sc)->rdma.cm_id->route.addr.dst_addr; \
418 } \
419 WARN_ONCE(1, \
420 "expected[%s] != %s first_error=%1pe local=%pISpsfc remote=%pISpsfc\n", \
421 smbdirect_socket_status_string(__expected_status), \
422 smbdirect_socket_status_string((__sc)->status), \
423 SMBDIRECT_DEBUG_ERR_PTR((__sc)->first_error), \
424 __src, __dst); \
425 __unexpected_cmd \
426 })
427
428 #define SMBDIRECT_CHECK_STATUS_WARN(__sc, __expected_status) \
429 __SMBDIRECT_CHECK_STATUS_WARN(__sc, __expected_status, /* nothing */)
430
431 #define SMBDIRECT_CHECK_STATUS_DISCONNECT(__sc, __expected_status) \
432 __SMBDIRECT_CHECK_STATUS_WARN(__sc, __expected_status, \
433 __SMBDIRECT_SOCKET_DISCONNECT(__sc);)
434
435 struct smbdirect_send_io {
436 struct smbdirect_socket *socket;
437 struct ib_cqe cqe;
438
439 /*
440 * The SGE entries for this work request
441 *
442 * The first points to the packet header
443 */
444 #define SMBDIRECT_SEND_IO_MAX_SGE 6
445 size_t num_sge;
446 struct ib_sge sge[SMBDIRECT_SEND_IO_MAX_SGE];
447
448 /*
449 * Link to the list of sibling smbdirect_send_io
450 * messages.
451 */
452 struct list_head sibling_list;
453 struct ib_send_wr wr;
454
455 /* SMBD packet header follows this structure */
456 u8 packet[];
457 };
458
459 struct smbdirect_send_batch {
460 /*
461 * List of smbdirect_send_io messages
462 */
463 struct list_head msg_list;
464 /*
465 * Number of list entries
466 */
467 size_t wr_cnt;
468
469 /*
470 * Possible remote key invalidation state
471 */
472 bool need_invalidate_rkey;
473 u32 remote_key;
474 };
475
476 struct smbdirect_recv_io {
477 struct smbdirect_socket *socket;
478 struct ib_cqe cqe;
479
480 /*
481 * For now we only use a single SGE
482 * as we have just one large buffer
483 * per posted recv.
484 */
485 #define SMBDIRECT_RECV_IO_MAX_SGE 1
486 struct ib_sge sge;
487
488 /* Link to free or reassembly list */
489 struct list_head list;
490
491 /* Indicate if this is the 1st packet of a payload */
492 bool first_segment;
493
494 /* SMBD packet header and payload follows this structure */
495 u8 packet[];
496 };
497
498 enum smbdirect_mr_state {
499 SMBDIRECT_MR_READY,
500 SMBDIRECT_MR_REGISTERED,
501 SMBDIRECT_MR_INVALIDATED,
502 SMBDIRECT_MR_ERROR,
503 SMBDIRECT_MR_DISABLED
504 };
505
506 struct smbdirect_mr_io {
507 struct smbdirect_socket *socket;
508 struct ib_cqe cqe;
509
510 /*
511 * We can have up to two references:
512 * 1. by the connection
513 * 2. by the registration
514 */
515 struct kref kref;
516 struct mutex mutex;
517
518 struct list_head list;
519
520 enum smbdirect_mr_state state;
521 struct ib_mr *mr;
522 struct sg_table sgt;
523 enum dma_data_direction dir;
524 union {
525 struct ib_reg_wr wr;
526 struct ib_send_wr inv_wr;
527 };
528
529 bool need_invalidate;
530 struct completion invalidate_done;
531 };
532
533 struct smbdirect_rw_io {
534 struct smbdirect_socket *socket;
535 struct ib_cqe cqe;
536
537 struct list_head list;
538
539 int error;
540 struct completion *completion;
541
542 struct rdma_rw_ctx rdma_ctx;
543 struct sg_table sgt;
544 struct scatterlist sg_list[];
545 };
546
547 #endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ */
548