xref: /linux/fs/smb/common/smbdirect/smbdirect_socket.h (revision 8e94268b21c8235d430ce1aa6dc0b15952744b9b)
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  *   Copyright (c) 2025 Stefan Metzmacher
4  */
5 
6 #ifndef __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__
7 #define __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__
8 
9 #include <rdma/rw.h>
10 
11 enum smbdirect_socket_status {
12 	SMBDIRECT_SOCKET_CREATED,
13 	SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED,
14 	SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING,
15 	SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED,
16 	SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED,
17 	SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING,
18 	SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED,
19 	SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED,
20 	SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING,
21 	SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED,
22 	SMBDIRECT_SOCKET_NEGOTIATE_NEEDED,
23 	SMBDIRECT_SOCKET_NEGOTIATE_RUNNING,
24 	SMBDIRECT_SOCKET_NEGOTIATE_FAILED,
25 	SMBDIRECT_SOCKET_CONNECTED,
26 	SMBDIRECT_SOCKET_ERROR,
27 	SMBDIRECT_SOCKET_DISCONNECTING,
28 	SMBDIRECT_SOCKET_DISCONNECTED,
29 	SMBDIRECT_SOCKET_DESTROYED
30 };
31 
32 static __always_inline
33 const char *smbdirect_socket_status_string(enum smbdirect_socket_status status)
34 {
35 	switch (status) {
36 	case SMBDIRECT_SOCKET_CREATED:
37 		return "CREATED";
38 	case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED:
39 		return "RESOLVE_ADDR_NEEDED";
40 	case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING:
41 		return "RESOLVE_ADDR_RUNNING";
42 	case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED:
43 		return "RESOLVE_ADDR_FAILED";
44 	case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED:
45 		return "RESOLVE_ROUTE_NEEDED";
46 	case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING:
47 		return "RESOLVE_ROUTE_RUNNING";
48 	case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED:
49 		return "RESOLVE_ROUTE_FAILED";
50 	case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED:
51 		return "RDMA_CONNECT_NEEDED";
52 	case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING:
53 		return "RDMA_CONNECT_RUNNING";
54 	case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED:
55 		return "RDMA_CONNECT_FAILED";
56 	case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED:
57 		return "NEGOTIATE_NEEDED";
58 	case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING:
59 		return "NEGOTIATE_RUNNING";
60 	case SMBDIRECT_SOCKET_NEGOTIATE_FAILED:
61 		return "NEGOTIATE_FAILED";
62 	case SMBDIRECT_SOCKET_CONNECTED:
63 		return "CONNECTED";
64 	case SMBDIRECT_SOCKET_ERROR:
65 		return "ERROR";
66 	case SMBDIRECT_SOCKET_DISCONNECTING:
67 		return "DISCONNECTING";
68 	case SMBDIRECT_SOCKET_DISCONNECTED:
69 		return "DISCONNECTED";
70 	case SMBDIRECT_SOCKET_DESTROYED:
71 		return "DESTROYED";
72 	}
73 
74 	return "<unknown>";
75 }
76 
77 /*
78  * This can be used with %1pe to print errors as strings or '0'
79  * And it avoids warnings like: warn: passing zero to 'ERR_PTR'
80  * from smatch -p=kernel --pedantic
81  */
82 static __always_inline
83 const void * __must_check SMBDIRECT_DEBUG_ERR_PTR(long error)
84 {
85 	if (error == 0)
86 		return NULL;
87 	return ERR_PTR(error);
88 }
89 
90 enum smbdirect_keepalive_status {
91 	SMBDIRECT_KEEPALIVE_NONE,
92 	SMBDIRECT_KEEPALIVE_PENDING,
93 	SMBDIRECT_KEEPALIVE_SENT
94 };
95 
96 struct smbdirect_socket {
97 	enum smbdirect_socket_status status;
98 	wait_queue_head_t status_wait;
99 	int first_error;
100 
101 	/*
102 	 * This points to the workqueue to
103 	 * be used for this socket.
104 	 * It can be per socket (on the client)
105 	 * or point to a global workqueue (on the server)
106 	 */
107 	struct workqueue_struct *workqueue;
108 
109 	struct work_struct disconnect_work;
110 
111 	/* RDMA related */
112 	struct {
113 		struct rdma_cm_id *cm_id;
114 		/*
115 		 * This is for iWarp MPA v1
116 		 */
117 		bool legacy_iwarp;
118 	} rdma;
119 
120 	/* IB verbs related */
121 	struct {
122 		struct ib_pd *pd;
123 		struct ib_cq *send_cq;
124 		struct ib_cq *recv_cq;
125 
126 		/*
127 		 * shortcuts for rdma.cm_id->{qp,device};
128 		 */
129 		struct ib_qp *qp;
130 		struct ib_device *dev;
131 	} ib;
132 
133 	struct smbdirect_socket_parameters parameters;
134 
135 	/*
136 	 * The state for connect/negotiation
137 	 */
138 	struct {
139 		spinlock_t lock;
140 		struct work_struct work;
141 	} connect;
142 
143 	/*
144 	 * The state for keepalive and timeout handling
145 	 */
146 	struct {
147 		enum smbdirect_keepalive_status keepalive;
148 		struct work_struct immediate_work;
149 		struct delayed_work timer_work;
150 	} idle;
151 
152 	/*
153 	 * The state for posted send buffers
154 	 */
155 	struct {
156 		/*
157 		 * Memory pools for preallocating
158 		 * smbdirect_send_io buffers
159 		 */
160 		struct {
161 			struct kmem_cache	*cache;
162 			mempool_t		*pool;
163 		} mem;
164 
165 		/*
166 		 * This is a coordination for smbdirect_send_batch.
167 		 *
168 		 * There's only one possible credit, which means
169 		 * only one instance is running at a time.
170 		 */
171 		struct {
172 			atomic_t count;
173 			wait_queue_head_t wait_queue;
174 		} bcredits;
175 
176 		/*
177 		 * The local credit state for ib_post_send()
178 		 */
179 		struct {
180 			atomic_t count;
181 			wait_queue_head_t wait_queue;
182 		} lcredits;
183 
184 		/*
185 		 * The remote credit state for the send side
186 		 */
187 		struct {
188 			atomic_t count;
189 			wait_queue_head_t wait_queue;
190 		} credits;
191 
192 		/*
193 		 * The state about posted/pending sends
194 		 */
195 		struct {
196 			atomic_t count;
197 			/*
198 			 * woken when count is decremented
199 			 */
200 			wait_queue_head_t dec_wait_queue;
201 			/*
202 			 * woken when count reached zero
203 			 */
204 			wait_queue_head_t zero_wait_queue;
205 		} pending;
206 	} send_io;
207 
208 	/*
209 	 * The state for posted receive buffers
210 	 */
211 	struct {
212 		/*
213 		 * The type of PDU we are expecting
214 		 */
215 		enum {
216 			SMBDIRECT_EXPECT_NEGOTIATE_REQ = 1,
217 			SMBDIRECT_EXPECT_NEGOTIATE_REP = 2,
218 			SMBDIRECT_EXPECT_DATA_TRANSFER = 3,
219 		} expected;
220 
221 		/*
222 		 * Memory pools for preallocating
223 		 * smbdirect_recv_io buffers
224 		 */
225 		struct {
226 			struct kmem_cache	*cache;
227 			mempool_t		*pool;
228 		} mem;
229 
230 		/*
231 		 * The list of free smbdirect_recv_io
232 		 * structures
233 		 */
234 		struct {
235 			struct list_head list;
236 			spinlock_t lock;
237 		} free;
238 
239 		/*
240 		 * The state for posted recv_io messages
241 		 * and the refill work struct.
242 		 */
243 		struct {
244 			atomic_t count;
245 			struct work_struct refill_work;
246 		} posted;
247 
248 		/*
249 		 * The credit state for the recv side
250 		 */
251 		struct {
252 			u16 target;
253 			atomic_t available;
254 			atomic_t count;
255 		} credits;
256 
257 		/*
258 		 * The list of arrived non-empty smbdirect_recv_io
259 		 * structures
260 		 *
261 		 * This represents the reassembly queue.
262 		 */
263 		struct {
264 			struct list_head list;
265 			spinlock_t lock;
266 			wait_queue_head_t wait_queue;
267 			/* total data length of reassembly queue */
268 			int data_length;
269 			int queue_length;
270 			/* the offset to first buffer in reassembly queue */
271 			int first_entry_offset;
272 			/*
273 			 * Indicate if we have received a full packet on the
274 			 * connection This is used to identify the first SMBD
275 			 * packet of a assembled payload (SMB packet) in
276 			 * reassembly queue so we can return a RFC1002 length to
277 			 * upper layer to indicate the length of the SMB packet
278 			 * received
279 			 */
280 			bool full_packet_received;
281 		} reassembly;
282 	} recv_io;
283 
284 	/*
285 	 * The state for Memory registrations on the client
286 	 */
287 	struct {
288 		enum ib_mr_type type;
289 
290 		/*
291 		 * The list of free smbdirect_mr_io
292 		 * structures
293 		 */
294 		struct {
295 			struct list_head list;
296 			spinlock_t lock;
297 		} all;
298 
299 		/*
300 		 * The number of available MRs ready for memory registration
301 		 */
302 		struct {
303 			atomic_t count;
304 			wait_queue_head_t wait_queue;
305 		} ready;
306 
307 		/*
308 		 * The number of used MRs
309 		 */
310 		struct {
311 			atomic_t count;
312 		} used;
313 
314 		struct work_struct recovery_work;
315 
316 		/* Used by transport to wait until all MRs are returned */
317 		struct {
318 			wait_queue_head_t wait_queue;
319 		} cleanup;
320 	} mr_io;
321 
322 	/*
323 	 * The state for RDMA read/write requests on the server
324 	 */
325 	struct {
326 		/*
327 		 * The credit state for the send side
328 		 */
329 		struct {
330 			/*
331 			 * The maximum number of rw credits
332 			 */
333 			size_t max;
334 			/*
335 			 * The number of pages per credit
336 			 */
337 			size_t num_pages;
338 			atomic_t count;
339 			wait_queue_head_t wait_queue;
340 		} credits;
341 	} rw_io;
342 
343 	/*
344 	 * For debug purposes
345 	 */
346 	struct {
347 		u64 get_receive_buffer;
348 		u64 put_receive_buffer;
349 		u64 enqueue_reassembly_queue;
350 		u64 dequeue_reassembly_queue;
351 		u64 send_empty;
352 	} statistics;
353 };
354 
355 static void __smbdirect_socket_disabled_work(struct work_struct *work)
356 {
357 	/*
358 	 * Should never be called as disable_[delayed_]work_sync() was used.
359 	 */
360 	WARN_ON_ONCE(1);
361 }
362 
363 static __always_inline void smbdirect_socket_init(struct smbdirect_socket *sc)
364 {
365 	/*
366 	 * This also sets status = SMBDIRECT_SOCKET_CREATED
367 	 */
368 	BUILD_BUG_ON(SMBDIRECT_SOCKET_CREATED != 0);
369 	memset(sc, 0, sizeof(*sc));
370 
371 	init_waitqueue_head(&sc->status_wait);
372 
373 	INIT_WORK(&sc->disconnect_work, __smbdirect_socket_disabled_work);
374 	disable_work_sync(&sc->disconnect_work);
375 
376 	spin_lock_init(&sc->connect.lock);
377 	INIT_WORK(&sc->connect.work, __smbdirect_socket_disabled_work);
378 	disable_work_sync(&sc->connect.work);
379 
380 	INIT_WORK(&sc->idle.immediate_work, __smbdirect_socket_disabled_work);
381 	disable_work_sync(&sc->idle.immediate_work);
382 	INIT_DELAYED_WORK(&sc->idle.timer_work, __smbdirect_socket_disabled_work);
383 	disable_delayed_work_sync(&sc->idle.timer_work);
384 
385 	atomic_set(&sc->send_io.bcredits.count, 0);
386 	init_waitqueue_head(&sc->send_io.bcredits.wait_queue);
387 
388 	atomic_set(&sc->send_io.lcredits.count, 0);
389 	init_waitqueue_head(&sc->send_io.lcredits.wait_queue);
390 
391 	atomic_set(&sc->send_io.credits.count, 0);
392 	init_waitqueue_head(&sc->send_io.credits.wait_queue);
393 
394 	atomic_set(&sc->send_io.pending.count, 0);
395 	init_waitqueue_head(&sc->send_io.pending.dec_wait_queue);
396 	init_waitqueue_head(&sc->send_io.pending.zero_wait_queue);
397 
398 	INIT_LIST_HEAD(&sc->recv_io.free.list);
399 	spin_lock_init(&sc->recv_io.free.lock);
400 
401 	atomic_set(&sc->recv_io.posted.count, 0);
402 	INIT_WORK(&sc->recv_io.posted.refill_work, __smbdirect_socket_disabled_work);
403 	disable_work_sync(&sc->recv_io.posted.refill_work);
404 
405 	atomic_set(&sc->recv_io.credits.available, 0);
406 	atomic_set(&sc->recv_io.credits.count, 0);
407 
408 	INIT_LIST_HEAD(&sc->recv_io.reassembly.list);
409 	spin_lock_init(&sc->recv_io.reassembly.lock);
410 	init_waitqueue_head(&sc->recv_io.reassembly.wait_queue);
411 
412 	atomic_set(&sc->rw_io.credits.count, 0);
413 	init_waitqueue_head(&sc->rw_io.credits.wait_queue);
414 
415 	spin_lock_init(&sc->mr_io.all.lock);
416 	INIT_LIST_HEAD(&sc->mr_io.all.list);
417 	atomic_set(&sc->mr_io.ready.count, 0);
418 	init_waitqueue_head(&sc->mr_io.ready.wait_queue);
419 	atomic_set(&sc->mr_io.used.count, 0);
420 	INIT_WORK(&sc->mr_io.recovery_work, __smbdirect_socket_disabled_work);
421 	disable_work_sync(&sc->mr_io.recovery_work);
422 	init_waitqueue_head(&sc->mr_io.cleanup.wait_queue);
423 }
424 
425 #define __SMBDIRECT_CHECK_STATUS_FAILED(__sc, __expected_status, __error_cmd, __unexpected_cmd) ({ \
426 	bool __failed = false; \
427 	if (unlikely((__sc)->first_error)) { \
428 		__failed = true; \
429 		__error_cmd \
430 	} else if (unlikely((__sc)->status != (__expected_status))) { \
431 		__failed = true; \
432 		__unexpected_cmd \
433 	} \
434 	__failed; \
435 })
436 
437 #define __SMBDIRECT_CHECK_STATUS_WARN(__sc, __expected_status, __unexpected_cmd) \
438 	__SMBDIRECT_CHECK_STATUS_FAILED(__sc, __expected_status, \
439 	, \
440 	{ \
441 		const struct sockaddr_storage *__src = NULL; \
442 		const struct sockaddr_storage *__dst = NULL; \
443 		if ((__sc)->rdma.cm_id) { \
444 			__src = &(__sc)->rdma.cm_id->route.addr.src_addr; \
445 			__dst = &(__sc)->rdma.cm_id->route.addr.dst_addr; \
446 		} \
447 		WARN_ONCE(1, \
448 			"expected[%s] != %s first_error=%1pe local=%pISpsfc remote=%pISpsfc\n", \
449 			smbdirect_socket_status_string(__expected_status), \
450 			smbdirect_socket_status_string((__sc)->status), \
451 			SMBDIRECT_DEBUG_ERR_PTR((__sc)->first_error), \
452 			__src, __dst); \
453 		__unexpected_cmd \
454 	})
455 
456 #define SMBDIRECT_CHECK_STATUS_WARN(__sc, __expected_status) \
457 	__SMBDIRECT_CHECK_STATUS_WARN(__sc, __expected_status, /* nothing */)
458 
459 #define SMBDIRECT_CHECK_STATUS_DISCONNECT(__sc, __expected_status) \
460 	__SMBDIRECT_CHECK_STATUS_WARN(__sc, __expected_status, \
461 		__SMBDIRECT_SOCKET_DISCONNECT(__sc);)
462 
463 struct smbdirect_send_io {
464 	struct smbdirect_socket *socket;
465 	struct ib_cqe cqe;
466 
467 	/*
468 	 * The SGE entries for this work request
469 	 *
470 	 * The first points to the packet header
471 	 */
472 #define SMBDIRECT_SEND_IO_MAX_SGE 6
473 	size_t num_sge;
474 	struct ib_sge sge[SMBDIRECT_SEND_IO_MAX_SGE];
475 
476 	/*
477 	 * Link to the list of sibling smbdirect_send_io
478 	 * messages.
479 	 */
480 	struct list_head sibling_list;
481 	struct ib_send_wr wr;
482 
483 	/* SMBD packet header follows this structure */
484 	u8 packet[];
485 };
486 
487 struct smbdirect_send_batch {
488 	/*
489 	 * List of smbdirect_send_io messages
490 	 */
491 	struct list_head msg_list;
492 	/*
493 	 * Number of list entries
494 	 */
495 	size_t wr_cnt;
496 
497 	/*
498 	 * Possible remote key invalidation state
499 	 */
500 	bool need_invalidate_rkey;
501 	u32 remote_key;
502 
503 	int credit;
504 };
505 
506 struct smbdirect_recv_io {
507 	struct smbdirect_socket *socket;
508 	struct ib_cqe cqe;
509 
510 	/*
511 	 * For now we only use a single SGE
512 	 * as we have just one large buffer
513 	 * per posted recv.
514 	 */
515 #define SMBDIRECT_RECV_IO_MAX_SGE 1
516 	struct ib_sge sge;
517 
518 	/* Link to free or reassembly list */
519 	struct list_head list;
520 
521 	/* Indicate if this is the 1st packet of a payload */
522 	bool first_segment;
523 
524 	/* SMBD packet header and payload follows this structure */
525 	u8 packet[];
526 };
527 
528 enum smbdirect_mr_state {
529 	SMBDIRECT_MR_READY,
530 	SMBDIRECT_MR_REGISTERED,
531 	SMBDIRECT_MR_INVALIDATED,
532 	SMBDIRECT_MR_ERROR,
533 	SMBDIRECT_MR_DISABLED
534 };
535 
536 struct smbdirect_mr_io {
537 	struct smbdirect_socket *socket;
538 	struct ib_cqe cqe;
539 
540 	/*
541 	 * We can have up to two references:
542 	 * 1. by the connection
543 	 * 2. by the registration
544 	 */
545 	struct kref kref;
546 	struct mutex mutex;
547 
548 	struct list_head list;
549 
550 	enum smbdirect_mr_state state;
551 	struct ib_mr *mr;
552 	struct sg_table sgt;
553 	enum dma_data_direction dir;
554 	union {
555 		struct ib_reg_wr wr;
556 		struct ib_send_wr inv_wr;
557 	};
558 
559 	bool need_invalidate;
560 	struct completion invalidate_done;
561 };
562 
563 struct smbdirect_rw_io {
564 	struct smbdirect_socket *socket;
565 	struct ib_cqe cqe;
566 
567 	struct list_head list;
568 
569 	int error;
570 	struct completion *completion;
571 
572 	struct rdma_rw_ctx rdma_ctx;
573 	struct sg_table sgt;
574 	struct scatterlist sg_list[];
575 };
576 
577 #endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ */
578