xref: /linux/fs/smb/common/smbdirect/smbdirect_socket.h (revision 566771afc7a81e343da9939f0bd848d3622e2501)
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  *   Copyright (c) 2025 Stefan Metzmacher
4  */
5 
6 #ifndef __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__
7 #define __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__
8 
9 #include <rdma/rw.h>
10 
11 enum smbdirect_socket_status {
12 	SMBDIRECT_SOCKET_CREATED,
13 	SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED,
14 	SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING,
15 	SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED,
16 	SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED,
17 	SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING,
18 	SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED,
19 	SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED,
20 	SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING,
21 	SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED,
22 	SMBDIRECT_SOCKET_NEGOTIATE_NEEDED,
23 	SMBDIRECT_SOCKET_NEGOTIATE_RUNNING,
24 	SMBDIRECT_SOCKET_NEGOTIATE_FAILED,
25 	SMBDIRECT_SOCKET_CONNECTED,
26 	SMBDIRECT_SOCKET_ERROR,
27 	SMBDIRECT_SOCKET_DISCONNECTING,
28 	SMBDIRECT_SOCKET_DISCONNECTED,
29 	SMBDIRECT_SOCKET_DESTROYED
30 };
31 
32 static __always_inline
smbdirect_socket_status_string(enum smbdirect_socket_status status)33 const char *smbdirect_socket_status_string(enum smbdirect_socket_status status)
34 {
35 	switch (status) {
36 	case SMBDIRECT_SOCKET_CREATED:
37 		return "CREATED";
38 	case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED:
39 		return "RESOLVE_ADDR_NEEDED";
40 	case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING:
41 		return "RESOLVE_ADDR_RUNNING";
42 	case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED:
43 		return "RESOLVE_ADDR_FAILED";
44 	case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED:
45 		return "RESOLVE_ROUTE_NEEDED";
46 	case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING:
47 		return "RESOLVE_ROUTE_RUNNING";
48 	case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED:
49 		return "RESOLVE_ROUTE_FAILED";
50 	case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED:
51 		return "RDMA_CONNECT_NEEDED";
52 	case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING:
53 		return "RDMA_CONNECT_RUNNING";
54 	case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED:
55 		return "RDMA_CONNECT_FAILED";
56 	case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED:
57 		return "NEGOTIATE_NEEDED";
58 	case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING:
59 		return "NEGOTIATE_RUNNING";
60 	case SMBDIRECT_SOCKET_NEGOTIATE_FAILED:
61 		return "NEGOTIATE_FAILED";
62 	case SMBDIRECT_SOCKET_CONNECTED:
63 		return "CONNECTED";
64 	case SMBDIRECT_SOCKET_ERROR:
65 		return "ERROR";
66 	case SMBDIRECT_SOCKET_DISCONNECTING:
67 		return "DISCONNECTING";
68 	case SMBDIRECT_SOCKET_DISCONNECTED:
69 		return "DISCONNECTED";
70 	case SMBDIRECT_SOCKET_DESTROYED:
71 		return "DESTROYED";
72 	}
73 
74 	return "<unknown>";
75 }
76 
77 enum smbdirect_keepalive_status {
78 	SMBDIRECT_KEEPALIVE_NONE,
79 	SMBDIRECT_KEEPALIVE_PENDING,
80 	SMBDIRECT_KEEPALIVE_SENT
81 };
82 
83 struct smbdirect_socket {
84 	enum smbdirect_socket_status status;
85 	wait_queue_head_t status_wait;
86 	int first_error;
87 
88 	/*
89 	 * This points to the workqueue to
90 	 * be used for this socket.
91 	 * It can be per socket (on the client)
92 	 * or point to a global workqueue (on the server)
93 	 */
94 	struct workqueue_struct *workqueue;
95 
96 	struct work_struct disconnect_work;
97 
98 	/* RDMA related */
99 	struct {
100 		struct rdma_cm_id *cm_id;
101 		/*
102 		 * This is for iWarp MPA v1
103 		 */
104 		bool legacy_iwarp;
105 	} rdma;
106 
107 	/* IB verbs related */
108 	struct {
109 		struct ib_pd *pd;
110 		struct ib_cq *send_cq;
111 		struct ib_cq *recv_cq;
112 
113 		/*
114 		 * shortcuts for rdma.cm_id->{qp,device};
115 		 */
116 		struct ib_qp *qp;
117 		struct ib_device *dev;
118 	} ib;
119 
120 	struct smbdirect_socket_parameters parameters;
121 
122 	/*
123 	 * The state for keepalive and timeout handling
124 	 */
125 	struct {
126 		enum smbdirect_keepalive_status keepalive;
127 		struct work_struct immediate_work;
128 		struct delayed_work timer_work;
129 	} idle;
130 
131 	/*
132 	 * The state for posted send buffers
133 	 */
134 	struct {
135 		/*
136 		 * Memory pools for preallocating
137 		 * smbdirect_send_io buffers
138 		 */
139 		struct {
140 			struct kmem_cache	*cache;
141 			mempool_t		*pool;
142 		} mem;
143 
144 		/*
145 		 * The local credit state for ib_post_send()
146 		 */
147 		struct {
148 			atomic_t count;
149 			wait_queue_head_t wait_queue;
150 		} lcredits;
151 
152 		/*
153 		 * The remote credit state for the send side
154 		 */
155 		struct {
156 			atomic_t count;
157 			wait_queue_head_t wait_queue;
158 		} credits;
159 
160 		/*
161 		 * The state about posted/pending sends
162 		 */
163 		struct {
164 			atomic_t count;
165 			/*
166 			 * woken when count is decremented
167 			 */
168 			wait_queue_head_t dec_wait_queue;
169 			/*
170 			 * woken when count reached zero
171 			 */
172 			wait_queue_head_t zero_wait_queue;
173 		} pending;
174 	} send_io;
175 
176 	/*
177 	 * The state for posted receive buffers
178 	 */
179 	struct {
180 		/*
181 		 * The type of PDU we are expecting
182 		 */
183 		enum {
184 			SMBDIRECT_EXPECT_NEGOTIATE_REQ = 1,
185 			SMBDIRECT_EXPECT_NEGOTIATE_REP = 2,
186 			SMBDIRECT_EXPECT_DATA_TRANSFER = 3,
187 		} expected;
188 
189 		/*
190 		 * Memory pools for preallocating
191 		 * smbdirect_recv_io buffers
192 		 */
193 		struct {
194 			struct kmem_cache	*cache;
195 			mempool_t		*pool;
196 		} mem;
197 
198 		/*
199 		 * The list of free smbdirect_recv_io
200 		 * structures
201 		 */
202 		struct {
203 			struct list_head list;
204 			spinlock_t lock;
205 		} free;
206 
207 		/*
208 		 * The state for posted recv_io messages
209 		 * and the refill work struct.
210 		 */
211 		struct {
212 			atomic_t count;
213 			struct work_struct refill_work;
214 		} posted;
215 
216 		/*
217 		 * The credit state for the recv side
218 		 */
219 		struct {
220 			u16 target;
221 			atomic_t count;
222 		} credits;
223 
224 		/*
225 		 * The list of arrived non-empty smbdirect_recv_io
226 		 * structures
227 		 *
228 		 * This represents the reassembly queue.
229 		 */
230 		struct {
231 			struct list_head list;
232 			spinlock_t lock;
233 			wait_queue_head_t wait_queue;
234 			/* total data length of reassembly queue */
235 			int data_length;
236 			int queue_length;
237 			/* the offset to first buffer in reassembly queue */
238 			int first_entry_offset;
239 			/*
240 			 * Indicate if we have received a full packet on the
241 			 * connection This is used to identify the first SMBD
242 			 * packet of a assembled payload (SMB packet) in
243 			 * reassembly queue so we can return a RFC1002 length to
244 			 * upper layer to indicate the length of the SMB packet
245 			 * received
246 			 */
247 			bool full_packet_received;
248 		} reassembly;
249 	} recv_io;
250 
251 	/*
252 	 * The state for Memory registrations on the client
253 	 */
254 	struct {
255 		enum ib_mr_type type;
256 
257 		/*
258 		 * The list of free smbdirect_mr_io
259 		 * structures
260 		 */
261 		struct {
262 			struct list_head list;
263 			spinlock_t lock;
264 		} all;
265 
266 		/*
267 		 * The number of available MRs ready for memory registration
268 		 */
269 		struct {
270 			atomic_t count;
271 			wait_queue_head_t wait_queue;
272 		} ready;
273 
274 		/*
275 		 * The number of used MRs
276 		 */
277 		struct {
278 			atomic_t count;
279 		} used;
280 
281 		struct work_struct recovery_work;
282 
283 		/* Used by transport to wait until all MRs are returned */
284 		struct {
285 			wait_queue_head_t wait_queue;
286 		} cleanup;
287 	} mr_io;
288 
289 	/*
290 	 * The state for RDMA read/write requests on the server
291 	 */
292 	struct {
293 		/*
294 		 * The credit state for the send side
295 		 */
296 		struct {
297 			/*
298 			 * The maximum number of rw credits
299 			 */
300 			size_t max;
301 			/*
302 			 * The number of pages per credit
303 			 */
304 			size_t num_pages;
305 			atomic_t count;
306 			wait_queue_head_t wait_queue;
307 		} credits;
308 	} rw_io;
309 
310 	/*
311 	 * For debug purposes
312 	 */
313 	struct {
314 		u64 get_receive_buffer;
315 		u64 put_receive_buffer;
316 		u64 enqueue_reassembly_queue;
317 		u64 dequeue_reassembly_queue;
318 		u64 send_empty;
319 	} statistics;
320 };
321 
__smbdirect_socket_disabled_work(struct work_struct * work)322 static void __smbdirect_socket_disabled_work(struct work_struct *work)
323 {
324 	/*
325 	 * Should never be called as disable_[delayed_]work_sync() was used.
326 	 */
327 	WARN_ON_ONCE(1);
328 }
329 
smbdirect_socket_init(struct smbdirect_socket * sc)330 static __always_inline void smbdirect_socket_init(struct smbdirect_socket *sc)
331 {
332 	/*
333 	 * This also sets status = SMBDIRECT_SOCKET_CREATED
334 	 */
335 	BUILD_BUG_ON(SMBDIRECT_SOCKET_CREATED != 0);
336 	memset(sc, 0, sizeof(*sc));
337 
338 	init_waitqueue_head(&sc->status_wait);
339 
340 	INIT_WORK(&sc->disconnect_work, __smbdirect_socket_disabled_work);
341 	disable_work_sync(&sc->disconnect_work);
342 
343 	INIT_WORK(&sc->idle.immediate_work, __smbdirect_socket_disabled_work);
344 	disable_work_sync(&sc->idle.immediate_work);
345 	INIT_DELAYED_WORK(&sc->idle.timer_work, __smbdirect_socket_disabled_work);
346 	disable_delayed_work_sync(&sc->idle.timer_work);
347 
348 	atomic_set(&sc->send_io.lcredits.count, 0);
349 	init_waitqueue_head(&sc->send_io.lcredits.wait_queue);
350 
351 	atomic_set(&sc->send_io.credits.count, 0);
352 	init_waitqueue_head(&sc->send_io.credits.wait_queue);
353 
354 	atomic_set(&sc->send_io.pending.count, 0);
355 	init_waitqueue_head(&sc->send_io.pending.dec_wait_queue);
356 	init_waitqueue_head(&sc->send_io.pending.zero_wait_queue);
357 
358 	INIT_LIST_HEAD(&sc->recv_io.free.list);
359 	spin_lock_init(&sc->recv_io.free.lock);
360 
361 	atomic_set(&sc->recv_io.posted.count, 0);
362 	INIT_WORK(&sc->recv_io.posted.refill_work, __smbdirect_socket_disabled_work);
363 	disable_work_sync(&sc->recv_io.posted.refill_work);
364 
365 	atomic_set(&sc->recv_io.credits.count, 0);
366 
367 	INIT_LIST_HEAD(&sc->recv_io.reassembly.list);
368 	spin_lock_init(&sc->recv_io.reassembly.lock);
369 	init_waitqueue_head(&sc->recv_io.reassembly.wait_queue);
370 
371 	atomic_set(&sc->rw_io.credits.count, 0);
372 	init_waitqueue_head(&sc->rw_io.credits.wait_queue);
373 
374 	spin_lock_init(&sc->mr_io.all.lock);
375 	INIT_LIST_HEAD(&sc->mr_io.all.list);
376 	atomic_set(&sc->mr_io.ready.count, 0);
377 	init_waitqueue_head(&sc->mr_io.ready.wait_queue);
378 	atomic_set(&sc->mr_io.used.count, 0);
379 	INIT_WORK(&sc->mr_io.recovery_work, __smbdirect_socket_disabled_work);
380 	disable_work_sync(&sc->mr_io.recovery_work);
381 	init_waitqueue_head(&sc->mr_io.cleanup.wait_queue);
382 }
383 
384 struct smbdirect_send_io {
385 	struct smbdirect_socket *socket;
386 	struct ib_cqe cqe;
387 
388 	/*
389 	 * The SGE entries for this work request
390 	 *
391 	 * The first points to the packet header
392 	 */
393 #define SMBDIRECT_SEND_IO_MAX_SGE 6
394 	size_t num_sge;
395 	struct ib_sge sge[SMBDIRECT_SEND_IO_MAX_SGE];
396 
397 	/*
398 	 * Link to the list of sibling smbdirect_send_io
399 	 * messages.
400 	 */
401 	struct list_head sibling_list;
402 	struct ib_send_wr wr;
403 
404 	/* SMBD packet header follows this structure */
405 	u8 packet[];
406 };
407 
408 struct smbdirect_send_batch {
409 	/*
410 	 * List of smbdirect_send_io messages
411 	 */
412 	struct list_head msg_list;
413 	/*
414 	 * Number of list entries
415 	 */
416 	size_t wr_cnt;
417 
418 	/*
419 	 * Possible remote key invalidation state
420 	 */
421 	bool need_invalidate_rkey;
422 	u32 remote_key;
423 };
424 
425 struct smbdirect_recv_io {
426 	struct smbdirect_socket *socket;
427 	struct ib_cqe cqe;
428 
429 	/*
430 	 * For now we only use a single SGE
431 	 * as we have just one large buffer
432 	 * per posted recv.
433 	 */
434 #define SMBDIRECT_RECV_IO_MAX_SGE 1
435 	struct ib_sge sge;
436 
437 	/* Link to free or reassembly list */
438 	struct list_head list;
439 
440 	/* Indicate if this is the 1st packet of a payload */
441 	bool first_segment;
442 
443 	/* SMBD packet header and payload follows this structure */
444 	u8 packet[];
445 };
446 
447 enum smbdirect_mr_state {
448 	SMBDIRECT_MR_READY,
449 	SMBDIRECT_MR_REGISTERED,
450 	SMBDIRECT_MR_INVALIDATED,
451 	SMBDIRECT_MR_ERROR,
452 	SMBDIRECT_MR_DISABLED
453 };
454 
455 struct smbdirect_mr_io {
456 	struct smbdirect_socket *socket;
457 	struct ib_cqe cqe;
458 
459 	/*
460 	 * We can have up to two references:
461 	 * 1. by the connection
462 	 * 2. by the registration
463 	 */
464 	struct kref kref;
465 	struct mutex mutex;
466 
467 	struct list_head list;
468 
469 	enum smbdirect_mr_state state;
470 	struct ib_mr *mr;
471 	struct sg_table sgt;
472 	enum dma_data_direction dir;
473 	union {
474 		struct ib_reg_wr wr;
475 		struct ib_send_wr inv_wr;
476 	};
477 
478 	bool need_invalidate;
479 	struct completion invalidate_done;
480 };
481 
482 struct smbdirect_rw_io {
483 	struct smbdirect_socket *socket;
484 	struct ib_cqe cqe;
485 
486 	struct list_head list;
487 
488 	int error;
489 	struct completion *completion;
490 
491 	struct rdma_rw_ctx rdma_ctx;
492 	struct sg_table sgt;
493 	struct scatterlist sg_list[];
494 };
495 
496 #endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ */
497