xref: /linux/net/smc/smc_close.c (revision e3b9f1e81de2083f359bacd2a94bf1c024f2ede0)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
4  *
5  *  Socket Closing - normal and abnormal
6  *
7  *  Copyright IBM Corp. 2016
8  *
9  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
10  */
11 
12 #include <linux/workqueue.h>
13 #include <linux/sched/signal.h>
14 
15 #include <net/sock.h>
16 
17 #include "smc.h"
18 #include "smc_tx.h"
19 #include "smc_cdc.h"
20 #include "smc_close.h"
21 
22 #define SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME	(5 * HZ)
23 
24 static void smc_close_cleanup_listen(struct sock *parent)
25 {
26 	struct sock *sk;
27 
28 	/* Close non-accepted connections */
29 	while ((sk = smc_accept_dequeue(parent, NULL)))
30 		smc_close_non_accepted(sk);
31 }
32 
33 static void smc_close_wait_listen_clcsock(struct smc_sock *smc)
34 {
35 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
36 	struct sock *sk = &smc->sk;
37 	signed long timeout;
38 
39 	timeout = SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME;
40 	add_wait_queue(sk_sleep(sk), &wait);
41 	do {
42 		release_sock(sk);
43 		if (smc->clcsock)
44 			timeout = wait_woken(&wait, TASK_UNINTERRUPTIBLE,
45 					     timeout);
46 		sched_annotate_sleep();
47 		lock_sock(sk);
48 		if (!smc->clcsock)
49 			break;
50 	} while (timeout);
51 	remove_wait_queue(sk_sleep(sk), &wait);
52 }
53 
54 /* wait for sndbuf data being transmitted */
55 static void smc_close_stream_wait(struct smc_sock *smc, long timeout)
56 {
57 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
58 	struct sock *sk = &smc->sk;
59 
60 	if (!timeout)
61 		return;
62 
63 	if (!smc_tx_prepared_sends(&smc->conn))
64 		return;
65 
66 	smc->wait_close_tx_prepared = 1;
67 	add_wait_queue(sk_sleep(sk), &wait);
68 	while (!signal_pending(current) && timeout) {
69 		int rc;
70 
71 		rc = sk_wait_event(sk, &timeout,
72 				   !smc_tx_prepared_sends(&smc->conn) ||
73 				   (sk->sk_err == ECONNABORTED) ||
74 				   (sk->sk_err == ECONNRESET),
75 				   &wait);
76 		if (rc)
77 			break;
78 	}
79 	remove_wait_queue(sk_sleep(sk), &wait);
80 	smc->wait_close_tx_prepared = 0;
81 }
82 
83 void smc_close_wake_tx_prepared(struct smc_sock *smc)
84 {
85 	if (smc->wait_close_tx_prepared)
86 		/* wake up socket closing */
87 		smc->sk.sk_state_change(&smc->sk);
88 }
89 
90 static int smc_close_wr(struct smc_connection *conn)
91 {
92 	conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1;
93 
94 	return smc_cdc_get_slot_and_msg_send(conn);
95 }
96 
97 static int smc_close_final(struct smc_connection *conn)
98 {
99 	if (atomic_read(&conn->bytes_to_rcv))
100 		conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
101 	else
102 		conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1;
103 
104 	return smc_cdc_get_slot_and_msg_send(conn);
105 }
106 
107 static int smc_close_abort(struct smc_connection *conn)
108 {
109 	conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
110 
111 	return smc_cdc_get_slot_and_msg_send(conn);
112 }
113 
114 /* terminate smc socket abnormally - active abort
115  * link group is terminated, i.e. RDMA communication no longer possible
116  */
117 static void smc_close_active_abort(struct smc_sock *smc)
118 {
119 	struct sock *sk = &smc->sk;
120 
121 	struct smc_cdc_conn_state_flags *txflags =
122 		&smc->conn.local_tx_ctrl.conn_state_flags;
123 
124 	sk->sk_err = ECONNABORTED;
125 	if (smc->clcsock && smc->clcsock->sk) {
126 		smc->clcsock->sk->sk_err = ECONNABORTED;
127 		smc->clcsock->sk->sk_state_change(smc->clcsock->sk);
128 	}
129 	switch (sk->sk_state) {
130 	case SMC_INIT:
131 	case SMC_ACTIVE:
132 		sk->sk_state = SMC_PEERABORTWAIT;
133 		release_sock(sk);
134 		cancel_delayed_work_sync(&smc->conn.tx_work);
135 		lock_sock(sk);
136 		sock_put(sk); /* passive closing */
137 		break;
138 	case SMC_APPCLOSEWAIT1:
139 	case SMC_APPCLOSEWAIT2:
140 		if (!smc_cdc_rxed_any_close(&smc->conn))
141 			sk->sk_state = SMC_PEERABORTWAIT;
142 		else
143 			sk->sk_state = SMC_CLOSED;
144 		release_sock(sk);
145 		cancel_delayed_work_sync(&smc->conn.tx_work);
146 		lock_sock(sk);
147 		break;
148 	case SMC_PEERCLOSEWAIT1:
149 	case SMC_PEERCLOSEWAIT2:
150 		if (!txflags->peer_conn_closed) {
151 			/* just SHUTDOWN_SEND done */
152 			sk->sk_state = SMC_PEERABORTWAIT;
153 		} else {
154 			sk->sk_state = SMC_CLOSED;
155 		}
156 		sock_put(sk); /* passive closing */
157 		break;
158 	case SMC_PROCESSABORT:
159 	case SMC_APPFINCLOSEWAIT:
160 		sk->sk_state = SMC_CLOSED;
161 		break;
162 	case SMC_PEERFINCLOSEWAIT:
163 		sock_put(sk); /* passive closing */
164 		break;
165 	case SMC_PEERABORTWAIT:
166 	case SMC_CLOSED:
167 		break;
168 	}
169 
170 	sock_set_flag(sk, SOCK_DEAD);
171 	sk->sk_state_change(sk);
172 }
173 
174 static inline bool smc_close_sent_any_close(struct smc_connection *conn)
175 {
176 	return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort ||
177 	       conn->local_tx_ctrl.conn_state_flags.peer_conn_closed;
178 }
179 
180 int smc_close_active(struct smc_sock *smc)
181 {
182 	struct smc_cdc_conn_state_flags *txflags =
183 		&smc->conn.local_tx_ctrl.conn_state_flags;
184 	struct smc_connection *conn = &smc->conn;
185 	struct sock *sk = &smc->sk;
186 	int old_state;
187 	long timeout;
188 	int rc = 0;
189 
190 	timeout = current->flags & PF_EXITING ?
191 		  0 : sock_flag(sk, SOCK_LINGER) ?
192 		      sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
193 
194 	old_state = sk->sk_state;
195 again:
196 	switch (sk->sk_state) {
197 	case SMC_INIT:
198 		sk->sk_state = SMC_CLOSED;
199 		break;
200 	case SMC_LISTEN:
201 		sk->sk_state = SMC_CLOSED;
202 		sk->sk_state_change(sk); /* wake up accept */
203 		if (smc->clcsock && smc->clcsock->sk) {
204 			rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
205 			/* wake up kernel_accept of smc_tcp_listen_worker */
206 			smc->clcsock->sk->sk_data_ready(smc->clcsock->sk);
207 			smc_close_wait_listen_clcsock(smc);
208 		}
209 		smc_close_cleanup_listen(sk);
210 		break;
211 	case SMC_ACTIVE:
212 		smc_close_stream_wait(smc, timeout);
213 		release_sock(sk);
214 		cancel_delayed_work_sync(&conn->tx_work);
215 		lock_sock(sk);
216 		if (sk->sk_state == SMC_ACTIVE) {
217 			/* send close request */
218 			rc = smc_close_final(conn);
219 			if (rc)
220 				break;
221 			sk->sk_state = SMC_PEERCLOSEWAIT1;
222 		} else {
223 			/* peer event has changed the state */
224 			goto again;
225 		}
226 		break;
227 	case SMC_APPFINCLOSEWAIT:
228 		/* socket already shutdown wr or both (active close) */
229 		if (txflags->peer_done_writing &&
230 		    !smc_close_sent_any_close(conn)) {
231 			/* just shutdown wr done, send close request */
232 			rc = smc_close_final(conn);
233 			if (rc)
234 				break;
235 		}
236 		sk->sk_state = SMC_CLOSED;
237 		break;
238 	case SMC_APPCLOSEWAIT1:
239 	case SMC_APPCLOSEWAIT2:
240 		if (!smc_cdc_rxed_any_close(conn))
241 			smc_close_stream_wait(smc, timeout);
242 		release_sock(sk);
243 		cancel_delayed_work_sync(&conn->tx_work);
244 		lock_sock(sk);
245 		if (sk->sk_state != SMC_APPCLOSEWAIT1 &&
246 		    sk->sk_state != SMC_APPCLOSEWAIT2)
247 			goto again;
248 		/* confirm close from peer */
249 		rc = smc_close_final(conn);
250 		if (rc)
251 			break;
252 		if (smc_cdc_rxed_any_close(conn)) {
253 			/* peer has closed the socket already */
254 			sk->sk_state = SMC_CLOSED;
255 			sock_put(sk); /* postponed passive closing */
256 		} else {
257 			/* peer has just issued a shutdown write */
258 			sk->sk_state = SMC_PEERFINCLOSEWAIT;
259 		}
260 		break;
261 	case SMC_PEERCLOSEWAIT1:
262 	case SMC_PEERCLOSEWAIT2:
263 		if (txflags->peer_done_writing &&
264 		    !smc_close_sent_any_close(conn)) {
265 			/* just shutdown wr done, send close request */
266 			rc = smc_close_final(conn);
267 			if (rc)
268 				break;
269 		}
270 		/* peer sending PeerConnectionClosed will cause transition */
271 		break;
272 	case SMC_PEERFINCLOSEWAIT:
273 		/* peer sending PeerConnectionClosed will cause transition */
274 		break;
275 	case SMC_PROCESSABORT:
276 		smc_close_abort(conn);
277 		sk->sk_state = SMC_CLOSED;
278 		break;
279 	case SMC_PEERABORTWAIT:
280 	case SMC_CLOSED:
281 		/* nothing to do, add tracing in future patch */
282 		break;
283 	}
284 
285 	if (old_state != sk->sk_state)
286 		sk->sk_state_change(sk);
287 	return rc;
288 }
289 
290 static void smc_close_passive_abort_received(struct smc_sock *smc)
291 {
292 	struct smc_cdc_conn_state_flags *txflags =
293 		&smc->conn.local_tx_ctrl.conn_state_flags;
294 	struct sock *sk = &smc->sk;
295 
296 	switch (sk->sk_state) {
297 	case SMC_INIT:
298 	case SMC_ACTIVE:
299 	case SMC_APPCLOSEWAIT1:
300 		sk->sk_state = SMC_PROCESSABORT;
301 		sock_put(sk); /* passive closing */
302 		break;
303 	case SMC_APPFINCLOSEWAIT:
304 		sk->sk_state = SMC_PROCESSABORT;
305 		break;
306 	case SMC_PEERCLOSEWAIT1:
307 	case SMC_PEERCLOSEWAIT2:
308 		if (txflags->peer_done_writing &&
309 		    !smc_close_sent_any_close(&smc->conn))
310 			/* just shutdown, but not yet closed locally */
311 			sk->sk_state = SMC_PROCESSABORT;
312 		else
313 			sk->sk_state = SMC_CLOSED;
314 		sock_put(sk); /* passive closing */
315 		break;
316 	case SMC_APPCLOSEWAIT2:
317 	case SMC_PEERFINCLOSEWAIT:
318 		sk->sk_state = SMC_CLOSED;
319 		sock_put(sk); /* passive closing */
320 		break;
321 	case SMC_PEERABORTWAIT:
322 		sk->sk_state = SMC_CLOSED;
323 		break;
324 	case SMC_PROCESSABORT:
325 	/* nothing to do, add tracing in future patch */
326 		break;
327 	}
328 }
329 
330 /* Either some kind of closing has been received: peer_conn_closed,
331  * peer_conn_abort, or peer_done_writing
332  * or the link group of the connection terminates abnormally.
333  */
334 static void smc_close_passive_work(struct work_struct *work)
335 {
336 	struct smc_connection *conn = container_of(work,
337 						   struct smc_connection,
338 						   close_work);
339 	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
340 	struct smc_cdc_conn_state_flags *rxflags;
341 	struct sock *sk = &smc->sk;
342 	int old_state;
343 
344 	lock_sock(sk);
345 	old_state = sk->sk_state;
346 
347 	if (!conn->alert_token_local) {
348 		/* abnormal termination */
349 		smc_close_active_abort(smc);
350 		goto wakeup;
351 	}
352 
353 	rxflags = &conn->local_rx_ctrl.conn_state_flags;
354 	if (rxflags->peer_conn_abort) {
355 		/* peer has not received all data */
356 		smc_close_passive_abort_received(smc);
357 		release_sock(&smc->sk);
358 		cancel_delayed_work_sync(&conn->tx_work);
359 		lock_sock(&smc->sk);
360 		goto wakeup;
361 	}
362 
363 	switch (sk->sk_state) {
364 	case SMC_INIT:
365 		if (atomic_read(&conn->bytes_to_rcv) ||
366 		    (rxflags->peer_done_writing &&
367 		     !smc_cdc_rxed_any_close(conn))) {
368 			sk->sk_state = SMC_APPCLOSEWAIT1;
369 		} else {
370 			sk->sk_state = SMC_CLOSED;
371 			sock_put(sk); /* passive closing */
372 		}
373 		break;
374 	case SMC_ACTIVE:
375 		sk->sk_state = SMC_APPCLOSEWAIT1;
376 		/* postpone sock_put() for passive closing to cover
377 		 * received SEND_SHUTDOWN as well
378 		 */
379 		break;
380 	case SMC_PEERCLOSEWAIT1:
381 		if (rxflags->peer_done_writing)
382 			sk->sk_state = SMC_PEERCLOSEWAIT2;
383 		/* fall through */
384 		/* to check for closing */
385 	case SMC_PEERCLOSEWAIT2:
386 		if (!smc_cdc_rxed_any_close(conn))
387 			break;
388 		if (sock_flag(sk, SOCK_DEAD) &&
389 		    smc_close_sent_any_close(conn)) {
390 			/* smc_release has already been called locally */
391 			sk->sk_state = SMC_CLOSED;
392 		} else {
393 			/* just shutdown, but not yet closed locally */
394 			sk->sk_state = SMC_APPFINCLOSEWAIT;
395 		}
396 		sock_put(sk); /* passive closing */
397 		break;
398 	case SMC_PEERFINCLOSEWAIT:
399 		if (smc_cdc_rxed_any_close(conn)) {
400 			sk->sk_state = SMC_CLOSED;
401 			sock_put(sk); /* passive closing */
402 		}
403 		break;
404 	case SMC_APPCLOSEWAIT1:
405 	case SMC_APPCLOSEWAIT2:
406 		/* postpone sock_put() for passive closing to cover
407 		 * received SEND_SHUTDOWN as well
408 		 */
409 		break;
410 	case SMC_APPFINCLOSEWAIT:
411 	case SMC_PEERABORTWAIT:
412 	case SMC_PROCESSABORT:
413 	case SMC_CLOSED:
414 		/* nothing to do, add tracing in future patch */
415 		break;
416 	}
417 
418 wakeup:
419 	sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */
420 	sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */
421 
422 	if (old_state != sk->sk_state) {
423 		sk->sk_state_change(sk);
424 		if ((sk->sk_state == SMC_CLOSED) &&
425 		    (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket))
426 			smc_conn_free(conn);
427 	}
428 	release_sock(sk);
429 	sock_put(sk); /* sock_hold done by schedulers of close_work */
430 }
431 
432 int smc_close_shutdown_write(struct smc_sock *smc)
433 {
434 	struct smc_connection *conn = &smc->conn;
435 	struct sock *sk = &smc->sk;
436 	int old_state;
437 	long timeout;
438 	int rc = 0;
439 
440 	timeout = current->flags & PF_EXITING ?
441 		  0 : sock_flag(sk, SOCK_LINGER) ?
442 		      sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
443 
444 	old_state = sk->sk_state;
445 again:
446 	switch (sk->sk_state) {
447 	case SMC_ACTIVE:
448 		smc_close_stream_wait(smc, timeout);
449 		release_sock(sk);
450 		cancel_delayed_work_sync(&conn->tx_work);
451 		lock_sock(sk);
452 		if (sk->sk_state != SMC_ACTIVE)
453 			goto again;
454 		/* send close wr request */
455 		rc = smc_close_wr(conn);
456 		if (rc)
457 			break;
458 		sk->sk_state = SMC_PEERCLOSEWAIT1;
459 		break;
460 	case SMC_APPCLOSEWAIT1:
461 		/* passive close */
462 		if (!smc_cdc_rxed_any_close(conn))
463 			smc_close_stream_wait(smc, timeout);
464 		release_sock(sk);
465 		cancel_delayed_work_sync(&conn->tx_work);
466 		lock_sock(sk);
467 		if (sk->sk_state != SMC_APPCLOSEWAIT1)
468 			goto again;
469 		/* confirm close from peer */
470 		rc = smc_close_wr(conn);
471 		if (rc)
472 			break;
473 		sk->sk_state = SMC_APPCLOSEWAIT2;
474 		break;
475 	case SMC_APPCLOSEWAIT2:
476 	case SMC_PEERFINCLOSEWAIT:
477 	case SMC_PEERCLOSEWAIT1:
478 	case SMC_PEERCLOSEWAIT2:
479 	case SMC_APPFINCLOSEWAIT:
480 	case SMC_PROCESSABORT:
481 	case SMC_PEERABORTWAIT:
482 		/* nothing to do, add tracing in future patch */
483 		break;
484 	}
485 
486 	if (old_state != sk->sk_state)
487 		sk->sk_state_change(sk);
488 	return rc;
489 }
490 
491 /* Initialize close properties on connection establishment. */
492 void smc_close_init(struct smc_sock *smc)
493 {
494 	INIT_WORK(&smc->conn.close_work, smc_close_passive_work);
495 }
496