1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Socket Closing - normal and abnormal 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/workqueue.h> 13 #include <linux/sched/signal.h> 14 15 #include <net/sock.h> 16 #include <net/tcp.h> 17 18 #include "smc.h" 19 #include "smc_tx.h" 20 #include "smc_cdc.h" 21 #include "smc_close.h" 22 23 /* release the clcsock that is assigned to the smc_sock */ 24 void smc_clcsock_release(struct smc_sock *smc) 25 { 26 struct socket *tcp; 27 28 if (smc->listen_smc && current_work() != &smc->smc_listen_work) 29 cancel_work_sync(&smc->smc_listen_work); 30 mutex_lock(&smc->clcsock_release_lock); 31 if (smc->clcsock) { 32 tcp = smc->clcsock; 33 smc->clcsock = NULL; 34 sock_release(tcp); 35 } 36 mutex_unlock(&smc->clcsock_release_lock); 37 } 38 39 static void smc_close_cleanup_listen(struct sock *parent) 40 { 41 struct sock *sk; 42 43 /* Close non-accepted connections */ 44 while ((sk = smc_accept_dequeue(parent, NULL))) 45 smc_close_non_accepted(sk); 46 } 47 48 /* wait for sndbuf data being transmitted */ 49 static void smc_close_stream_wait(struct smc_sock *smc, long timeout) 50 { 51 DEFINE_WAIT_FUNC(wait, woken_wake_function); 52 struct sock *sk = &smc->sk; 53 54 if (!timeout) 55 return; 56 57 if (!smc_tx_prepared_sends(&smc->conn)) 58 return; 59 60 /* Send out corked data remaining in sndbuf */ 61 smc_tx_pending(&smc->conn); 62 63 smc->wait_close_tx_prepared = 1; 64 add_wait_queue(sk_sleep(sk), &wait); 65 while (!signal_pending(current) && timeout) { 66 int rc; 67 68 rc = sk_wait_event(sk, &timeout, 69 !smc_tx_prepared_sends(&smc->conn) || 70 READ_ONCE(sk->sk_err) == ECONNABORTED || 71 READ_ONCE(sk->sk_err) == ECONNRESET || 72 smc->conn.killed, 73 &wait); 74 if (rc) 75 break; 76 } 77 remove_wait_queue(sk_sleep(sk), &wait); 78 smc->wait_close_tx_prepared = 0; 79 } 80 81 void smc_close_wake_tx_prepared(struct smc_sock *smc) 82 { 83 if (smc->wait_close_tx_prepared) 84 /* wake up socket closing */ 85 smc->sk.sk_state_change(&smc->sk); 86 } 87 88 static int smc_close_wr(struct smc_connection *conn) 89 { 90 conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1; 91 92 return smc_cdc_get_slot_and_msg_send(conn); 93 } 94 95 static int smc_close_final(struct smc_connection *conn) 96 { 97 if (atomic_read(&conn->bytes_to_rcv)) 98 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 99 else 100 conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1; 101 if (conn->killed) 102 return -EPIPE; 103 104 return smc_cdc_get_slot_and_msg_send(conn); 105 } 106 107 int smc_close_abort(struct smc_connection *conn) 108 { 109 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 110 111 return smc_cdc_get_slot_and_msg_send(conn); 112 } 113 114 static void smc_close_cancel_work(struct smc_sock *smc) 115 { 116 struct sock *sk = &smc->sk; 117 118 release_sock(sk); 119 if (cancel_work_sync(&smc->conn.close_work)) 120 sock_put(sk); 121 cancel_delayed_work_sync(&smc->conn.tx_work); 122 lock_sock(sk); 123 } 124 125 /* terminate smc socket abnormally - active abort 126 * link group is terminated, i.e. RDMA communication no longer possible 127 */ 128 void smc_close_active_abort(struct smc_sock *smc) 129 { 130 struct sock *sk = &smc->sk; 131 bool release_clcsock = false; 132 133 if (sk->sk_state != SMC_INIT && smc->clcsock && smc->clcsock->sk) { 134 sk->sk_err = ECONNABORTED; 135 if (smc->clcsock && smc->clcsock->sk) 136 tcp_abort(smc->clcsock->sk, ECONNABORTED); 137 } 138 switch (sk->sk_state) { 139 case SMC_ACTIVE: 140 case SMC_APPCLOSEWAIT1: 141 case SMC_APPCLOSEWAIT2: 142 sk->sk_state = SMC_PEERABORTWAIT; 143 smc_close_cancel_work(smc); 144 if (sk->sk_state != SMC_PEERABORTWAIT) 145 break; 146 sk->sk_state = SMC_CLOSED; 147 sock_put(sk); /* (postponed) passive closing */ 148 break; 149 case SMC_PEERCLOSEWAIT1: 150 case SMC_PEERCLOSEWAIT2: 151 case SMC_PEERFINCLOSEWAIT: 152 sk->sk_state = SMC_PEERABORTWAIT; 153 smc_close_cancel_work(smc); 154 if (sk->sk_state != SMC_PEERABORTWAIT) 155 break; 156 sk->sk_state = SMC_CLOSED; 157 smc_conn_free(&smc->conn); 158 release_clcsock = true; 159 sock_put(sk); /* passive closing */ 160 break; 161 case SMC_PROCESSABORT: 162 case SMC_APPFINCLOSEWAIT: 163 sk->sk_state = SMC_PEERABORTWAIT; 164 smc_close_cancel_work(smc); 165 if (sk->sk_state != SMC_PEERABORTWAIT) 166 break; 167 sk->sk_state = SMC_CLOSED; 168 smc_conn_free(&smc->conn); 169 release_clcsock = true; 170 break; 171 case SMC_INIT: 172 case SMC_PEERABORTWAIT: 173 case SMC_CLOSED: 174 break; 175 } 176 177 smc_sock_set_flag(sk, SOCK_DEAD); 178 sk->sk_state_change(sk); 179 180 if (release_clcsock) { 181 release_sock(sk); 182 smc_clcsock_release(smc); 183 lock_sock(sk); 184 } 185 } 186 187 static inline bool smc_close_sent_any_close(struct smc_connection *conn) 188 { 189 return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort || 190 conn->local_tx_ctrl.conn_state_flags.peer_conn_closed; 191 } 192 193 int smc_close_active(struct smc_sock *smc) 194 { 195 struct smc_cdc_conn_state_flags *txflags = 196 &smc->conn.local_tx_ctrl.conn_state_flags; 197 struct smc_connection *conn = &smc->conn; 198 struct sock *sk = &smc->sk; 199 int old_state; 200 long timeout; 201 int rc = 0; 202 int rc1 = 0; 203 204 timeout = current->flags & PF_EXITING ? 205 0 : sock_flag(sk, SOCK_LINGER) ? 206 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; 207 208 old_state = sk->sk_state; 209 again: 210 switch (sk->sk_state) { 211 case SMC_INIT: 212 sk->sk_state = SMC_CLOSED; 213 break; 214 case SMC_LISTEN: 215 sk->sk_state = SMC_CLOSED; 216 sk->sk_state_change(sk); /* wake up accept */ 217 if (smc->clcsock && smc->clcsock->sk) { 218 write_lock_bh(&smc->clcsock->sk->sk_callback_lock); 219 smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready, 220 &smc->clcsk_data_ready); 221 smc->clcsock->sk->sk_user_data = NULL; 222 write_unlock_bh(&smc->clcsock->sk->sk_callback_lock); 223 rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); 224 } 225 smc_close_cleanup_listen(sk); 226 release_sock(sk); 227 flush_work(&smc->tcp_listen_work); 228 lock_sock(sk); 229 break; 230 case SMC_ACTIVE: 231 smc_close_stream_wait(smc, timeout); 232 release_sock(sk); 233 cancel_delayed_work_sync(&conn->tx_work); 234 lock_sock(sk); 235 if (sk->sk_state == SMC_ACTIVE) { 236 /* send close request */ 237 rc = smc_close_final(conn); 238 sk->sk_state = SMC_PEERCLOSEWAIT1; 239 240 /* actively shutdown clcsock before peer close it, 241 * prevent peer from entering TIME_WAIT state. 242 */ 243 if (smc->clcsock && smc->clcsock->sk) { 244 rc1 = kernel_sock_shutdown(smc->clcsock, 245 SHUT_RDWR); 246 rc = rc ? rc : rc1; 247 } 248 } else { 249 /* peer event has changed the state */ 250 goto again; 251 } 252 break; 253 case SMC_APPFINCLOSEWAIT: 254 /* socket already shutdown wr or both (active close) */ 255 if (txflags->peer_done_writing && 256 !smc_close_sent_any_close(conn)) { 257 /* just shutdown wr done, send close request */ 258 rc = smc_close_final(conn); 259 } 260 sk->sk_state = SMC_CLOSED; 261 break; 262 case SMC_APPCLOSEWAIT1: 263 case SMC_APPCLOSEWAIT2: 264 if (!smc_cdc_rxed_any_close(conn)) 265 smc_close_stream_wait(smc, timeout); 266 release_sock(sk); 267 cancel_delayed_work_sync(&conn->tx_work); 268 lock_sock(sk); 269 if (sk->sk_state != SMC_APPCLOSEWAIT1 && 270 sk->sk_state != SMC_APPCLOSEWAIT2) 271 goto again; 272 /* confirm close from peer */ 273 rc = smc_close_final(conn); 274 if (smc_cdc_rxed_any_close(conn)) { 275 /* peer has closed the socket already */ 276 sk->sk_state = SMC_CLOSED; 277 sock_put(sk); /* postponed passive closing */ 278 } else { 279 /* peer has just issued a shutdown write */ 280 sk->sk_state = SMC_PEERFINCLOSEWAIT; 281 } 282 break; 283 case SMC_PEERCLOSEWAIT1: 284 case SMC_PEERCLOSEWAIT2: 285 if (txflags->peer_done_writing && 286 !smc_close_sent_any_close(conn)) { 287 /* just shutdown wr done, send close request */ 288 rc = smc_close_final(conn); 289 } 290 /* peer sending PeerConnectionClosed will cause transition */ 291 break; 292 case SMC_PEERFINCLOSEWAIT: 293 /* peer sending PeerConnectionClosed will cause transition */ 294 break; 295 case SMC_PROCESSABORT: 296 rc = smc_close_abort(conn); 297 sk->sk_state = SMC_CLOSED; 298 break; 299 case SMC_PEERABORTWAIT: 300 sk->sk_state = SMC_CLOSED; 301 break; 302 case SMC_CLOSED: 303 /* nothing to do, add tracing in future patch */ 304 break; 305 } 306 307 if (old_state != sk->sk_state) 308 sk->sk_state_change(sk); 309 return rc; 310 } 311 312 static void smc_close_passive_abort_received(struct smc_sock *smc) 313 { 314 struct smc_cdc_conn_state_flags *txflags = 315 &smc->conn.local_tx_ctrl.conn_state_flags; 316 struct sock *sk = &smc->sk; 317 318 switch (sk->sk_state) { 319 case SMC_INIT: 320 case SMC_ACTIVE: 321 case SMC_APPCLOSEWAIT1: 322 sk->sk_state = SMC_PROCESSABORT; 323 sock_put(sk); /* passive closing */ 324 break; 325 case SMC_APPFINCLOSEWAIT: 326 sk->sk_state = SMC_PROCESSABORT; 327 break; 328 case SMC_PEERCLOSEWAIT1: 329 case SMC_PEERCLOSEWAIT2: 330 if (txflags->peer_done_writing && 331 !smc_close_sent_any_close(&smc->conn)) 332 /* just shutdown, but not yet closed locally */ 333 sk->sk_state = SMC_PROCESSABORT; 334 else 335 sk->sk_state = SMC_CLOSED; 336 sock_put(sk); /* passive closing */ 337 break; 338 case SMC_APPCLOSEWAIT2: 339 case SMC_PEERFINCLOSEWAIT: 340 sk->sk_state = SMC_CLOSED; 341 sock_put(sk); /* passive closing */ 342 break; 343 case SMC_PEERABORTWAIT: 344 sk->sk_state = SMC_CLOSED; 345 break; 346 case SMC_PROCESSABORT: 347 /* nothing to do, add tracing in future patch */ 348 break; 349 } 350 } 351 352 /* Either some kind of closing has been received: peer_conn_closed, 353 * peer_conn_abort, or peer_done_writing 354 * or the link group of the connection terminates abnormally. 355 */ 356 static void smc_close_passive_work(struct work_struct *work) 357 { 358 struct smc_connection *conn = container_of(work, 359 struct smc_connection, 360 close_work); 361 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 362 struct smc_cdc_conn_state_flags *rxflags; 363 bool release_clcsock = false; 364 struct sock *sk = &smc->sk; 365 int old_state; 366 367 lock_sock(sk); 368 old_state = sk->sk_state; 369 370 rxflags = &conn->local_rx_ctrl.conn_state_flags; 371 if (rxflags->peer_conn_abort) { 372 /* peer has not received all data */ 373 smc_close_passive_abort_received(smc); 374 release_sock(sk); 375 cancel_delayed_work_sync(&conn->tx_work); 376 lock_sock(sk); 377 goto wakeup; 378 } 379 380 switch (sk->sk_state) { 381 case SMC_INIT: 382 sk->sk_state = SMC_APPCLOSEWAIT1; 383 break; 384 case SMC_ACTIVE: 385 sk->sk_state = SMC_APPCLOSEWAIT1; 386 /* postpone sock_put() for passive closing to cover 387 * received SEND_SHUTDOWN as well 388 */ 389 break; 390 case SMC_PEERCLOSEWAIT1: 391 if (rxflags->peer_done_writing) 392 sk->sk_state = SMC_PEERCLOSEWAIT2; 393 fallthrough; 394 /* to check for closing */ 395 case SMC_PEERCLOSEWAIT2: 396 if (!smc_cdc_rxed_any_close(conn)) 397 break; 398 if (sock_flag(sk, SOCK_DEAD) && 399 smc_close_sent_any_close(conn)) { 400 /* smc_release has already been called locally */ 401 sk->sk_state = SMC_CLOSED; 402 } else { 403 /* just shutdown, but not yet closed locally */ 404 sk->sk_state = SMC_APPFINCLOSEWAIT; 405 } 406 sock_put(sk); /* passive closing */ 407 break; 408 case SMC_PEERFINCLOSEWAIT: 409 if (smc_cdc_rxed_any_close(conn)) { 410 sk->sk_state = SMC_CLOSED; 411 sock_put(sk); /* passive closing */ 412 } 413 break; 414 case SMC_APPCLOSEWAIT1: 415 case SMC_APPCLOSEWAIT2: 416 /* postpone sock_put() for passive closing to cover 417 * received SEND_SHUTDOWN as well 418 */ 419 break; 420 case SMC_APPFINCLOSEWAIT: 421 case SMC_PEERABORTWAIT: 422 case SMC_PROCESSABORT: 423 case SMC_CLOSED: 424 /* nothing to do, add tracing in future patch */ 425 break; 426 } 427 428 wakeup: 429 sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */ 430 sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */ 431 432 if (old_state != sk->sk_state) { 433 sk->sk_state_change(sk); 434 if ((sk->sk_state == SMC_CLOSED) && 435 (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { 436 smc_conn_free(conn); 437 if (smc->clcsock) 438 release_clcsock = true; 439 } 440 } 441 release_sock(sk); 442 if (release_clcsock) 443 smc_clcsock_release(smc); 444 sock_put(sk); /* sock_hold done by schedulers of close_work */ 445 } 446 447 int smc_close_shutdown_write(struct smc_sock *smc) 448 { 449 struct smc_connection *conn = &smc->conn; 450 struct sock *sk = &smc->sk; 451 int old_state; 452 long timeout; 453 int rc = 0; 454 455 timeout = current->flags & PF_EXITING ? 456 0 : sock_flag(sk, SOCK_LINGER) ? 457 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; 458 459 old_state = sk->sk_state; 460 again: 461 switch (sk->sk_state) { 462 case SMC_ACTIVE: 463 smc_close_stream_wait(smc, timeout); 464 release_sock(sk); 465 cancel_delayed_work_sync(&conn->tx_work); 466 lock_sock(sk); 467 if (sk->sk_state != SMC_ACTIVE) 468 goto again; 469 /* send close wr request */ 470 rc = smc_close_wr(conn); 471 sk->sk_state = SMC_PEERCLOSEWAIT1; 472 break; 473 case SMC_APPCLOSEWAIT1: 474 /* passive close */ 475 if (!smc_cdc_rxed_any_close(conn)) 476 smc_close_stream_wait(smc, timeout); 477 release_sock(sk); 478 cancel_delayed_work_sync(&conn->tx_work); 479 lock_sock(sk); 480 if (sk->sk_state != SMC_APPCLOSEWAIT1) 481 goto again; 482 /* confirm close from peer */ 483 rc = smc_close_wr(conn); 484 sk->sk_state = SMC_APPCLOSEWAIT2; 485 break; 486 case SMC_APPCLOSEWAIT2: 487 case SMC_PEERFINCLOSEWAIT: 488 case SMC_PEERCLOSEWAIT1: 489 case SMC_PEERCLOSEWAIT2: 490 case SMC_APPFINCLOSEWAIT: 491 case SMC_PROCESSABORT: 492 case SMC_PEERABORTWAIT: 493 /* nothing to do, add tracing in future patch */ 494 break; 495 } 496 497 if (old_state != sk->sk_state) 498 sk->sk_state_change(sk); 499 return rc; 500 } 501 502 /* Initialize close properties on connection establishment. */ 503 void smc_close_init(struct smc_sock *smc) 504 { 505 INIT_WORK(&smc->conn.close_work, smc_close_passive_work); 506 } 507