1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Socket Closing - normal and abnormal 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/workqueue.h> 13 #include <linux/sched/signal.h> 14 15 #include <net/sock.h> 16 #include <net/tcp.h> 17 18 #include "smc.h" 19 #include "smc_tx.h" 20 #include "smc_cdc.h" 21 #include "smc_close.h" 22 23 /* release the clcsock that is assigned to the smc_sock */ 24 void smc_clcsock_release(struct smc_sock *smc) 25 { 26 struct socket *tcp; 27 28 if (smc->listen_smc && current_work() != &smc->smc_listen_work) 29 cancel_work_sync(&smc->smc_listen_work); 30 mutex_lock(&smc->clcsock_release_lock); 31 if (smc->clcsock) { 32 tcp = smc->clcsock; 33 smc->clcsock = NULL; 34 sock_release(tcp); 35 } 36 mutex_unlock(&smc->clcsock_release_lock); 37 } 38 39 static void smc_close_cleanup_listen(struct sock *parent) 40 { 41 struct sock *sk; 42 43 /* Close non-accepted connections */ 44 while ((sk = smc_accept_dequeue(parent, NULL))) 45 smc_close_non_accepted(sk); 46 } 47 48 /* wait for sndbuf data being transmitted */ 49 static void smc_close_stream_wait(struct smc_sock *smc, long timeout) 50 { 51 DEFINE_WAIT_FUNC(wait, woken_wake_function); 52 struct sock *sk = &smc->sk; 53 54 if (!timeout) 55 return; 56 57 if (!smc_tx_prepared_sends(&smc->conn)) 58 return; 59 60 /* Send out corked data remaining in sndbuf */ 61 smc_tx_pending(&smc->conn); 62 63 smc->wait_close_tx_prepared = 1; 64 add_wait_queue(sk_sleep(sk), &wait); 65 while (!signal_pending(current) && timeout) { 66 int rc; 67 68 rc = sk_wait_event(sk, &timeout, 69 !smc_tx_prepared_sends(&smc->conn) || 70 READ_ONCE(sk->sk_err) == ECONNABORTED || 71 READ_ONCE(sk->sk_err) == ECONNRESET || 72 smc->conn.killed, 73 &wait); 74 if (rc) 75 break; 76 } 77 remove_wait_queue(sk_sleep(sk), &wait); 78 smc->wait_close_tx_prepared = 0; 79 } 80 81 void smc_close_wake_tx_prepared(struct smc_sock *smc) 82 { 83 if (smc->wait_close_tx_prepared) 84 /* wake up socket closing */ 85 smc->sk.sk_state_change(&smc->sk); 86 } 87 88 static int smc_close_wr(struct smc_connection *conn) 89 { 90 conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1; 91 92 return smc_cdc_get_slot_and_msg_send(conn); 93 } 94 95 static int smc_close_final(struct smc_connection *conn) 96 { 97 if (atomic_read(&conn->bytes_to_rcv)) 98 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 99 else 100 conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1; 101 if (conn->killed) 102 return -EPIPE; 103 104 return smc_cdc_get_slot_and_msg_send(conn); 105 } 106 107 int smc_close_abort(struct smc_connection *conn) 108 { 109 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 110 111 return smc_cdc_get_slot_and_msg_send(conn); 112 } 113 114 static void smc_close_cancel_work(struct smc_sock *smc) 115 { 116 struct sock *sk = &smc->sk; 117 118 release_sock(sk); 119 cancel_work_sync(&smc->conn.close_work); 120 cancel_delayed_work_sync(&smc->conn.tx_work); 121 lock_sock(sk); 122 } 123 124 /* terminate smc socket abnormally - active abort 125 * link group is terminated, i.e. RDMA communication no longer possible 126 */ 127 void smc_close_active_abort(struct smc_sock *smc) 128 { 129 struct sock *sk = &smc->sk; 130 bool release_clcsock = false; 131 132 if (sk->sk_state != SMC_INIT && smc->clcsock && smc->clcsock->sk) { 133 sk->sk_err = ECONNABORTED; 134 if (smc->clcsock && smc->clcsock->sk) 135 tcp_abort(smc->clcsock->sk, ECONNABORTED); 136 } 137 switch (sk->sk_state) { 138 case SMC_ACTIVE: 139 case SMC_APPCLOSEWAIT1: 140 case SMC_APPCLOSEWAIT2: 141 sk->sk_state = SMC_PEERABORTWAIT; 142 smc_close_cancel_work(smc); 143 if (sk->sk_state != SMC_PEERABORTWAIT) 144 break; 145 sk->sk_state = SMC_CLOSED; 146 sock_put(sk); /* (postponed) passive closing */ 147 break; 148 case SMC_PEERCLOSEWAIT1: 149 case SMC_PEERCLOSEWAIT2: 150 case SMC_PEERFINCLOSEWAIT: 151 sk->sk_state = SMC_PEERABORTWAIT; 152 smc_close_cancel_work(smc); 153 if (sk->sk_state != SMC_PEERABORTWAIT) 154 break; 155 sk->sk_state = SMC_CLOSED; 156 smc_conn_free(&smc->conn); 157 release_clcsock = true; 158 sock_put(sk); /* passive closing */ 159 break; 160 case SMC_PROCESSABORT: 161 case SMC_APPFINCLOSEWAIT: 162 sk->sk_state = SMC_PEERABORTWAIT; 163 smc_close_cancel_work(smc); 164 if (sk->sk_state != SMC_PEERABORTWAIT) 165 break; 166 sk->sk_state = SMC_CLOSED; 167 smc_conn_free(&smc->conn); 168 release_clcsock = true; 169 break; 170 case SMC_INIT: 171 case SMC_PEERABORTWAIT: 172 case SMC_CLOSED: 173 break; 174 } 175 176 sock_set_flag(sk, SOCK_DEAD); 177 sk->sk_state_change(sk); 178 179 if (release_clcsock) { 180 release_sock(sk); 181 smc_clcsock_release(smc); 182 lock_sock(sk); 183 } 184 } 185 186 static inline bool smc_close_sent_any_close(struct smc_connection *conn) 187 { 188 return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort || 189 conn->local_tx_ctrl.conn_state_flags.peer_conn_closed; 190 } 191 192 int smc_close_active(struct smc_sock *smc) 193 { 194 struct smc_cdc_conn_state_flags *txflags = 195 &smc->conn.local_tx_ctrl.conn_state_flags; 196 struct smc_connection *conn = &smc->conn; 197 struct sock *sk = &smc->sk; 198 int old_state; 199 long timeout; 200 int rc = 0; 201 int rc1 = 0; 202 203 timeout = current->flags & PF_EXITING ? 204 0 : sock_flag(sk, SOCK_LINGER) ? 205 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; 206 207 old_state = sk->sk_state; 208 again: 209 switch (sk->sk_state) { 210 case SMC_INIT: 211 sk->sk_state = SMC_CLOSED; 212 break; 213 case SMC_LISTEN: 214 sk->sk_state = SMC_CLOSED; 215 sk->sk_state_change(sk); /* wake up accept */ 216 if (smc->clcsock && smc->clcsock->sk) { 217 write_lock_bh(&smc->clcsock->sk->sk_callback_lock); 218 smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready, 219 &smc->clcsk_data_ready); 220 smc->clcsock->sk->sk_user_data = NULL; 221 write_unlock_bh(&smc->clcsock->sk->sk_callback_lock); 222 rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); 223 } 224 smc_close_cleanup_listen(sk); 225 release_sock(sk); 226 flush_work(&smc->tcp_listen_work); 227 lock_sock(sk); 228 break; 229 case SMC_ACTIVE: 230 smc_close_stream_wait(smc, timeout); 231 release_sock(sk); 232 cancel_delayed_work_sync(&conn->tx_work); 233 lock_sock(sk); 234 if (sk->sk_state == SMC_ACTIVE) { 235 /* send close request */ 236 rc = smc_close_final(conn); 237 sk->sk_state = SMC_PEERCLOSEWAIT1; 238 239 /* actively shutdown clcsock before peer close it, 240 * prevent peer from entering TIME_WAIT state. 241 */ 242 if (smc->clcsock && smc->clcsock->sk) { 243 rc1 = kernel_sock_shutdown(smc->clcsock, 244 SHUT_RDWR); 245 rc = rc ? rc : rc1; 246 } 247 } else { 248 /* peer event has changed the state */ 249 goto again; 250 } 251 break; 252 case SMC_APPFINCLOSEWAIT: 253 /* socket already shutdown wr or both (active close) */ 254 if (txflags->peer_done_writing && 255 !smc_close_sent_any_close(conn)) { 256 /* just shutdown wr done, send close request */ 257 rc = smc_close_final(conn); 258 } 259 sk->sk_state = SMC_CLOSED; 260 break; 261 case SMC_APPCLOSEWAIT1: 262 case SMC_APPCLOSEWAIT2: 263 if (!smc_cdc_rxed_any_close(conn)) 264 smc_close_stream_wait(smc, timeout); 265 release_sock(sk); 266 cancel_delayed_work_sync(&conn->tx_work); 267 lock_sock(sk); 268 if (sk->sk_state != SMC_APPCLOSEWAIT1 && 269 sk->sk_state != SMC_APPCLOSEWAIT2) 270 goto again; 271 /* confirm close from peer */ 272 rc = smc_close_final(conn); 273 if (smc_cdc_rxed_any_close(conn)) { 274 /* peer has closed the socket already */ 275 sk->sk_state = SMC_CLOSED; 276 sock_put(sk); /* postponed passive closing */ 277 } else { 278 /* peer has just issued a shutdown write */ 279 sk->sk_state = SMC_PEERFINCLOSEWAIT; 280 } 281 break; 282 case SMC_PEERCLOSEWAIT1: 283 case SMC_PEERCLOSEWAIT2: 284 if (txflags->peer_done_writing && 285 !smc_close_sent_any_close(conn)) { 286 /* just shutdown wr done, send close request */ 287 rc = smc_close_final(conn); 288 } 289 /* peer sending PeerConnectionClosed will cause transition */ 290 break; 291 case SMC_PEERFINCLOSEWAIT: 292 /* peer sending PeerConnectionClosed will cause transition */ 293 break; 294 case SMC_PROCESSABORT: 295 rc = smc_close_abort(conn); 296 sk->sk_state = SMC_CLOSED; 297 break; 298 case SMC_PEERABORTWAIT: 299 sk->sk_state = SMC_CLOSED; 300 break; 301 case SMC_CLOSED: 302 /* nothing to do, add tracing in future patch */ 303 break; 304 } 305 306 if (old_state != sk->sk_state) 307 sk->sk_state_change(sk); 308 return rc; 309 } 310 311 static void smc_close_passive_abort_received(struct smc_sock *smc) 312 { 313 struct smc_cdc_conn_state_flags *txflags = 314 &smc->conn.local_tx_ctrl.conn_state_flags; 315 struct sock *sk = &smc->sk; 316 317 switch (sk->sk_state) { 318 case SMC_INIT: 319 case SMC_ACTIVE: 320 case SMC_APPCLOSEWAIT1: 321 sk->sk_state = SMC_PROCESSABORT; 322 sock_put(sk); /* passive closing */ 323 break; 324 case SMC_APPFINCLOSEWAIT: 325 sk->sk_state = SMC_PROCESSABORT; 326 break; 327 case SMC_PEERCLOSEWAIT1: 328 case SMC_PEERCLOSEWAIT2: 329 if (txflags->peer_done_writing && 330 !smc_close_sent_any_close(&smc->conn)) 331 /* just shutdown, but not yet closed locally */ 332 sk->sk_state = SMC_PROCESSABORT; 333 else 334 sk->sk_state = SMC_CLOSED; 335 sock_put(sk); /* passive closing */ 336 break; 337 case SMC_APPCLOSEWAIT2: 338 case SMC_PEERFINCLOSEWAIT: 339 sk->sk_state = SMC_CLOSED; 340 sock_put(sk); /* passive closing */ 341 break; 342 case SMC_PEERABORTWAIT: 343 sk->sk_state = SMC_CLOSED; 344 break; 345 case SMC_PROCESSABORT: 346 /* nothing to do, add tracing in future patch */ 347 break; 348 } 349 } 350 351 /* Either some kind of closing has been received: peer_conn_closed, 352 * peer_conn_abort, or peer_done_writing 353 * or the link group of the connection terminates abnormally. 354 */ 355 static void smc_close_passive_work(struct work_struct *work) 356 { 357 struct smc_connection *conn = container_of(work, 358 struct smc_connection, 359 close_work); 360 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 361 struct smc_cdc_conn_state_flags *rxflags; 362 bool release_clcsock = false; 363 struct sock *sk = &smc->sk; 364 int old_state; 365 366 lock_sock(sk); 367 old_state = sk->sk_state; 368 369 rxflags = &conn->local_rx_ctrl.conn_state_flags; 370 if (rxflags->peer_conn_abort) { 371 /* peer has not received all data */ 372 smc_close_passive_abort_received(smc); 373 release_sock(sk); 374 cancel_delayed_work_sync(&conn->tx_work); 375 lock_sock(sk); 376 goto wakeup; 377 } 378 379 switch (sk->sk_state) { 380 case SMC_INIT: 381 sk->sk_state = SMC_APPCLOSEWAIT1; 382 break; 383 case SMC_ACTIVE: 384 sk->sk_state = SMC_APPCLOSEWAIT1; 385 /* postpone sock_put() for passive closing to cover 386 * received SEND_SHUTDOWN as well 387 */ 388 break; 389 case SMC_PEERCLOSEWAIT1: 390 if (rxflags->peer_done_writing) 391 sk->sk_state = SMC_PEERCLOSEWAIT2; 392 fallthrough; 393 /* to check for closing */ 394 case SMC_PEERCLOSEWAIT2: 395 if (!smc_cdc_rxed_any_close(conn)) 396 break; 397 if (sock_flag(sk, SOCK_DEAD) && 398 smc_close_sent_any_close(conn)) { 399 /* smc_release has already been called locally */ 400 sk->sk_state = SMC_CLOSED; 401 } else { 402 /* just shutdown, but not yet closed locally */ 403 sk->sk_state = SMC_APPFINCLOSEWAIT; 404 } 405 sock_put(sk); /* passive closing */ 406 break; 407 case SMC_PEERFINCLOSEWAIT: 408 if (smc_cdc_rxed_any_close(conn)) { 409 sk->sk_state = SMC_CLOSED; 410 sock_put(sk); /* passive closing */ 411 } 412 break; 413 case SMC_APPCLOSEWAIT1: 414 case SMC_APPCLOSEWAIT2: 415 /* postpone sock_put() for passive closing to cover 416 * received SEND_SHUTDOWN as well 417 */ 418 break; 419 case SMC_APPFINCLOSEWAIT: 420 case SMC_PEERABORTWAIT: 421 case SMC_PROCESSABORT: 422 case SMC_CLOSED: 423 /* nothing to do, add tracing in future patch */ 424 break; 425 } 426 427 wakeup: 428 sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */ 429 sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */ 430 431 if (old_state != sk->sk_state) { 432 sk->sk_state_change(sk); 433 if ((sk->sk_state == SMC_CLOSED) && 434 (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { 435 smc_conn_free(conn); 436 if (smc->clcsock) 437 release_clcsock = true; 438 } 439 } 440 release_sock(sk); 441 if (release_clcsock) 442 smc_clcsock_release(smc); 443 sock_put(sk); /* sock_hold done by schedulers of close_work */ 444 } 445 446 int smc_close_shutdown_write(struct smc_sock *smc) 447 { 448 struct smc_connection *conn = &smc->conn; 449 struct sock *sk = &smc->sk; 450 int old_state; 451 long timeout; 452 int rc = 0; 453 454 timeout = current->flags & PF_EXITING ? 455 0 : sock_flag(sk, SOCK_LINGER) ? 456 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; 457 458 old_state = sk->sk_state; 459 again: 460 switch (sk->sk_state) { 461 case SMC_ACTIVE: 462 smc_close_stream_wait(smc, timeout); 463 release_sock(sk); 464 cancel_delayed_work_sync(&conn->tx_work); 465 lock_sock(sk); 466 if (sk->sk_state != SMC_ACTIVE) 467 goto again; 468 /* send close wr request */ 469 rc = smc_close_wr(conn); 470 sk->sk_state = SMC_PEERCLOSEWAIT1; 471 break; 472 case SMC_APPCLOSEWAIT1: 473 /* passive close */ 474 if (!smc_cdc_rxed_any_close(conn)) 475 smc_close_stream_wait(smc, timeout); 476 release_sock(sk); 477 cancel_delayed_work_sync(&conn->tx_work); 478 lock_sock(sk); 479 if (sk->sk_state != SMC_APPCLOSEWAIT1) 480 goto again; 481 /* confirm close from peer */ 482 rc = smc_close_wr(conn); 483 sk->sk_state = SMC_APPCLOSEWAIT2; 484 break; 485 case SMC_APPCLOSEWAIT2: 486 case SMC_PEERFINCLOSEWAIT: 487 case SMC_PEERCLOSEWAIT1: 488 case SMC_PEERCLOSEWAIT2: 489 case SMC_APPFINCLOSEWAIT: 490 case SMC_PROCESSABORT: 491 case SMC_PEERABORTWAIT: 492 /* nothing to do, add tracing in future patch */ 493 break; 494 } 495 496 if (old_state != sk->sk_state) 497 sk->sk_state_change(sk); 498 return rc; 499 } 500 501 /* Initialize close properties on connection establishment. */ 502 void smc_close_init(struct smc_sock *smc) 503 { 504 INIT_WORK(&smc->conn.close_work, smc_close_passive_work); 505 } 506