1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Socket Closing - normal and abnormal 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/workqueue.h> 13 #include <linux/sched/signal.h> 14 15 #include <net/sock.h> 16 #include <net/tcp.h> 17 18 #include "smc.h" 19 #include "smc_tx.h" 20 #include "smc_cdc.h" 21 #include "smc_close.h" 22 23 #define SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME (5 * HZ) 24 25 /* release the clcsock that is assigned to the smc_sock */ 26 void smc_clcsock_release(struct smc_sock *smc) 27 { 28 struct socket *tcp; 29 30 if (smc->listen_smc && current_work() != &smc->smc_listen_work) 31 cancel_work_sync(&smc->smc_listen_work); 32 mutex_lock(&smc->clcsock_release_lock); 33 if (smc->clcsock) { 34 tcp = smc->clcsock; 35 smc->clcsock = NULL; 36 sock_release(tcp); 37 } 38 mutex_unlock(&smc->clcsock_release_lock); 39 } 40 41 static void smc_close_cleanup_listen(struct sock *parent) 42 { 43 struct sock *sk; 44 45 /* Close non-accepted connections */ 46 while ((sk = smc_accept_dequeue(parent, NULL))) 47 smc_close_non_accepted(sk); 48 } 49 50 /* wait for sndbuf data being transmitted */ 51 static void smc_close_stream_wait(struct smc_sock *smc, long timeout) 52 { 53 DEFINE_WAIT_FUNC(wait, woken_wake_function); 54 struct sock *sk = &smc->sk; 55 56 if (!timeout) 57 return; 58 59 if (!smc_tx_prepared_sends(&smc->conn)) 60 return; 61 62 smc->wait_close_tx_prepared = 1; 63 add_wait_queue(sk_sleep(sk), &wait); 64 while (!signal_pending(current) && timeout) { 65 int rc; 66 67 rc = sk_wait_event(sk, &timeout, 68 !smc_tx_prepared_sends(&smc->conn) || 69 sk->sk_err == ECONNABORTED || 70 sk->sk_err == ECONNRESET || 71 smc->conn.killed, 72 &wait); 73 if (rc) 74 break; 75 } 76 remove_wait_queue(sk_sleep(sk), &wait); 77 smc->wait_close_tx_prepared = 0; 78 } 79 80 void smc_close_wake_tx_prepared(struct smc_sock *smc) 81 { 82 if (smc->wait_close_tx_prepared) 83 /* wake up socket closing */ 84 smc->sk.sk_state_change(&smc->sk); 85 } 86 87 static int smc_close_wr(struct smc_connection *conn) 88 { 89 conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1; 90 91 return smc_cdc_get_slot_and_msg_send(conn); 92 } 93 94 static int smc_close_final(struct smc_connection *conn) 95 { 96 if (atomic_read(&conn->bytes_to_rcv)) 97 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 98 else 99 conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1; 100 if (conn->killed) 101 return -EPIPE; 102 103 return smc_cdc_get_slot_and_msg_send(conn); 104 } 105 106 int smc_close_abort(struct smc_connection *conn) 107 { 108 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 109 110 return smc_cdc_get_slot_and_msg_send(conn); 111 } 112 113 /* terminate smc socket abnormally - active abort 114 * link group is terminated, i.e. RDMA communication no longer possible 115 */ 116 void smc_close_active_abort(struct smc_sock *smc) 117 { 118 struct sock *sk = &smc->sk; 119 bool release_clcsock = false; 120 121 if (sk->sk_state != SMC_INIT && smc->clcsock && smc->clcsock->sk) { 122 sk->sk_err = ECONNABORTED; 123 if (smc->clcsock && smc->clcsock->sk) 124 tcp_abort(smc->clcsock->sk, ECONNABORTED); 125 } 126 switch (sk->sk_state) { 127 case SMC_ACTIVE: 128 sk->sk_state = SMC_PEERABORTWAIT; 129 release_sock(sk); 130 cancel_delayed_work_sync(&smc->conn.tx_work); 131 lock_sock(sk); 132 sk->sk_state = SMC_CLOSED; 133 sock_put(sk); /* passive closing */ 134 break; 135 case SMC_APPCLOSEWAIT1: 136 case SMC_APPCLOSEWAIT2: 137 release_sock(sk); 138 cancel_delayed_work_sync(&smc->conn.tx_work); 139 lock_sock(sk); 140 sk->sk_state = SMC_CLOSED; 141 sock_put(sk); /* postponed passive closing */ 142 break; 143 case SMC_PEERCLOSEWAIT1: 144 case SMC_PEERCLOSEWAIT2: 145 case SMC_PEERFINCLOSEWAIT: 146 sk->sk_state = SMC_CLOSED; 147 smc_conn_free(&smc->conn); 148 release_clcsock = true; 149 sock_put(sk); /* passive closing */ 150 break; 151 case SMC_PROCESSABORT: 152 case SMC_APPFINCLOSEWAIT: 153 sk->sk_state = SMC_CLOSED; 154 break; 155 case SMC_INIT: 156 case SMC_PEERABORTWAIT: 157 case SMC_CLOSED: 158 break; 159 } 160 161 sock_set_flag(sk, SOCK_DEAD); 162 sk->sk_state_change(sk); 163 164 if (release_clcsock) { 165 release_sock(sk); 166 smc_clcsock_release(smc); 167 lock_sock(sk); 168 } 169 } 170 171 static inline bool smc_close_sent_any_close(struct smc_connection *conn) 172 { 173 return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort || 174 conn->local_tx_ctrl.conn_state_flags.peer_conn_closed; 175 } 176 177 int smc_close_active(struct smc_sock *smc) 178 { 179 struct smc_cdc_conn_state_flags *txflags = 180 &smc->conn.local_tx_ctrl.conn_state_flags; 181 struct smc_connection *conn = &smc->conn; 182 struct sock *sk = &smc->sk; 183 int old_state; 184 long timeout; 185 int rc = 0; 186 187 timeout = current->flags & PF_EXITING ? 188 0 : sock_flag(sk, SOCK_LINGER) ? 189 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; 190 191 old_state = sk->sk_state; 192 again: 193 switch (sk->sk_state) { 194 case SMC_INIT: 195 sk->sk_state = SMC_CLOSED; 196 break; 197 case SMC_LISTEN: 198 sk->sk_state = SMC_CLOSED; 199 sk->sk_state_change(sk); /* wake up accept */ 200 if (smc->clcsock && smc->clcsock->sk) { 201 rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); 202 /* wake up kernel_accept of smc_tcp_listen_worker */ 203 smc->clcsock->sk->sk_data_ready(smc->clcsock->sk); 204 } 205 smc_close_cleanup_listen(sk); 206 release_sock(sk); 207 flush_work(&smc->tcp_listen_work); 208 lock_sock(sk); 209 break; 210 case SMC_ACTIVE: 211 smc_close_stream_wait(smc, timeout); 212 release_sock(sk); 213 cancel_delayed_work_sync(&conn->tx_work); 214 lock_sock(sk); 215 if (sk->sk_state == SMC_ACTIVE) { 216 /* send close request */ 217 rc = smc_close_final(conn); 218 sk->sk_state = SMC_PEERCLOSEWAIT1; 219 } else { 220 /* peer event has changed the state */ 221 goto again; 222 } 223 break; 224 case SMC_APPFINCLOSEWAIT: 225 /* socket already shutdown wr or both (active close) */ 226 if (txflags->peer_done_writing && 227 !smc_close_sent_any_close(conn)) { 228 /* just shutdown wr done, send close request */ 229 rc = smc_close_final(conn); 230 } 231 sk->sk_state = SMC_CLOSED; 232 break; 233 case SMC_APPCLOSEWAIT1: 234 case SMC_APPCLOSEWAIT2: 235 if (!smc_cdc_rxed_any_close(conn)) 236 smc_close_stream_wait(smc, timeout); 237 release_sock(sk); 238 cancel_delayed_work_sync(&conn->tx_work); 239 lock_sock(sk); 240 if (sk->sk_state != SMC_APPCLOSEWAIT1 && 241 sk->sk_state != SMC_APPCLOSEWAIT2) 242 goto again; 243 /* confirm close from peer */ 244 rc = smc_close_final(conn); 245 if (smc_cdc_rxed_any_close(conn)) { 246 /* peer has closed the socket already */ 247 sk->sk_state = SMC_CLOSED; 248 sock_put(sk); /* postponed passive closing */ 249 } else { 250 /* peer has just issued a shutdown write */ 251 sk->sk_state = SMC_PEERFINCLOSEWAIT; 252 } 253 break; 254 case SMC_PEERCLOSEWAIT1: 255 case SMC_PEERCLOSEWAIT2: 256 if (txflags->peer_done_writing && 257 !smc_close_sent_any_close(conn)) { 258 /* just shutdown wr done, send close request */ 259 rc = smc_close_final(conn); 260 } 261 /* peer sending PeerConnectionClosed will cause transition */ 262 break; 263 case SMC_PEERFINCLOSEWAIT: 264 /* peer sending PeerConnectionClosed will cause transition */ 265 break; 266 case SMC_PROCESSABORT: 267 rc = smc_close_abort(conn); 268 sk->sk_state = SMC_CLOSED; 269 break; 270 case SMC_PEERABORTWAIT: 271 sk->sk_state = SMC_CLOSED; 272 break; 273 case SMC_CLOSED: 274 /* nothing to do, add tracing in future patch */ 275 break; 276 } 277 278 if (old_state != sk->sk_state) 279 sk->sk_state_change(sk); 280 return rc; 281 } 282 283 static void smc_close_passive_abort_received(struct smc_sock *smc) 284 { 285 struct smc_cdc_conn_state_flags *txflags = 286 &smc->conn.local_tx_ctrl.conn_state_flags; 287 struct sock *sk = &smc->sk; 288 289 switch (sk->sk_state) { 290 case SMC_INIT: 291 case SMC_ACTIVE: 292 case SMC_APPCLOSEWAIT1: 293 sk->sk_state = SMC_PROCESSABORT; 294 sock_put(sk); /* passive closing */ 295 break; 296 case SMC_APPFINCLOSEWAIT: 297 sk->sk_state = SMC_PROCESSABORT; 298 break; 299 case SMC_PEERCLOSEWAIT1: 300 case SMC_PEERCLOSEWAIT2: 301 if (txflags->peer_done_writing && 302 !smc_close_sent_any_close(&smc->conn)) 303 /* just shutdown, but not yet closed locally */ 304 sk->sk_state = SMC_PROCESSABORT; 305 else 306 sk->sk_state = SMC_CLOSED; 307 sock_put(sk); /* passive closing */ 308 break; 309 case SMC_APPCLOSEWAIT2: 310 case SMC_PEERFINCLOSEWAIT: 311 sk->sk_state = SMC_CLOSED; 312 sock_put(sk); /* passive closing */ 313 break; 314 case SMC_PEERABORTWAIT: 315 sk->sk_state = SMC_CLOSED; 316 break; 317 case SMC_PROCESSABORT: 318 /* nothing to do, add tracing in future patch */ 319 break; 320 } 321 } 322 323 /* Either some kind of closing has been received: peer_conn_closed, 324 * peer_conn_abort, or peer_done_writing 325 * or the link group of the connection terminates abnormally. 326 */ 327 static void smc_close_passive_work(struct work_struct *work) 328 { 329 struct smc_connection *conn = container_of(work, 330 struct smc_connection, 331 close_work); 332 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 333 struct smc_cdc_conn_state_flags *rxflags; 334 bool release_clcsock = false; 335 struct sock *sk = &smc->sk; 336 int old_state; 337 338 lock_sock(sk); 339 old_state = sk->sk_state; 340 341 rxflags = &conn->local_rx_ctrl.conn_state_flags; 342 if (rxflags->peer_conn_abort) { 343 /* peer has not received all data */ 344 smc_close_passive_abort_received(smc); 345 release_sock(&smc->sk); 346 cancel_delayed_work_sync(&conn->tx_work); 347 lock_sock(&smc->sk); 348 goto wakeup; 349 } 350 351 switch (sk->sk_state) { 352 case SMC_INIT: 353 sk->sk_state = SMC_APPCLOSEWAIT1; 354 break; 355 case SMC_ACTIVE: 356 sk->sk_state = SMC_APPCLOSEWAIT1; 357 /* postpone sock_put() for passive closing to cover 358 * received SEND_SHUTDOWN as well 359 */ 360 break; 361 case SMC_PEERCLOSEWAIT1: 362 if (rxflags->peer_done_writing) 363 sk->sk_state = SMC_PEERCLOSEWAIT2; 364 /* fall through */ 365 /* to check for closing */ 366 case SMC_PEERCLOSEWAIT2: 367 if (!smc_cdc_rxed_any_close(conn)) 368 break; 369 if (sock_flag(sk, SOCK_DEAD) && 370 smc_close_sent_any_close(conn)) { 371 /* smc_release has already been called locally */ 372 sk->sk_state = SMC_CLOSED; 373 } else { 374 /* just shutdown, but not yet closed locally */ 375 sk->sk_state = SMC_APPFINCLOSEWAIT; 376 } 377 sock_put(sk); /* passive closing */ 378 break; 379 case SMC_PEERFINCLOSEWAIT: 380 if (smc_cdc_rxed_any_close(conn)) { 381 sk->sk_state = SMC_CLOSED; 382 sock_put(sk); /* passive closing */ 383 } 384 break; 385 case SMC_APPCLOSEWAIT1: 386 case SMC_APPCLOSEWAIT2: 387 /* postpone sock_put() for passive closing to cover 388 * received SEND_SHUTDOWN as well 389 */ 390 break; 391 case SMC_APPFINCLOSEWAIT: 392 case SMC_PEERABORTWAIT: 393 case SMC_PROCESSABORT: 394 case SMC_CLOSED: 395 /* nothing to do, add tracing in future patch */ 396 break; 397 } 398 399 wakeup: 400 sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */ 401 sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */ 402 403 if (old_state != sk->sk_state) { 404 sk->sk_state_change(sk); 405 if ((sk->sk_state == SMC_CLOSED) && 406 (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { 407 smc_conn_free(conn); 408 if (smc->clcsock) 409 release_clcsock = true; 410 } 411 } 412 release_sock(sk); 413 if (release_clcsock) 414 smc_clcsock_release(smc); 415 sock_put(sk); /* sock_hold done by schedulers of close_work */ 416 } 417 418 int smc_close_shutdown_write(struct smc_sock *smc) 419 { 420 struct smc_connection *conn = &smc->conn; 421 struct sock *sk = &smc->sk; 422 int old_state; 423 long timeout; 424 int rc = 0; 425 426 timeout = current->flags & PF_EXITING ? 427 0 : sock_flag(sk, SOCK_LINGER) ? 428 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; 429 430 old_state = sk->sk_state; 431 again: 432 switch (sk->sk_state) { 433 case SMC_ACTIVE: 434 smc_close_stream_wait(smc, timeout); 435 release_sock(sk); 436 cancel_delayed_work_sync(&conn->tx_work); 437 lock_sock(sk); 438 if (sk->sk_state != SMC_ACTIVE) 439 goto again; 440 /* send close wr request */ 441 rc = smc_close_wr(conn); 442 sk->sk_state = SMC_PEERCLOSEWAIT1; 443 break; 444 case SMC_APPCLOSEWAIT1: 445 /* passive close */ 446 if (!smc_cdc_rxed_any_close(conn)) 447 smc_close_stream_wait(smc, timeout); 448 release_sock(sk); 449 cancel_delayed_work_sync(&conn->tx_work); 450 lock_sock(sk); 451 if (sk->sk_state != SMC_APPCLOSEWAIT1) 452 goto again; 453 /* confirm close from peer */ 454 rc = smc_close_wr(conn); 455 sk->sk_state = SMC_APPCLOSEWAIT2; 456 break; 457 case SMC_APPCLOSEWAIT2: 458 case SMC_PEERFINCLOSEWAIT: 459 case SMC_PEERCLOSEWAIT1: 460 case SMC_PEERCLOSEWAIT2: 461 case SMC_APPFINCLOSEWAIT: 462 case SMC_PROCESSABORT: 463 case SMC_PEERABORTWAIT: 464 /* nothing to do, add tracing in future patch */ 465 break; 466 } 467 468 if (old_state != sk->sk_state) 469 sk->sk_state_change(sk); 470 return rc; 471 } 472 473 /* Initialize close properties on connection establishment. */ 474 void smc_close_init(struct smc_sock *smc) 475 { 476 INIT_WORK(&smc->conn.close_work, smc_close_passive_work); 477 } 478