1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Socket Closing - normal and abnormal 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/workqueue.h> 13 #include <linux/sched/signal.h> 14 15 #include <net/sock.h> 16 17 #include "smc.h" 18 #include "smc_tx.h" 19 #include "smc_cdc.h" 20 #include "smc_close.h" 21 22 #define SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME (5 * HZ) 23 24 /* release the clcsock that is assigned to the smc_sock */ 25 void smc_clcsock_release(struct smc_sock *smc) 26 { 27 struct socket *tcp; 28 29 if (smc->listen_smc && current_work() != &smc->smc_listen_work) 30 cancel_work_sync(&smc->smc_listen_work); 31 mutex_lock(&smc->clcsock_release_lock); 32 if (smc->clcsock) { 33 tcp = smc->clcsock; 34 smc->clcsock = NULL; 35 sock_release(tcp); 36 } 37 mutex_unlock(&smc->clcsock_release_lock); 38 } 39 40 static void smc_close_cleanup_listen(struct sock *parent) 41 { 42 struct sock *sk; 43 44 /* Close non-accepted connections */ 45 while ((sk = smc_accept_dequeue(parent, NULL))) 46 smc_close_non_accepted(sk); 47 } 48 49 /* wait for sndbuf data being transmitted */ 50 static void smc_close_stream_wait(struct smc_sock *smc, long timeout) 51 { 52 DEFINE_WAIT_FUNC(wait, woken_wake_function); 53 struct sock *sk = &smc->sk; 54 55 if (!timeout) 56 return; 57 58 if (!smc_tx_prepared_sends(&smc->conn)) 59 return; 60 61 smc->wait_close_tx_prepared = 1; 62 add_wait_queue(sk_sleep(sk), &wait); 63 while (!signal_pending(current) && timeout) { 64 int rc; 65 66 rc = sk_wait_event(sk, &timeout, 67 !smc_tx_prepared_sends(&smc->conn) || 68 (sk->sk_err == ECONNABORTED) || 69 (sk->sk_err == ECONNRESET), 70 &wait); 71 if (rc) 72 break; 73 } 74 remove_wait_queue(sk_sleep(sk), &wait); 75 smc->wait_close_tx_prepared = 0; 76 } 77 78 void smc_close_wake_tx_prepared(struct smc_sock *smc) 79 { 80 if (smc->wait_close_tx_prepared) 81 /* wake up socket closing */ 82 smc->sk.sk_state_change(&smc->sk); 83 } 84 85 static int smc_close_wr(struct smc_connection *conn) 86 { 87 conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1; 88 89 return smc_cdc_get_slot_and_msg_send(conn); 90 } 91 92 static int smc_close_final(struct smc_connection *conn) 93 { 94 if (atomic_read(&conn->bytes_to_rcv)) 95 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 96 else 97 conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1; 98 99 return smc_cdc_get_slot_and_msg_send(conn); 100 } 101 102 static int smc_close_abort(struct smc_connection *conn) 103 { 104 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 105 106 return smc_cdc_get_slot_and_msg_send(conn); 107 } 108 109 /* terminate smc socket abnormally - active abort 110 * link group is terminated, i.e. RDMA communication no longer possible 111 */ 112 static void smc_close_active_abort(struct smc_sock *smc) 113 { 114 struct sock *sk = &smc->sk; 115 116 struct smc_cdc_conn_state_flags *txflags = 117 &smc->conn.local_tx_ctrl.conn_state_flags; 118 119 if (sk->sk_state != SMC_INIT && smc->clcsock && smc->clcsock->sk) { 120 sk->sk_err = ECONNABORTED; 121 if (smc->clcsock && smc->clcsock->sk) { 122 smc->clcsock->sk->sk_err = ECONNABORTED; 123 smc->clcsock->sk->sk_state_change(smc->clcsock->sk); 124 } 125 } 126 switch (sk->sk_state) { 127 case SMC_ACTIVE: 128 sk->sk_state = SMC_PEERABORTWAIT; 129 release_sock(sk); 130 cancel_delayed_work_sync(&smc->conn.tx_work); 131 lock_sock(sk); 132 sock_put(sk); /* passive closing */ 133 break; 134 case SMC_APPCLOSEWAIT1: 135 case SMC_APPCLOSEWAIT2: 136 if (!smc_cdc_rxed_any_close(&smc->conn)) 137 sk->sk_state = SMC_PEERABORTWAIT; 138 else 139 sk->sk_state = SMC_CLOSED; 140 release_sock(sk); 141 cancel_delayed_work_sync(&smc->conn.tx_work); 142 lock_sock(sk); 143 break; 144 case SMC_PEERCLOSEWAIT1: 145 case SMC_PEERCLOSEWAIT2: 146 if (!txflags->peer_conn_closed) { 147 /* just SHUTDOWN_SEND done */ 148 sk->sk_state = SMC_PEERABORTWAIT; 149 } else { 150 sk->sk_state = SMC_CLOSED; 151 } 152 sock_put(sk); /* passive closing */ 153 break; 154 case SMC_PROCESSABORT: 155 case SMC_APPFINCLOSEWAIT: 156 sk->sk_state = SMC_CLOSED; 157 break; 158 case SMC_PEERFINCLOSEWAIT: 159 sock_put(sk); /* passive closing */ 160 break; 161 case SMC_INIT: 162 case SMC_PEERABORTWAIT: 163 case SMC_CLOSED: 164 break; 165 } 166 167 sock_set_flag(sk, SOCK_DEAD); 168 sk->sk_state_change(sk); 169 } 170 171 static inline bool smc_close_sent_any_close(struct smc_connection *conn) 172 { 173 return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort || 174 conn->local_tx_ctrl.conn_state_flags.peer_conn_closed; 175 } 176 177 int smc_close_active(struct smc_sock *smc) 178 { 179 struct smc_cdc_conn_state_flags *txflags = 180 &smc->conn.local_tx_ctrl.conn_state_flags; 181 struct smc_connection *conn = &smc->conn; 182 struct sock *sk = &smc->sk; 183 int old_state; 184 long timeout; 185 int rc = 0; 186 187 timeout = current->flags & PF_EXITING ? 188 0 : sock_flag(sk, SOCK_LINGER) ? 189 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; 190 191 old_state = sk->sk_state; 192 again: 193 switch (sk->sk_state) { 194 case SMC_INIT: 195 sk->sk_state = SMC_CLOSED; 196 break; 197 case SMC_LISTEN: 198 sk->sk_state = SMC_CLOSED; 199 sk->sk_state_change(sk); /* wake up accept */ 200 if (smc->clcsock && smc->clcsock->sk) { 201 rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); 202 /* wake up kernel_accept of smc_tcp_listen_worker */ 203 smc->clcsock->sk->sk_data_ready(smc->clcsock->sk); 204 } 205 smc_close_cleanup_listen(sk); 206 release_sock(sk); 207 flush_work(&smc->tcp_listen_work); 208 lock_sock(sk); 209 break; 210 case SMC_ACTIVE: 211 smc_close_stream_wait(smc, timeout); 212 release_sock(sk); 213 cancel_delayed_work_sync(&conn->tx_work); 214 lock_sock(sk); 215 if (sk->sk_state == SMC_ACTIVE) { 216 /* send close request */ 217 rc = smc_close_final(conn); 218 if (rc) 219 break; 220 sk->sk_state = SMC_PEERCLOSEWAIT1; 221 } else { 222 /* peer event has changed the state */ 223 goto again; 224 } 225 break; 226 case SMC_APPFINCLOSEWAIT: 227 /* socket already shutdown wr or both (active close) */ 228 if (txflags->peer_done_writing && 229 !smc_close_sent_any_close(conn)) { 230 /* just shutdown wr done, send close request */ 231 rc = smc_close_final(conn); 232 if (rc) 233 break; 234 } 235 sk->sk_state = SMC_CLOSED; 236 break; 237 case SMC_APPCLOSEWAIT1: 238 case SMC_APPCLOSEWAIT2: 239 if (!smc_cdc_rxed_any_close(conn)) 240 smc_close_stream_wait(smc, timeout); 241 release_sock(sk); 242 cancel_delayed_work_sync(&conn->tx_work); 243 lock_sock(sk); 244 if (sk->sk_state != SMC_APPCLOSEWAIT1 && 245 sk->sk_state != SMC_APPCLOSEWAIT2) 246 goto again; 247 /* confirm close from peer */ 248 rc = smc_close_final(conn); 249 if (rc) 250 break; 251 if (smc_cdc_rxed_any_close(conn)) { 252 /* peer has closed the socket already */ 253 sk->sk_state = SMC_CLOSED; 254 sock_put(sk); /* postponed passive closing */ 255 } else { 256 /* peer has just issued a shutdown write */ 257 sk->sk_state = SMC_PEERFINCLOSEWAIT; 258 } 259 break; 260 case SMC_PEERCLOSEWAIT1: 261 case SMC_PEERCLOSEWAIT2: 262 if (txflags->peer_done_writing && 263 !smc_close_sent_any_close(conn)) { 264 /* just shutdown wr done, send close request */ 265 rc = smc_close_final(conn); 266 if (rc) 267 break; 268 } 269 /* peer sending PeerConnectionClosed will cause transition */ 270 break; 271 case SMC_PEERFINCLOSEWAIT: 272 /* peer sending PeerConnectionClosed will cause transition */ 273 break; 274 case SMC_PROCESSABORT: 275 smc_close_abort(conn); 276 sk->sk_state = SMC_CLOSED; 277 break; 278 case SMC_PEERABORTWAIT: 279 case SMC_CLOSED: 280 /* nothing to do, add tracing in future patch */ 281 break; 282 } 283 284 if (old_state != sk->sk_state) 285 sk->sk_state_change(sk); 286 return rc; 287 } 288 289 static void smc_close_passive_abort_received(struct smc_sock *smc) 290 { 291 struct smc_cdc_conn_state_flags *txflags = 292 &smc->conn.local_tx_ctrl.conn_state_flags; 293 struct sock *sk = &smc->sk; 294 295 switch (sk->sk_state) { 296 case SMC_INIT: 297 case SMC_ACTIVE: 298 case SMC_APPCLOSEWAIT1: 299 sk->sk_state = SMC_PROCESSABORT; 300 sock_put(sk); /* passive closing */ 301 break; 302 case SMC_APPFINCLOSEWAIT: 303 sk->sk_state = SMC_PROCESSABORT; 304 break; 305 case SMC_PEERCLOSEWAIT1: 306 case SMC_PEERCLOSEWAIT2: 307 if (txflags->peer_done_writing && 308 !smc_close_sent_any_close(&smc->conn)) 309 /* just shutdown, but not yet closed locally */ 310 sk->sk_state = SMC_PROCESSABORT; 311 else 312 sk->sk_state = SMC_CLOSED; 313 sock_put(sk); /* passive closing */ 314 break; 315 case SMC_APPCLOSEWAIT2: 316 case SMC_PEERFINCLOSEWAIT: 317 sk->sk_state = SMC_CLOSED; 318 sock_put(sk); /* passive closing */ 319 break; 320 case SMC_PEERABORTWAIT: 321 sk->sk_state = SMC_CLOSED; 322 break; 323 case SMC_PROCESSABORT: 324 /* nothing to do, add tracing in future patch */ 325 break; 326 } 327 } 328 329 /* Either some kind of closing has been received: peer_conn_closed, 330 * peer_conn_abort, or peer_done_writing 331 * or the link group of the connection terminates abnormally. 332 */ 333 static void smc_close_passive_work(struct work_struct *work) 334 { 335 struct smc_connection *conn = container_of(work, 336 struct smc_connection, 337 close_work); 338 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 339 struct smc_cdc_conn_state_flags *rxflags; 340 bool release_clcsock = false; 341 struct sock *sk = &smc->sk; 342 int old_state; 343 344 lock_sock(sk); 345 old_state = sk->sk_state; 346 347 if (!conn->alert_token_local) { 348 /* abnormal termination */ 349 smc_close_active_abort(smc); 350 goto wakeup; 351 } 352 353 rxflags = &conn->local_rx_ctrl.conn_state_flags; 354 if (rxflags->peer_conn_abort) { 355 /* peer has not received all data */ 356 smc_close_passive_abort_received(smc); 357 release_sock(&smc->sk); 358 cancel_delayed_work_sync(&conn->tx_work); 359 lock_sock(&smc->sk); 360 goto wakeup; 361 } 362 363 switch (sk->sk_state) { 364 case SMC_INIT: 365 sk->sk_state = SMC_APPCLOSEWAIT1; 366 break; 367 case SMC_ACTIVE: 368 sk->sk_state = SMC_APPCLOSEWAIT1; 369 /* postpone sock_put() for passive closing to cover 370 * received SEND_SHUTDOWN as well 371 */ 372 break; 373 case SMC_PEERCLOSEWAIT1: 374 if (rxflags->peer_done_writing) 375 sk->sk_state = SMC_PEERCLOSEWAIT2; 376 /* fall through */ 377 /* to check for closing */ 378 case SMC_PEERCLOSEWAIT2: 379 if (!smc_cdc_rxed_any_close(conn)) 380 break; 381 if (sock_flag(sk, SOCK_DEAD) && 382 smc_close_sent_any_close(conn)) { 383 /* smc_release has already been called locally */ 384 sk->sk_state = SMC_CLOSED; 385 } else { 386 /* just shutdown, but not yet closed locally */ 387 sk->sk_state = SMC_APPFINCLOSEWAIT; 388 } 389 sock_put(sk); /* passive closing */ 390 break; 391 case SMC_PEERFINCLOSEWAIT: 392 if (smc_cdc_rxed_any_close(conn)) { 393 sk->sk_state = SMC_CLOSED; 394 sock_put(sk); /* passive closing */ 395 } 396 break; 397 case SMC_APPCLOSEWAIT1: 398 case SMC_APPCLOSEWAIT2: 399 /* postpone sock_put() for passive closing to cover 400 * received SEND_SHUTDOWN as well 401 */ 402 break; 403 case SMC_APPFINCLOSEWAIT: 404 case SMC_PEERABORTWAIT: 405 case SMC_PROCESSABORT: 406 case SMC_CLOSED: 407 /* nothing to do, add tracing in future patch */ 408 break; 409 } 410 411 wakeup: 412 sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */ 413 sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */ 414 415 if (old_state != sk->sk_state) { 416 sk->sk_state_change(sk); 417 if ((sk->sk_state == SMC_CLOSED) && 418 (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { 419 smc_conn_free(conn); 420 if (smc->clcsock) 421 release_clcsock = true; 422 } 423 } 424 release_sock(sk); 425 if (release_clcsock) 426 smc_clcsock_release(smc); 427 sock_put(sk); /* sock_hold done by schedulers of close_work */ 428 } 429 430 int smc_close_shutdown_write(struct smc_sock *smc) 431 { 432 struct smc_connection *conn = &smc->conn; 433 struct sock *sk = &smc->sk; 434 int old_state; 435 long timeout; 436 int rc = 0; 437 438 timeout = current->flags & PF_EXITING ? 439 0 : sock_flag(sk, SOCK_LINGER) ? 440 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; 441 442 old_state = sk->sk_state; 443 again: 444 switch (sk->sk_state) { 445 case SMC_ACTIVE: 446 smc_close_stream_wait(smc, timeout); 447 release_sock(sk); 448 cancel_delayed_work_sync(&conn->tx_work); 449 lock_sock(sk); 450 if (sk->sk_state != SMC_ACTIVE) 451 goto again; 452 /* send close wr request */ 453 rc = smc_close_wr(conn); 454 if (rc) 455 break; 456 sk->sk_state = SMC_PEERCLOSEWAIT1; 457 break; 458 case SMC_APPCLOSEWAIT1: 459 /* passive close */ 460 if (!smc_cdc_rxed_any_close(conn)) 461 smc_close_stream_wait(smc, timeout); 462 release_sock(sk); 463 cancel_delayed_work_sync(&conn->tx_work); 464 lock_sock(sk); 465 if (sk->sk_state != SMC_APPCLOSEWAIT1) 466 goto again; 467 /* confirm close from peer */ 468 rc = smc_close_wr(conn); 469 if (rc) 470 break; 471 sk->sk_state = SMC_APPCLOSEWAIT2; 472 break; 473 case SMC_APPCLOSEWAIT2: 474 case SMC_PEERFINCLOSEWAIT: 475 case SMC_PEERCLOSEWAIT1: 476 case SMC_PEERCLOSEWAIT2: 477 case SMC_APPFINCLOSEWAIT: 478 case SMC_PROCESSABORT: 479 case SMC_PEERABORTWAIT: 480 /* nothing to do, add tracing in future patch */ 481 break; 482 } 483 484 if (old_state != sk->sk_state) 485 sk->sk_state_change(sk); 486 return rc; 487 } 488 489 /* Initialize close properties on connection establishment. */ 490 void smc_close_init(struct smc_sock *smc) 491 { 492 INIT_WORK(&smc->conn.close_work, smc_close_passive_work); 493 } 494