1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Socket Closing - normal and abnormal 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/workqueue.h> 13 #include <linux/sched/signal.h> 14 15 #include <net/sock.h> 16 17 #include "smc.h" 18 #include "smc_tx.h" 19 #include "smc_cdc.h" 20 #include "smc_close.h" 21 22 #define SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME (5 * HZ) 23 24 static void smc_close_cleanup_listen(struct sock *parent) 25 { 26 struct sock *sk; 27 28 /* Close non-accepted connections */ 29 while ((sk = smc_accept_dequeue(parent, NULL))) 30 smc_close_non_accepted(sk); 31 } 32 33 static void smc_close_wait_listen_clcsock(struct smc_sock *smc) 34 { 35 DEFINE_WAIT_FUNC(wait, woken_wake_function); 36 struct sock *sk = &smc->sk; 37 signed long timeout; 38 39 timeout = SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME; 40 add_wait_queue(sk_sleep(sk), &wait); 41 do { 42 release_sock(sk); 43 if (smc->clcsock) 44 timeout = wait_woken(&wait, TASK_UNINTERRUPTIBLE, 45 timeout); 46 sched_annotate_sleep(); 47 lock_sock(sk); 48 if (!smc->clcsock) 49 break; 50 } while (timeout); 51 remove_wait_queue(sk_sleep(sk), &wait); 52 } 53 54 /* wait for sndbuf data being transmitted */ 55 static void smc_close_stream_wait(struct smc_sock *smc, long timeout) 56 { 57 DEFINE_WAIT_FUNC(wait, woken_wake_function); 58 struct sock *sk = &smc->sk; 59 60 if (!timeout) 61 return; 62 63 if (!smc_tx_prepared_sends(&smc->conn)) 64 return; 65 66 smc->wait_close_tx_prepared = 1; 67 add_wait_queue(sk_sleep(sk), &wait); 68 while (!signal_pending(current) && timeout) { 69 int rc; 70 71 rc = sk_wait_event(sk, &timeout, 72 !smc_tx_prepared_sends(&smc->conn) || 73 (sk->sk_err == ECONNABORTED) || 74 (sk->sk_err == ECONNRESET), 75 &wait); 76 if (rc) 77 break; 78 } 79 remove_wait_queue(sk_sleep(sk), &wait); 80 smc->wait_close_tx_prepared = 0; 81 } 82 83 void smc_close_wake_tx_prepared(struct smc_sock *smc) 84 { 85 if (smc->wait_close_tx_prepared) 86 /* wake up socket closing */ 87 smc->sk.sk_state_change(&smc->sk); 88 } 89 90 static int smc_close_wr(struct smc_connection *conn) 91 { 92 conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1; 93 94 return smc_cdc_get_slot_and_msg_send(conn); 95 } 96 97 static int smc_close_final(struct smc_connection *conn) 98 { 99 if (atomic_read(&conn->bytes_to_rcv)) 100 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 101 else 102 conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1; 103 104 return smc_cdc_get_slot_and_msg_send(conn); 105 } 106 107 static int smc_close_abort(struct smc_connection *conn) 108 { 109 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 110 111 return smc_cdc_get_slot_and_msg_send(conn); 112 } 113 114 /* terminate smc socket abnormally - active abort 115 * link group is terminated, i.e. RDMA communication no longer possible 116 */ 117 static void smc_close_active_abort(struct smc_sock *smc) 118 { 119 struct sock *sk = &smc->sk; 120 121 struct smc_cdc_conn_state_flags *txflags = 122 &smc->conn.local_tx_ctrl.conn_state_flags; 123 124 sk->sk_err = ECONNABORTED; 125 if (smc->clcsock && smc->clcsock->sk) { 126 smc->clcsock->sk->sk_err = ECONNABORTED; 127 smc->clcsock->sk->sk_state_change(smc->clcsock->sk); 128 } 129 switch (sk->sk_state) { 130 case SMC_INIT: 131 case SMC_ACTIVE: 132 sk->sk_state = SMC_PEERABORTWAIT; 133 release_sock(sk); 134 cancel_delayed_work_sync(&smc->conn.tx_work); 135 lock_sock(sk); 136 sock_put(sk); /* passive closing */ 137 break; 138 case SMC_APPCLOSEWAIT1: 139 case SMC_APPCLOSEWAIT2: 140 if (!smc_cdc_rxed_any_close(&smc->conn)) 141 sk->sk_state = SMC_PEERABORTWAIT; 142 else 143 sk->sk_state = SMC_CLOSED; 144 release_sock(sk); 145 cancel_delayed_work_sync(&smc->conn.tx_work); 146 lock_sock(sk); 147 break; 148 case SMC_PEERCLOSEWAIT1: 149 case SMC_PEERCLOSEWAIT2: 150 if (!txflags->peer_conn_closed) { 151 /* just SHUTDOWN_SEND done */ 152 sk->sk_state = SMC_PEERABORTWAIT; 153 } else { 154 sk->sk_state = SMC_CLOSED; 155 } 156 sock_put(sk); /* passive closing */ 157 break; 158 case SMC_PROCESSABORT: 159 case SMC_APPFINCLOSEWAIT: 160 sk->sk_state = SMC_CLOSED; 161 break; 162 case SMC_PEERFINCLOSEWAIT: 163 sock_put(sk); /* passive closing */ 164 break; 165 case SMC_PEERABORTWAIT: 166 case SMC_CLOSED: 167 break; 168 } 169 170 sock_set_flag(sk, SOCK_DEAD); 171 sk->sk_state_change(sk); 172 } 173 174 static inline bool smc_close_sent_any_close(struct smc_connection *conn) 175 { 176 return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort || 177 conn->local_tx_ctrl.conn_state_flags.peer_conn_closed; 178 } 179 180 int smc_close_active(struct smc_sock *smc) 181 { 182 struct smc_cdc_conn_state_flags *txflags = 183 &smc->conn.local_tx_ctrl.conn_state_flags; 184 struct smc_connection *conn = &smc->conn; 185 struct sock *sk = &smc->sk; 186 int old_state; 187 long timeout; 188 int rc = 0; 189 190 timeout = current->flags & PF_EXITING ? 191 0 : sock_flag(sk, SOCK_LINGER) ? 192 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; 193 194 old_state = sk->sk_state; 195 again: 196 switch (sk->sk_state) { 197 case SMC_INIT: 198 sk->sk_state = SMC_CLOSED; 199 break; 200 case SMC_LISTEN: 201 sk->sk_state = SMC_CLOSED; 202 sk->sk_state_change(sk); /* wake up accept */ 203 if (smc->clcsock && smc->clcsock->sk) { 204 rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); 205 /* wake up kernel_accept of smc_tcp_listen_worker */ 206 smc->clcsock->sk->sk_data_ready(smc->clcsock->sk); 207 smc_close_wait_listen_clcsock(smc); 208 } 209 smc_close_cleanup_listen(sk); 210 break; 211 case SMC_ACTIVE: 212 smc_close_stream_wait(smc, timeout); 213 release_sock(sk); 214 cancel_delayed_work_sync(&conn->tx_work); 215 lock_sock(sk); 216 if (sk->sk_state == SMC_ACTIVE) { 217 /* send close request */ 218 rc = smc_close_final(conn); 219 if (rc) 220 break; 221 sk->sk_state = SMC_PEERCLOSEWAIT1; 222 } else { 223 /* peer event has changed the state */ 224 goto again; 225 } 226 break; 227 case SMC_APPFINCLOSEWAIT: 228 /* socket already shutdown wr or both (active close) */ 229 if (txflags->peer_done_writing && 230 !smc_close_sent_any_close(conn)) { 231 /* just shutdown wr done, send close request */ 232 rc = smc_close_final(conn); 233 if (rc) 234 break; 235 } 236 sk->sk_state = SMC_CLOSED; 237 break; 238 case SMC_APPCLOSEWAIT1: 239 case SMC_APPCLOSEWAIT2: 240 if (!smc_cdc_rxed_any_close(conn)) 241 smc_close_stream_wait(smc, timeout); 242 release_sock(sk); 243 cancel_delayed_work_sync(&conn->tx_work); 244 lock_sock(sk); 245 if (sk->sk_state != SMC_APPCLOSEWAIT1 && 246 sk->sk_state != SMC_APPCLOSEWAIT2) 247 goto again; 248 /* confirm close from peer */ 249 rc = smc_close_final(conn); 250 if (rc) 251 break; 252 if (smc_cdc_rxed_any_close(conn)) { 253 /* peer has closed the socket already */ 254 sk->sk_state = SMC_CLOSED; 255 sock_put(sk); /* postponed passive closing */ 256 } else { 257 /* peer has just issued a shutdown write */ 258 sk->sk_state = SMC_PEERFINCLOSEWAIT; 259 } 260 break; 261 case SMC_PEERCLOSEWAIT1: 262 case SMC_PEERCLOSEWAIT2: 263 if (txflags->peer_done_writing && 264 !smc_close_sent_any_close(conn)) { 265 /* just shutdown wr done, send close request */ 266 rc = smc_close_final(conn); 267 if (rc) 268 break; 269 } 270 /* peer sending PeerConnectionClosed will cause transition */ 271 break; 272 case SMC_PEERFINCLOSEWAIT: 273 /* peer sending PeerConnectionClosed will cause transition */ 274 break; 275 case SMC_PROCESSABORT: 276 smc_close_abort(conn); 277 sk->sk_state = SMC_CLOSED; 278 break; 279 case SMC_PEERABORTWAIT: 280 case SMC_CLOSED: 281 /* nothing to do, add tracing in future patch */ 282 break; 283 } 284 285 if (old_state != sk->sk_state) 286 sk->sk_state_change(sk); 287 return rc; 288 } 289 290 static void smc_close_passive_abort_received(struct smc_sock *smc) 291 { 292 struct smc_cdc_conn_state_flags *txflags = 293 &smc->conn.local_tx_ctrl.conn_state_flags; 294 struct sock *sk = &smc->sk; 295 296 switch (sk->sk_state) { 297 case SMC_INIT: 298 case SMC_ACTIVE: 299 case SMC_APPCLOSEWAIT1: 300 sk->sk_state = SMC_PROCESSABORT; 301 sock_put(sk); /* passive closing */ 302 break; 303 case SMC_APPFINCLOSEWAIT: 304 sk->sk_state = SMC_PROCESSABORT; 305 break; 306 case SMC_PEERCLOSEWAIT1: 307 case SMC_PEERCLOSEWAIT2: 308 if (txflags->peer_done_writing && 309 !smc_close_sent_any_close(&smc->conn)) 310 /* just shutdown, but not yet closed locally */ 311 sk->sk_state = SMC_PROCESSABORT; 312 else 313 sk->sk_state = SMC_CLOSED; 314 sock_put(sk); /* passive closing */ 315 break; 316 case SMC_APPCLOSEWAIT2: 317 case SMC_PEERFINCLOSEWAIT: 318 sk->sk_state = SMC_CLOSED; 319 sock_put(sk); /* passive closing */ 320 break; 321 case SMC_PEERABORTWAIT: 322 sk->sk_state = SMC_CLOSED; 323 break; 324 case SMC_PROCESSABORT: 325 /* nothing to do, add tracing in future patch */ 326 break; 327 } 328 } 329 330 /* Either some kind of closing has been received: peer_conn_closed, 331 * peer_conn_abort, or peer_done_writing 332 * or the link group of the connection terminates abnormally. 333 */ 334 static void smc_close_passive_work(struct work_struct *work) 335 { 336 struct smc_connection *conn = container_of(work, 337 struct smc_connection, 338 close_work); 339 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 340 struct smc_cdc_conn_state_flags *rxflags; 341 struct sock *sk = &smc->sk; 342 int old_state; 343 344 lock_sock(sk); 345 old_state = sk->sk_state; 346 347 if (!conn->alert_token_local) { 348 /* abnormal termination */ 349 smc_close_active_abort(smc); 350 goto wakeup; 351 } 352 353 rxflags = &conn->local_rx_ctrl.conn_state_flags; 354 if (rxflags->peer_conn_abort) { 355 /* peer has not received all data */ 356 smc_close_passive_abort_received(smc); 357 release_sock(&smc->sk); 358 cancel_delayed_work_sync(&conn->tx_work); 359 lock_sock(&smc->sk); 360 goto wakeup; 361 } 362 363 switch (sk->sk_state) { 364 case SMC_INIT: 365 if (atomic_read(&conn->bytes_to_rcv) || 366 (rxflags->peer_done_writing && 367 !smc_cdc_rxed_any_close(conn))) { 368 sk->sk_state = SMC_APPCLOSEWAIT1; 369 } else { 370 sk->sk_state = SMC_CLOSED; 371 sock_put(sk); /* passive closing */ 372 } 373 break; 374 case SMC_ACTIVE: 375 sk->sk_state = SMC_APPCLOSEWAIT1; 376 /* postpone sock_put() for passive closing to cover 377 * received SEND_SHUTDOWN as well 378 */ 379 break; 380 case SMC_PEERCLOSEWAIT1: 381 if (rxflags->peer_done_writing) 382 sk->sk_state = SMC_PEERCLOSEWAIT2; 383 /* fall through */ 384 /* to check for closing */ 385 case SMC_PEERCLOSEWAIT2: 386 if (!smc_cdc_rxed_any_close(conn)) 387 break; 388 if (sock_flag(sk, SOCK_DEAD) && 389 smc_close_sent_any_close(conn)) { 390 /* smc_release has already been called locally */ 391 sk->sk_state = SMC_CLOSED; 392 } else { 393 /* just shutdown, but not yet closed locally */ 394 sk->sk_state = SMC_APPFINCLOSEWAIT; 395 } 396 sock_put(sk); /* passive closing */ 397 break; 398 case SMC_PEERFINCLOSEWAIT: 399 if (smc_cdc_rxed_any_close(conn)) { 400 sk->sk_state = SMC_CLOSED; 401 sock_put(sk); /* passive closing */ 402 } 403 break; 404 case SMC_APPCLOSEWAIT1: 405 case SMC_APPCLOSEWAIT2: 406 /* postpone sock_put() for passive closing to cover 407 * received SEND_SHUTDOWN as well 408 */ 409 break; 410 case SMC_APPFINCLOSEWAIT: 411 case SMC_PEERABORTWAIT: 412 case SMC_PROCESSABORT: 413 case SMC_CLOSED: 414 /* nothing to do, add tracing in future patch */ 415 break; 416 } 417 418 wakeup: 419 sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */ 420 sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */ 421 422 if (old_state != sk->sk_state) { 423 sk->sk_state_change(sk); 424 if ((sk->sk_state == SMC_CLOSED) && 425 (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) 426 smc_conn_free(conn); 427 } 428 release_sock(sk); 429 sock_put(sk); /* sock_hold done by schedulers of close_work */ 430 } 431 432 int smc_close_shutdown_write(struct smc_sock *smc) 433 { 434 struct smc_connection *conn = &smc->conn; 435 struct sock *sk = &smc->sk; 436 int old_state; 437 long timeout; 438 int rc = 0; 439 440 timeout = current->flags & PF_EXITING ? 441 0 : sock_flag(sk, SOCK_LINGER) ? 442 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; 443 444 old_state = sk->sk_state; 445 again: 446 switch (sk->sk_state) { 447 case SMC_ACTIVE: 448 smc_close_stream_wait(smc, timeout); 449 release_sock(sk); 450 cancel_delayed_work_sync(&conn->tx_work); 451 lock_sock(sk); 452 if (sk->sk_state != SMC_ACTIVE) 453 goto again; 454 /* send close wr request */ 455 rc = smc_close_wr(conn); 456 if (rc) 457 break; 458 sk->sk_state = SMC_PEERCLOSEWAIT1; 459 break; 460 case SMC_APPCLOSEWAIT1: 461 /* passive close */ 462 if (!smc_cdc_rxed_any_close(conn)) 463 smc_close_stream_wait(smc, timeout); 464 release_sock(sk); 465 cancel_delayed_work_sync(&conn->tx_work); 466 lock_sock(sk); 467 if (sk->sk_state != SMC_APPCLOSEWAIT1) 468 goto again; 469 /* confirm close from peer */ 470 rc = smc_close_wr(conn); 471 if (rc) 472 break; 473 sk->sk_state = SMC_APPCLOSEWAIT2; 474 break; 475 case SMC_APPCLOSEWAIT2: 476 case SMC_PEERFINCLOSEWAIT: 477 case SMC_PEERCLOSEWAIT1: 478 case SMC_PEERCLOSEWAIT2: 479 case SMC_APPFINCLOSEWAIT: 480 case SMC_PROCESSABORT: 481 case SMC_PEERABORTWAIT: 482 /* nothing to do, add tracing in future patch */ 483 break; 484 } 485 486 if (old_state != sk->sk_state) 487 sk->sk_state_change(sk); 488 return rc; 489 } 490 491 /* Initialize close properties on connection establishment. */ 492 void smc_close_init(struct smc_sock *smc) 493 { 494 INIT_WORK(&smc->conn.close_work, smc_close_passive_work); 495 } 496