1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Manage send buffer. 6 * Producer: 7 * Copy user space data into send buffer, if send buffer space available. 8 * Consumer: 9 * Trigger RDMA write into RMBE of peer and send CDC, if RMBE space available. 10 * 11 * Copyright IBM Corp. 2016 12 * 13 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 14 */ 15 16 #include <linux/net.h> 17 #include <linux/rcupdate.h> 18 #include <linux/workqueue.h> 19 #include <linux/sched/signal.h> 20 21 #include <net/sock.h> 22 #include <net/tcp.h> 23 24 #include "smc.h" 25 #include "smc_wr.h" 26 #include "smc_cdc.h" 27 #include "smc_close.h" 28 #include "smc_ism.h" 29 #include "smc_tx.h" 30 #include "smc_stats.h" 31 #include "smc_tracepoint.h" 32 33 #define SMC_TX_WORK_DELAY 0 34 35 /***************************** sndbuf producer *******************************/ 36 37 /* callback implementation for sk.sk_write_space() 38 * to wakeup sndbuf producers that blocked with smc_tx_wait(). 39 * called under sk_socket lock. 40 */ 41 static void smc_tx_write_space(struct sock *sk) 42 { 43 struct socket *sock = sk->sk_socket; 44 struct smc_sock *smc = smc_sk(sk); 45 struct socket_wq *wq; 46 47 /* similar to sk_stream_write_space */ 48 if (atomic_read(&smc->conn.sndbuf_space) && sock) { 49 if (test_bit(SOCK_NOSPACE, &sock->flags)) 50 SMC_STAT_RMB_TX_FULL(smc, !smc->conn.lnk); 51 clear_bit(SOCK_NOSPACE, &sock->flags); 52 rcu_read_lock(); 53 wq = rcu_dereference(sk->sk_wq); 54 if (skwq_has_sleeper(wq)) 55 wake_up_interruptible_poll(&wq->wait, 56 EPOLLOUT | EPOLLWRNORM | 57 EPOLLWRBAND); 58 if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) 59 sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT); 60 rcu_read_unlock(); 61 } 62 } 63 64 /* Wakeup sndbuf producers that blocked with smc_tx_wait(). 65 * Cf. tcp_data_snd_check()=>tcp_check_space()=>tcp_new_space(). 66 */ 67 void smc_tx_sndbuf_nonfull(struct smc_sock *smc) 68 { 69 if (smc->sk.sk_socket && 70 test_bit(SOCK_NOSPACE, &smc->sk.sk_socket->flags)) 71 smc->sk.sk_write_space(&smc->sk); 72 } 73 74 /* blocks sndbuf producer until at least one byte of free space available 75 * or urgent Byte was consumed 76 */ 77 static int smc_tx_wait(struct smc_sock *smc, int flags) 78 { 79 DEFINE_WAIT_FUNC(wait, woken_wake_function); 80 struct smc_connection *conn = &smc->conn; 81 struct sock *sk = &smc->sk; 82 long timeo; 83 int rc = 0; 84 85 /* similar to sk_stream_wait_memory */ 86 timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); 87 add_wait_queue(sk_sleep(sk), &wait); 88 while (1) { 89 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 90 if (sk->sk_err || 91 (sk->sk_shutdown & SEND_SHUTDOWN) || 92 conn->killed || 93 conn->local_tx_ctrl.conn_state_flags.peer_done_writing) { 94 rc = -EPIPE; 95 break; 96 } 97 if (smc_cdc_rxed_any_close(conn)) { 98 rc = -ECONNRESET; 99 break; 100 } 101 if (!timeo) { 102 /* ensure EPOLLOUT is subsequently generated */ 103 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 104 rc = -EAGAIN; 105 break; 106 } 107 if (signal_pending(current)) { 108 rc = sock_intr_errno(timeo); 109 break; 110 } 111 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); 112 if (atomic_read(&conn->sndbuf_space) && !conn->urg_tx_pend) 113 break; /* at least 1 byte of free & no urgent data */ 114 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 115 sk_wait_event(sk, &timeo, 116 sk->sk_err || 117 (sk->sk_shutdown & SEND_SHUTDOWN) || 118 smc_cdc_rxed_any_close(conn) || 119 (atomic_read(&conn->sndbuf_space) && 120 !conn->urg_tx_pend), 121 &wait); 122 } 123 remove_wait_queue(sk_sleep(sk), &wait); 124 return rc; 125 } 126 127 static bool smc_tx_is_corked(struct smc_sock *smc) 128 { 129 struct tcp_sock *tp = tcp_sk(smc->clcsock->sk); 130 131 return (tp->nonagle & TCP_NAGLE_CORK) ? true : false; 132 } 133 134 /* sndbuf producer: main API called by socket layer. 135 * called under sock lock. 136 */ 137 int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len) 138 { 139 size_t copylen, send_done = 0, send_remaining = len; 140 size_t chunk_len, chunk_off, chunk_len_sum; 141 struct smc_connection *conn = &smc->conn; 142 union smc_host_cursor prep; 143 struct sock *sk = &smc->sk; 144 char *sndbuf_base; 145 int tx_cnt_prep; 146 int writespace; 147 int rc, chunk; 148 149 /* This should be in poll */ 150 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); 151 152 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) { 153 rc = -EPIPE; 154 goto out_err; 155 } 156 157 if (sk->sk_state == SMC_INIT) 158 return -ENOTCONN; 159 160 if (len > conn->sndbuf_desc->len) 161 SMC_STAT_RMB_TX_SIZE_SMALL(smc, !conn->lnk); 162 163 if (len > conn->peer_rmbe_size) 164 SMC_STAT_RMB_TX_PEER_SIZE_SMALL(smc, !conn->lnk); 165 166 if (msg->msg_flags & MSG_OOB) 167 SMC_STAT_INC(smc, urg_data_cnt); 168 169 while (msg_data_left(msg)) { 170 if (smc->sk.sk_shutdown & SEND_SHUTDOWN || 171 (smc->sk.sk_err == ECONNABORTED) || 172 conn->killed) 173 return -EPIPE; 174 if (smc_cdc_rxed_any_close(conn)) 175 return send_done ?: -ECONNRESET; 176 177 if (msg->msg_flags & MSG_OOB) 178 conn->local_tx_ctrl.prod_flags.urg_data_pending = 1; 179 180 if (!atomic_read(&conn->sndbuf_space) || conn->urg_tx_pend) { 181 if (send_done) 182 return send_done; 183 rc = smc_tx_wait(smc, msg->msg_flags); 184 if (rc) 185 goto out_err; 186 continue; 187 } 188 189 /* initialize variables for 1st iteration of subsequent loop */ 190 /* could be just 1 byte, even after smc_tx_wait above */ 191 writespace = atomic_read(&conn->sndbuf_space); 192 /* not more than what user space asked for */ 193 copylen = min_t(size_t, send_remaining, writespace); 194 /* determine start of sndbuf */ 195 sndbuf_base = conn->sndbuf_desc->cpu_addr; 196 smc_curs_copy(&prep, &conn->tx_curs_prep, conn); 197 tx_cnt_prep = prep.count; 198 /* determine chunks where to write into sndbuf */ 199 /* either unwrapped case, or 1st chunk of wrapped case */ 200 chunk_len = min_t(size_t, copylen, conn->sndbuf_desc->len - 201 tx_cnt_prep); 202 chunk_len_sum = chunk_len; 203 chunk_off = tx_cnt_prep; 204 smc_sndbuf_sync_sg_for_cpu(conn); 205 for (chunk = 0; chunk < 2; chunk++) { 206 rc = memcpy_from_msg(sndbuf_base + chunk_off, 207 msg, chunk_len); 208 if (rc) { 209 smc_sndbuf_sync_sg_for_device(conn); 210 if (send_done) 211 return send_done; 212 goto out_err; 213 } 214 send_done += chunk_len; 215 send_remaining -= chunk_len; 216 217 if (chunk_len_sum == copylen) 218 break; /* either on 1st or 2nd iteration */ 219 /* prepare next (== 2nd) iteration */ 220 chunk_len = copylen - chunk_len; /* remainder */ 221 chunk_len_sum += chunk_len; 222 chunk_off = 0; /* modulo offset in send ring buffer */ 223 } 224 smc_sndbuf_sync_sg_for_device(conn); 225 /* update cursors */ 226 smc_curs_add(conn->sndbuf_desc->len, &prep, copylen); 227 smc_curs_copy(&conn->tx_curs_prep, &prep, conn); 228 /* increased in send tasklet smc_cdc_tx_handler() */ 229 smp_mb__before_atomic(); 230 atomic_sub(copylen, &conn->sndbuf_space); 231 /* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */ 232 smp_mb__after_atomic(); 233 /* since we just produced more new data into sndbuf, 234 * trigger sndbuf consumer: RDMA write into peer RMBE and CDC 235 */ 236 if ((msg->msg_flags & MSG_OOB) && !send_remaining) 237 conn->urg_tx_pend = true; 238 if ((msg->msg_flags & MSG_MORE || smc_tx_is_corked(smc) || 239 msg->msg_flags & MSG_SENDPAGE_NOTLAST) && 240 (atomic_read(&conn->sndbuf_space))) 241 /* for a corked socket defer the RDMA writes if 242 * sndbuf_space is still available. The applications 243 * should known how/when to uncork it. 244 */ 245 continue; 246 smc_tx_sndbuf_nonempty(conn); 247 248 trace_smc_tx_sendmsg(smc, copylen); 249 } /* while (msg_data_left(msg)) */ 250 251 return send_done; 252 253 out_err: 254 rc = sk_stream_error(sk, msg->msg_flags, rc); 255 /* make sure we wake any epoll edge trigger waiter */ 256 if (unlikely(rc == -EAGAIN)) 257 sk->sk_write_space(sk); 258 return rc; 259 } 260 261 int smc_tx_sendpage(struct smc_sock *smc, struct page *page, int offset, 262 size_t size, int flags) 263 { 264 struct msghdr msg = {.msg_flags = flags}; 265 char *kaddr = kmap(page); 266 struct kvec iov; 267 int rc; 268 269 iov.iov_base = kaddr + offset; 270 iov.iov_len = size; 271 iov_iter_kvec(&msg.msg_iter, WRITE, &iov, 1, size); 272 rc = smc_tx_sendmsg(smc, &msg, size); 273 kunmap(page); 274 return rc; 275 } 276 277 /***************************** sndbuf consumer *******************************/ 278 279 /* sndbuf consumer: actual data transfer of one target chunk with ISM write */ 280 int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len, 281 u32 offset, int signal) 282 { 283 struct smc_ism_position pos; 284 int rc; 285 286 memset(&pos, 0, sizeof(pos)); 287 pos.token = conn->peer_token; 288 pos.index = conn->peer_rmbe_idx; 289 pos.offset = conn->tx_off + offset; 290 pos.signal = signal; 291 rc = smc_ism_write(conn->lgr->smcd, &pos, data, len); 292 if (rc) 293 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 294 return rc; 295 } 296 297 /* sndbuf consumer: actual data transfer of one target chunk with RDMA write */ 298 static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset, 299 int num_sges, struct ib_rdma_wr *rdma_wr) 300 { 301 struct smc_link_group *lgr = conn->lgr; 302 struct smc_link *link = conn->lnk; 303 int rc; 304 305 rdma_wr->wr.wr_id = smc_wr_tx_get_next_wr_id(link); 306 rdma_wr->wr.num_sge = num_sges; 307 rdma_wr->remote_addr = 308 lgr->rtokens[conn->rtoken_idx][link->link_idx].dma_addr + 309 /* RMBE within RMB */ 310 conn->tx_off + 311 /* offset within RMBE */ 312 peer_rmbe_offset; 313 rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][link->link_idx].rkey; 314 rc = ib_post_send(link->roce_qp, &rdma_wr->wr, NULL); 315 if (rc) 316 smcr_link_down_cond_sched(link); 317 return rc; 318 } 319 320 /* sndbuf consumer */ 321 static inline void smc_tx_advance_cursors(struct smc_connection *conn, 322 union smc_host_cursor *prod, 323 union smc_host_cursor *sent, 324 size_t len) 325 { 326 smc_curs_add(conn->peer_rmbe_size, prod, len); 327 /* increased in recv tasklet smc_cdc_msg_rcv() */ 328 smp_mb__before_atomic(); 329 /* data in flight reduces usable snd_wnd */ 330 atomic_sub(len, &conn->peer_rmbe_space); 331 /* guarantee 0 <= peer_rmbe_space <= peer_rmbe_size */ 332 smp_mb__after_atomic(); 333 smc_curs_add(conn->sndbuf_desc->len, sent, len); 334 } 335 336 /* SMC-R helper for smc_tx_rdma_writes() */ 337 static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len, 338 size_t src_off, size_t src_len, 339 size_t dst_off, size_t dst_len, 340 struct smc_rdma_wr *wr_rdma_buf) 341 { 342 struct smc_link *link = conn->lnk; 343 344 dma_addr_t dma_addr = 345 sg_dma_address(conn->sndbuf_desc->sgt[link->link_idx].sgl); 346 int src_len_sum = src_len, dst_len_sum = dst_len; 347 int sent_count = src_off; 348 int srcchunk, dstchunk; 349 int num_sges; 350 int rc; 351 352 for (dstchunk = 0; dstchunk < 2; dstchunk++) { 353 struct ib_sge *sge = 354 wr_rdma_buf->wr_tx_rdma[dstchunk].wr.sg_list; 355 356 num_sges = 0; 357 for (srcchunk = 0; srcchunk < 2; srcchunk++) { 358 sge[srcchunk].addr = dma_addr + src_off; 359 sge[srcchunk].length = src_len; 360 num_sges++; 361 362 src_off += src_len; 363 if (src_off >= conn->sndbuf_desc->len) 364 src_off -= conn->sndbuf_desc->len; 365 /* modulo in send ring */ 366 if (src_len_sum == dst_len) 367 break; /* either on 1st or 2nd iteration */ 368 /* prepare next (== 2nd) iteration */ 369 src_len = dst_len - src_len; /* remainder */ 370 src_len_sum += src_len; 371 } 372 rc = smc_tx_rdma_write(conn, dst_off, num_sges, 373 &wr_rdma_buf->wr_tx_rdma[dstchunk]); 374 if (rc) 375 return rc; 376 if (dst_len_sum == len) 377 break; /* either on 1st or 2nd iteration */ 378 /* prepare next (== 2nd) iteration */ 379 dst_off = 0; /* modulo offset in RMBE ring buffer */ 380 dst_len = len - dst_len; /* remainder */ 381 dst_len_sum += dst_len; 382 src_len = min_t(int, dst_len, conn->sndbuf_desc->len - 383 sent_count); 384 src_len_sum = src_len; 385 } 386 return 0; 387 } 388 389 /* SMC-D helper for smc_tx_rdma_writes() */ 390 static int smcd_tx_rdma_writes(struct smc_connection *conn, size_t len, 391 size_t src_off, size_t src_len, 392 size_t dst_off, size_t dst_len) 393 { 394 int src_len_sum = src_len, dst_len_sum = dst_len; 395 int srcchunk, dstchunk; 396 int rc; 397 398 for (dstchunk = 0; dstchunk < 2; dstchunk++) { 399 for (srcchunk = 0; srcchunk < 2; srcchunk++) { 400 void *data = conn->sndbuf_desc->cpu_addr + src_off; 401 402 rc = smcd_tx_ism_write(conn, data, src_len, dst_off + 403 sizeof(struct smcd_cdc_msg), 0); 404 if (rc) 405 return rc; 406 dst_off += src_len; 407 src_off += src_len; 408 if (src_off >= conn->sndbuf_desc->len) 409 src_off -= conn->sndbuf_desc->len; 410 /* modulo in send ring */ 411 if (src_len_sum == dst_len) 412 break; /* either on 1st or 2nd iteration */ 413 /* prepare next (== 2nd) iteration */ 414 src_len = dst_len - src_len; /* remainder */ 415 src_len_sum += src_len; 416 } 417 if (dst_len_sum == len) 418 break; /* either on 1st or 2nd iteration */ 419 /* prepare next (== 2nd) iteration */ 420 dst_off = 0; /* modulo offset in RMBE ring buffer */ 421 dst_len = len - dst_len; /* remainder */ 422 dst_len_sum += dst_len; 423 src_len = min_t(int, dst_len, conn->sndbuf_desc->len - src_off); 424 src_len_sum = src_len; 425 } 426 return 0; 427 } 428 429 /* sndbuf consumer: prepare all necessary (src&dst) chunks of data transmit; 430 * usable snd_wnd as max transmit 431 */ 432 static int smc_tx_rdma_writes(struct smc_connection *conn, 433 struct smc_rdma_wr *wr_rdma_buf) 434 { 435 size_t len, src_len, dst_off, dst_len; /* current chunk values */ 436 union smc_host_cursor sent, prep, prod, cons; 437 struct smc_cdc_producer_flags *pflags; 438 int to_send, rmbespace; 439 int rc; 440 441 /* source: sndbuf */ 442 smc_curs_copy(&sent, &conn->tx_curs_sent, conn); 443 smc_curs_copy(&prep, &conn->tx_curs_prep, conn); 444 /* cf. wmem_alloc - (snd_max - snd_una) */ 445 to_send = smc_curs_diff(conn->sndbuf_desc->len, &sent, &prep); 446 if (to_send <= 0) 447 return 0; 448 449 /* destination: RMBE */ 450 /* cf. snd_wnd */ 451 rmbespace = atomic_read(&conn->peer_rmbe_space); 452 if (rmbespace <= 0) { 453 struct smc_sock *smc = container_of(conn, struct smc_sock, 454 conn); 455 SMC_STAT_RMB_TX_PEER_FULL(smc, !conn->lnk); 456 return 0; 457 } 458 smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn); 459 smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn); 460 461 /* if usable snd_wnd closes ask peer to advertise once it opens again */ 462 pflags = &conn->local_tx_ctrl.prod_flags; 463 pflags->write_blocked = (to_send >= rmbespace); 464 /* cf. usable snd_wnd */ 465 len = min(to_send, rmbespace); 466 467 /* initialize variables for first iteration of subsequent nested loop */ 468 dst_off = prod.count; 469 if (prod.wrap == cons.wrap) { 470 /* the filled destination area is unwrapped, 471 * hence the available free destination space is wrapped 472 * and we need 2 destination chunks of sum len; start with 1st 473 * which is limited by what's available in sndbuf 474 */ 475 dst_len = min_t(size_t, 476 conn->peer_rmbe_size - prod.count, len); 477 } else { 478 /* the filled destination area is wrapped, 479 * hence the available free destination space is unwrapped 480 * and we need a single destination chunk of entire len 481 */ 482 dst_len = len; 483 } 484 /* dst_len determines the maximum src_len */ 485 if (sent.count + dst_len <= conn->sndbuf_desc->len) { 486 /* unwrapped src case: single chunk of entire dst_len */ 487 src_len = dst_len; 488 } else { 489 /* wrapped src case: 2 chunks of sum dst_len; start with 1st: */ 490 src_len = conn->sndbuf_desc->len - sent.count; 491 } 492 493 if (conn->lgr->is_smcd) 494 rc = smcd_tx_rdma_writes(conn, len, sent.count, src_len, 495 dst_off, dst_len); 496 else 497 rc = smcr_tx_rdma_writes(conn, len, sent.count, src_len, 498 dst_off, dst_len, wr_rdma_buf); 499 if (rc) 500 return rc; 501 502 if (conn->urg_tx_pend && len == to_send) 503 pflags->urg_data_present = 1; 504 smc_tx_advance_cursors(conn, &prod, &sent, len); 505 /* update connection's cursors with advanced local cursors */ 506 smc_curs_copy(&conn->local_tx_ctrl.prod, &prod, conn); 507 /* dst: peer RMBE */ 508 smc_curs_copy(&conn->tx_curs_sent, &sent, conn);/* src: local sndbuf */ 509 510 return 0; 511 } 512 513 /* Wakeup sndbuf consumers from any context (IRQ or process) 514 * since there is more data to transmit; usable snd_wnd as max transmit 515 */ 516 static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) 517 { 518 struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags; 519 struct smc_link *link = conn->lnk; 520 struct smc_rdma_wr *wr_rdma_buf; 521 struct smc_cdc_tx_pend *pend; 522 struct smc_wr_buf *wr_buf; 523 int rc; 524 525 if (!link || !smc_wr_tx_link_hold(link)) 526 return -ENOLINK; 527 rc = smc_cdc_get_free_slot(conn, link, &wr_buf, &wr_rdma_buf, &pend); 528 if (rc < 0) { 529 smc_wr_tx_link_put(link); 530 if (rc == -EBUSY) { 531 struct smc_sock *smc = 532 container_of(conn, struct smc_sock, conn); 533 534 if (smc->sk.sk_err == ECONNABORTED) 535 return sock_error(&smc->sk); 536 if (conn->killed) 537 return -EPIPE; 538 rc = 0; 539 mod_delayed_work(conn->lgr->tx_wq, &conn->tx_work, 540 SMC_TX_WORK_DELAY); 541 } 542 return rc; 543 } 544 545 spin_lock_bh(&conn->send_lock); 546 if (link != conn->lnk) { 547 /* link of connection changed, tx_work will restart */ 548 smc_wr_tx_put_slot(link, 549 (struct smc_wr_tx_pend_priv *)pend); 550 rc = -ENOLINK; 551 goto out_unlock; 552 } 553 if (!pflags->urg_data_present) { 554 rc = smc_tx_rdma_writes(conn, wr_rdma_buf); 555 if (rc) { 556 smc_wr_tx_put_slot(link, 557 (struct smc_wr_tx_pend_priv *)pend); 558 goto out_unlock; 559 } 560 } 561 562 rc = smc_cdc_msg_send(conn, wr_buf, pend); 563 if (!rc && pflags->urg_data_present) { 564 pflags->urg_data_pending = 0; 565 pflags->urg_data_present = 0; 566 } 567 568 out_unlock: 569 spin_unlock_bh(&conn->send_lock); 570 smc_wr_tx_link_put(link); 571 return rc; 572 } 573 574 static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn) 575 { 576 struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags; 577 int rc = 0; 578 579 spin_lock_bh(&conn->send_lock); 580 if (!pflags->urg_data_present) 581 rc = smc_tx_rdma_writes(conn, NULL); 582 if (!rc) 583 rc = smcd_cdc_msg_send(conn); 584 585 if (!rc && pflags->urg_data_present) { 586 pflags->urg_data_pending = 0; 587 pflags->urg_data_present = 0; 588 } 589 spin_unlock_bh(&conn->send_lock); 590 return rc; 591 } 592 593 int smc_tx_sndbuf_nonempty(struct smc_connection *conn) 594 { 595 int rc; 596 597 if (conn->killed || 598 conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) 599 return -EPIPE; /* connection being aborted */ 600 if (conn->lgr->is_smcd) 601 rc = smcd_tx_sndbuf_nonempty(conn); 602 else 603 rc = smcr_tx_sndbuf_nonempty(conn); 604 605 if (!rc) { 606 /* trigger socket release if connection is closing */ 607 struct smc_sock *smc = container_of(conn, struct smc_sock, 608 conn); 609 smc_close_wake_tx_prepared(smc); 610 } 611 return rc; 612 } 613 614 /* Wakeup sndbuf consumers from process context 615 * since there is more data to transmit. The caller 616 * must hold sock lock. 617 */ 618 void smc_tx_pending(struct smc_connection *conn) 619 { 620 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 621 int rc; 622 623 if (smc->sk.sk_err) 624 return; 625 626 rc = smc_tx_sndbuf_nonempty(conn); 627 if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked && 628 !atomic_read(&conn->bytes_to_rcv)) 629 conn->local_rx_ctrl.prod_flags.write_blocked = 0; 630 } 631 632 /* Wakeup sndbuf consumers from process context 633 * since there is more data to transmit in locked 634 * sock. 635 */ 636 void smc_tx_work(struct work_struct *work) 637 { 638 struct smc_connection *conn = container_of(to_delayed_work(work), 639 struct smc_connection, 640 tx_work); 641 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 642 643 lock_sock(&smc->sk); 644 smc_tx_pending(conn); 645 release_sock(&smc->sk); 646 } 647 648 void smc_tx_consumer_update(struct smc_connection *conn, bool force) 649 { 650 union smc_host_cursor cfed, cons, prod; 651 int sender_free = conn->rmb_desc->len; 652 int to_confirm; 653 654 smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn); 655 smc_curs_copy(&cfed, &conn->rx_curs_confirmed, conn); 656 to_confirm = smc_curs_diff(conn->rmb_desc->len, &cfed, &cons); 657 if (to_confirm > conn->rmbe_update_limit) { 658 smc_curs_copy(&prod, &conn->local_rx_ctrl.prod, conn); 659 sender_free = conn->rmb_desc->len - 660 smc_curs_diff_large(conn->rmb_desc->len, 661 &cfed, &prod); 662 } 663 664 if (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req || 665 force || 666 ((to_confirm > conn->rmbe_update_limit) && 667 ((sender_free <= (conn->rmb_desc->len / 2)) || 668 conn->local_rx_ctrl.prod_flags.write_blocked))) { 669 if (conn->killed || 670 conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) 671 return; 672 if ((smc_cdc_get_slot_and_msg_send(conn) < 0) && 673 !conn->killed) { 674 queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work, 675 SMC_TX_WORK_DELAY); 676 return; 677 } 678 } 679 if (conn->local_rx_ctrl.prod_flags.write_blocked && 680 !atomic_read(&conn->bytes_to_rcv)) 681 conn->local_rx_ctrl.prod_flags.write_blocked = 0; 682 } 683 684 /***************************** send initialize *******************************/ 685 686 /* Initialize send properties on connection establishment. NB: not __init! */ 687 void smc_tx_init(struct smc_sock *smc) 688 { 689 smc->sk.sk_write_space = smc_tx_write_space; 690 } 691