1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* RxRPC packet transmission 3 * 4 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 9 10 #include <linux/net.h> 11 #include <linux/gfp.h> 12 #include <linux/skbuff.h> 13 #include <linux/export.h> 14 #include <net/sock.h> 15 #include <net/af_rxrpc.h> 16 #include <net/udp.h> 17 #include "ar-internal.h" 18 19 extern int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); 20 21 ssize_t do_udp_sendmsg(struct socket *socket, struct msghdr *msg, size_t len) 22 { 23 struct sockaddr *sa = msg->msg_name; 24 struct sock *sk = socket->sk; 25 26 if (IS_ENABLED(CONFIG_AF_RXRPC_IPV6)) { 27 if (sa->sa_family == AF_INET6) { 28 if (sk->sk_family != AF_INET6) { 29 pr_warn("AF_INET6 address on AF_INET socket\n"); 30 return -ENOPROTOOPT; 31 } 32 return udpv6_sendmsg(sk, msg, len); 33 } 34 } 35 return udp_sendmsg(sk, msg, len); 36 } 37 38 struct rxrpc_abort_buffer { 39 struct rxrpc_wire_header whdr; 40 __be32 abort_code; 41 }; 42 43 static const char rxrpc_keepalive_string[] = ""; 44 45 /* 46 * Increase Tx backoff on transmission failure and clear it on success. 47 */ 48 static void rxrpc_tx_backoff(struct rxrpc_call *call, int ret) 49 { 50 if (ret < 0) { 51 if (call->tx_backoff < 1000) 52 call->tx_backoff += 100; 53 } else { 54 call->tx_backoff = 0; 55 } 56 } 57 58 /* 59 * Arrange for a keepalive ping a certain time after we last transmitted. This 60 * lets the far side know we're still interested in this call and helps keep 61 * the route through any intervening firewall open. 62 * 63 * Receiving a response to the ping will prevent the ->expect_rx_by timer from 64 * expiring. 65 */ 66 static void rxrpc_set_keepalive(struct rxrpc_call *call, ktime_t now) 67 { 68 ktime_t delay = ms_to_ktime(READ_ONCE(call->next_rx_timo) / 6); 69 70 call->keepalive_at = ktime_add(ktime_get_real(), delay); 71 trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_keepalive); 72 } 73 74 /* 75 * Allocate transmission buffers for an ACK and attach them to local->kv[]. 76 */ 77 static int rxrpc_alloc_ack(struct rxrpc_call *call, size_t sack_size) 78 { 79 struct rxrpc_wire_header *whdr; 80 struct rxrpc_acktrailer *trailer; 81 struct rxrpc_ackpacket *ack; 82 struct kvec *kv = call->local->kvec; 83 gfp_t gfp = rcu_read_lock_held() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS; 84 void *buf, *buf2 = NULL; 85 u8 *filler; 86 87 buf = page_frag_alloc(&call->local->tx_alloc, 88 sizeof(*whdr) + sizeof(*ack) + 1 + 3 + sizeof(*trailer), gfp); 89 if (!buf) 90 return -ENOMEM; 91 92 if (sack_size) { 93 buf2 = page_frag_alloc(&call->local->tx_alloc, sack_size, gfp); 94 if (!buf2) { 95 page_frag_free(buf); 96 return -ENOMEM; 97 } 98 } 99 100 whdr = buf; 101 ack = buf + sizeof(*whdr); 102 filler = buf + sizeof(*whdr) + sizeof(*ack) + 1; 103 trailer = buf + sizeof(*whdr) + sizeof(*ack) + 1 + 3; 104 105 kv[0].iov_base = whdr; 106 kv[0].iov_len = sizeof(*whdr) + sizeof(*ack); 107 kv[1].iov_base = buf2; 108 kv[1].iov_len = sack_size; 109 kv[2].iov_base = filler; 110 kv[2].iov_len = 3 + sizeof(*trailer); 111 return 3; /* Number of kvec[] used. */ 112 } 113 114 static void rxrpc_free_ack(struct rxrpc_call *call) 115 { 116 page_frag_free(call->local->kvec[0].iov_base); 117 if (call->local->kvec[1].iov_base) 118 page_frag_free(call->local->kvec[1].iov_base); 119 } 120 121 /* 122 * Record the beginning of an RTT probe. 123 */ 124 static void rxrpc_begin_rtt_probe(struct rxrpc_call *call, rxrpc_serial_t serial, 125 ktime_t now, enum rxrpc_rtt_tx_trace why) 126 { 127 unsigned long avail = call->rtt_avail; 128 int rtt_slot = 9; 129 130 if (!(avail & RXRPC_CALL_RTT_AVAIL_MASK)) 131 goto no_slot; 132 133 rtt_slot = __ffs(avail & RXRPC_CALL_RTT_AVAIL_MASK); 134 if (!test_and_clear_bit(rtt_slot, &call->rtt_avail)) 135 goto no_slot; 136 137 call->rtt_serial[rtt_slot] = serial; 138 call->rtt_sent_at[rtt_slot] = now; 139 smp_wmb(); /* Write data before avail bit */ 140 set_bit(rtt_slot + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail); 141 142 trace_rxrpc_rtt_tx(call, why, rtt_slot, serial); 143 return; 144 145 no_slot: 146 trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_no_slot, rtt_slot, serial); 147 } 148 149 /* 150 * Fill out an ACK packet. 151 */ 152 static int rxrpc_fill_out_ack(struct rxrpc_call *call, int nr_kv, u8 ack_reason, 153 rxrpc_serial_t serial_to_ack, rxrpc_serial_t *_ack_serial) 154 { 155 struct kvec *kv = call->local->kvec; 156 struct rxrpc_wire_header *whdr = kv[0].iov_base; 157 struct rxrpc_acktrailer *trailer = kv[2].iov_base + 3; 158 struct rxrpc_ackpacket *ack = (struct rxrpc_ackpacket *)(whdr + 1); 159 unsigned int qsize, sack, wrap, to, max_mtu, if_mtu; 160 rxrpc_seq_t window, wtop; 161 ktime_t now = ktime_get_real(); 162 int rsize; 163 u8 *filler = kv[2].iov_base; 164 u8 *sackp = kv[1].iov_base; 165 166 rxrpc_inc_stat(call->rxnet, stat_tx_ack_fill); 167 168 window = call->ackr_window; 169 wtop = call->ackr_wtop; 170 sack = call->ackr_sack_base % RXRPC_SACK_SIZE; 171 172 *_ack_serial = rxrpc_get_next_serial(call->conn); 173 174 whdr->epoch = htonl(call->conn->proto.epoch); 175 whdr->cid = htonl(call->cid); 176 whdr->callNumber = htonl(call->call_id); 177 whdr->serial = htonl(*_ack_serial); 178 whdr->seq = 0; 179 whdr->type = RXRPC_PACKET_TYPE_ACK; 180 whdr->flags = call->conn->out_clientflag | RXRPC_SLOW_START_OK; 181 whdr->userStatus = 0; 182 whdr->securityIndex = call->security_ix; 183 whdr->_rsvd = 0; 184 whdr->serviceId = htons(call->dest_srx.srx_service); 185 186 ack->bufferSpace = 0; 187 ack->maxSkew = 0; 188 ack->firstPacket = htonl(window); 189 ack->previousPacket = htonl(call->rx_highest_seq); 190 ack->serial = htonl(serial_to_ack); 191 ack->reason = ack_reason; 192 ack->nAcks = wtop - window; 193 filler[0] = 0; 194 filler[1] = 0; 195 filler[2] = 0; 196 197 if (ack_reason == RXRPC_ACK_PING) 198 whdr->flags |= RXRPC_REQUEST_ACK; 199 200 if (after(wtop, window)) { 201 kv[1].iov_len = ack->nAcks; 202 203 wrap = RXRPC_SACK_SIZE - sack; 204 to = umin(ack->nAcks, RXRPC_SACK_SIZE); 205 206 if (sack + ack->nAcks <= RXRPC_SACK_SIZE) { 207 memcpy(sackp, call->ackr_sack_table + sack, ack->nAcks); 208 } else { 209 memcpy(sackp, call->ackr_sack_table + sack, wrap); 210 memcpy(sackp + wrap, call->ackr_sack_table, to - wrap); 211 } 212 } else if (before(wtop, window)) { 213 pr_warn("ack window backward %x %x", window, wtop); 214 } else if (ack->reason == RXRPC_ACK_DELAY) { 215 ack->reason = RXRPC_ACK_IDLE; 216 } 217 218 qsize = (window - 1) - call->rx_consumed; 219 rsize = max_t(int, call->rx_winsize - qsize, 0); 220 221 if_mtu = call->peer->if_mtu - call->peer->hdrsize; 222 if (call->peer->ackr_adv_pmtud) { 223 max_mtu = umax(call->peer->max_data, rxrpc_rx_mtu); 224 } else { 225 if_mtu = umin(if_mtu, 1444); 226 max_mtu = if_mtu; 227 } 228 229 trailer->maxMTU = htonl(max_mtu); 230 trailer->ifMTU = htonl(if_mtu); 231 trailer->rwind = htonl(rsize); 232 trailer->jumbo_max = 0; /* Advertise pmtu discovery */ 233 234 if (ack_reason == RXRPC_ACK_PING) 235 rxrpc_begin_rtt_probe(call, *_ack_serial, now, rxrpc_rtt_tx_ping); 236 if (whdr->flags & RXRPC_REQUEST_ACK) 237 call->rtt_last_req = now; 238 rxrpc_set_keepalive(call, now); 239 return nr_kv; 240 } 241 242 /* 243 * Transmit an ACK packet. 244 */ 245 static void rxrpc_send_ack_packet(struct rxrpc_call *call, int nr_kv, size_t len, 246 rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why) 247 { 248 struct kvec *kv = call->local->kvec; 249 struct rxrpc_wire_header *whdr = kv[0].iov_base; 250 struct rxrpc_acktrailer *trailer = kv[2].iov_base + 3; 251 struct rxrpc_connection *conn; 252 struct rxrpc_ackpacket *ack = (struct rxrpc_ackpacket *)(whdr + 1); 253 struct msghdr msg; 254 int ret; 255 256 if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) 257 return; 258 259 conn = call->conn; 260 261 msg.msg_name = &call->peer->srx.transport; 262 msg.msg_namelen = call->peer->srx.transport_len; 263 msg.msg_control = NULL; 264 msg.msg_controllen = 0; 265 msg.msg_flags = MSG_SPLICE_PAGES; 266 267 trace_rxrpc_tx_ack(call->debug_id, serial, 268 ntohl(ack->firstPacket), 269 ntohl(ack->serial), ack->reason, ack->nAcks, 270 ntohl(trailer->rwind), why); 271 272 rxrpc_inc_stat(call->rxnet, stat_tx_ack_send); 273 274 iov_iter_kvec(&msg.msg_iter, WRITE, kv, nr_kv, len); 275 rxrpc_local_dont_fragment(conn->local, why == rxrpc_propose_ack_ping_for_mtu_probe); 276 277 ret = do_udp_sendmsg(conn->local->socket, &msg, len); 278 rxrpc_peer_mark_tx(call->peer); 279 if (ret < 0) { 280 trace_rxrpc_tx_fail(call->debug_id, serial, ret, 281 rxrpc_tx_point_call_ack); 282 if (why == rxrpc_propose_ack_ping_for_mtu_probe && 283 ret == -EMSGSIZE) 284 rxrpc_input_probe_for_pmtud(conn, serial, true); 285 } else { 286 trace_rxrpc_tx_packet(call->debug_id, whdr, 287 rxrpc_tx_point_call_ack); 288 if (why == rxrpc_propose_ack_ping_for_mtu_probe) { 289 call->peer->pmtud_pending = false; 290 call->peer->pmtud_probing = true; 291 call->conn->pmtud_probe = serial; 292 call->conn->pmtud_call = call->debug_id; 293 trace_rxrpc_pmtud_tx(call); 294 } 295 } 296 rxrpc_tx_backoff(call, ret); 297 } 298 299 /* 300 * Queue an ACK for immediate transmission. 301 */ 302 void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason, 303 rxrpc_serial_t serial_to_ack, enum rxrpc_propose_ack_trace why) 304 { 305 struct kvec *kv = call->local->kvec; 306 rxrpc_serial_t ack_serial; 307 size_t len; 308 int nr_kv; 309 310 if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) 311 return; 312 313 rxrpc_inc_stat(call->rxnet, stat_tx_acks[ack_reason]); 314 315 nr_kv = rxrpc_alloc_ack(call, call->ackr_wtop - call->ackr_window); 316 if (nr_kv < 0) { 317 kleave(" = -ENOMEM"); 318 return; 319 } 320 321 nr_kv = rxrpc_fill_out_ack(call, nr_kv, ack_reason, serial_to_ack, &ack_serial); 322 len = kv[0].iov_len; 323 len += kv[1].iov_len; 324 len += kv[2].iov_len; 325 326 /* Extend a path MTU probe ACK. */ 327 if (why == rxrpc_propose_ack_ping_for_mtu_probe) { 328 size_t probe_mtu = call->peer->pmtud_trial + sizeof(struct rxrpc_wire_header); 329 330 if (len > probe_mtu) 331 goto skip; 332 while (len < probe_mtu) { 333 size_t part = umin(probe_mtu - len, PAGE_SIZE); 334 335 kv[nr_kv].iov_base = page_address(ZERO_PAGE(0)); 336 kv[nr_kv].iov_len = part; 337 len += part; 338 nr_kv++; 339 } 340 } 341 342 call->ackr_nr_unacked = 0; 343 atomic_set(&call->ackr_nr_consumed, 0); 344 clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags); 345 346 trace_rxrpc_send_ack(call, why, ack_reason, ack_serial); 347 rxrpc_send_ack_packet(call, nr_kv, len, ack_serial, why); 348 skip: 349 rxrpc_free_ack(call); 350 } 351 352 /* 353 * Send an ACK probe for path MTU discovery. 354 */ 355 void rxrpc_send_probe_for_pmtud(struct rxrpc_call *call) 356 { 357 rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, 358 rxrpc_propose_ack_ping_for_mtu_probe); 359 } 360 361 /* 362 * Send an ABORT call packet. 363 */ 364 int rxrpc_send_abort_packet(struct rxrpc_call *call) 365 { 366 struct rxrpc_connection *conn; 367 struct rxrpc_abort_buffer pkt; 368 struct msghdr msg; 369 struct kvec iov[1]; 370 rxrpc_serial_t serial; 371 int ret; 372 373 /* Don't bother sending aborts for a client call once the server has 374 * hard-ACK'd all of its request data. After that point, we're not 375 * going to stop the operation proceeding, and whilst we might limit 376 * the reply, it's not worth it if we can send a new call on the same 377 * channel instead, thereby closing off this call. 378 */ 379 if (rxrpc_is_client_call(call) && 380 test_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags)) 381 return 0; 382 383 if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) 384 return -ECONNRESET; 385 386 conn = call->conn; 387 388 msg.msg_name = &call->peer->srx.transport; 389 msg.msg_namelen = call->peer->srx.transport_len; 390 msg.msg_control = NULL; 391 msg.msg_controllen = 0; 392 msg.msg_flags = 0; 393 394 pkt.whdr.epoch = htonl(conn->proto.epoch); 395 pkt.whdr.cid = htonl(call->cid); 396 pkt.whdr.callNumber = htonl(call->call_id); 397 pkt.whdr.seq = 0; 398 pkt.whdr.type = RXRPC_PACKET_TYPE_ABORT; 399 pkt.whdr.flags = conn->out_clientflag; 400 pkt.whdr.userStatus = 0; 401 pkt.whdr.securityIndex = call->security_ix; 402 pkt.whdr._rsvd = 0; 403 pkt.whdr.serviceId = htons(call->dest_srx.srx_service); 404 pkt.abort_code = htonl(call->abort_code); 405 406 iov[0].iov_base = &pkt; 407 iov[0].iov_len = sizeof(pkt); 408 409 serial = rxrpc_get_next_serial(conn); 410 pkt.whdr.serial = htonl(serial); 411 412 iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, sizeof(pkt)); 413 ret = do_udp_sendmsg(conn->local->socket, &msg, sizeof(pkt)); 414 rxrpc_peer_mark_tx(conn->peer); 415 if (ret < 0) 416 trace_rxrpc_tx_fail(call->debug_id, serial, ret, 417 rxrpc_tx_point_call_abort); 418 else 419 trace_rxrpc_tx_packet(call->debug_id, &pkt.whdr, 420 rxrpc_tx_point_call_abort); 421 rxrpc_tx_backoff(call, ret); 422 return ret; 423 } 424 425 /* 426 * Prepare a (sub)packet for transmission. 427 */ 428 static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, 429 struct rxrpc_send_data_req *req, 430 struct rxrpc_txbuf *txb, 431 struct rxrpc_wire_header *whdr, 432 rxrpc_serial_t serial, int subpkt) 433 { 434 struct rxrpc_jumbo_header *jumbo = txb->data - sizeof(*jumbo); 435 enum rxrpc_req_ack_trace why; 436 struct rxrpc_connection *conn = call->conn; 437 struct kvec *kv = &call->local->kvec[1 + subpkt]; 438 size_t len = txb->pkt_len; 439 bool last; 440 u8 flags; 441 442 _enter("%x,%zd", txb->seq, len); 443 444 txb->serial = serial; 445 446 if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) && 447 txb->seq == 1) 448 whdr->userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE; 449 450 txb->flags &= ~RXRPC_REQUEST_ACK; 451 flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS; 452 last = txb->flags & RXRPC_LAST_PACKET; 453 454 if (subpkt < req->n - 1) { 455 len = RXRPC_JUMBO_DATALEN; 456 goto dont_set_request_ack; 457 } 458 459 /* If our RTT cache needs working on, request an ACK. Also request 460 * ACKs if a DATA packet appears to have been lost. 461 * 462 * However, we mustn't request an ACK on the last reply packet of a 463 * service call, lest OpenAFS incorrectly send us an ACK with some 464 * soft-ACKs in it and then never follow up with a proper hard ACK. 465 */ 466 if (last && rxrpc_sending_to_client(txb)) 467 why = rxrpc_reqack_no_srv_last; 468 else if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) 469 why = rxrpc_reqack_ack_lost; 470 else if (txb->flags & RXRPC_TXBUF_RESENT) 471 why = rxrpc_reqack_retrans; 472 else if (call->cong_ca_state == RXRPC_CA_SLOW_START && call->cong_cwnd <= RXRPC_MIN_CWND) 473 why = rxrpc_reqack_slow_start; 474 else if (call->tx_winsize <= 2) 475 why = rxrpc_reqack_small_txwin; 476 else if (call->rtt_count < 3) 477 why = rxrpc_reqack_more_rtt; 478 else if (ktime_before(ktime_add_ms(call->rtt_last_req, 1000), ktime_get_real())) 479 why = rxrpc_reqack_old_rtt; 480 else if (!last && !after(READ_ONCE(call->send_top), txb->seq)) 481 why = rxrpc_reqack_app_stall; 482 else if (call->tx_winsize <= (2 * req->n) || call->cong_cwnd <= (2 * req->n)) 483 why = rxrpc_reqack_jumbo_win; 484 else 485 goto dont_set_request_ack; 486 487 rxrpc_inc_stat(call->rxnet, stat_why_req_ack[why]); 488 trace_rxrpc_req_ack(call->debug_id, txb->seq, why); 489 if (why != rxrpc_reqack_no_srv_last) { 490 flags |= RXRPC_REQUEST_ACK; 491 trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, -1, serial); 492 call->rtt_last_req = req->now; 493 } 494 dont_set_request_ack: 495 496 /* There's a jumbo header prepended to the data if we need it. */ 497 if (subpkt < req->n - 1) 498 flags |= RXRPC_JUMBO_PACKET; 499 else 500 flags &= ~RXRPC_JUMBO_PACKET; 501 if (subpkt == 0) { 502 whdr->flags = flags; 503 whdr->cksum = txb->cksum; 504 kv->iov_base = txb->data; 505 } else { 506 jumbo->flags = flags; 507 jumbo->pad = 0; 508 jumbo->cksum = txb->cksum; 509 kv->iov_base = jumbo; 510 len += sizeof(*jumbo); 511 } 512 513 trace_rxrpc_tx_data(call, txb->seq, txb->serial, flags, req->trace); 514 kv->iov_len = len; 515 return len; 516 } 517 518 /* 519 * Prepare a transmission queue object for initial transmission. Returns the 520 * number of microseconds since the transmission queue base timestamp. 521 */ 522 static unsigned int rxrpc_prepare_txqueue(struct rxrpc_txqueue *tq, 523 struct rxrpc_send_data_req *req) 524 { 525 if (!tq) 526 return 0; 527 if (tq->xmit_ts_base == KTIME_MIN) { 528 tq->xmit_ts_base = req->now; 529 return 0; 530 } 531 return ktime_to_us(ktime_sub(req->now, tq->xmit_ts_base)); 532 } 533 534 /* 535 * Prepare a (jumbo) packet for transmission. 536 */ 537 static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, 538 struct rxrpc_send_data_req *req, 539 struct rxrpc_wire_header *whdr) 540 { 541 struct rxrpc_txqueue *tq = req->tq; 542 rxrpc_serial_t serial; 543 unsigned int xmit_ts; 544 rxrpc_seq_t seq = req->seq; 545 size_t len = 0; 546 bool start_tlp = false; 547 548 trace_rxrpc_tq(call, tq, seq, rxrpc_tq_transmit); 549 550 /* Each transmission of a Tx packet needs a new serial number */ 551 serial = rxrpc_get_next_serials(call->conn, req->n); 552 553 whdr->epoch = htonl(call->conn->proto.epoch); 554 whdr->cid = htonl(call->cid); 555 whdr->callNumber = htonl(call->call_id); 556 whdr->seq = htonl(seq); 557 whdr->serial = htonl(serial); 558 whdr->type = RXRPC_PACKET_TYPE_DATA; 559 whdr->flags = 0; 560 whdr->userStatus = 0; 561 whdr->securityIndex = call->security_ix; 562 whdr->_rsvd = 0; 563 whdr->serviceId = htons(call->conn->service_id); 564 565 call->tx_last_serial = serial + req->n - 1; 566 call->tx_last_sent = req->now; 567 xmit_ts = rxrpc_prepare_txqueue(tq, req); 568 prefetch(tq->next); 569 570 for (int i = 0;;) { 571 int ix = seq & RXRPC_TXQ_MASK; 572 struct rxrpc_txbuf *txb = tq->bufs[seq & RXRPC_TXQ_MASK]; 573 574 _debug("prep[%u] tq=%x q=%x", i, tq->qbase, seq); 575 576 /* Record (re-)transmission for RACK [RFC8985 6.1]. */ 577 if (__test_and_clear_bit(ix, &tq->segment_lost)) 578 call->tx_nr_lost--; 579 if (req->retrans) { 580 __set_bit(ix, &tq->ever_retransmitted); 581 __set_bit(ix, &tq->segment_retransmitted); 582 call->tx_nr_resent++; 583 } else { 584 call->tx_nr_sent++; 585 start_tlp = true; 586 } 587 tq->segment_xmit_ts[ix] = xmit_ts; 588 tq->segment_serial[ix] = serial; 589 if (i + 1 == req->n) 590 /* Only sample the last subpacket in a jumbo. */ 591 __set_bit(ix, &tq->rtt_samples); 592 len += rxrpc_prepare_data_subpacket(call, req, txb, whdr, serial, i); 593 serial++; 594 seq++; 595 i++; 596 if (i >= req->n) 597 break; 598 if (!(seq & RXRPC_TXQ_MASK)) { 599 tq = tq->next; 600 trace_rxrpc_tq(call, tq, seq, rxrpc_tq_transmit_advance); 601 xmit_ts = rxrpc_prepare_txqueue(tq, req); 602 } 603 } 604 605 /* Set timeouts */ 606 if (req->tlp_probe) { 607 /* Sending TLP loss probe [RFC8985 7.3]. */ 608 call->tlp_serial = serial - 1; 609 call->tlp_seq = seq - 1; 610 } else if (start_tlp) { 611 /* Schedule TLP loss probe [RFC8985 7.2]. */ 612 ktime_t pto; 613 614 if (!test_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) 615 /* The first packet may take longer to elicit a response. */ 616 pto = NSEC_PER_SEC; 617 else 618 pto = rxrpc_tlp_calc_pto(call, req->now); 619 620 call->rack_timer_mode = RXRPC_CALL_RACKTIMER_TLP_PTO; 621 call->rack_timo_at = ktime_add(req->now, pto); 622 trace_rxrpc_rack_timer(call, pto, false); 623 trace_rxrpc_timer_set(call, pto, rxrpc_timer_trace_rack_tlp_pto); 624 } 625 626 if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) { 627 ktime_t delay = ms_to_ktime(READ_ONCE(call->next_rx_timo)); 628 629 call->expect_rx_by = ktime_add(req->now, delay); 630 trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_expect_rx); 631 } 632 633 rxrpc_set_keepalive(call, req->now); 634 page_frag_free(whdr); 635 return len; 636 } 637 638 /* 639 * Send one or more packets through the transport endpoint 640 */ 641 void rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req *req) 642 { 643 struct rxrpc_wire_header *whdr; 644 struct rxrpc_connection *conn = call->conn; 645 enum rxrpc_tx_point frag; 646 struct rxrpc_txqueue *tq = req->tq; 647 struct rxrpc_txbuf *txb; 648 struct msghdr msg; 649 rxrpc_seq_t seq = req->seq; 650 size_t len = sizeof(*whdr); 651 bool new_call = test_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags); 652 int ret, stat_ix; 653 654 _enter("%x,%x-%x", tq->qbase, seq, seq + req->n - 1); 655 656 whdr = page_frag_alloc(&call->local->tx_alloc, sizeof(*whdr), GFP_NOFS); 657 if (!whdr) 658 return; /* Drop the packet if no memory. */ 659 660 call->local->kvec[0].iov_base = whdr; 661 call->local->kvec[0].iov_len = sizeof(*whdr); 662 663 stat_ix = umin(req->n, ARRAY_SIZE(call->rxnet->stat_tx_jumbo)) - 1; 664 atomic_inc(&call->rxnet->stat_tx_jumbo[stat_ix]); 665 666 len += rxrpc_prepare_data_packet(call, req, whdr); 667 txb = tq->bufs[seq & RXRPC_TXQ_MASK]; 668 669 iov_iter_kvec(&msg.msg_iter, WRITE, call->local->kvec, 1 + req->n, len); 670 671 msg.msg_name = &call->peer->srx.transport; 672 msg.msg_namelen = call->peer->srx.transport_len; 673 msg.msg_control = NULL; 674 msg.msg_controllen = 0; 675 msg.msg_flags = MSG_SPLICE_PAGES; 676 677 /* Send the packet with the don't fragment bit set unless we think it's 678 * too big or if this is a retransmission. 679 */ 680 if (seq == call->tx_transmitted + 1 && 681 len >= sizeof(struct rxrpc_wire_header) + call->peer->max_data) { 682 rxrpc_local_dont_fragment(conn->local, false); 683 frag = rxrpc_tx_point_call_data_frag; 684 } else { 685 rxrpc_local_dont_fragment(conn->local, true); 686 frag = rxrpc_tx_point_call_data_nofrag; 687 } 688 689 /* Track what we've attempted to transmit at least once so that the 690 * retransmission algorithm doesn't try to resend what we haven't sent 691 * yet. 692 */ 693 if (seq == call->tx_transmitted + 1) 694 call->tx_transmitted = seq + req->n - 1; 695 696 if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { 697 static int lose; 698 699 if ((lose++ & 7) == 7) { 700 ret = 0; 701 trace_rxrpc_tx_data(call, txb->seq, txb->serial, txb->flags, 702 rxrpc_txdata_inject_loss); 703 rxrpc_peer_mark_tx(conn->peer); 704 goto done; 705 } 706 } 707 708 /* send the packet by UDP 709 * - returns -EMSGSIZE if UDP would have to fragment the packet 710 * to go out of the interface 711 * - in which case, we'll have processed the ICMP error 712 * message and update the peer record 713 */ 714 rxrpc_inc_stat(call->rxnet, stat_tx_data_send); 715 ret = do_udp_sendmsg(conn->local->socket, &msg, len); 716 rxrpc_peer_mark_tx(conn->peer); 717 718 if (ret == -EMSGSIZE) { 719 rxrpc_inc_stat(call->rxnet, stat_tx_data_send_msgsize); 720 trace_rxrpc_tx_packet(call->debug_id, whdr, frag); 721 ret = 0; 722 } else if (ret < 0) { 723 rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail); 724 trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret, frag); 725 } else { 726 trace_rxrpc_tx_packet(call->debug_id, whdr, frag); 727 } 728 729 rxrpc_tx_backoff(call, ret); 730 731 if (ret < 0) { 732 /* Cancel the call if the initial transmission fails or if we 733 * hit due to network routing issues that aren't going away 734 * anytime soon. The layer above can arrange the 735 * retransmission. 736 */ 737 if (new_call || 738 ret == -ENETUNREACH || 739 ret == -EHOSTUNREACH || 740 ret == -ECONNREFUSED) 741 rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 742 RX_USER_ABORT, ret); 743 } 744 745 done: 746 _leave(" = %d [%u]", ret, call->peer->max_data); 747 } 748 749 /* 750 * Transmit a connection-level abort. 751 */ 752 void rxrpc_send_conn_abort(struct rxrpc_connection *conn) 753 { 754 struct rxrpc_wire_header whdr; 755 struct msghdr msg; 756 struct kvec iov[2]; 757 __be32 word; 758 size_t len; 759 u32 serial; 760 int ret; 761 762 msg.msg_name = &conn->peer->srx.transport; 763 msg.msg_namelen = conn->peer->srx.transport_len; 764 msg.msg_control = NULL; 765 msg.msg_controllen = 0; 766 msg.msg_flags = 0; 767 768 whdr.epoch = htonl(conn->proto.epoch); 769 whdr.cid = htonl(conn->proto.cid); 770 whdr.callNumber = 0; 771 whdr.seq = 0; 772 whdr.type = RXRPC_PACKET_TYPE_ABORT; 773 whdr.flags = conn->out_clientflag; 774 whdr.userStatus = 0; 775 whdr.securityIndex = conn->security_ix; 776 whdr._rsvd = 0; 777 whdr.serviceId = htons(conn->service_id); 778 779 word = htonl(conn->abort_code); 780 781 iov[0].iov_base = &whdr; 782 iov[0].iov_len = sizeof(whdr); 783 iov[1].iov_base = &word; 784 iov[1].iov_len = sizeof(word); 785 786 len = iov[0].iov_len + iov[1].iov_len; 787 788 serial = rxrpc_get_next_serial(conn); 789 whdr.serial = htonl(serial); 790 791 iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len); 792 ret = do_udp_sendmsg(conn->local->socket, &msg, len); 793 if (ret < 0) { 794 trace_rxrpc_tx_fail(conn->debug_id, serial, ret, 795 rxrpc_tx_point_conn_abort); 796 _debug("sendmsg failed: %d", ret); 797 return; 798 } 799 800 trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_conn_abort); 801 802 rxrpc_peer_mark_tx(conn->peer); 803 } 804 805 /* 806 * Reject a packet through the local endpoint. 807 */ 808 void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) 809 { 810 struct rxrpc_wire_header whdr; 811 struct sockaddr_rxrpc srx; 812 struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 813 struct msghdr msg; 814 struct kvec iov[2]; 815 size_t size; 816 __be32 code; 817 int ret, ioc; 818 819 if (sp->hdr.type == RXRPC_PACKET_TYPE_ABORT) 820 return; /* Never abort an abort. */ 821 822 rxrpc_see_skb(skb, rxrpc_skb_see_reject); 823 824 iov[0].iov_base = &whdr; 825 iov[0].iov_len = sizeof(whdr); 826 iov[1].iov_base = &code; 827 iov[1].iov_len = sizeof(code); 828 829 msg.msg_name = &srx.transport; 830 msg.msg_control = NULL; 831 msg.msg_controllen = 0; 832 msg.msg_flags = 0; 833 834 whdr = (struct rxrpc_wire_header) { 835 .epoch = htonl(sp->hdr.epoch), 836 .cid = htonl(sp->hdr.cid), 837 .callNumber = htonl(sp->hdr.callNumber), 838 .serviceId = htons(sp->hdr.serviceId), 839 .flags = ~sp->hdr.flags & RXRPC_CLIENT_INITIATED, 840 }; 841 842 switch (skb->mark) { 843 case RXRPC_SKB_MARK_REJECT_BUSY: 844 whdr.type = RXRPC_PACKET_TYPE_BUSY; 845 size = sizeof(whdr); 846 ioc = 1; 847 break; 848 case RXRPC_SKB_MARK_REJECT_CONN_ABORT: 849 whdr.callNumber = 0; 850 fallthrough; 851 case RXRPC_SKB_MARK_REJECT_ABORT: 852 whdr.type = RXRPC_PACKET_TYPE_ABORT; 853 code = htonl(skb->priority); 854 size = sizeof(whdr) + sizeof(code); 855 ioc = 2; 856 break; 857 default: 858 return; 859 } 860 861 if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) { 862 msg.msg_namelen = srx.transport_len; 863 864 iov_iter_kvec(&msg.msg_iter, WRITE, iov, ioc, size); 865 ret = do_udp_sendmsg(local->socket, &msg, size); 866 if (ret < 0) 867 trace_rxrpc_tx_fail(local->debug_id, 0, ret, 868 rxrpc_tx_point_reject); 869 else 870 trace_rxrpc_tx_packet(local->debug_id, &whdr, 871 rxrpc_tx_point_reject); 872 } 873 } 874 875 /* 876 * Send a VERSION reply to a peer as a keepalive. 877 */ 878 void rxrpc_send_keepalive(struct rxrpc_peer *peer) 879 { 880 struct rxrpc_wire_header whdr; 881 struct msghdr msg; 882 struct kvec iov[2]; 883 size_t len; 884 int ret; 885 886 _enter(""); 887 888 msg.msg_name = &peer->srx.transport; 889 msg.msg_namelen = peer->srx.transport_len; 890 msg.msg_control = NULL; 891 msg.msg_controllen = 0; 892 msg.msg_flags = 0; 893 894 whdr.epoch = htonl(peer->local->rxnet->epoch); 895 whdr.cid = 0; 896 whdr.callNumber = 0; 897 whdr.seq = 0; 898 whdr.serial = 0; 899 whdr.type = RXRPC_PACKET_TYPE_VERSION; /* Not client-initiated */ 900 whdr.flags = RXRPC_LAST_PACKET; 901 whdr.userStatus = 0; 902 whdr.securityIndex = 0; 903 whdr._rsvd = 0; 904 whdr.serviceId = 0; 905 906 iov[0].iov_base = &whdr; 907 iov[0].iov_len = sizeof(whdr); 908 iov[1].iov_base = (char *)rxrpc_keepalive_string; 909 iov[1].iov_len = sizeof(rxrpc_keepalive_string); 910 911 len = iov[0].iov_len + iov[1].iov_len; 912 913 iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len); 914 ret = do_udp_sendmsg(peer->local->socket, &msg, len); 915 if (ret < 0) 916 trace_rxrpc_tx_fail(peer->debug_id, 0, ret, 917 rxrpc_tx_point_version_keepalive); 918 else 919 trace_rxrpc_tx_packet(peer->debug_id, &whdr, 920 rxrpc_tx_point_version_keepalive); 921 922 rxrpc_peer_mark_tx(peer); 923 _leave(""); 924 } 925 926 /* 927 * Send a RESPONSE message. 928 */ 929 void rxrpc_send_response(struct rxrpc_connection *conn, struct sk_buff *response) 930 { 931 struct rxrpc_skb_priv *sp = rxrpc_skb(response); 932 struct scatterlist sg[16]; 933 struct bio_vec *bvec = conn->local->bvec; 934 struct msghdr msg; 935 size_t len = sp->resp.len; 936 __be32 wserial; 937 u32 serial = 0; 938 int ret, nr_sg; 939 940 _enter("C=%x,%x", conn->debug_id, sp->resp.challenge_serial); 941 942 sg_init_table(sg, ARRAY_SIZE(sg)); 943 ret = skb_to_sgvec(response, sg, 0, len); 944 if (ret < 0) 945 goto fail; 946 nr_sg = ret; 947 ret = -EIO; 948 if (WARN_ON_ONCE(nr_sg > ARRAY_SIZE(conn->local->bvec))) 949 goto fail; 950 951 for (int i = 0; i < nr_sg; i++) 952 bvec_set_page(&bvec[i], sg_page(&sg[i]), sg[i].length, sg[i].offset); 953 954 iov_iter_bvec(&msg.msg_iter, WRITE, bvec, nr_sg, len); 955 956 msg.msg_name = &conn->peer->srx.transport; 957 msg.msg_namelen = conn->peer->srx.transport_len; 958 msg.msg_control = NULL; 959 msg.msg_controllen = 0; 960 msg.msg_flags = MSG_SPLICE_PAGES; 961 962 serial = rxrpc_get_next_serials(conn, 1); 963 wserial = htonl(serial); 964 965 trace_rxrpc_tx_response(conn, serial, sp); 966 967 ret = skb_store_bits(response, offsetof(struct rxrpc_wire_header, serial), 968 &wserial, sizeof(wserial)); 969 if (ret < 0) 970 goto fail; 971 972 rxrpc_local_dont_fragment(conn->local, false); 973 974 ret = do_udp_sendmsg(conn->local->socket, &msg, len); 975 if (ret < 0) 976 goto fail; 977 978 rxrpc_peer_mark_tx(conn->peer); 979 return; 980 981 fail: 982 trace_rxrpc_tx_fail(conn->debug_id, serial, ret, 983 rxrpc_tx_point_response); 984 kleave(" = %d", ret); 985 } 986