1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* RxRPC packet transmission 3 * 4 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 9 10 #include <linux/net.h> 11 #include <linux/gfp.h> 12 #include <linux/skbuff.h> 13 #include <linux/export.h> 14 #include <net/sock.h> 15 #include <net/af_rxrpc.h> 16 #include <net/udp.h> 17 #include "ar-internal.h" 18 19 extern int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); 20 21 ssize_t do_udp_sendmsg(struct socket *socket, struct msghdr *msg, size_t len) 22 { 23 struct sockaddr *sa = msg->msg_name; 24 struct sock *sk = socket->sk; 25 26 if (IS_ENABLED(CONFIG_AF_RXRPC_IPV6)) { 27 if (sa->sa_family == AF_INET6) { 28 if (sk->sk_family != AF_INET6) { 29 pr_warn("AF_INET6 address on AF_INET socket\n"); 30 return -ENOPROTOOPT; 31 } 32 return udpv6_sendmsg(sk, msg, len); 33 } 34 } 35 return udp_sendmsg(sk, msg, len); 36 } 37 38 struct rxrpc_abort_buffer { 39 struct rxrpc_wire_header whdr; 40 __be32 abort_code; 41 }; 42 43 static const char rxrpc_keepalive_string[] = ""; 44 45 /* 46 * Increase Tx backoff on transmission failure and clear it on success. 47 */ 48 static void rxrpc_tx_backoff(struct rxrpc_call *call, int ret) 49 { 50 if (ret < 0) { 51 if (call->tx_backoff < 1000) 52 call->tx_backoff += 100; 53 } else { 54 call->tx_backoff = 0; 55 } 56 } 57 58 /* 59 * Arrange for a keepalive ping a certain time after we last transmitted. This 60 * lets the far side know we're still interested in this call and helps keep 61 * the route through any intervening firewall open. 62 * 63 * Receiving a response to the ping will prevent the ->expect_rx_by timer from 64 * expiring. 65 */ 66 static void rxrpc_set_keepalive(struct rxrpc_call *call, ktime_t now) 67 { 68 ktime_t delay = ms_to_ktime(READ_ONCE(call->next_rx_timo) / 6); 69 70 call->keepalive_at = ktime_add(ktime_get_real(), delay); 71 trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_keepalive); 72 } 73 74 /* 75 * Allocate transmission buffers for an ACK and attach them to local->kv[]. 76 */ 77 static int rxrpc_alloc_ack(struct rxrpc_call *call, size_t sack_size) 78 { 79 struct rxrpc_wire_header *whdr; 80 struct rxrpc_acktrailer *trailer; 81 struct rxrpc_ackpacket *ack; 82 struct kvec *kv = call->local->kvec; 83 gfp_t gfp = rcu_read_lock_held() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS; 84 void *buf, *buf2 = NULL; 85 u8 *filler; 86 87 buf = page_frag_alloc(&call->local->tx_alloc, 88 sizeof(*whdr) + sizeof(*ack) + 1 + 3 + sizeof(*trailer), gfp); 89 if (!buf) 90 return -ENOMEM; 91 92 if (sack_size) { 93 buf2 = page_frag_alloc(&call->local->tx_alloc, sack_size, gfp); 94 if (!buf2) { 95 page_frag_free(buf); 96 return -ENOMEM; 97 } 98 } 99 100 whdr = buf; 101 ack = buf + sizeof(*whdr); 102 filler = buf + sizeof(*whdr) + sizeof(*ack) + 1; 103 trailer = buf + sizeof(*whdr) + sizeof(*ack) + 1 + 3; 104 105 kv[0].iov_base = whdr; 106 kv[0].iov_len = sizeof(*whdr) + sizeof(*ack); 107 kv[1].iov_base = buf2; 108 kv[1].iov_len = sack_size; 109 kv[2].iov_base = filler; 110 kv[2].iov_len = 3 + sizeof(*trailer); 111 return 3; /* Number of kvec[] used. */ 112 } 113 114 static void rxrpc_free_ack(struct rxrpc_call *call) 115 { 116 page_frag_free(call->local->kvec[0].iov_base); 117 if (call->local->kvec[1].iov_base) 118 page_frag_free(call->local->kvec[1].iov_base); 119 } 120 121 /* 122 * Record the beginning of an RTT probe. 123 */ 124 static void rxrpc_begin_rtt_probe(struct rxrpc_call *call, rxrpc_serial_t serial, 125 ktime_t now, enum rxrpc_rtt_tx_trace why) 126 { 127 unsigned long avail = call->rtt_avail; 128 int rtt_slot = 9; 129 130 if (!(avail & RXRPC_CALL_RTT_AVAIL_MASK)) 131 goto no_slot; 132 133 rtt_slot = __ffs(avail & RXRPC_CALL_RTT_AVAIL_MASK); 134 if (!test_and_clear_bit(rtt_slot, &call->rtt_avail)) 135 goto no_slot; 136 137 call->rtt_serial[rtt_slot] = serial; 138 call->rtt_sent_at[rtt_slot] = now; 139 smp_wmb(); /* Write data before avail bit */ 140 set_bit(rtt_slot + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail); 141 142 trace_rxrpc_rtt_tx(call, why, rtt_slot, serial); 143 return; 144 145 no_slot: 146 trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_no_slot, rtt_slot, serial); 147 } 148 149 /* 150 * Fill out an ACK packet. 151 */ 152 static int rxrpc_fill_out_ack(struct rxrpc_call *call, int nr_kv, u8 ack_reason, 153 rxrpc_serial_t serial_to_ack, rxrpc_serial_t *_ack_serial) 154 { 155 struct kvec *kv = call->local->kvec; 156 struct rxrpc_wire_header *whdr = kv[0].iov_base; 157 struct rxrpc_acktrailer *trailer = kv[2].iov_base + 3; 158 struct rxrpc_ackpacket *ack = (struct rxrpc_ackpacket *)(whdr + 1); 159 unsigned int qsize, sack, wrap, to, max_mtu, if_mtu; 160 rxrpc_seq_t window, wtop; 161 ktime_t now = ktime_get_real(); 162 int rsize; 163 u8 *filler = kv[2].iov_base; 164 u8 *sackp = kv[1].iov_base; 165 166 rxrpc_inc_stat(call->rxnet, stat_tx_ack_fill); 167 168 window = call->ackr_window; 169 wtop = call->ackr_wtop; 170 sack = call->ackr_sack_base % RXRPC_SACK_SIZE; 171 172 *_ack_serial = rxrpc_get_next_serial(call->conn); 173 174 whdr->epoch = htonl(call->conn->proto.epoch); 175 whdr->cid = htonl(call->cid); 176 whdr->callNumber = htonl(call->call_id); 177 whdr->serial = htonl(*_ack_serial); 178 whdr->seq = 0; 179 whdr->type = RXRPC_PACKET_TYPE_ACK; 180 whdr->flags = call->conn->out_clientflag | RXRPC_SLOW_START_OK; 181 whdr->userStatus = 0; 182 whdr->securityIndex = call->security_ix; 183 whdr->_rsvd = 0; 184 whdr->serviceId = htons(call->dest_srx.srx_service); 185 186 ack->bufferSpace = 0; 187 ack->maxSkew = 0; 188 ack->firstPacket = htonl(window); 189 ack->previousPacket = htonl(call->rx_highest_seq); 190 ack->serial = htonl(serial_to_ack); 191 ack->reason = ack_reason; 192 ack->nAcks = wtop - window; 193 filler[0] = 0; 194 filler[1] = 0; 195 filler[2] = 0; 196 197 if (ack_reason == RXRPC_ACK_PING) 198 whdr->flags |= RXRPC_REQUEST_ACK; 199 200 if (after(wtop, window)) { 201 kv[1].iov_len = ack->nAcks; 202 203 wrap = RXRPC_SACK_SIZE - sack; 204 to = umin(ack->nAcks, RXRPC_SACK_SIZE); 205 206 if (sack + ack->nAcks <= RXRPC_SACK_SIZE) { 207 memcpy(sackp, call->ackr_sack_table + sack, ack->nAcks); 208 } else { 209 memcpy(sackp, call->ackr_sack_table + sack, wrap); 210 memcpy(sackp + wrap, call->ackr_sack_table, to - wrap); 211 } 212 } else if (before(wtop, window)) { 213 pr_warn("ack window backward %x %x", window, wtop); 214 } else if (ack->reason == RXRPC_ACK_DELAY) { 215 ack->reason = RXRPC_ACK_IDLE; 216 } 217 218 qsize = (window - 1) - call->rx_consumed; 219 rsize = max_t(int, call->rx_winsize - qsize, 0); 220 221 if_mtu = call->peer->if_mtu - call->peer->hdrsize; 222 if (call->peer->ackr_adv_pmtud) { 223 max_mtu = umax(call->peer->max_data, rxrpc_rx_mtu); 224 } else { 225 if_mtu = umin(if_mtu, 1444); 226 max_mtu = if_mtu; 227 } 228 229 trailer->maxMTU = htonl(max_mtu); 230 trailer->ifMTU = htonl(if_mtu); 231 trailer->rwind = htonl(rsize); 232 trailer->jumbo_max = 0; /* Advertise pmtu discovery */ 233 234 if (ack_reason == RXRPC_ACK_PING) 235 rxrpc_begin_rtt_probe(call, *_ack_serial, now, rxrpc_rtt_tx_ping); 236 if (whdr->flags & RXRPC_REQUEST_ACK) 237 call->rtt_last_req = now; 238 rxrpc_set_keepalive(call, now); 239 return nr_kv; 240 } 241 242 /* 243 * Transmit an ACK packet. 244 */ 245 static void rxrpc_send_ack_packet(struct rxrpc_call *call, int nr_kv, size_t len, 246 rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why) 247 { 248 struct kvec *kv = call->local->kvec; 249 struct rxrpc_wire_header *whdr = kv[0].iov_base; 250 struct rxrpc_acktrailer *trailer = kv[2].iov_base + 3; 251 struct rxrpc_connection *conn; 252 struct rxrpc_ackpacket *ack = (struct rxrpc_ackpacket *)(whdr + 1); 253 struct msghdr msg; 254 int ret; 255 256 if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) 257 return; 258 259 conn = call->conn; 260 261 msg.msg_name = &call->peer->srx.transport; 262 msg.msg_namelen = call->peer->srx.transport_len; 263 msg.msg_control = NULL; 264 msg.msg_controllen = 0; 265 msg.msg_flags = MSG_SPLICE_PAGES; 266 267 trace_rxrpc_tx_ack(call->debug_id, serial, 268 ntohl(ack->firstPacket), 269 ntohl(ack->serial), ack->reason, ack->nAcks, 270 ntohl(trailer->rwind), why); 271 272 rxrpc_inc_stat(call->rxnet, stat_tx_ack_send); 273 274 iov_iter_kvec(&msg.msg_iter, WRITE, kv, nr_kv, len); 275 rxrpc_local_dont_fragment(conn->local, why == rxrpc_propose_ack_ping_for_mtu_probe); 276 277 ret = do_udp_sendmsg(conn->local->socket, &msg, len); 278 call->peer->last_tx_at = ktime_get_seconds(); 279 if (ret < 0) { 280 trace_rxrpc_tx_fail(call->debug_id, serial, ret, 281 rxrpc_tx_point_call_ack); 282 if (why == rxrpc_propose_ack_ping_for_mtu_probe && 283 ret == -EMSGSIZE) 284 rxrpc_input_probe_for_pmtud(conn, serial, true); 285 } else { 286 trace_rxrpc_tx_packet(call->debug_id, whdr, 287 rxrpc_tx_point_call_ack); 288 if (why == rxrpc_propose_ack_ping_for_mtu_probe) { 289 call->peer->pmtud_pending = false; 290 call->peer->pmtud_probing = true; 291 call->conn->pmtud_probe = serial; 292 call->conn->pmtud_call = call->debug_id; 293 trace_rxrpc_pmtud_tx(call); 294 } 295 } 296 rxrpc_tx_backoff(call, ret); 297 } 298 299 /* 300 * Queue an ACK for immediate transmission. 301 */ 302 void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason, 303 rxrpc_serial_t serial_to_ack, enum rxrpc_propose_ack_trace why) 304 { 305 struct kvec *kv = call->local->kvec; 306 rxrpc_serial_t ack_serial; 307 size_t len; 308 int nr_kv; 309 310 if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) 311 return; 312 313 rxrpc_inc_stat(call->rxnet, stat_tx_acks[ack_reason]); 314 315 nr_kv = rxrpc_alloc_ack(call, call->ackr_wtop - call->ackr_window); 316 if (nr_kv < 0) { 317 kleave(" = -ENOMEM"); 318 return; 319 } 320 321 nr_kv = rxrpc_fill_out_ack(call, nr_kv, ack_reason, serial_to_ack, &ack_serial); 322 len = kv[0].iov_len; 323 len += kv[1].iov_len; 324 len += kv[2].iov_len; 325 326 /* Extend a path MTU probe ACK. */ 327 if (why == rxrpc_propose_ack_ping_for_mtu_probe) { 328 size_t probe_mtu = call->peer->pmtud_trial + sizeof(struct rxrpc_wire_header); 329 330 if (len > probe_mtu) 331 goto skip; 332 while (len < probe_mtu) { 333 size_t part = umin(probe_mtu - len, PAGE_SIZE); 334 335 kv[nr_kv].iov_base = page_address(ZERO_PAGE(0)); 336 kv[nr_kv].iov_len = part; 337 len += part; 338 nr_kv++; 339 } 340 } 341 342 call->ackr_nr_unacked = 0; 343 atomic_set(&call->ackr_nr_consumed, 0); 344 clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags); 345 346 trace_rxrpc_send_ack(call, why, ack_reason, ack_serial); 347 rxrpc_send_ack_packet(call, nr_kv, len, ack_serial, why); 348 skip: 349 rxrpc_free_ack(call); 350 } 351 352 /* 353 * Send an ACK probe for path MTU discovery. 354 */ 355 void rxrpc_send_probe_for_pmtud(struct rxrpc_call *call) 356 { 357 rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, 358 rxrpc_propose_ack_ping_for_mtu_probe); 359 } 360 361 /* 362 * Send an ABORT call packet. 363 */ 364 int rxrpc_send_abort_packet(struct rxrpc_call *call) 365 { 366 struct rxrpc_connection *conn; 367 struct rxrpc_abort_buffer pkt; 368 struct msghdr msg; 369 struct kvec iov[1]; 370 rxrpc_serial_t serial; 371 int ret; 372 373 /* Don't bother sending aborts for a client call once the server has 374 * hard-ACK'd all of its request data. After that point, we're not 375 * going to stop the operation proceeding, and whilst we might limit 376 * the reply, it's not worth it if we can send a new call on the same 377 * channel instead, thereby closing off this call. 378 */ 379 if (rxrpc_is_client_call(call) && 380 test_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags)) 381 return 0; 382 383 if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) 384 return -ECONNRESET; 385 386 conn = call->conn; 387 388 msg.msg_name = &call->peer->srx.transport; 389 msg.msg_namelen = call->peer->srx.transport_len; 390 msg.msg_control = NULL; 391 msg.msg_controllen = 0; 392 msg.msg_flags = 0; 393 394 pkt.whdr.epoch = htonl(conn->proto.epoch); 395 pkt.whdr.cid = htonl(call->cid); 396 pkt.whdr.callNumber = htonl(call->call_id); 397 pkt.whdr.seq = 0; 398 pkt.whdr.type = RXRPC_PACKET_TYPE_ABORT; 399 pkt.whdr.flags = conn->out_clientflag; 400 pkt.whdr.userStatus = 0; 401 pkt.whdr.securityIndex = call->security_ix; 402 pkt.whdr._rsvd = 0; 403 pkt.whdr.serviceId = htons(call->dest_srx.srx_service); 404 pkt.abort_code = htonl(call->abort_code); 405 406 iov[0].iov_base = &pkt; 407 iov[0].iov_len = sizeof(pkt); 408 409 serial = rxrpc_get_next_serial(conn); 410 pkt.whdr.serial = htonl(serial); 411 412 iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, sizeof(pkt)); 413 ret = do_udp_sendmsg(conn->local->socket, &msg, sizeof(pkt)); 414 conn->peer->last_tx_at = ktime_get_seconds(); 415 if (ret < 0) 416 trace_rxrpc_tx_fail(call->debug_id, serial, ret, 417 rxrpc_tx_point_call_abort); 418 else 419 trace_rxrpc_tx_packet(call->debug_id, &pkt.whdr, 420 rxrpc_tx_point_call_abort); 421 rxrpc_tx_backoff(call, ret); 422 return ret; 423 } 424 425 /* 426 * Prepare a (sub)packet for transmission. 427 */ 428 static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, 429 struct rxrpc_send_data_req *req, 430 struct rxrpc_txbuf *txb, 431 struct rxrpc_wire_header *whdr, 432 rxrpc_serial_t serial, int subpkt) 433 { 434 struct rxrpc_jumbo_header *jumbo = txb->data - sizeof(*jumbo); 435 enum rxrpc_req_ack_trace why; 436 struct rxrpc_connection *conn = call->conn; 437 struct kvec *kv = &call->local->kvec[1 + subpkt]; 438 size_t len = txb->pkt_len; 439 bool last; 440 u8 flags; 441 442 _enter("%x,%zd", txb->seq, len); 443 444 txb->serial = serial; 445 446 if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) && 447 txb->seq == 1) 448 whdr->userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE; 449 450 txb->flags &= ~RXRPC_REQUEST_ACK; 451 flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS; 452 last = txb->flags & RXRPC_LAST_PACKET; 453 454 if (subpkt < req->n - 1) { 455 len = RXRPC_JUMBO_DATALEN; 456 goto dont_set_request_ack; 457 } 458 459 /* If our RTT cache needs working on, request an ACK. Also request 460 * ACKs if a DATA packet appears to have been lost. 461 * 462 * However, we mustn't request an ACK on the last reply packet of a 463 * service call, lest OpenAFS incorrectly send us an ACK with some 464 * soft-ACKs in it and then never follow up with a proper hard ACK. 465 */ 466 if (last && rxrpc_sending_to_client(txb)) 467 why = rxrpc_reqack_no_srv_last; 468 else if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) 469 why = rxrpc_reqack_ack_lost; 470 else if (txb->flags & RXRPC_TXBUF_RESENT) 471 why = rxrpc_reqack_retrans; 472 else if (call->cong_ca_state == RXRPC_CA_SLOW_START && call->cong_cwnd <= RXRPC_MIN_CWND) 473 why = rxrpc_reqack_slow_start; 474 else if (call->tx_winsize <= 2) 475 why = rxrpc_reqack_small_txwin; 476 else if (call->rtt_count < 3) 477 why = rxrpc_reqack_more_rtt; 478 else if (ktime_before(ktime_add_ms(call->rtt_last_req, 1000), ktime_get_real())) 479 why = rxrpc_reqack_old_rtt; 480 else if (!last && !after(READ_ONCE(call->send_top), txb->seq)) 481 why = rxrpc_reqack_app_stall; 482 else 483 goto dont_set_request_ack; 484 485 rxrpc_inc_stat(call->rxnet, stat_why_req_ack[why]); 486 trace_rxrpc_req_ack(call->debug_id, txb->seq, why); 487 if (why != rxrpc_reqack_no_srv_last) { 488 flags |= RXRPC_REQUEST_ACK; 489 trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, -1, serial); 490 call->rtt_last_req = req->now; 491 } 492 dont_set_request_ack: 493 494 /* There's a jumbo header prepended to the data if we need it. */ 495 if (subpkt < req->n - 1) 496 flags |= RXRPC_JUMBO_PACKET; 497 else 498 flags &= ~RXRPC_JUMBO_PACKET; 499 if (subpkt == 0) { 500 whdr->flags = flags; 501 whdr->cksum = txb->cksum; 502 kv->iov_base = txb->data; 503 } else { 504 jumbo->flags = flags; 505 jumbo->pad = 0; 506 jumbo->cksum = txb->cksum; 507 kv->iov_base = jumbo; 508 len += sizeof(*jumbo); 509 } 510 511 trace_rxrpc_tx_data(call, txb->seq, txb->serial, flags, req->trace); 512 kv->iov_len = len; 513 return len; 514 } 515 516 /* 517 * Prepare a transmission queue object for initial transmission. Returns the 518 * number of microseconds since the transmission queue base timestamp. 519 */ 520 static unsigned int rxrpc_prepare_txqueue(struct rxrpc_txqueue *tq, 521 struct rxrpc_send_data_req *req) 522 { 523 if (!tq) 524 return 0; 525 if (tq->xmit_ts_base == KTIME_MIN) { 526 tq->xmit_ts_base = req->now; 527 return 0; 528 } 529 return ktime_to_us(ktime_sub(req->now, tq->xmit_ts_base)); 530 } 531 532 /* 533 * Prepare a (jumbo) packet for transmission. 534 */ 535 static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, 536 struct rxrpc_send_data_req *req, 537 struct rxrpc_wire_header *whdr) 538 { 539 struct rxrpc_txqueue *tq = req->tq; 540 rxrpc_serial_t serial; 541 unsigned int xmit_ts; 542 rxrpc_seq_t seq = req->seq; 543 size_t len = 0; 544 bool start_tlp = false; 545 546 trace_rxrpc_tq(call, tq, seq, rxrpc_tq_transmit); 547 548 /* Each transmission of a Tx packet needs a new serial number */ 549 serial = rxrpc_get_next_serials(call->conn, req->n); 550 551 whdr->epoch = htonl(call->conn->proto.epoch); 552 whdr->cid = htonl(call->cid); 553 whdr->callNumber = htonl(call->call_id); 554 whdr->seq = htonl(seq); 555 whdr->serial = htonl(serial); 556 whdr->type = RXRPC_PACKET_TYPE_DATA; 557 whdr->flags = 0; 558 whdr->userStatus = 0; 559 whdr->securityIndex = call->security_ix; 560 whdr->_rsvd = 0; 561 whdr->serviceId = htons(call->conn->service_id); 562 563 call->tx_last_serial = serial + req->n - 1; 564 call->tx_last_sent = req->now; 565 xmit_ts = rxrpc_prepare_txqueue(tq, req); 566 prefetch(tq->next); 567 568 for (int i = 0;;) { 569 int ix = seq & RXRPC_TXQ_MASK; 570 struct rxrpc_txbuf *txb = tq->bufs[seq & RXRPC_TXQ_MASK]; 571 572 _debug("prep[%u] tq=%x q=%x", i, tq->qbase, seq); 573 574 /* Record (re-)transmission for RACK [RFC8985 6.1]. */ 575 if (__test_and_clear_bit(ix, &tq->segment_lost)) 576 call->tx_nr_lost--; 577 if (req->retrans) { 578 __set_bit(ix, &tq->ever_retransmitted); 579 __set_bit(ix, &tq->segment_retransmitted); 580 call->tx_nr_resent++; 581 } else { 582 call->tx_nr_sent++; 583 start_tlp = true; 584 } 585 tq->segment_xmit_ts[ix] = xmit_ts; 586 tq->segment_serial[ix] = serial; 587 if (i + 1 == req->n) 588 /* Only sample the last subpacket in a jumbo. */ 589 __set_bit(ix, &tq->rtt_samples); 590 len += rxrpc_prepare_data_subpacket(call, req, txb, whdr, serial, i); 591 serial++; 592 seq++; 593 i++; 594 if (i >= req->n) 595 break; 596 if (!(seq & RXRPC_TXQ_MASK)) { 597 tq = tq->next; 598 trace_rxrpc_tq(call, tq, seq, rxrpc_tq_transmit_advance); 599 xmit_ts = rxrpc_prepare_txqueue(tq, req); 600 } 601 } 602 603 /* Set timeouts */ 604 if (req->tlp_probe) { 605 /* Sending TLP loss probe [RFC8985 7.3]. */ 606 call->tlp_serial = serial - 1; 607 call->tlp_seq = seq - 1; 608 } else if (start_tlp) { 609 /* Schedule TLP loss probe [RFC8985 7.2]. */ 610 ktime_t pto; 611 612 if (!test_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) 613 /* The first packet may take longer to elicit a response. */ 614 pto = NSEC_PER_SEC; 615 else 616 pto = rxrpc_tlp_calc_pto(call, req->now); 617 618 call->rack_timer_mode = RXRPC_CALL_RACKTIMER_TLP_PTO; 619 call->rack_timo_at = ktime_add(req->now, pto); 620 trace_rxrpc_rack_timer(call, pto, false); 621 trace_rxrpc_timer_set(call, pto, rxrpc_timer_trace_rack_tlp_pto); 622 } 623 624 if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) { 625 ktime_t delay = ms_to_ktime(READ_ONCE(call->next_rx_timo)); 626 627 call->expect_rx_by = ktime_add(req->now, delay); 628 trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_expect_rx); 629 } 630 631 rxrpc_set_keepalive(call, req->now); 632 page_frag_free(whdr); 633 return len; 634 } 635 636 /* 637 * Send one or more packets through the transport endpoint 638 */ 639 void rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req *req) 640 { 641 struct rxrpc_wire_header *whdr; 642 struct rxrpc_connection *conn = call->conn; 643 enum rxrpc_tx_point frag; 644 struct rxrpc_txqueue *tq = req->tq; 645 struct rxrpc_txbuf *txb; 646 struct msghdr msg; 647 rxrpc_seq_t seq = req->seq; 648 size_t len = sizeof(*whdr); 649 bool new_call = test_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags); 650 int ret, stat_ix; 651 652 _enter("%x,%x-%x", tq->qbase, seq, seq + req->n - 1); 653 654 whdr = page_frag_alloc(&call->local->tx_alloc, sizeof(*whdr), GFP_NOFS); 655 if (!whdr) 656 return; /* Drop the packet if no memory. */ 657 658 call->local->kvec[0].iov_base = whdr; 659 call->local->kvec[0].iov_len = sizeof(*whdr); 660 661 stat_ix = umin(req->n, ARRAY_SIZE(call->rxnet->stat_tx_jumbo)) - 1; 662 atomic_inc(&call->rxnet->stat_tx_jumbo[stat_ix]); 663 664 len += rxrpc_prepare_data_packet(call, req, whdr); 665 txb = tq->bufs[seq & RXRPC_TXQ_MASK]; 666 667 iov_iter_kvec(&msg.msg_iter, WRITE, call->local->kvec, 1 + req->n, len); 668 669 msg.msg_name = &call->peer->srx.transport; 670 msg.msg_namelen = call->peer->srx.transport_len; 671 msg.msg_control = NULL; 672 msg.msg_controllen = 0; 673 msg.msg_flags = MSG_SPLICE_PAGES; 674 675 /* Send the packet with the don't fragment bit set unless we think it's 676 * too big or if this is a retransmission. 677 */ 678 if (seq == call->tx_transmitted + 1 && 679 len >= sizeof(struct rxrpc_wire_header) + call->peer->max_data) { 680 rxrpc_local_dont_fragment(conn->local, false); 681 frag = rxrpc_tx_point_call_data_frag; 682 } else { 683 rxrpc_local_dont_fragment(conn->local, true); 684 frag = rxrpc_tx_point_call_data_nofrag; 685 } 686 687 /* Track what we've attempted to transmit at least once so that the 688 * retransmission algorithm doesn't try to resend what we haven't sent 689 * yet. 690 */ 691 if (seq == call->tx_transmitted + 1) 692 call->tx_transmitted = seq + req->n - 1; 693 694 if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { 695 static int lose; 696 697 if ((lose++ & 7) == 7) { 698 ret = 0; 699 trace_rxrpc_tx_data(call, txb->seq, txb->serial, txb->flags, 700 rxrpc_txdata_inject_loss); 701 conn->peer->last_tx_at = ktime_get_seconds(); 702 goto done; 703 } 704 } 705 706 /* send the packet by UDP 707 * - returns -EMSGSIZE if UDP would have to fragment the packet 708 * to go out of the interface 709 * - in which case, we'll have processed the ICMP error 710 * message and update the peer record 711 */ 712 rxrpc_inc_stat(call->rxnet, stat_tx_data_send); 713 ret = do_udp_sendmsg(conn->local->socket, &msg, len); 714 conn->peer->last_tx_at = ktime_get_seconds(); 715 716 if (ret == -EMSGSIZE) { 717 rxrpc_inc_stat(call->rxnet, stat_tx_data_send_msgsize); 718 trace_rxrpc_tx_packet(call->debug_id, whdr, frag); 719 ret = 0; 720 } else if (ret < 0) { 721 rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail); 722 trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret, frag); 723 } else { 724 trace_rxrpc_tx_packet(call->debug_id, whdr, frag); 725 } 726 727 rxrpc_tx_backoff(call, ret); 728 729 if (ret < 0) { 730 /* Cancel the call if the initial transmission fails or if we 731 * hit due to network routing issues that aren't going away 732 * anytime soon. The layer above can arrange the 733 * retransmission. 734 */ 735 if (new_call || 736 ret == -ENETUNREACH || 737 ret == -EHOSTUNREACH || 738 ret == -ECONNREFUSED) 739 rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 740 RX_USER_ABORT, ret); 741 } 742 743 done: 744 _leave(" = %d [%u]", ret, call->peer->max_data); 745 } 746 747 /* 748 * Transmit a connection-level abort. 749 */ 750 void rxrpc_send_conn_abort(struct rxrpc_connection *conn) 751 { 752 struct rxrpc_wire_header whdr; 753 struct msghdr msg; 754 struct kvec iov[2]; 755 __be32 word; 756 size_t len; 757 u32 serial; 758 int ret; 759 760 msg.msg_name = &conn->peer->srx.transport; 761 msg.msg_namelen = conn->peer->srx.transport_len; 762 msg.msg_control = NULL; 763 msg.msg_controllen = 0; 764 msg.msg_flags = 0; 765 766 whdr.epoch = htonl(conn->proto.epoch); 767 whdr.cid = htonl(conn->proto.cid); 768 whdr.callNumber = 0; 769 whdr.seq = 0; 770 whdr.type = RXRPC_PACKET_TYPE_ABORT; 771 whdr.flags = conn->out_clientflag; 772 whdr.userStatus = 0; 773 whdr.securityIndex = conn->security_ix; 774 whdr._rsvd = 0; 775 whdr.serviceId = htons(conn->service_id); 776 777 word = htonl(conn->abort_code); 778 779 iov[0].iov_base = &whdr; 780 iov[0].iov_len = sizeof(whdr); 781 iov[1].iov_base = &word; 782 iov[1].iov_len = sizeof(word); 783 784 len = iov[0].iov_len + iov[1].iov_len; 785 786 serial = rxrpc_get_next_serial(conn); 787 whdr.serial = htonl(serial); 788 789 iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len); 790 ret = do_udp_sendmsg(conn->local->socket, &msg, len); 791 if (ret < 0) { 792 trace_rxrpc_tx_fail(conn->debug_id, serial, ret, 793 rxrpc_tx_point_conn_abort); 794 _debug("sendmsg failed: %d", ret); 795 return; 796 } 797 798 trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_conn_abort); 799 800 conn->peer->last_tx_at = ktime_get_seconds(); 801 } 802 803 /* 804 * Reject a packet through the local endpoint. 805 */ 806 void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) 807 { 808 struct rxrpc_wire_header whdr; 809 struct sockaddr_rxrpc srx; 810 struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 811 struct msghdr msg; 812 struct kvec iov[2]; 813 size_t size; 814 __be32 code; 815 int ret, ioc; 816 817 rxrpc_see_skb(skb, rxrpc_skb_see_reject); 818 819 iov[0].iov_base = &whdr; 820 iov[0].iov_len = sizeof(whdr); 821 iov[1].iov_base = &code; 822 iov[1].iov_len = sizeof(code); 823 824 msg.msg_name = &srx.transport; 825 msg.msg_control = NULL; 826 msg.msg_controllen = 0; 827 msg.msg_flags = 0; 828 829 memset(&whdr, 0, sizeof(whdr)); 830 831 switch (skb->mark) { 832 case RXRPC_SKB_MARK_REJECT_BUSY: 833 whdr.type = RXRPC_PACKET_TYPE_BUSY; 834 size = sizeof(whdr); 835 ioc = 1; 836 break; 837 case RXRPC_SKB_MARK_REJECT_ABORT: 838 whdr.type = RXRPC_PACKET_TYPE_ABORT; 839 code = htonl(skb->priority); 840 size = sizeof(whdr) + sizeof(code); 841 ioc = 2; 842 break; 843 default: 844 return; 845 } 846 847 if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) { 848 msg.msg_namelen = srx.transport_len; 849 850 whdr.epoch = htonl(sp->hdr.epoch); 851 whdr.cid = htonl(sp->hdr.cid); 852 whdr.callNumber = htonl(sp->hdr.callNumber); 853 whdr.serviceId = htons(sp->hdr.serviceId); 854 whdr.flags = sp->hdr.flags; 855 whdr.flags ^= RXRPC_CLIENT_INITIATED; 856 whdr.flags &= RXRPC_CLIENT_INITIATED; 857 858 iov_iter_kvec(&msg.msg_iter, WRITE, iov, ioc, size); 859 ret = do_udp_sendmsg(local->socket, &msg, size); 860 if (ret < 0) 861 trace_rxrpc_tx_fail(local->debug_id, 0, ret, 862 rxrpc_tx_point_reject); 863 else 864 trace_rxrpc_tx_packet(local->debug_id, &whdr, 865 rxrpc_tx_point_reject); 866 } 867 } 868 869 /* 870 * Send a VERSION reply to a peer as a keepalive. 871 */ 872 void rxrpc_send_keepalive(struct rxrpc_peer *peer) 873 { 874 struct rxrpc_wire_header whdr; 875 struct msghdr msg; 876 struct kvec iov[2]; 877 size_t len; 878 int ret; 879 880 _enter(""); 881 882 msg.msg_name = &peer->srx.transport; 883 msg.msg_namelen = peer->srx.transport_len; 884 msg.msg_control = NULL; 885 msg.msg_controllen = 0; 886 msg.msg_flags = 0; 887 888 whdr.epoch = htonl(peer->local->rxnet->epoch); 889 whdr.cid = 0; 890 whdr.callNumber = 0; 891 whdr.seq = 0; 892 whdr.serial = 0; 893 whdr.type = RXRPC_PACKET_TYPE_VERSION; /* Not client-initiated */ 894 whdr.flags = RXRPC_LAST_PACKET; 895 whdr.userStatus = 0; 896 whdr.securityIndex = 0; 897 whdr._rsvd = 0; 898 whdr.serviceId = 0; 899 900 iov[0].iov_base = &whdr; 901 iov[0].iov_len = sizeof(whdr); 902 iov[1].iov_base = (char *)rxrpc_keepalive_string; 903 iov[1].iov_len = sizeof(rxrpc_keepalive_string); 904 905 len = iov[0].iov_len + iov[1].iov_len; 906 907 iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len); 908 ret = do_udp_sendmsg(peer->local->socket, &msg, len); 909 if (ret < 0) 910 trace_rxrpc_tx_fail(peer->debug_id, 0, ret, 911 rxrpc_tx_point_version_keepalive); 912 else 913 trace_rxrpc_tx_packet(peer->debug_id, &whdr, 914 rxrpc_tx_point_version_keepalive); 915 916 peer->last_tx_at = ktime_get_seconds(); 917 _leave(""); 918 } 919 920 /* 921 * Send a RESPONSE message. 922 */ 923 void rxrpc_send_response(struct rxrpc_connection *conn, struct sk_buff *response) 924 { 925 struct rxrpc_skb_priv *sp = rxrpc_skb(response); 926 struct scatterlist sg[16]; 927 struct bio_vec bvec[16]; 928 struct msghdr msg; 929 size_t len = sp->resp.len; 930 __be32 wserial; 931 u32 serial = 0; 932 int ret, nr_sg; 933 934 _enter("C=%x,%x", conn->debug_id, sp->resp.challenge_serial); 935 936 sg_init_table(sg, ARRAY_SIZE(sg)); 937 ret = skb_to_sgvec(response, sg, 0, len); 938 if (ret < 0) 939 goto fail; 940 nr_sg = ret; 941 942 for (int i = 0; i < nr_sg; i++) 943 bvec_set_page(&bvec[i], sg_page(&sg[i]), sg[i].length, sg[i].offset); 944 945 iov_iter_bvec(&msg.msg_iter, WRITE, bvec, nr_sg, len); 946 947 msg.msg_name = &conn->peer->srx.transport; 948 msg.msg_namelen = conn->peer->srx.transport_len; 949 msg.msg_control = NULL; 950 msg.msg_controllen = 0; 951 msg.msg_flags = MSG_SPLICE_PAGES; 952 953 serial = rxrpc_get_next_serials(conn, 1); 954 wserial = htonl(serial); 955 956 trace_rxrpc_tx_response(conn, serial, sp); 957 958 ret = skb_store_bits(response, offsetof(struct rxrpc_wire_header, serial), 959 &wserial, sizeof(wserial)); 960 if (ret < 0) 961 goto fail; 962 963 rxrpc_local_dont_fragment(conn->local, false); 964 965 ret = do_udp_sendmsg(conn->local->socket, &msg, len); 966 if (ret < 0) 967 goto fail; 968 969 conn->peer->last_tx_at = ktime_get_seconds(); 970 return; 971 972 fail: 973 trace_rxrpc_tx_fail(conn->debug_id, serial, ret, 974 rxrpc_tx_point_response); 975 kleave(" = %d", ret); 976 } 977