1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* RxRPC packet transmission
3 *
4 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
6 */
7
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9
10 #include <linux/net.h>
11 #include <linux/gfp.h>
12 #include <linux/skbuff.h>
13 #include <linux/export.h>
14 #include <net/sock.h>
15 #include <net/af_rxrpc.h>
16 #include <net/udp.h>
17 #include "ar-internal.h"
18
do_udp_sendmsg(struct socket * socket,struct msghdr * msg,size_t len)19 ssize_t do_udp_sendmsg(struct socket *socket, struct msghdr *msg, size_t len)
20 {
21 struct sockaddr *sa = msg->msg_name;
22 struct sock *sk = socket->sk;
23
24 if (IS_ENABLED(CONFIG_AF_RXRPC_IPV6)) {
25 if (sa->sa_family == AF_INET6) {
26 if (sk->sk_family != AF_INET6) {
27 pr_warn("AF_INET6 address on AF_INET socket\n");
28 return -ENOPROTOOPT;
29 }
30 return udpv6_sendmsg(sk, msg, len);
31 }
32 }
33 return udp_sendmsg(sk, msg, len);
34 }
35
36 struct rxrpc_abort_buffer {
37 struct rxrpc_wire_header whdr;
38 __be32 abort_code;
39 };
40
41 static const char rxrpc_keepalive_string[] = "";
42
43 /*
44 * Increase Tx backoff on transmission failure and clear it on success.
45 */
rxrpc_tx_backoff(struct rxrpc_call * call,int ret)46 static void rxrpc_tx_backoff(struct rxrpc_call *call, int ret)
47 {
48 if (ret < 0) {
49 if (call->tx_backoff < 1000)
50 call->tx_backoff += 100;
51 } else {
52 call->tx_backoff = 0;
53 }
54 }
55
56 /*
57 * Arrange for a keepalive ping a certain time after we last transmitted. This
58 * lets the far side know we're still interested in this call and helps keep
59 * the route through any intervening firewall open.
60 *
61 * Receiving a response to the ping will prevent the ->expect_rx_by timer from
62 * expiring.
63 */
rxrpc_set_keepalive(struct rxrpc_call * call,ktime_t now)64 static void rxrpc_set_keepalive(struct rxrpc_call *call, ktime_t now)
65 {
66 ktime_t delay = ms_to_ktime(READ_ONCE(call->next_rx_timo) / 6);
67
68 call->keepalive_at = ktime_add(ktime_get_real(), delay);
69 trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_keepalive);
70 }
71
72 /*
73 * Allocate transmission buffers for an ACK and attach them to local->kv[].
74 */
rxrpc_alloc_ack(struct rxrpc_call * call,size_t sack_size)75 static int rxrpc_alloc_ack(struct rxrpc_call *call, size_t sack_size)
76 {
77 struct rxrpc_wire_header *whdr;
78 struct rxrpc_acktrailer *trailer;
79 struct rxrpc_ackpacket *ack;
80 struct kvec *kv = call->local->kvec;
81 gfp_t gfp = rcu_read_lock_held() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS;
82 void *buf, *buf2 = NULL;
83 u8 *filler;
84
85 buf = page_frag_alloc(&call->local->tx_alloc,
86 sizeof(*whdr) + sizeof(*ack) + 1 + 3 + sizeof(*trailer), gfp);
87 if (!buf)
88 return -ENOMEM;
89
90 if (sack_size) {
91 buf2 = page_frag_alloc(&call->local->tx_alloc, sack_size, gfp);
92 if (!buf2) {
93 page_frag_free(buf);
94 return -ENOMEM;
95 }
96 }
97
98 whdr = buf;
99 ack = buf + sizeof(*whdr);
100 filler = buf + sizeof(*whdr) + sizeof(*ack) + 1;
101 trailer = buf + sizeof(*whdr) + sizeof(*ack) + 1 + 3;
102
103 kv[0].iov_base = whdr;
104 kv[0].iov_len = sizeof(*whdr) + sizeof(*ack);
105 kv[1].iov_base = buf2;
106 kv[1].iov_len = sack_size;
107 kv[2].iov_base = filler;
108 kv[2].iov_len = 3 + sizeof(*trailer);
109 return 3; /* Number of kvec[] used. */
110 }
111
rxrpc_free_ack(struct rxrpc_call * call)112 static void rxrpc_free_ack(struct rxrpc_call *call)
113 {
114 page_frag_free(call->local->kvec[0].iov_base);
115 if (call->local->kvec[1].iov_base)
116 page_frag_free(call->local->kvec[1].iov_base);
117 }
118
119 /*
120 * Record the beginning of an RTT probe.
121 */
rxrpc_begin_rtt_probe(struct rxrpc_call * call,rxrpc_serial_t serial,ktime_t now,enum rxrpc_rtt_tx_trace why)122 static void rxrpc_begin_rtt_probe(struct rxrpc_call *call, rxrpc_serial_t serial,
123 ktime_t now, enum rxrpc_rtt_tx_trace why)
124 {
125 unsigned long avail = call->rtt_avail;
126 int rtt_slot = 9;
127
128 if (!(avail & RXRPC_CALL_RTT_AVAIL_MASK))
129 goto no_slot;
130
131 rtt_slot = __ffs(avail & RXRPC_CALL_RTT_AVAIL_MASK);
132 if (!test_and_clear_bit(rtt_slot, &call->rtt_avail))
133 goto no_slot;
134
135 call->rtt_serial[rtt_slot] = serial;
136 call->rtt_sent_at[rtt_slot] = now;
137 smp_wmb(); /* Write data before avail bit */
138 set_bit(rtt_slot + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail);
139
140 trace_rxrpc_rtt_tx(call, why, rtt_slot, serial);
141 return;
142
143 no_slot:
144 trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_no_slot, rtt_slot, serial);
145 }
146
147 /*
148 * Fill out an ACK packet.
149 */
rxrpc_fill_out_ack(struct rxrpc_call * call,int nr_kv,u8 ack_reason,rxrpc_serial_t serial_to_ack,rxrpc_serial_t * _ack_serial)150 static int rxrpc_fill_out_ack(struct rxrpc_call *call, int nr_kv, u8 ack_reason,
151 rxrpc_serial_t serial_to_ack, rxrpc_serial_t *_ack_serial)
152 {
153 struct kvec *kv = call->local->kvec;
154 struct rxrpc_wire_header *whdr = kv[0].iov_base;
155 struct rxrpc_acktrailer *trailer = kv[2].iov_base + 3;
156 struct rxrpc_ackpacket *ack = (struct rxrpc_ackpacket *)(whdr + 1);
157 unsigned int qsize, sack, wrap, to, max_mtu, if_mtu;
158 rxrpc_seq_t window, wtop;
159 ktime_t now = ktime_get_real();
160 int rsize;
161 u8 *filler = kv[2].iov_base;
162 u8 *sackp = kv[1].iov_base;
163
164 rxrpc_inc_stat(call->rxnet, stat_tx_ack_fill);
165
166 window = call->ackr_window;
167 wtop = call->ackr_wtop;
168 sack = call->ackr_sack_base % RXRPC_SACK_SIZE;
169
170 *_ack_serial = rxrpc_get_next_serial(call->conn);
171
172 whdr->epoch = htonl(call->conn->proto.epoch);
173 whdr->cid = htonl(call->cid);
174 whdr->callNumber = htonl(call->call_id);
175 whdr->serial = htonl(*_ack_serial);
176 whdr->seq = 0;
177 whdr->type = RXRPC_PACKET_TYPE_ACK;
178 whdr->flags = call->conn->out_clientflag | RXRPC_SLOW_START_OK;
179 whdr->userStatus = 0;
180 whdr->securityIndex = call->security_ix;
181 whdr->_rsvd = 0;
182 whdr->serviceId = htons(call->dest_srx.srx_service);
183
184 ack->bufferSpace = 0;
185 ack->maxSkew = 0;
186 ack->firstPacket = htonl(window);
187 ack->previousPacket = htonl(call->rx_highest_seq);
188 ack->serial = htonl(serial_to_ack);
189 ack->reason = ack_reason;
190 ack->nAcks = wtop - window;
191 filler[0] = 0;
192 filler[1] = 0;
193 filler[2] = 0;
194
195 if (ack_reason == RXRPC_ACK_PING)
196 whdr->flags |= RXRPC_REQUEST_ACK;
197
198 if (after(wtop, window)) {
199 kv[1].iov_len = ack->nAcks;
200
201 wrap = RXRPC_SACK_SIZE - sack;
202 to = umin(ack->nAcks, RXRPC_SACK_SIZE);
203
204 if (sack + ack->nAcks <= RXRPC_SACK_SIZE) {
205 memcpy(sackp, call->ackr_sack_table + sack, ack->nAcks);
206 } else {
207 memcpy(sackp, call->ackr_sack_table + sack, wrap);
208 memcpy(sackp + wrap, call->ackr_sack_table, to - wrap);
209 }
210 } else if (before(wtop, window)) {
211 pr_warn("ack window backward %x %x", window, wtop);
212 } else if (ack->reason == RXRPC_ACK_DELAY) {
213 ack->reason = RXRPC_ACK_IDLE;
214 }
215
216 qsize = (window - 1) - call->rx_consumed;
217 rsize = max_t(int, call->rx_winsize - qsize, 0);
218
219 if_mtu = call->peer->if_mtu - call->peer->hdrsize;
220 if (call->peer->ackr_adv_pmtud) {
221 max_mtu = umax(call->peer->max_data, rxrpc_rx_mtu);
222 } else {
223 if_mtu = umin(if_mtu, 1444);
224 max_mtu = if_mtu;
225 }
226
227 trailer->maxMTU = htonl(max_mtu);
228 trailer->ifMTU = htonl(if_mtu);
229 trailer->rwind = htonl(rsize);
230 trailer->jumbo_max = 0; /* Advertise pmtu discovery */
231
232 if (ack_reason == RXRPC_ACK_PING)
233 rxrpc_begin_rtt_probe(call, *_ack_serial, now, rxrpc_rtt_tx_ping);
234 if (whdr->flags & RXRPC_REQUEST_ACK)
235 call->rtt_last_req = now;
236 rxrpc_set_keepalive(call, now);
237 return nr_kv;
238 }
239
240 /*
241 * Transmit an ACK packet.
242 */
rxrpc_send_ack_packet(struct rxrpc_call * call,int nr_kv,size_t len,rxrpc_serial_t serial,enum rxrpc_propose_ack_trace why)243 static void rxrpc_send_ack_packet(struct rxrpc_call *call, int nr_kv, size_t len,
244 rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why)
245 {
246 struct kvec *kv = call->local->kvec;
247 struct rxrpc_wire_header *whdr = kv[0].iov_base;
248 struct rxrpc_acktrailer *trailer = kv[2].iov_base + 3;
249 struct rxrpc_connection *conn;
250 struct rxrpc_ackpacket *ack = (struct rxrpc_ackpacket *)(whdr + 1);
251 struct msghdr msg;
252 int ret;
253
254 if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
255 return;
256
257 conn = call->conn;
258
259 msg.msg_name = &call->peer->srx.transport;
260 msg.msg_namelen = call->peer->srx.transport_len;
261 msg.msg_control = NULL;
262 msg.msg_controllen = 0;
263 msg.msg_flags = MSG_SPLICE_PAGES;
264
265 trace_rxrpc_tx_ack(call->debug_id, serial,
266 ntohl(ack->firstPacket),
267 ntohl(ack->serial), ack->reason, ack->nAcks,
268 ntohl(trailer->rwind), why);
269
270 rxrpc_inc_stat(call->rxnet, stat_tx_ack_send);
271
272 iov_iter_kvec(&msg.msg_iter, WRITE, kv, nr_kv, len);
273 rxrpc_local_dont_fragment(conn->local, why == rxrpc_propose_ack_ping_for_mtu_probe);
274
275 ret = do_udp_sendmsg(conn->local->socket, &msg, len);
276 rxrpc_peer_mark_tx(call->peer);
277 if (ret < 0) {
278 trace_rxrpc_tx_fail(call->debug_id, serial, ret,
279 rxrpc_tx_point_call_ack);
280 if (why == rxrpc_propose_ack_ping_for_mtu_probe &&
281 ret == -EMSGSIZE)
282 rxrpc_input_probe_for_pmtud(conn, serial, true);
283 } else {
284 trace_rxrpc_tx_packet(call->debug_id, whdr,
285 rxrpc_tx_point_call_ack);
286 if (why == rxrpc_propose_ack_ping_for_mtu_probe) {
287 call->peer->pmtud_pending = false;
288 call->peer->pmtud_probing = true;
289 call->conn->pmtud_probe = serial;
290 call->conn->pmtud_call = call->debug_id;
291 trace_rxrpc_pmtud_tx(call);
292 }
293 }
294 rxrpc_tx_backoff(call, ret);
295 }
296
297 /*
298 * Queue an ACK for immediate transmission.
299 */
rxrpc_send_ACK(struct rxrpc_call * call,u8 ack_reason,rxrpc_serial_t serial_to_ack,enum rxrpc_propose_ack_trace why)300 void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason,
301 rxrpc_serial_t serial_to_ack, enum rxrpc_propose_ack_trace why)
302 {
303 struct kvec *kv = call->local->kvec;
304 rxrpc_serial_t ack_serial;
305 size_t len;
306 int nr_kv;
307
308 if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
309 return;
310
311 rxrpc_inc_stat(call->rxnet, stat_tx_acks[ack_reason]);
312
313 nr_kv = rxrpc_alloc_ack(call, call->ackr_wtop - call->ackr_window);
314 if (nr_kv < 0) {
315 kleave(" = -ENOMEM");
316 return;
317 }
318
319 nr_kv = rxrpc_fill_out_ack(call, nr_kv, ack_reason, serial_to_ack, &ack_serial);
320 len = kv[0].iov_len;
321 len += kv[1].iov_len;
322 len += kv[2].iov_len;
323
324 /* Extend a path MTU probe ACK. */
325 if (why == rxrpc_propose_ack_ping_for_mtu_probe) {
326 size_t probe_mtu = call->peer->pmtud_trial + sizeof(struct rxrpc_wire_header);
327
328 if (len > probe_mtu)
329 goto skip;
330 while (len < probe_mtu) {
331 size_t part = umin(probe_mtu - len, PAGE_SIZE);
332
333 kv[nr_kv].iov_base = page_address(ZERO_PAGE(0));
334 kv[nr_kv].iov_len = part;
335 len += part;
336 nr_kv++;
337 }
338 }
339
340 call->ackr_nr_unacked = 0;
341 atomic_set(&call->ackr_nr_consumed, 0);
342 clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags);
343
344 trace_rxrpc_send_ack(call, why, ack_reason, ack_serial);
345 rxrpc_send_ack_packet(call, nr_kv, len, ack_serial, why);
346 skip:
347 rxrpc_free_ack(call);
348 }
349
350 /*
351 * Send an ACK probe for path MTU discovery.
352 */
rxrpc_send_probe_for_pmtud(struct rxrpc_call * call)353 void rxrpc_send_probe_for_pmtud(struct rxrpc_call *call)
354 {
355 rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
356 rxrpc_propose_ack_ping_for_mtu_probe);
357 }
358
359 /*
360 * Send an ABORT call packet.
361 */
rxrpc_send_abort_packet(struct rxrpc_call * call)362 int rxrpc_send_abort_packet(struct rxrpc_call *call)
363 {
364 struct rxrpc_connection *conn;
365 struct rxrpc_abort_buffer pkt;
366 struct msghdr msg;
367 struct kvec iov[1];
368 rxrpc_serial_t serial;
369 int ret;
370
371 /* Don't bother sending aborts for a client call once the server has
372 * hard-ACK'd all of its request data. After that point, we're not
373 * going to stop the operation proceeding, and whilst we might limit
374 * the reply, it's not worth it if we can send a new call on the same
375 * channel instead, thereby closing off this call.
376 */
377 if (rxrpc_is_client_call(call) &&
378 test_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags))
379 return 0;
380
381 if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
382 return -ECONNRESET;
383
384 conn = call->conn;
385
386 msg.msg_name = &call->peer->srx.transport;
387 msg.msg_namelen = call->peer->srx.transport_len;
388 msg.msg_control = NULL;
389 msg.msg_controllen = 0;
390 msg.msg_flags = 0;
391
392 pkt.whdr.epoch = htonl(conn->proto.epoch);
393 pkt.whdr.cid = htonl(call->cid);
394 pkt.whdr.callNumber = htonl(call->call_id);
395 pkt.whdr.seq = 0;
396 pkt.whdr.type = RXRPC_PACKET_TYPE_ABORT;
397 pkt.whdr.flags = conn->out_clientflag;
398 pkt.whdr.userStatus = 0;
399 pkt.whdr.securityIndex = call->security_ix;
400 pkt.whdr._rsvd = 0;
401 pkt.whdr.serviceId = htons(call->dest_srx.srx_service);
402 pkt.abort_code = htonl(call->abort_code);
403
404 iov[0].iov_base = &pkt;
405 iov[0].iov_len = sizeof(pkt);
406
407 serial = rxrpc_get_next_serial(conn);
408 pkt.whdr.serial = htonl(serial);
409
410 iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, sizeof(pkt));
411 ret = do_udp_sendmsg(conn->local->socket, &msg, sizeof(pkt));
412 rxrpc_peer_mark_tx(conn->peer);
413 if (ret < 0)
414 trace_rxrpc_tx_fail(call->debug_id, serial, ret,
415 rxrpc_tx_point_call_abort);
416 else
417 trace_rxrpc_tx_packet(call->debug_id, &pkt.whdr,
418 rxrpc_tx_point_call_abort);
419 rxrpc_tx_backoff(call, ret);
420 return ret;
421 }
422
423 /*
424 * Prepare a (sub)packet for transmission.
425 */
rxrpc_prepare_data_subpacket(struct rxrpc_call * call,struct rxrpc_send_data_req * req,struct rxrpc_txbuf * txb,struct rxrpc_wire_header * whdr,rxrpc_serial_t serial,int subpkt)426 static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call,
427 struct rxrpc_send_data_req *req,
428 struct rxrpc_txbuf *txb,
429 struct rxrpc_wire_header *whdr,
430 rxrpc_serial_t serial, int subpkt)
431 {
432 struct rxrpc_jumbo_header *jumbo = txb->data - sizeof(*jumbo);
433 enum rxrpc_req_ack_trace why;
434 struct rxrpc_connection *conn = call->conn;
435 struct kvec *kv = &call->local->kvec[1 + subpkt];
436 size_t len = txb->pkt_len;
437 bool last;
438 u8 flags;
439
440 _enter("%x,%zd", txb->seq, len);
441
442 txb->serial = serial;
443
444 if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) &&
445 txb->seq == 1)
446 whdr->userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE;
447
448 txb->flags &= ~RXRPC_REQUEST_ACK;
449 flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS;
450 last = txb->flags & RXRPC_LAST_PACKET;
451
452 if (subpkt < req->n - 1) {
453 len = RXRPC_JUMBO_DATALEN;
454 goto dont_set_request_ack;
455 }
456
457 /* If our RTT cache needs working on, request an ACK. Also request
458 * ACKs if a DATA packet appears to have been lost.
459 *
460 * However, we mustn't request an ACK on the last reply packet of a
461 * service call, lest OpenAFS incorrectly send us an ACK with some
462 * soft-ACKs in it and then never follow up with a proper hard ACK.
463 */
464 if (last && rxrpc_sending_to_client(txb))
465 why = rxrpc_reqack_no_srv_last;
466 else if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events))
467 why = rxrpc_reqack_ack_lost;
468 else if (txb->flags & RXRPC_TXBUF_RESENT)
469 why = rxrpc_reqack_retrans;
470 else if (call->cong_ca_state == RXRPC_CA_SLOW_START && call->cong_cwnd <= RXRPC_MIN_CWND)
471 why = rxrpc_reqack_slow_start;
472 else if (call->tx_winsize <= 2)
473 why = rxrpc_reqack_small_txwin;
474 else if (call->rtt_count < 3)
475 why = rxrpc_reqack_more_rtt;
476 else if (ktime_before(ktime_add_ms(call->rtt_last_req, 1000), ktime_get_real()))
477 why = rxrpc_reqack_old_rtt;
478 else if (!last && !after(READ_ONCE(call->send_top), txb->seq))
479 why = rxrpc_reqack_app_stall;
480 else if (call->tx_winsize <= (2 * req->n) || call->cong_cwnd <= (2 * req->n))
481 why = rxrpc_reqack_jumbo_win;
482 else
483 goto dont_set_request_ack;
484
485 rxrpc_inc_stat(call->rxnet, stat_why_req_ack[why]);
486 trace_rxrpc_req_ack(call->debug_id, txb->seq, why);
487 if (why != rxrpc_reqack_no_srv_last) {
488 flags |= RXRPC_REQUEST_ACK;
489 trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, -1, serial);
490 call->rtt_last_req = req->now;
491 }
492 dont_set_request_ack:
493
494 /* There's a jumbo header prepended to the data if we need it. */
495 if (subpkt < req->n - 1)
496 flags |= RXRPC_JUMBO_PACKET;
497 else
498 flags &= ~RXRPC_JUMBO_PACKET;
499 if (subpkt == 0) {
500 whdr->flags = flags;
501 whdr->cksum = txb->cksum;
502 kv->iov_base = txb->data;
503 } else {
504 jumbo->flags = flags;
505 jumbo->pad = 0;
506 jumbo->cksum = txb->cksum;
507 kv->iov_base = jumbo;
508 len += sizeof(*jumbo);
509 }
510
511 trace_rxrpc_tx_data(call, txb->seq, txb->serial, flags, req->trace);
512 kv->iov_len = len;
513 return len;
514 }
515
516 /*
517 * Prepare a transmission queue object for initial transmission. Returns the
518 * number of microseconds since the transmission queue base timestamp.
519 */
rxrpc_prepare_txqueue(struct rxrpc_txqueue * tq,struct rxrpc_send_data_req * req)520 static unsigned int rxrpc_prepare_txqueue(struct rxrpc_txqueue *tq,
521 struct rxrpc_send_data_req *req)
522 {
523 if (!tq)
524 return 0;
525 if (tq->xmit_ts_base == KTIME_MIN) {
526 tq->xmit_ts_base = req->now;
527 return 0;
528 }
529 return ktime_to_us(ktime_sub(req->now, tq->xmit_ts_base));
530 }
531
532 /*
533 * Prepare a (jumbo) packet for transmission.
534 */
rxrpc_prepare_data_packet(struct rxrpc_call * call,struct rxrpc_send_data_req * req,struct rxrpc_wire_header * whdr)535 static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call,
536 struct rxrpc_send_data_req *req,
537 struct rxrpc_wire_header *whdr)
538 {
539 struct rxrpc_txqueue *tq = req->tq;
540 rxrpc_serial_t serial;
541 unsigned int xmit_ts;
542 rxrpc_seq_t seq = req->seq;
543 size_t len = 0;
544 bool start_tlp = false;
545
546 trace_rxrpc_tq(call, tq, seq, rxrpc_tq_transmit);
547
548 /* Each transmission of a Tx packet needs a new serial number */
549 serial = rxrpc_get_next_serials(call->conn, req->n);
550
551 whdr->epoch = htonl(call->conn->proto.epoch);
552 whdr->cid = htonl(call->cid);
553 whdr->callNumber = htonl(call->call_id);
554 whdr->seq = htonl(seq);
555 whdr->serial = htonl(serial);
556 whdr->type = RXRPC_PACKET_TYPE_DATA;
557 whdr->flags = 0;
558 whdr->userStatus = 0;
559 whdr->securityIndex = call->security_ix;
560 whdr->_rsvd = 0;
561 whdr->serviceId = htons(call->conn->service_id);
562
563 call->tx_last_serial = serial + req->n - 1;
564 call->tx_last_sent = req->now;
565 xmit_ts = rxrpc_prepare_txqueue(tq, req);
566 prefetch(tq->next);
567
568 for (int i = 0;;) {
569 int ix = seq & RXRPC_TXQ_MASK;
570 struct rxrpc_txbuf *txb = tq->bufs[seq & RXRPC_TXQ_MASK];
571
572 _debug("prep[%u] tq=%x q=%x", i, tq->qbase, seq);
573
574 /* Record (re-)transmission for RACK [RFC8985 6.1]. */
575 if (__test_and_clear_bit(ix, &tq->segment_lost))
576 call->tx_nr_lost--;
577 if (req->retrans) {
578 __set_bit(ix, &tq->ever_retransmitted);
579 __set_bit(ix, &tq->segment_retransmitted);
580 call->tx_nr_resent++;
581 } else {
582 call->tx_nr_sent++;
583 start_tlp = true;
584 }
585 tq->segment_xmit_ts[ix] = xmit_ts;
586 tq->segment_serial[ix] = serial;
587 if (i + 1 == req->n)
588 /* Only sample the last subpacket in a jumbo. */
589 __set_bit(ix, &tq->rtt_samples);
590 len += rxrpc_prepare_data_subpacket(call, req, txb, whdr, serial, i);
591 serial++;
592 seq++;
593 i++;
594 if (i >= req->n)
595 break;
596 if (!(seq & RXRPC_TXQ_MASK)) {
597 tq = tq->next;
598 trace_rxrpc_tq(call, tq, seq, rxrpc_tq_transmit_advance);
599 xmit_ts = rxrpc_prepare_txqueue(tq, req);
600 }
601 }
602
603 /* Set timeouts */
604 if (req->tlp_probe) {
605 /* Sending TLP loss probe [RFC8985 7.3]. */
606 call->tlp_serial = serial - 1;
607 call->tlp_seq = seq - 1;
608 } else if (start_tlp) {
609 /* Schedule TLP loss probe [RFC8985 7.2]. */
610 ktime_t pto;
611
612 if (!test_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags))
613 /* The first packet may take longer to elicit a response. */
614 pto = NSEC_PER_SEC;
615 else
616 pto = rxrpc_tlp_calc_pto(call, req->now);
617
618 call->rack_timer_mode = RXRPC_CALL_RACKTIMER_TLP_PTO;
619 call->rack_timo_at = ktime_add(req->now, pto);
620 trace_rxrpc_rack_timer(call, pto, false);
621 trace_rxrpc_timer_set(call, pto, rxrpc_timer_trace_rack_tlp_pto);
622 }
623
624 if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) {
625 ktime_t delay = ms_to_ktime(READ_ONCE(call->next_rx_timo));
626
627 call->expect_rx_by = ktime_add(req->now, delay);
628 trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_expect_rx);
629 }
630
631 rxrpc_set_keepalive(call, req->now);
632 page_frag_free(whdr);
633 return len;
634 }
635
636 /*
637 * Send one or more packets through the transport endpoint
638 */
rxrpc_send_data_packet(struct rxrpc_call * call,struct rxrpc_send_data_req * req)639 void rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req *req)
640 {
641 struct rxrpc_wire_header *whdr;
642 struct rxrpc_connection *conn = call->conn;
643 enum rxrpc_tx_point frag;
644 struct rxrpc_txqueue *tq = req->tq;
645 struct rxrpc_txbuf *txb;
646 struct msghdr msg;
647 rxrpc_seq_t seq = req->seq;
648 size_t len = sizeof(*whdr);
649 bool new_call = test_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags);
650 int ret, stat_ix;
651
652 _enter("%x,%x-%x", tq->qbase, seq, seq + req->n - 1);
653
654 whdr = page_frag_alloc(&call->local->tx_alloc, sizeof(*whdr), GFP_NOFS);
655 if (!whdr)
656 return; /* Drop the packet if no memory. */
657
658 call->local->kvec[0].iov_base = whdr;
659 call->local->kvec[0].iov_len = sizeof(*whdr);
660
661 stat_ix = umin(req->n, ARRAY_SIZE(call->rxnet->stat_tx_jumbo)) - 1;
662 atomic_inc(&call->rxnet->stat_tx_jumbo[stat_ix]);
663
664 len += rxrpc_prepare_data_packet(call, req, whdr);
665 txb = tq->bufs[seq & RXRPC_TXQ_MASK];
666
667 iov_iter_kvec(&msg.msg_iter, WRITE, call->local->kvec, 1 + req->n, len);
668
669 msg.msg_name = &call->peer->srx.transport;
670 msg.msg_namelen = call->peer->srx.transport_len;
671 msg.msg_control = NULL;
672 msg.msg_controllen = 0;
673 msg.msg_flags = MSG_SPLICE_PAGES;
674
675 /* Send the packet with the don't fragment bit set unless we think it's
676 * too big or if this is a retransmission.
677 */
678 if (seq == call->tx_transmitted + 1 &&
679 len >= sizeof(struct rxrpc_wire_header) + call->peer->max_data) {
680 rxrpc_local_dont_fragment(conn->local, false);
681 frag = rxrpc_tx_point_call_data_frag;
682 } else {
683 rxrpc_local_dont_fragment(conn->local, true);
684 frag = rxrpc_tx_point_call_data_nofrag;
685 }
686
687 /* Track what we've attempted to transmit at least once so that the
688 * retransmission algorithm doesn't try to resend what we haven't sent
689 * yet.
690 */
691 if (seq == call->tx_transmitted + 1)
692 call->tx_transmitted = seq + req->n - 1;
693
694 if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
695 static int lose;
696
697 if ((lose++ & 7) == 7) {
698 ret = 0;
699 trace_rxrpc_tx_data(call, txb->seq, txb->serial, txb->flags,
700 rxrpc_txdata_inject_loss);
701 rxrpc_peer_mark_tx(conn->peer);
702 goto done;
703 }
704 }
705
706 /* send the packet by UDP
707 * - returns -EMSGSIZE if UDP would have to fragment the packet
708 * to go out of the interface
709 * - in which case, we'll have processed the ICMP error
710 * message and update the peer record
711 */
712 rxrpc_inc_stat(call->rxnet, stat_tx_data_send);
713 ret = do_udp_sendmsg(conn->local->socket, &msg, len);
714 rxrpc_peer_mark_tx(conn->peer);
715
716 if (ret == -EMSGSIZE) {
717 rxrpc_inc_stat(call->rxnet, stat_tx_data_send_msgsize);
718 trace_rxrpc_tx_packet(call->debug_id, whdr, frag);
719 ret = 0;
720 } else if (ret < 0) {
721 rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail);
722 trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret, frag);
723 } else {
724 trace_rxrpc_tx_packet(call->debug_id, whdr, frag);
725 }
726
727 rxrpc_tx_backoff(call, ret);
728
729 if (ret < 0) {
730 /* Cancel the call if the initial transmission fails or if we
731 * hit due to network routing issues that aren't going away
732 * anytime soon. The layer above can arrange the
733 * retransmission.
734 */
735 if (new_call ||
736 ret == -ENETUNREACH ||
737 ret == -EHOSTUNREACH ||
738 ret == -ECONNREFUSED)
739 rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
740 RX_USER_ABORT, ret);
741 }
742
743 done:
744 _leave(" = %d [%u]", ret, call->peer->max_data);
745 }
746
747 /*
748 * Transmit a connection-level abort.
749 */
rxrpc_send_conn_abort(struct rxrpc_connection * conn)750 void rxrpc_send_conn_abort(struct rxrpc_connection *conn)
751 {
752 struct rxrpc_wire_header whdr;
753 struct msghdr msg;
754 struct kvec iov[2];
755 __be32 word;
756 size_t len;
757 u32 serial;
758 int ret;
759
760 msg.msg_name = &conn->peer->srx.transport;
761 msg.msg_namelen = conn->peer->srx.transport_len;
762 msg.msg_control = NULL;
763 msg.msg_controllen = 0;
764 msg.msg_flags = 0;
765
766 whdr.epoch = htonl(conn->proto.epoch);
767 whdr.cid = htonl(conn->proto.cid);
768 whdr.callNumber = 0;
769 whdr.seq = 0;
770 whdr.type = RXRPC_PACKET_TYPE_ABORT;
771 whdr.flags = conn->out_clientflag;
772 whdr.userStatus = 0;
773 whdr.securityIndex = conn->security_ix;
774 whdr._rsvd = 0;
775 whdr.serviceId = htons(conn->service_id);
776
777 word = htonl(conn->abort_code);
778
779 iov[0].iov_base = &whdr;
780 iov[0].iov_len = sizeof(whdr);
781 iov[1].iov_base = &word;
782 iov[1].iov_len = sizeof(word);
783
784 len = iov[0].iov_len + iov[1].iov_len;
785
786 serial = rxrpc_get_next_serial(conn);
787 whdr.serial = htonl(serial);
788
789 iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len);
790 ret = do_udp_sendmsg(conn->local->socket, &msg, len);
791 if (ret < 0) {
792 trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
793 rxrpc_tx_point_conn_abort);
794 _debug("sendmsg failed: %d", ret);
795 return;
796 }
797
798 trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_conn_abort);
799
800 rxrpc_peer_mark_tx(conn->peer);
801 }
802
803 /*
804 * Reject a packet through the local endpoint.
805 */
rxrpc_reject_packet(struct rxrpc_local * local,struct sk_buff * skb)806 void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
807 {
808 struct rxrpc_wire_header whdr;
809 struct sockaddr_rxrpc srx;
810 struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
811 struct msghdr msg;
812 struct kvec iov[2];
813 size_t size;
814 __be32 code;
815 int ret, ioc;
816
817 if (sp->hdr.type == RXRPC_PACKET_TYPE_ABORT)
818 return; /* Never abort an abort. */
819
820 rxrpc_see_skb(skb, rxrpc_skb_see_reject);
821
822 iov[0].iov_base = &whdr;
823 iov[0].iov_len = sizeof(whdr);
824 iov[1].iov_base = &code;
825 iov[1].iov_len = sizeof(code);
826
827 msg.msg_name = &srx.transport;
828 msg.msg_control = NULL;
829 msg.msg_controllen = 0;
830 msg.msg_flags = 0;
831
832 whdr = (struct rxrpc_wire_header) {
833 .epoch = htonl(sp->hdr.epoch),
834 .cid = htonl(sp->hdr.cid),
835 .callNumber = htonl(sp->hdr.callNumber),
836 .serviceId = htons(sp->hdr.serviceId),
837 .flags = ~sp->hdr.flags & RXRPC_CLIENT_INITIATED,
838 };
839
840 switch (skb->mark) {
841 case RXRPC_SKB_MARK_REJECT_BUSY:
842 whdr.type = RXRPC_PACKET_TYPE_BUSY;
843 size = sizeof(whdr);
844 ioc = 1;
845 break;
846 case RXRPC_SKB_MARK_REJECT_CONN_ABORT:
847 whdr.callNumber = 0;
848 fallthrough;
849 case RXRPC_SKB_MARK_REJECT_ABORT:
850 whdr.type = RXRPC_PACKET_TYPE_ABORT;
851 code = htonl(skb->priority);
852 size = sizeof(whdr) + sizeof(code);
853 ioc = 2;
854 break;
855 default:
856 return;
857 }
858
859 if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) {
860 msg.msg_namelen = srx.transport_len;
861
862 iov_iter_kvec(&msg.msg_iter, WRITE, iov, ioc, size);
863 ret = do_udp_sendmsg(local->socket, &msg, size);
864 if (ret < 0)
865 trace_rxrpc_tx_fail(local->debug_id, 0, ret,
866 rxrpc_tx_point_reject);
867 else
868 trace_rxrpc_tx_packet(local->debug_id, &whdr,
869 rxrpc_tx_point_reject);
870 }
871 }
872
873 /*
874 * Send a VERSION reply to a peer as a keepalive.
875 */
rxrpc_send_keepalive(struct rxrpc_peer * peer)876 void rxrpc_send_keepalive(struct rxrpc_peer *peer)
877 {
878 struct rxrpc_wire_header whdr;
879 struct msghdr msg;
880 struct kvec iov[2];
881 size_t len;
882 int ret;
883
884 _enter("");
885
886 msg.msg_name = &peer->srx.transport;
887 msg.msg_namelen = peer->srx.transport_len;
888 msg.msg_control = NULL;
889 msg.msg_controllen = 0;
890 msg.msg_flags = 0;
891
892 whdr.epoch = htonl(peer->local->rxnet->epoch);
893 whdr.cid = 0;
894 whdr.callNumber = 0;
895 whdr.seq = 0;
896 whdr.serial = 0;
897 whdr.type = RXRPC_PACKET_TYPE_VERSION; /* Not client-initiated */
898 whdr.flags = RXRPC_LAST_PACKET;
899 whdr.userStatus = 0;
900 whdr.securityIndex = 0;
901 whdr._rsvd = 0;
902 whdr.serviceId = 0;
903
904 iov[0].iov_base = &whdr;
905 iov[0].iov_len = sizeof(whdr);
906 iov[1].iov_base = (char *)rxrpc_keepalive_string;
907 iov[1].iov_len = sizeof(rxrpc_keepalive_string);
908
909 len = iov[0].iov_len + iov[1].iov_len;
910
911 iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len);
912 ret = do_udp_sendmsg(peer->local->socket, &msg, len);
913 if (ret < 0)
914 trace_rxrpc_tx_fail(peer->debug_id, 0, ret,
915 rxrpc_tx_point_version_keepalive);
916 else
917 trace_rxrpc_tx_packet(peer->debug_id, &whdr,
918 rxrpc_tx_point_version_keepalive);
919
920 rxrpc_peer_mark_tx(peer);
921 _leave("");
922 }
923
924 /*
925 * Send a RESPONSE message.
926 */
rxrpc_send_response(struct rxrpc_connection * conn,struct sk_buff * response)927 void rxrpc_send_response(struct rxrpc_connection *conn, struct sk_buff *response)
928 {
929 struct rxrpc_skb_priv *sp = rxrpc_skb(response);
930 struct scatterlist sg[16];
931 struct bio_vec *bvec = conn->local->bvec;
932 struct msghdr msg;
933 size_t len = sp->resp.len;
934 __be32 wserial;
935 u32 serial = 0;
936 int ret, nr_sg;
937
938 _enter("C=%x,%x", conn->debug_id, sp->resp.challenge_serial);
939
940 sg_init_table(sg, ARRAY_SIZE(sg));
941 ret = skb_to_sgvec(response, sg, 0, len);
942 if (ret < 0)
943 goto fail;
944 nr_sg = ret;
945 ret = -EIO;
946 if (WARN_ON_ONCE(nr_sg > ARRAY_SIZE(conn->local->bvec)))
947 goto fail;
948
949 for (int i = 0; i < nr_sg; i++)
950 bvec_set_page(&bvec[i], sg_page(&sg[i]), sg[i].length, sg[i].offset);
951
952 iov_iter_bvec(&msg.msg_iter, WRITE, bvec, nr_sg, len);
953
954 msg.msg_name = &conn->peer->srx.transport;
955 msg.msg_namelen = conn->peer->srx.transport_len;
956 msg.msg_control = NULL;
957 msg.msg_controllen = 0;
958 msg.msg_flags = MSG_SPLICE_PAGES;
959
960 serial = rxrpc_get_next_serials(conn, 1);
961 wserial = htonl(serial);
962
963 trace_rxrpc_tx_response(conn, serial, sp);
964
965 ret = skb_store_bits(response, offsetof(struct rxrpc_wire_header, serial),
966 &wserial, sizeof(wserial));
967 if (ret < 0)
968 goto fail;
969
970 rxrpc_local_dont_fragment(conn->local, false);
971
972 ret = do_udp_sendmsg(conn->local->socket, &msg, len);
973 if (ret < 0)
974 goto fail;
975
976 rxrpc_peer_mark_tx(conn->peer);
977 return;
978
979 fail:
980 trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
981 rxrpc_tx_point_response);
982 kleave(" = %d", ret);
983 }
984