1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (C) B.A.T.M.A.N. contributors: 3 * 4 * Edo Monticelli, Antonio Quartulli 5 */ 6 7 #include "tp_meter.h" 8 #include "main.h" 9 10 #include <linux/atomic.h> 11 #include <linux/bug.h> 12 #include <linux/build_bug.h> 13 #include <linux/byteorder/generic.h> 14 #include <linux/cache.h> 15 #include <linux/compiler.h> 16 #include <linux/completion.h> 17 #include <linux/container_of.h> 18 #include <linux/err.h> 19 #include <linux/etherdevice.h> 20 #include <linux/gfp.h> 21 #include <linux/if_ether.h> 22 #include <linux/init.h> 23 #include <linux/jiffies.h> 24 #include <linux/kref.h> 25 #include <linux/kthread.h> 26 #include <linux/limits.h> 27 #include <linux/list.h> 28 #include <linux/minmax.h> 29 #include <linux/netdevice.h> 30 #include <linux/param.h> 31 #include <linux/printk.h> 32 #include <linux/random.h> 33 #include <linux/rculist.h> 34 #include <linux/rcupdate.h> 35 #include <linux/sched.h> 36 #include <linux/skbuff.h> 37 #include <linux/slab.h> 38 #include <linux/spinlock.h> 39 #include <linux/stddef.h> 40 #include <linux/string.h> 41 #include <linux/timer.h> 42 #include <linux/wait.h> 43 #include <linux/workqueue.h> 44 #include <uapi/linux/batadv_packet.h> 45 #include <uapi/linux/batman_adv.h> 46 47 #include "hard-interface.h" 48 #include "log.h" 49 #include "netlink.h" 50 #include "originator.h" 51 #include "send.h" 52 53 /** 54 * BATADV_TP_DEF_TEST_LENGTH - Default test length if not specified by the user 55 * in milliseconds 56 */ 57 #define BATADV_TP_DEF_TEST_LENGTH 10000 58 59 /** 60 * BATADV_TP_AWND - Advertised window by the receiver (in bytes) 61 */ 62 #define BATADV_TP_AWND 0x20000000 63 64 /** 65 * BATADV_TP_RECV_TIMEOUT - Receiver activity timeout. If the receiver does not 66 * get anything for such amount of milliseconds, the connection is killed 67 */ 68 #define BATADV_TP_RECV_TIMEOUT 1000 69 70 /** 71 * BATADV_TP_MAX_RTO - Maximum sender timeout. If the sender RTO gets beyond 72 * such amount of milliseconds, the receiver is considered unreachable and the 73 * connection is killed 74 */ 75 #define BATADV_TP_MAX_RTO 30000 76 77 /** 78 * BATADV_TP_FIRST_SEQ - First seqno of each session. The number is rather high 79 * in order to immediately trigger a wrap around (test purposes) 80 */ 81 #define BATADV_TP_FIRST_SEQ ((u32)-1 - 2000) 82 83 /** 84 * BATADV_TP_PLEN - length of the payload (data after the batadv_unicast header) 85 * to simulate 86 */ 87 #define BATADV_TP_PLEN (BATADV_TP_PACKET_LEN - ETH_HLEN - \ 88 sizeof(struct batadv_unicast_packet)) 89 90 /** 91 * BATADV_TP_MAX_UNACKED - maximum number of packets a receiver didn't yet ack 92 */ 93 #define BATADV_TP_MAX_UNACKED 100 94 95 static u8 batadv_tp_prerandom[4096] __read_mostly; 96 97 /** 98 * batadv_tp_session_cookie() - generate session cookie based on session ids 99 * @session: TP session identifier 100 * @icmp_uid: icmp pseudo uid of the tp session 101 * 102 * Return: 32 bit tp_meter session cookie 103 */ 104 static u32 batadv_tp_session_cookie(const u8 session[2], u8 icmp_uid) 105 { 106 u32 cookie; 107 108 cookie = icmp_uid << 16; 109 cookie |= session[0] << 8; 110 cookie |= session[1]; 111 112 return cookie; 113 } 114 115 /** 116 * batadv_tp_cwnd() - compute the new cwnd size 117 * @base: base cwnd size value 118 * @increment: the value to add to base to get the new size 119 * @min: minimum cwnd value (usually MSS) 120 * 121 * Return the new cwnd size and ensure it does not exceed the Advertised 122 * Receiver Window size. It is wrapped around safely. 123 * For details refer to Section 3.1 of RFC5681 124 * 125 * Return: new congestion window size in bytes 126 */ 127 static u32 batadv_tp_cwnd(u32 base, u32 increment, u32 min) 128 { 129 u32 new_size = base + increment; 130 131 /* check for wrap-around */ 132 if (new_size < base) 133 new_size = (u32)ULONG_MAX; 134 135 new_size = min_t(u32, new_size, BATADV_TP_AWND); 136 137 return max_t(u32, new_size, min); 138 } 139 140 /** 141 * batadv_tp_update_cwnd() - update the Congestion Windows 142 * @tp_vars: the private data of the current TP meter session 143 * @mss: maximum segment size of transmission 144 * 145 * 1) if the session is in Slow Start, the CWND has to be increased by 1 146 * MSS every unique received ACK 147 * 2) if the session is in Congestion Avoidance, the CWND has to be 148 * increased by MSS * MSS / CWND for every unique received ACK 149 */ 150 static void batadv_tp_update_cwnd(struct batadv_tp_sender *tp_vars, u32 mss) 151 __must_hold(&tp_vars->cc_lock) 152 { 153 /* slow start... */ 154 if (tp_vars->cc.cwnd <= tp_vars->cc.ss_threshold) { 155 tp_vars->cc.dec_cwnd = 0; 156 tp_vars->cc.cwnd = batadv_tp_cwnd(tp_vars->cc.cwnd, mss, mss); 157 return; 158 } 159 160 /* prevent overflow in (mss * mss) << 3 */ 161 mss = min_t(u32, mss, (1U << 14) - 1); 162 163 /* increment CWND at least of 1 (section 3.1 of RFC5681) */ 164 tp_vars->cc.dec_cwnd += max_t(u32, 1U << 3, 165 ((mss * mss) << 3) / tp_vars->cc.cwnd); 166 if (tp_vars->cc.dec_cwnd < (mss << 3)) 167 return; 168 169 tp_vars->cc.cwnd = batadv_tp_cwnd(tp_vars->cc.cwnd, mss, mss); 170 tp_vars->cc.dec_cwnd = 0; 171 } 172 173 /** 174 * batadv_tp_update_rto() - calculate new retransmission timeout 175 * @tp_vars: the private data of the current TP meter session 176 * @new_rtt: new roundtrip time in msec 177 */ 178 static void batadv_tp_update_rto(struct batadv_tp_sender *tp_vars, 179 u32 new_rtt) 180 __must_hold(&tp_vars->cc_lock) 181 { 182 long m = new_rtt; 183 184 /* RTT update 185 * Details in Section 2.2 and 2.3 of RFC6298 186 * 187 * It's tricky to understand. Don't lose hair please. 188 * Inspired by tcp_rtt_estimator() tcp_input.c 189 */ 190 if (tp_vars->cc.srtt != 0) { 191 m -= (tp_vars->cc.srtt >> 3); /* m is now error in rtt est */ 192 tp_vars->cc.srtt += m; /* rtt = 7/8 srtt + 1/8 new */ 193 if (m < 0) 194 m = -m; 195 196 m -= (tp_vars->cc.rttvar >> 2); 197 tp_vars->cc.rttvar += m; /* mdev ~= 3/4 rttvar + 1/4 new */ 198 } else { 199 /* first measure getting in */ 200 tp_vars->cc.srtt = m << 3; /* take the measured time to be srtt */ 201 tp_vars->cc.rttvar = m << 1; /* new_rtt / 2 */ 202 } 203 204 /* rto = srtt + 4 * rttvar. 205 * rttvar is scaled by 4, therefore doesn't need to be multiplied 206 */ 207 WRITE_ONCE(tp_vars->cc.rto, (tp_vars->cc.srtt >> 3) + tp_vars->cc.rttvar); 208 } 209 210 /** 211 * batadv_tp_batctl_notify() - send client status result to client 212 * @reason: reason for tp meter session stop 213 * @dst: destination of tp_meter session 214 * @bat_priv: the bat priv with all the mesh interface information 215 * @start_time: start of transmission in jiffies 216 * @total_sent: bytes acked to the receiver 217 * @cookie: cookie of tp_meter session 218 */ 219 static void batadv_tp_batctl_notify(enum batadv_tp_meter_reason reason, 220 const u8 *dst, struct batadv_priv *bat_priv, 221 unsigned long start_time, u64 total_sent, 222 u32 cookie) 223 { 224 u32 test_time; 225 u8 result; 226 u32 total_bytes; 227 228 if (!batadv_tp_is_error(reason)) { 229 result = BATADV_TP_REASON_COMPLETE; 230 test_time = jiffies_to_msecs(jiffies - start_time); 231 total_bytes = total_sent; 232 } else { 233 result = reason; 234 test_time = 0; 235 total_bytes = 0; 236 } 237 238 batadv_netlink_tpmeter_notify(bat_priv, dst, result, test_time, 239 total_bytes, cookie); 240 } 241 242 /** 243 * batadv_tp_batctl_error_notify() - send client error result to client 244 * @reason: reason for tp meter session stop 245 * @dst: destination of tp_meter session 246 * @bat_priv: the bat priv with all the mesh interface information 247 * @cookie: cookie of tp_meter session 248 */ 249 static void batadv_tp_batctl_error_notify(enum batadv_tp_meter_reason reason, 250 const u8 *dst, 251 struct batadv_priv *bat_priv, 252 u32 cookie) 253 { 254 batadv_tp_batctl_notify(reason, dst, bat_priv, 0, 0, cookie); 255 } 256 257 /** 258 * batadv_tp_list_find_sender() - find a sender tp_vars object in the global list 259 * @bat_priv: the bat priv with all the mesh interface information 260 * @dst: the other endpoint MAC address to look for 261 * 262 * Look for a tp_vars object matching dst as end_point and return it after 263 * having increment the refcounter. Return NULL is not found 264 * 265 * Return: matching tp_vars or NULL when no tp_vars with @dst was found 266 */ 267 static struct batadv_tp_sender * 268 batadv_tp_list_find_sender(struct batadv_priv *bat_priv, const u8 *dst) 269 { 270 struct batadv_tp_sender *pos, *tp_vars = NULL; 271 272 rcu_read_lock(); 273 hlist_for_each_entry_rcu(pos, &bat_priv->tp_sender_list, common.list) { 274 if (!batadv_compare_eth(pos->common.other_end, dst)) 275 continue; 276 277 /* most of the time this function is invoked during the normal 278 * process..it makes sens to pay more when the session is 279 * finished and to speed the process up during the measurement 280 */ 281 if (unlikely(!kref_get_unless_zero(&pos->common.refcount))) 282 continue; 283 284 tp_vars = pos; 285 break; 286 } 287 rcu_read_unlock(); 288 289 return tp_vars; 290 } 291 292 /** 293 * batadv_tp_list_active() - check if session from/to destination is ongoing 294 * @bat_priv: the bat priv with all the mesh interface information 295 * @dst: the other endpoint MAC address to look for 296 * 297 * Return: true if a matching session with @dst was found, false otherwise 298 */ 299 static bool batadv_tp_list_active(struct batadv_priv *bat_priv, const u8 *dst) 300 __must_hold(&bat_priv->tp_list_lock) 301 { 302 struct batadv_tp_receiver *tp_receiver; 303 struct batadv_tp_sender *tp_sender; 304 305 hlist_for_each_entry_rcu(tp_sender, &bat_priv->tp_sender_list, common.list) { 306 if (batadv_compare_eth(tp_sender->common.other_end, dst)) 307 return true; 308 } 309 310 hlist_for_each_entry_rcu(tp_receiver, &bat_priv->tp_receiver_list, common.list) { 311 if (batadv_compare_eth(tp_receiver->common.other_end, dst)) 312 return true; 313 } 314 315 return false; 316 } 317 318 /** 319 * batadv_tp_list_find_sender_session() - find tp_vars sender session 320 * object in the global list 321 * @bat_priv: the bat priv with all the mesh interface information 322 * @dst: the other endpoint MAC address to look for 323 * @session: session identifier 324 * 325 * Look for a tp_vars object matching dst as end_point, session as tp meter 326 * session and return it after having increment the refcounter. Return NULL 327 * is not found 328 * 329 * Return: matching tp_vars or NULL when no tp_vars was found 330 */ 331 static struct batadv_tp_sender * 332 batadv_tp_list_find_sender_session(struct batadv_priv *bat_priv, const u8 *dst, 333 const u8 *session) 334 { 335 struct batadv_tp_sender *pos, *tp_vars = NULL; 336 337 rcu_read_lock(); 338 hlist_for_each_entry_rcu(pos, &bat_priv->tp_sender_list, common.list) { 339 if (!batadv_compare_eth(pos->common.other_end, dst)) 340 continue; 341 342 if (memcmp(pos->common.session, session, sizeof(pos->common.session)) != 0) 343 continue; 344 345 /* most of the time this function is invoked during the normal 346 * process..it makes sense to pay more when the session is 347 * finished and to speed the process up during the measurement 348 */ 349 if (unlikely(!kref_get_unless_zero(&pos->common.refcount))) 350 continue; 351 352 tp_vars = pos; 353 break; 354 } 355 rcu_read_unlock(); 356 357 return tp_vars; 358 } 359 360 /** 361 * batadv_tp_vars_common_release() - release batadv_tp_vars_common from lists 362 * and queue for free after rcu grace period 363 * @ref: kref pointer of the batadv_tp_vars_common 364 */ 365 static void batadv_tp_vars_common_release(struct kref *ref) 366 { 367 struct batadv_tp_vars_common *tp_vars; 368 struct batadv_tp_unacked *un, *safe; 369 370 tp_vars = container_of(ref, struct batadv_tp_vars_common, refcount); 371 372 /* lock should not be needed because this object is now out of any 373 * context! 374 */ 375 spin_lock_bh(&tp_vars->unacked_lock); 376 list_for_each_entry_safe(un, safe, &tp_vars->unacked_list, list) { 377 list_del(&un->list); 378 kfree(un); 379 } 380 spin_unlock_bh(&tp_vars->unacked_lock); 381 382 kfree_rcu(tp_vars, rcu); 383 } 384 385 /** 386 * batadv_tp_sender_put() - decrement the batadv_tp_sender 387 * refcounter and possibly release it 388 * @tp_vars: the private data of the current TP meter session to be free'd 389 */ 390 static void batadv_tp_sender_put(struct batadv_tp_sender *tp_vars) 391 { 392 if (!tp_vars) 393 return; 394 395 kref_put(&tp_vars->common.refcount, batadv_tp_vars_common_release); 396 } 397 398 /** 399 * batadv_tp_list_detach() - remove tp session from mesh session list once 400 * @tp_vars: the private data of the current TP meter session 401 * 402 * Return: whether tp_vars was detached from list and reference must be freed 403 */ 404 static bool batadv_tp_list_detach(struct batadv_tp_vars_common *tp_vars) 405 { 406 bool detached = false; 407 408 spin_lock_bh(&tp_vars->bat_priv->tp_list_lock); 409 if (!hlist_unhashed(&tp_vars->list)) { 410 hlist_del_init_rcu(&tp_vars->list); 411 detached = true; 412 } 413 spin_unlock_bh(&tp_vars->bat_priv->tp_list_lock); 414 415 if (!detached) 416 return false; 417 418 atomic_dec(&tp_vars->bat_priv->tp_num); 419 420 return true; 421 } 422 423 /** 424 * batadv_tp_sender_cleanup() - cleanup sender data and drop and timer 425 * @tp_vars: the private data of the current TP meter session to cleanup 426 */ 427 static void batadv_tp_sender_cleanup(struct batadv_tp_sender *tp_vars) 428 { 429 disable_delayed_work_sync(&tp_vars->finish_work); 430 431 if (batadv_tp_list_detach(&tp_vars->common)) 432 batadv_tp_sender_put(tp_vars); 433 434 /* kill the timer and remove its reference */ 435 timer_shutdown_sync(&tp_vars->common.timer); 436 batadv_tp_sender_put(tp_vars); 437 } 438 439 /** 440 * batadv_tp_sender_end() - print info about ended session and inform client 441 * @bat_priv: the bat priv with all the mesh interface information 442 * @tp_vars: the private data of the current TP meter session 443 */ 444 static void batadv_tp_sender_end(struct batadv_priv *bat_priv, 445 struct batadv_tp_sender *tp_vars) 446 { 447 enum batadv_tp_meter_reason reason; 448 u32 session_cookie; 449 450 reason = atomic_read(&tp_vars->send_result); 451 452 batadv_dbg(BATADV_DBG_TP_METER, bat_priv, 453 "Test towards %pM finished..shutting down (reason=%d)\n", 454 tp_vars->common.other_end, reason); 455 456 batadv_dbg(BATADV_DBG_TP_METER, bat_priv, 457 "Last timing stats: SRTT=%ums RTTVAR=%ums RTO=%ums\n", 458 tp_vars->cc.srtt >> 3, tp_vars->cc.rttvar >> 2, tp_vars->cc.rto); 459 460 batadv_dbg(BATADV_DBG_TP_METER, bat_priv, 461 "Final values: cwnd=%u ss_threshold=%u\n", 462 tp_vars->cc.cwnd, tp_vars->cc.ss_threshold); 463 464 session_cookie = batadv_tp_session_cookie(tp_vars->common.session, 465 tp_vars->icmp_uid); 466 467 batadv_tp_batctl_notify(reason, 468 tp_vars->common.other_end, 469 bat_priv, 470 tp_vars->start_time, 471 atomic64_read(&tp_vars->tot_sent), 472 session_cookie); 473 } 474 475 /** 476 * batadv_tp_sender_shutdown() - let sender thread/timer stop gracefully 477 * @tp_vars: the private data of the current TP meter session 478 * @reason: reason for tp meter session stop 479 */ 480 static void batadv_tp_sender_shutdown(struct batadv_tp_sender *tp_vars, 481 enum batadv_tp_meter_reason reason) 482 { 483 atomic_cmpxchg(&tp_vars->send_result, 0, reason); 484 } 485 486 /** 487 * batadv_tp_sender_stopped() - check if tp session was stopped with reason 488 * @tp_vars: the private data of the current TP meter session 489 * 490 * Return: whether stop reason was found 491 */ 492 static bool batadv_tp_sender_stopped(struct batadv_tp_sender *tp_vars) 493 { 494 return atomic_read(&tp_vars->send_result) != 0; 495 } 496 497 /** 498 * batadv_tp_sender_finish() - stop sender session after test_length was reached 499 * @work: delayed work reference of the related tp_vars 500 */ 501 static void batadv_tp_sender_finish(struct work_struct *work) 502 { 503 struct delayed_work *delayed_work; 504 struct batadv_tp_sender *tp_vars; 505 506 delayed_work = to_delayed_work(work); 507 tp_vars = container_of(delayed_work, struct batadv_tp_sender, 508 finish_work); 509 510 batadv_tp_sender_shutdown(tp_vars, BATADV_TP_REASON_COMPLETE); 511 } 512 513 /** 514 * batadv_tp_reset_sender_timer() - reschedule the sender timer 515 * @tp_vars: the private TP meter data for this session 516 * 517 * Reschedule the timer using tp_vars->cc.rto as delay 518 */ 519 static void batadv_tp_reset_sender_timer(struct batadv_tp_sender *tp_vars) 520 { 521 /* most of the time this function is invoked while normal packet 522 * reception... 523 */ 524 if (unlikely(batadv_tp_sender_stopped(tp_vars))) 525 /* timer ref will be dropped in batadv_tp_sender_cleanup */ 526 return; 527 528 mod_timer(&tp_vars->common.timer, 529 jiffies + msecs_to_jiffies(READ_ONCE(tp_vars->cc.rto))); 530 } 531 532 /** 533 * batadv_tp_sender_timeout() - timer that fires in case of packet loss 534 * @t: address to timer_list inside tp_vars 535 * 536 * If fired it means that there was packet loss. 537 * Switch to Slow Start, set the ss_threshold to half of the current cwnd and 538 * reset the cwnd to 3*MSS 539 */ 540 static void batadv_tp_sender_timeout(struct timer_list *t) 541 { 542 struct batadv_tp_sender *tp_vars = timer_container_of(tp_vars, t, common.timer); 543 struct batadv_priv *bat_priv = tp_vars->common.bat_priv; 544 545 if (batadv_tp_sender_stopped(tp_vars)) 546 return; 547 548 spin_lock_bh(&tp_vars->cc_lock); 549 550 /* if the user waited long enough...shutdown the test */ 551 if (unlikely(tp_vars->cc.rto >= BATADV_TP_MAX_RTO)) { 552 spin_unlock_bh(&tp_vars->cc_lock); 553 batadv_tp_sender_shutdown(tp_vars, 554 BATADV_TP_REASON_DST_UNREACHABLE); 555 return; 556 } 557 558 /* RTO exponential backoff 559 * Details in Section 5.5 of RFC6298 560 */ 561 WRITE_ONCE(tp_vars->cc.rto, tp_vars->cc.rto * 2); 562 563 tp_vars->cc.ss_threshold = tp_vars->cc.cwnd >> 1; 564 if (tp_vars->cc.ss_threshold < BATADV_TP_PLEN * 2) 565 tp_vars->cc.ss_threshold = BATADV_TP_PLEN * 2; 566 567 batadv_dbg(BATADV_DBG_TP_METER, bat_priv, 568 "Meter: RTO fired during test towards %pM! cwnd=%u new ss_thr=%u, resetting last_sent to %u\n", 569 tp_vars->common.other_end, tp_vars->cc.cwnd, tp_vars->cc.ss_threshold, 570 tp_vars->cc.last_acked); 571 572 tp_vars->cc.cwnd = BATADV_TP_PLEN * 3; 573 574 WRITE_ONCE(tp_vars->cc.last_sent, tp_vars->cc.last_acked); 575 576 spin_unlock_bh(&tp_vars->cc_lock); 577 578 /* resend the non-ACKed packets.. */ 579 wake_up(&tp_vars->more_bytes); 580 581 batadv_tp_reset_sender_timer(tp_vars); 582 } 583 584 /** 585 * batadv_tp_fill_prerandom() - Fill buffer with prefetched random bytes 586 * @tp_vars: the private TP meter data for this session 587 * @buf: Buffer to fill with bytes 588 * @nbytes: amount of pseudorandom bytes 589 */ 590 static void batadv_tp_fill_prerandom(struct batadv_tp_sender *tp_vars, 591 u8 *buf, size_t nbytes) 592 { 593 u32 local_offset; 594 size_t bytes_inbuf; 595 size_t to_copy; 596 size_t pos = 0; 597 598 spin_lock_bh(&tp_vars->prerandom_lock); 599 local_offset = tp_vars->prerandom_offset; 600 tp_vars->prerandom_offset += nbytes; 601 tp_vars->prerandom_offset %= sizeof(batadv_tp_prerandom); 602 spin_unlock_bh(&tp_vars->prerandom_lock); 603 604 while (nbytes) { 605 local_offset %= sizeof(batadv_tp_prerandom); 606 bytes_inbuf = sizeof(batadv_tp_prerandom) - local_offset; 607 to_copy = min(nbytes, bytes_inbuf); 608 609 memcpy(&buf[pos], &batadv_tp_prerandom[local_offset], to_copy); 610 pos += to_copy; 611 nbytes -= to_copy; 612 local_offset = 0; 613 } 614 } 615 616 /** 617 * batadv_tp_send_msg() - send a single message 618 * @tp_vars: the private TP meter data for this session 619 * @src: source mac address 620 * @orig_node: the originator of the destination 621 * @seqno: sequence number of this packet 622 * @len: length of the entire packet 623 * @session: session identifier 624 * @uid: local ICMP "socket" index 625 * @timestamp: timestamp in jiffies which is replied in ack 626 * 627 * Create and send a single TP Meter message. 628 * 629 * Return: 0 on success, BATADV_TP_REASON_MEMORY_ERROR if the packet couldn't 630 * be allocated, BATADV_TP_REASON_CANT_SEND if the packet could not be 631 * transmitted 632 */ 633 static int batadv_tp_send_msg(struct batadv_tp_sender *tp_vars, const u8 *src, 634 struct batadv_orig_node *orig_node, 635 u32 seqno, size_t len, const u8 *session, 636 int uid, u32 timestamp) 637 { 638 struct batadv_icmp_tp_packet *icmp; 639 struct sk_buff *skb; 640 int r; 641 u8 *data; 642 size_t data_len; 643 644 skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); 645 if (unlikely(!skb)) 646 return BATADV_TP_REASON_MEMORY_ERROR; 647 648 skb_reserve(skb, ETH_HLEN); 649 icmp = skb_put(skb, sizeof(*icmp)); 650 651 /* fill the icmp header */ 652 ether_addr_copy(icmp->dst, orig_node->orig); 653 ether_addr_copy(icmp->orig, src); 654 icmp->version = BATADV_COMPAT_VERSION; 655 icmp->packet_type = BATADV_ICMP; 656 icmp->ttl = BATADV_TTL; 657 icmp->msg_type = BATADV_TP; 658 icmp->uid = uid; 659 660 icmp->subtype = BATADV_TP_MSG; 661 memcpy(icmp->session, session, sizeof(icmp->session)); 662 icmp->seqno = htonl(seqno); 663 icmp->timestamp = htonl(timestamp); 664 665 data_len = len - sizeof(*icmp); 666 data = skb_put(skb, data_len); 667 batadv_tp_fill_prerandom(tp_vars, data, data_len); 668 669 r = batadv_send_skb_to_orig(skb, orig_node, NULL); 670 if (r == NET_XMIT_SUCCESS) 671 return 0; 672 673 return BATADV_TP_REASON_CANT_SEND; 674 } 675 676 /** 677 * enum batadv_tp_ack_reaction - expected reaction to ack packet 678 */ 679 enum batadv_tp_ack_reaction { 680 /** @BATADV_TP_ACK_REACTION_OLD_ACK: ignore old ack packet */ 681 BATADV_TP_ACK_REACTION_OLD_ACK, 682 683 /** @BATADV_TP_ACK_REACTION_IGNORE: ignore duplicated ack but reset timer */ 684 BATADV_TP_ACK_REACTION_IGNORE, 685 686 /** @BATADV_TP_ACK_REACTION_RESEND_WAKEUP: resend data and wakeup "more_bytes" */ 687 BATADV_TP_ACK_REACTION_RESEND_WAKEUP, 688 689 /** @BATADV_TP_ACK_REACTION_WAKEUP: wakeup "more_bytes" */ 690 BATADV_TP_ACK_REACTION_WAKEUP, 691 }; 692 693 /** 694 * batadv_tp_handle_ack() - Calculate reaction to ACK and update congestion control 695 * @bat_priv: the bat priv with all the mesh interface information 696 * @tp_vars: the private data of the current TP meter session 697 * @recv_ack: received ACK seqno 698 * @mss: maximum segment size for transmission 699 * 700 * Return: expected reaction to this ack 701 */ 702 static enum batadv_tp_ack_reaction 703 batadv_tp_handle_ack(struct batadv_priv *bat_priv, 704 struct batadv_tp_sender *tp_vars, 705 u32 recv_ack, size_t mss) 706 __must_hold(&tp_vars->cc_lock) 707 { 708 enum batadv_tp_ack_reaction reaction; 709 710 if (batadv_seq_before(recv_ack, tp_vars->cc.last_acked)) 711 return BATADV_TP_ACK_REACTION_OLD_ACK; 712 713 /* check if this ACK is a duplicate */ 714 if (tp_vars->cc.last_acked == recv_ack) { 715 /* if this is the third duplicate ACK do Fast Retransmit */ 716 if (tp_vars->cc.dup_acks > 3) 717 return BATADV_TP_ACK_REACTION_IGNORE; 718 719 tp_vars->cc.dup_acks++; 720 if (tp_vars->cc.dup_acks != 3) 721 return BATADV_TP_ACK_REACTION_IGNORE; 722 723 if (!batadv_seq_before(tp_vars->cc.recover, recv_ack)) 724 return BATADV_TP_ACK_REACTION_IGNORE; 725 726 /* Fast Recovery */ 727 tp_vars->cc.fast_recovery = true; 728 729 /* Set recover to the last outstanding seqno when Fast Recovery 730 * is entered. RFC6582, Section 3.2, step 1 731 */ 732 tp_vars->cc.recover = tp_vars->cc.last_sent; 733 tp_vars->cc.ss_threshold = tp_vars->cc.cwnd >> 1; 734 batadv_dbg(BATADV_DBG_TP_METER, bat_priv, 735 "Meter: Fast Recovery, (cur cwnd=%u) ss_thr=%u last_sent=%u recv_ack=%u\n", 736 tp_vars->cc.cwnd, tp_vars->cc.ss_threshold, 737 tp_vars->cc.last_sent, recv_ack); 738 tp_vars->cc.cwnd = batadv_tp_cwnd(tp_vars->cc.ss_threshold, 3 * mss, 739 mss); 740 tp_vars->cc.dec_cwnd = 0; 741 WRITE_ONCE(tp_vars->cc.last_sent, recv_ack); 742 743 return BATADV_TP_ACK_REACTION_RESEND_WAKEUP; 744 } 745 746 /* count the acked data */ 747 atomic64_add(recv_ack - tp_vars->cc.last_acked, &tp_vars->tot_sent); 748 749 /* reset the duplicate ACKs counter */ 750 tp_vars->cc.dup_acks = 0; 751 752 if (tp_vars->cc.fast_recovery) { 753 /* partial ACK */ 754 if (batadv_seq_before(recv_ack, tp_vars->cc.recover)) { 755 /* this is another hole in the window. React 756 * immediately as specified by NewReno (see 757 * Section 3.2 of RFC6582 for details) 758 */ 759 reaction = BATADV_TP_ACK_REACTION_RESEND_WAKEUP; 760 tp_vars->cc.cwnd = batadv_tp_cwnd(tp_vars->cc.cwnd, 761 mss, mss); 762 } else { 763 tp_vars->cc.fast_recovery = false; 764 /* set cwnd to the value of ss_threshold at the 765 * moment that Fast Recovery was entered. 766 * RFC6582, Section 3.2, step 3 767 */ 768 tp_vars->cc.cwnd = batadv_tp_cwnd(tp_vars->cc.ss_threshold, 769 0, mss); 770 reaction = BATADV_TP_ACK_REACTION_WAKEUP; 771 } 772 } else { 773 if (recv_ack - tp_vars->cc.last_acked >= mss) 774 batadv_tp_update_cwnd(tp_vars, mss); 775 776 reaction = BATADV_TP_ACK_REACTION_WAKEUP; 777 } 778 779 /* move the Transmit Window */ 780 WRITE_ONCE(tp_vars->cc.last_acked, recv_ack); 781 782 return reaction; 783 } 784 785 /** 786 * batadv_tp_recv_ack() - ACK receiving function 787 * @bat_priv: the bat priv with all the mesh interface information 788 * @skb: the buffer containing the received packet 789 * 790 * Process a received TP ACK packet 791 */ 792 static void batadv_tp_recv_ack(struct batadv_priv *bat_priv, 793 const struct sk_buff *skb) 794 { 795 struct batadv_hard_iface *primary_if = NULL; 796 struct batadv_orig_node *orig_node = NULL; 797 const struct batadv_icmp_tp_packet *icmp; 798 enum batadv_tp_ack_reaction reaction; 799 struct batadv_tp_sender *tp_vars; 800 size_t packet_len; 801 u32 recv_ack; 802 size_t mss; 803 u32 rtt; 804 805 packet_len = BATADV_TP_PLEN; 806 mss = BATADV_TP_PLEN; 807 packet_len += sizeof(struct batadv_unicast_packet); 808 809 icmp = (struct batadv_icmp_tp_packet *)skb->data; 810 recv_ack = ntohl(icmp->seqno); 811 812 /* find the tp_vars */ 813 tp_vars = batadv_tp_list_find_sender_session(bat_priv, icmp->orig, 814 icmp->session); 815 if (unlikely(!tp_vars)) 816 return; 817 818 if (unlikely(batadv_tp_sender_stopped(tp_vars))) 819 goto out; 820 821 /* old ACK? silently drop it.. */ 822 if (batadv_seq_before(recv_ack, READ_ONCE(tp_vars->cc.last_acked))) 823 goto out; 824 825 primary_if = batadv_primary_if_get_selected(bat_priv); 826 if (unlikely(!primary_if)) 827 goto out; 828 829 orig_node = batadv_orig_hash_find(bat_priv, icmp->orig); 830 if (unlikely(!orig_node)) 831 goto out; 832 833 spin_lock_bh(&tp_vars->cc_lock); 834 /* update RTO with the new sampled RTT, if any */ 835 rtt = jiffies_to_msecs(jiffies) - ntohl(icmp->timestamp); 836 if (icmp->timestamp && rtt) 837 batadv_tp_update_rto(tp_vars, rtt); 838 839 reaction = batadv_tp_handle_ack(bat_priv, tp_vars, recv_ack, mss); 840 spin_unlock_bh(&tp_vars->cc_lock); 841 842 if (reaction == BATADV_TP_ACK_REACTION_OLD_ACK) 843 goto out; 844 845 /* ACK for new data... reset the timer */ 846 batadv_tp_reset_sender_timer(tp_vars); 847 848 switch (reaction) { 849 default: 850 case BATADV_TP_ACK_REACTION_IGNORE: 851 goto out; 852 case BATADV_TP_ACK_REACTION_RESEND_WAKEUP: 853 batadv_tp_send_msg(tp_vars, primary_if->net_dev->dev_addr, 854 orig_node, recv_ack, packet_len, 855 icmp->session, icmp->uid, 856 jiffies_to_msecs(jiffies)); 857 fallthrough; 858 case BATADV_TP_ACK_REACTION_WAKEUP: 859 wake_up(&tp_vars->more_bytes); 860 break; 861 } 862 863 out: 864 batadv_hardif_put(primary_if); 865 batadv_orig_node_put(orig_node); 866 batadv_tp_sender_put(tp_vars); 867 } 868 869 /** 870 * batadv_tp_avail() - check if congestion window is not full 871 * @tp_vars: the private data of the current TP meter session 872 * @payload_len: size of the payload of a single message 873 * 874 * Return: true when congestion window is not full, false otherwise 875 */ 876 static bool batadv_tp_avail(struct batadv_tp_sender *tp_vars, 877 size_t payload_len) 878 { 879 u32 win_left, win_limit; 880 881 spin_lock_bh(&tp_vars->cc_lock); 882 883 win_limit = tp_vars->cc.last_acked + tp_vars->cc.cwnd; 884 885 if (batadv_seq_before(tp_vars->cc.last_sent, win_limit)) 886 win_left = win_limit - tp_vars->cc.last_sent; 887 else 888 win_left = 0; 889 890 spin_unlock_bh(&tp_vars->cc_lock); 891 892 return win_left >= payload_len; 893 } 894 895 /** 896 * batadv_tp_wait_available() - wait until congestion window becomes free or 897 * timeout is reached 898 * @tp_vars: the private data of the current TP meter session 899 * @plen: size of the payload of a single message 900 * 901 * Return: 0 if the condition evaluated to false after the timeout elapsed, 902 * 1 if the condition evaluated to true after the timeout elapsed, the 903 * remaining jiffies (at least 1) if the condition evaluated to true before 904 * the timeout elapsed, or -ERESTARTSYS if it was interrupted by a signal. 905 */ 906 static int batadv_tp_wait_available(struct batadv_tp_sender *tp_vars, size_t plen) 907 { 908 int ret; 909 910 ret = wait_event_interruptible_timeout(tp_vars->more_bytes, 911 batadv_tp_avail(tp_vars, plen), 912 HZ / 10); 913 914 return ret; 915 } 916 917 /** 918 * batadv_tp_send() - main sending thread of a tp meter session 919 * @arg: address of the related tp_vars 920 * 921 * Return: 0 922 */ 923 static int batadv_tp_send(void *arg) 924 { 925 struct batadv_tp_sender *tp_vars = arg; 926 struct batadv_priv *bat_priv = tp_vars->common.bat_priv; 927 struct batadv_hard_iface *primary_if = NULL; 928 struct batadv_orig_node *orig_node = NULL; 929 size_t payload_len, packet_len; 930 u32 last_sent; 931 int err = 0; 932 933 orig_node = batadv_orig_hash_find(bat_priv, tp_vars->common.other_end); 934 if (unlikely(!orig_node)) { 935 err = BATADV_TP_REASON_DST_UNREACHABLE; 936 batadv_tp_sender_shutdown(tp_vars, err); 937 goto out; 938 } 939 940 primary_if = batadv_primary_if_get_selected(bat_priv); 941 if (unlikely(!primary_if)) { 942 err = BATADV_TP_REASON_DST_UNREACHABLE; 943 batadv_tp_sender_shutdown(tp_vars, err); 944 goto out; 945 } 946 947 /* assume that all the hard_interfaces have a correctly 948 * configured MTU, so use the mesh_iface MTU as MSS. 949 * This might not be true and in that case the fragmentation 950 * should be used. 951 * Now, try to send the packet as it is 952 */ 953 payload_len = BATADV_TP_PLEN; 954 BUILD_BUG_ON(sizeof(struct batadv_icmp_tp_packet) > BATADV_TP_PLEN); 955 956 batadv_tp_reset_sender_timer(tp_vars); 957 958 /* queue the worker in charge of terminating the test */ 959 queue_delayed_work(batadv_event_workqueue, &tp_vars->finish_work, 960 msecs_to_jiffies(tp_vars->test_length)); 961 962 while (!batadv_tp_sender_stopped(tp_vars)) { 963 if (unlikely(!batadv_tp_avail(tp_vars, payload_len))) { 964 batadv_tp_wait_available(tp_vars, payload_len); 965 continue; 966 } 967 968 /* to emulate normal unicast traffic, add to the payload len 969 * the size of the unicast header 970 */ 971 packet_len = payload_len + sizeof(struct batadv_unicast_packet); 972 last_sent = READ_ONCE(tp_vars->cc.last_sent); 973 974 err = batadv_tp_send_msg(tp_vars, primary_if->net_dev->dev_addr, 975 orig_node, last_sent, packet_len, 976 tp_vars->common.session, tp_vars->icmp_uid, 977 jiffies_to_msecs(jiffies)); 978 979 /* something went wrong during the preparation/transmission */ 980 if (unlikely(err && err != BATADV_TP_REASON_CANT_SEND)) { 981 batadv_dbg(BATADV_DBG_TP_METER, bat_priv, 982 "Meter: %s() cannot send packets (%d)\n", 983 __func__, err); 984 /* ensure nobody else tries to stop the thread now */ 985 batadv_tp_sender_shutdown(tp_vars, err); 986 break; 987 } 988 989 /* right-shift the TWND */ 990 if (!err) { 991 spin_lock_bh(&tp_vars->cc_lock); 992 if (tp_vars->cc.last_sent == last_sent) 993 WRITE_ONCE(tp_vars->cc.last_sent, last_sent + payload_len); 994 spin_unlock_bh(&tp_vars->cc_lock); 995 } 996 997 cond_resched(); 998 } 999 1000 out: 1001 batadv_hardif_put(primary_if); 1002 batadv_orig_node_put(orig_node); 1003 1004 batadv_tp_sender_end(bat_priv, tp_vars); 1005 batadv_tp_sender_cleanup(tp_vars); 1006 complete(&tp_vars->finished); 1007 1008 batadv_tp_sender_put(tp_vars); 1009 1010 return 0; 1011 } 1012 1013 /** 1014 * batadv_tp_start_kthread() - start new thread which manages the tp meter 1015 * sender 1016 * @tp_vars: the private data of the current TP meter session 1017 */ 1018 static void batadv_tp_start_kthread(struct batadv_tp_sender *tp_vars) 1019 { 1020 struct task_struct *kthread; 1021 struct batadv_priv *bat_priv = tp_vars->common.bat_priv; 1022 u32 session_cookie; 1023 1024 kref_get(&tp_vars->common.refcount); 1025 kthread = kthread_create(batadv_tp_send, tp_vars, "kbatadv_tp_meter"); 1026 if (IS_ERR(kthread)) { 1027 session_cookie = batadv_tp_session_cookie(tp_vars->common.session, 1028 tp_vars->icmp_uid); 1029 pr_err("batadv: cannot create tp meter kthread\n"); 1030 batadv_tp_batctl_error_notify(BATADV_TP_REASON_MEMORY_ERROR, 1031 tp_vars->common.other_end, 1032 bat_priv, session_cookie); 1033 1034 /* drop reserved reference for kthread */ 1035 batadv_tp_sender_put(tp_vars); 1036 1037 /* cleanup of failed tp meter variables */ 1038 batadv_tp_sender_cleanup(tp_vars); 1039 complete(&tp_vars->finished); 1040 return; 1041 } 1042 1043 wake_up_process(kthread); 1044 } 1045 1046 /** 1047 * batadv_tp_start() - start a new tp meter session 1048 * @bat_priv: the bat priv with all the mesh interface information 1049 * @dst: the receiver MAC address 1050 * @test_length: test length in milliseconds 1051 * @cookie: session cookie 1052 */ 1053 void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst, 1054 u32 test_length, u32 *cookie) 1055 { 1056 struct batadv_tp_sender *tp_vars; 1057 u8 session_id[2]; 1058 u8 icmp_uid; 1059 u32 session_cookie; 1060 1061 get_random_bytes(session_id, sizeof(session_id)); 1062 get_random_bytes(&icmp_uid, 1); 1063 session_cookie = batadv_tp_session_cookie(session_id, icmp_uid); 1064 *cookie = session_cookie; 1065 1066 /* look for an already existing test towards this node */ 1067 spin_lock_bh(&bat_priv->tp_list_lock); 1068 if (READ_ONCE(bat_priv->mesh_state) != BATADV_MESH_ACTIVE) { 1069 spin_unlock_bh(&bat_priv->tp_list_lock); 1070 batadv_tp_batctl_error_notify(BATADV_TP_REASON_DST_UNREACHABLE, 1071 dst, bat_priv, session_cookie); 1072 return; 1073 } 1074 1075 if (batadv_tp_list_active(bat_priv, dst)) { 1076 spin_unlock_bh(&bat_priv->tp_list_lock); 1077 batadv_dbg(BATADV_DBG_TP_METER, bat_priv, 1078 "Meter: test to or from the same node already ongoing, aborting\n"); 1079 batadv_tp_batctl_error_notify(BATADV_TP_REASON_ALREADY_ONGOING, 1080 dst, bat_priv, session_cookie); 1081 return; 1082 } 1083 1084 if (!atomic_add_unless(&bat_priv->tp_num, 1, BATADV_TP_MAX_NUM)) { 1085 spin_unlock_bh(&bat_priv->tp_list_lock); 1086 batadv_dbg(BATADV_DBG_TP_METER, bat_priv, 1087 "Meter: too many ongoing sessions, aborting (SEND)\n"); 1088 batadv_tp_batctl_error_notify(BATADV_TP_REASON_TOO_MANY, dst, 1089 bat_priv, session_cookie); 1090 return; 1091 } 1092 1093 tp_vars = kmalloc_obj(*tp_vars, GFP_ATOMIC); 1094 if (!tp_vars) { 1095 atomic_dec(&bat_priv->tp_num); 1096 spin_unlock_bh(&bat_priv->tp_list_lock); 1097 batadv_dbg(BATADV_DBG_TP_METER, bat_priv, 1098 "Meter: %s cannot allocate list elements\n", 1099 __func__); 1100 batadv_tp_batctl_error_notify(BATADV_TP_REASON_MEMORY_ERROR, 1101 dst, bat_priv, session_cookie); 1102 return; 1103 } 1104 1105 /* initialize tp_vars */ 1106 ether_addr_copy(tp_vars->common.other_end, dst); 1107 kref_init(&tp_vars->common.refcount); 1108 atomic_set(&tp_vars->send_result, 0); 1109 memcpy(tp_vars->common.session, session_id, sizeof(session_id)); 1110 tp_vars->icmp_uid = icmp_uid; 1111 1112 WRITE_ONCE(tp_vars->cc.last_sent, BATADV_TP_FIRST_SEQ); 1113 WRITE_ONCE(tp_vars->cc.dup_acks, 0); 1114 WRITE_ONCE(tp_vars->cc.last_acked, BATADV_TP_FIRST_SEQ); 1115 tp_vars->cc.fast_recovery = false; 1116 tp_vars->cc.recover = BATADV_TP_FIRST_SEQ; 1117 1118 /* initialise the CWND to 3*MSS (Section 3.1 in RFC5681). 1119 * For batman-adv the MSS is the size of the payload received by the 1120 * mesh_interface, hence its MTU 1121 */ 1122 tp_vars->cc.cwnd = BATADV_TP_PLEN * 3; 1123 tp_vars->cc.dec_cwnd = 0; 1124 1125 /* at the beginning initialise the SS threshold to the biggest possible 1126 * window size, hence the AWND size 1127 */ 1128 tp_vars->cc.ss_threshold = BATADV_TP_AWND; 1129 1130 /* RTO initial value is 3 seconds. 1131 * Details in Section 2.1 of RFC6298 1132 */ 1133 WRITE_ONCE(tp_vars->cc.rto, 1000); 1134 tp_vars->cc.srtt = 0; 1135 tp_vars->cc.rttvar = 0; 1136 1137 atomic64_set(&tp_vars->tot_sent, 0); 1138 1139 kref_get(&tp_vars->common.refcount); 1140 timer_setup(&tp_vars->common.timer, batadv_tp_sender_timeout, 0); 1141 1142 tp_vars->common.bat_priv = bat_priv; 1143 tp_vars->start_time = jiffies; 1144 1145 init_waitqueue_head(&tp_vars->more_bytes); 1146 init_completion(&tp_vars->finished); 1147 1148 spin_lock_init(&tp_vars->common.unacked_lock); 1149 INIT_LIST_HEAD(&tp_vars->common.unacked_list); 1150 1151 spin_lock_init(&tp_vars->cc_lock); 1152 1153 tp_vars->prerandom_offset = 0; 1154 spin_lock_init(&tp_vars->prerandom_lock); 1155 1156 tp_vars->test_length = test_length; 1157 if (!tp_vars->test_length) 1158 tp_vars->test_length = BATADV_TP_DEF_TEST_LENGTH; 1159 1160 /* init work item for finished tp tests */ 1161 INIT_DELAYED_WORK(&tp_vars->finish_work, batadv_tp_sender_finish); 1162 1163 kref_get(&tp_vars->common.refcount); 1164 hlist_add_head_rcu(&tp_vars->common.list, &bat_priv->tp_sender_list); 1165 spin_unlock_bh(&bat_priv->tp_list_lock); 1166 1167 batadv_dbg(BATADV_DBG_TP_METER, bat_priv, 1168 "Meter: starting throughput meter towards %pM (length=%ums)\n", 1169 dst, test_length); 1170 1171 /* start tp kthread. This way the write() call issued from userspace can 1172 * happily return and avoid to block 1173 */ 1174 batadv_tp_start_kthread(tp_vars); 1175 1176 /* don't return reference to new tp_vars */ 1177 batadv_tp_sender_put(tp_vars); 1178 } 1179 1180 /** 1181 * batadv_tp_stop() - stop currently running tp meter session 1182 * @bat_priv: the bat priv with all the mesh interface information 1183 * @dst: the receiver MAC address 1184 * @return_value: reason for tp meter session stop 1185 */ 1186 void batadv_tp_stop(struct batadv_priv *bat_priv, const u8 *dst, 1187 u8 return_value) 1188 { 1189 struct batadv_orig_node *orig_node; 1190 struct batadv_tp_sender *tp_vars; 1191 1192 batadv_dbg(BATADV_DBG_TP_METER, bat_priv, 1193 "Meter: stopping test towards %pM\n", dst); 1194 1195 orig_node = batadv_orig_hash_find(bat_priv, dst); 1196 if (!orig_node) 1197 return; 1198 1199 tp_vars = batadv_tp_list_find_sender(bat_priv, orig_node->orig); 1200 if (!tp_vars) { 1201 batadv_dbg(BATADV_DBG_TP_METER, bat_priv, 1202 "Meter: trying to interrupt an already over connection\n"); 1203 goto out_put_orig_node; 1204 } 1205 1206 batadv_tp_sender_shutdown(tp_vars, return_value); 1207 batadv_tp_sender_put(tp_vars); 1208 out_put_orig_node: 1209 batadv_orig_node_put(orig_node); 1210 } 1211 1212 /** 1213 * batadv_tp_list_find_receiver_session() - find tp_vars receiver session 1214 * object in the global list 1215 * @bat_priv: the bat priv with all the mesh interface information 1216 * @dst: the other endpoint MAC address to look for 1217 * @session: session identifier 1218 * 1219 * Look for a tp_vars object matching dst as end_point, session as tp meter 1220 * session and return it after having increment the refcounter. Return NULL 1221 * is not found 1222 * 1223 * Return: matching tp_vars or NULL when no tp_vars was found 1224 */ 1225 static struct batadv_tp_receiver * 1226 batadv_tp_list_find_receiver_session(struct batadv_priv *bat_priv, const u8 *dst, 1227 const u8 *session) 1228 { 1229 struct batadv_tp_receiver *pos, *tp_vars = NULL; 1230 1231 rcu_read_lock(); 1232 hlist_for_each_entry_rcu(pos, &bat_priv->tp_receiver_list, common.list) { 1233 if (!batadv_compare_eth(pos->common.other_end, dst)) 1234 continue; 1235 1236 if (memcmp(pos->common.session, session, sizeof(pos->common.session)) != 0) 1237 continue; 1238 1239 /* most of the time this function is invoked during the normal 1240 * process..it makes sense to pay more when the session is 1241 * finished and to speed the process up during the measurement 1242 */ 1243 if (unlikely(!kref_get_unless_zero(&pos->common.refcount))) 1244 continue; 1245 1246 tp_vars = pos; 1247 break; 1248 } 1249 rcu_read_unlock(); 1250 1251 return tp_vars; 1252 } 1253 1254 /** 1255 * batadv_tp_receiver_put() - decrement the batadv_tp_receiver 1256 * refcounter and possibly release it 1257 * @tp_vars: the private data of the current TP meter session to be free'd 1258 */ 1259 static void batadv_tp_receiver_put(struct batadv_tp_receiver *tp_vars) 1260 { 1261 if (!tp_vars) 1262 return; 1263 1264 kref_put(&tp_vars->common.refcount, batadv_tp_vars_common_release); 1265 } 1266 1267 /** 1268 * batadv_tp_reset_receiver_timer() - reset the receiver shutdown timer 1269 * @tp_vars: the private data of the current TP meter session 1270 * 1271 * start the receiver shutdown timer or reset it if already started 1272 */ 1273 static void batadv_tp_reset_receiver_timer(struct batadv_tp_receiver *tp_vars) 1274 { 1275 mod_timer(&tp_vars->common.timer, 1276 jiffies + msecs_to_jiffies(BATADV_TP_RECV_TIMEOUT)); 1277 } 1278 1279 /** 1280 * batadv_tp_receiver_shutdown() - stop a tp meter receiver when timeout is 1281 * reached without received ack 1282 * @t: address to timer_list inside tp_vars 1283 */ 1284 static void batadv_tp_receiver_shutdown(struct timer_list *t) 1285 { 1286 struct batadv_tp_receiver *tp_vars = timer_container_of(tp_vars, t, common.timer); 1287 struct batadv_tp_unacked *un, *safe; 1288 struct batadv_priv *bat_priv; 1289 1290 bat_priv = tp_vars->common.bat_priv; 1291 1292 /* if there is recent activity rearm the timer */ 1293 if (!batadv_has_timed_out(READ_ONCE(tp_vars->last_recv_time), 1294 BATADV_TP_RECV_TIMEOUT)) { 1295 /* reset the receiver shutdown timer */ 1296 batadv_tp_reset_receiver_timer(tp_vars); 1297 return; 1298 } 1299 1300 batadv_dbg(BATADV_DBG_TP_METER, bat_priv, 1301 "Shutting down for inactivity (more than %dms) from %pM\n", 1302 BATADV_TP_RECV_TIMEOUT, tp_vars->common.other_end); 1303 1304 if (batadv_tp_list_detach(&tp_vars->common)) 1305 batadv_tp_receiver_put(tp_vars); 1306 1307 spin_lock_bh(&tp_vars->common.unacked_lock); 1308 list_for_each_entry_safe(un, safe, &tp_vars->common.unacked_list, list) { 1309 list_del(&un->list); 1310 kfree(un); 1311 tp_vars->common.unacked_count--; 1312 } 1313 spin_unlock_bh(&tp_vars->common.unacked_lock); 1314 1315 /* drop reference of timer */ 1316 if (WARN_ON(atomic_xchg(&tp_vars->receiving, 0) != 1)) 1317 return; 1318 1319 batadv_tp_receiver_put(tp_vars); 1320 } 1321 1322 /** 1323 * batadv_tp_send_ack() - send an ACK packet 1324 * @bat_priv: the bat priv with all the mesh interface information 1325 * @dst: the mac address of the destination originator 1326 * @seq: the sequence number to ACK 1327 * @timestamp: the timestamp to echo back in the ACK 1328 * @session: session identifier 1329 * @socket_index: local ICMP socket identifier 1330 * 1331 * Return: 0 on success, a positive integer representing the reason of the 1332 * failure otherwise 1333 */ 1334 static int batadv_tp_send_ack(struct batadv_priv *bat_priv, const u8 *dst, 1335 u32 seq, __be32 timestamp, const u8 *session, 1336 int socket_index) 1337 { 1338 struct batadv_hard_iface *primary_if = NULL; 1339 struct batadv_orig_node *orig_node; 1340 struct batadv_icmp_tp_packet *icmp; 1341 struct sk_buff *skb; 1342 int r, ret; 1343 1344 orig_node = batadv_orig_hash_find(bat_priv, dst); 1345 if (unlikely(!orig_node)) { 1346 ret = BATADV_TP_REASON_DST_UNREACHABLE; 1347 goto out; 1348 } 1349 1350 primary_if = batadv_primary_if_get_selected(bat_priv); 1351 if (unlikely(!primary_if)) { 1352 ret = BATADV_TP_REASON_DST_UNREACHABLE; 1353 goto out; 1354 } 1355 1356 skb = netdev_alloc_skb_ip_align(NULL, sizeof(*icmp) + ETH_HLEN); 1357 if (unlikely(!skb)) { 1358 ret = BATADV_TP_REASON_MEMORY_ERROR; 1359 goto out; 1360 } 1361 1362 skb_reserve(skb, ETH_HLEN); 1363 icmp = skb_put(skb, sizeof(*icmp)); 1364 icmp->packet_type = BATADV_ICMP; 1365 icmp->version = BATADV_COMPAT_VERSION; 1366 icmp->ttl = BATADV_TTL; 1367 icmp->msg_type = BATADV_TP; 1368 ether_addr_copy(icmp->dst, orig_node->orig); 1369 ether_addr_copy(icmp->orig, primary_if->net_dev->dev_addr); 1370 icmp->uid = socket_index; 1371 1372 icmp->subtype = BATADV_TP_ACK; 1373 memcpy(icmp->session, session, sizeof(icmp->session)); 1374 icmp->seqno = htonl(seq); 1375 icmp->timestamp = timestamp; 1376 1377 /* send the ack */ 1378 r = batadv_send_skb_to_orig(skb, orig_node, NULL); 1379 if (unlikely(r < 0) || r == NET_XMIT_DROP) { 1380 ret = BATADV_TP_REASON_DST_UNREACHABLE; 1381 goto out; 1382 } 1383 ret = 0; 1384 1385 out: 1386 batadv_orig_node_put(orig_node); 1387 batadv_hardif_put(primary_if); 1388 1389 return ret; 1390 } 1391 1392 /** 1393 * batadv_tp_handle_out_of_order() - store an out of order packet 1394 * @tp_vars: the private data of the current TP meter session 1395 * @seqno: sequence number of new received packet 1396 * @payload_len: length of the received packet 1397 * 1398 * Store the out of order packet in the unacked list for late processing. This 1399 * packets are kept in this list so that they can be ACKed at once as soon as 1400 * all the previous packets have been received 1401 * 1402 * Return: true if the packed has been successfully processed, false otherwise 1403 */ 1404 static bool batadv_tp_handle_out_of_order(struct batadv_tp_receiver *tp_vars, 1405 u32 seqno, u32 payload_len) 1406 __must_hold(&tp_vars->common.unacked_lock) 1407 { 1408 struct batadv_tp_unacked *un, *new; 1409 bool added = false; 1410 1411 new = kmalloc_obj(*new, GFP_ATOMIC); 1412 if (unlikely(!new)) 1413 return false; 1414 1415 new->seqno = seqno; 1416 new->len = payload_len; 1417 1418 /* if the list is empty immediately attach this new object */ 1419 if (list_empty(&tp_vars->common.unacked_list)) { 1420 list_add(&new->list, &tp_vars->common.unacked_list); 1421 tp_vars->common.unacked_count++; 1422 return true; 1423 } 1424 1425 /* otherwise loop over the list and either drop the packet because this 1426 * is a duplicate or store it at the right position. 1427 * 1428 * The iteration is done in the reverse way because it is likely that 1429 * the last received packet (the one being processed now) has a bigger 1430 * seqno than all the others already stored. 1431 */ 1432 list_for_each_entry_reverse(un, &tp_vars->common.unacked_list, list) { 1433 /* check for duplicates */ 1434 if (new->seqno == un->seqno) { 1435 if (new->len > un->len) 1436 un->len = new->len; 1437 kfree(new); 1438 added = true; 1439 break; 1440 } 1441 1442 /* look for the right position */ 1443 if (batadv_seq_before(new->seqno, un->seqno)) 1444 continue; 1445 1446 /* as soon as an entry having a bigger seqno is found, the new 1447 * one is attached _after_ it. In this way the list is kept in 1448 * ascending order 1449 */ 1450 list_add(&new->list, &un->list); 1451 added = true; 1452 tp_vars->common.unacked_count++; 1453 break; 1454 } 1455 1456 /* received packet with smallest seqno out of order; add it to front */ 1457 if (!added) { 1458 list_add(&new->list, &tp_vars->common.unacked_list); 1459 tp_vars->common.unacked_count++; 1460 } 1461 1462 /* remove the last (biggest) unacked seqno when list is too large */ 1463 if (tp_vars->common.unacked_count > BATADV_TP_MAX_UNACKED) { 1464 un = list_last_entry(&tp_vars->common.unacked_list, 1465 struct batadv_tp_unacked, list); 1466 list_del(&un->list); 1467 kfree(un); 1468 tp_vars->common.unacked_count--; 1469 } 1470 1471 return true; 1472 } 1473 1474 /** 1475 * batadv_tp_ack_unordered() - update number received bytes in current stream 1476 * without gaps 1477 * @tp_vars: the private data of the current TP meter session 1478 */ 1479 static void batadv_tp_ack_unordered(struct batadv_tp_receiver *tp_vars) 1480 __must_hold(&tp_vars->common.unacked_lock) 1481 { 1482 struct batadv_tp_unacked *un, *safe; 1483 u32 to_ack; 1484 1485 /* go through the unacked packet list and possibly ACK them as 1486 * well 1487 */ 1488 list_for_each_entry_safe(un, safe, &tp_vars->common.unacked_list, list) { 1489 /* the list is ordered, therefore it is possible to stop as soon 1490 * there is a gap between the last acked seqno and the seqno of 1491 * the packet under inspection 1492 */ 1493 if (batadv_seq_before(tp_vars->last_recv, un->seqno)) 1494 break; 1495 1496 to_ack = un->seqno + un->len - tp_vars->last_recv; 1497 1498 if (batadv_seq_before(tp_vars->last_recv, un->seqno + un->len)) 1499 tp_vars->last_recv += to_ack; 1500 1501 list_del(&un->list); 1502 kfree(un); 1503 tp_vars->common.unacked_count--; 1504 } 1505 } 1506 1507 /** 1508 * batadv_tp_init_recv() - return matching or create new receiver tp_vars 1509 * @bat_priv: the bat priv with all the mesh interface information 1510 * @icmp: received icmp tp msg 1511 * 1512 * Return: corresponding tp_vars or NULL on errors 1513 */ 1514 static struct batadv_tp_receiver * 1515 batadv_tp_init_recv(struct batadv_priv *bat_priv, 1516 const struct batadv_icmp_tp_packet *icmp) 1517 { 1518 struct batadv_tp_receiver *tp_vars = NULL; 1519 1520 spin_lock_bh(&bat_priv->tp_list_lock); 1521 if (READ_ONCE(bat_priv->mesh_state) != BATADV_MESH_ACTIVE) 1522 goto out_unlock; 1523 1524 tp_vars = batadv_tp_list_find_receiver_session(bat_priv, icmp->orig, 1525 icmp->session); 1526 if (tp_vars) { 1527 WRITE_ONCE(tp_vars->last_recv_time, jiffies); 1528 goto out_unlock; 1529 } 1530 1531 if (!atomic_add_unless(&bat_priv->tp_num, 1, BATADV_TP_MAX_NUM)) { 1532 batadv_dbg(BATADV_DBG_TP_METER, bat_priv, 1533 "Meter: too many ongoing sessions, aborting (RECV)\n"); 1534 goto out_unlock; 1535 } 1536 1537 tp_vars = kmalloc_obj(*tp_vars, GFP_ATOMIC); 1538 if (!tp_vars) { 1539 atomic_dec(&bat_priv->tp_num); 1540 goto out_unlock; 1541 } 1542 1543 ether_addr_copy(tp_vars->common.other_end, icmp->orig); 1544 atomic_set(&tp_vars->receiving, 1); 1545 memcpy(tp_vars->common.session, icmp->session, sizeof(tp_vars->common.session)); 1546 tp_vars->last_recv = BATADV_TP_FIRST_SEQ; 1547 tp_vars->common.bat_priv = bat_priv; 1548 kref_init(&tp_vars->common.refcount); 1549 1550 spin_lock_init(&tp_vars->common.unacked_lock); 1551 INIT_LIST_HEAD(&tp_vars->common.unacked_list); 1552 tp_vars->common.unacked_count = 0; 1553 1554 kref_get(&tp_vars->common.refcount); 1555 timer_setup(&tp_vars->common.timer, batadv_tp_receiver_shutdown, 0); 1556 1557 WRITE_ONCE(tp_vars->last_recv_time, jiffies); 1558 1559 kref_get(&tp_vars->common.refcount); 1560 hlist_add_head_rcu(&tp_vars->common.list, &bat_priv->tp_receiver_list); 1561 1562 batadv_tp_reset_receiver_timer(tp_vars); 1563 1564 out_unlock: 1565 spin_unlock_bh(&bat_priv->tp_list_lock); 1566 1567 return tp_vars; 1568 } 1569 1570 /** 1571 * batadv_tp_recv_msg() - process a single data message 1572 * @bat_priv: the bat priv with all the mesh interface information 1573 * @skb: the buffer containing the received packet 1574 * 1575 * Process a received TP MSG packet 1576 */ 1577 static void batadv_tp_recv_msg(struct batadv_priv *bat_priv, 1578 const struct sk_buff *skb) 1579 { 1580 const struct batadv_icmp_tp_packet *icmp; 1581 struct batadv_tp_receiver *tp_vars; 1582 u32 payload_len; 1583 u32 to_ack; 1584 u32 seqno; 1585 1586 icmp = (struct batadv_icmp_tp_packet *)skb->data; 1587 1588 seqno = ntohl(icmp->seqno); 1589 /* check if this is the first seqno. This means that if the 1590 * first packet is lost, the tp meter does not work anymore! 1591 */ 1592 if (seqno == BATADV_TP_FIRST_SEQ) { 1593 tp_vars = batadv_tp_init_recv(bat_priv, icmp); 1594 if (!tp_vars) { 1595 batadv_dbg(BATADV_DBG_TP_METER, bat_priv, 1596 "Meter: seqno != BATADV_TP_FIRST_SEQ cannot initiate connection\n"); 1597 goto out; 1598 } 1599 } else { 1600 tp_vars = batadv_tp_list_find_receiver_session(bat_priv, icmp->orig, 1601 icmp->session); 1602 if (!tp_vars) { 1603 batadv_dbg(BATADV_DBG_TP_METER, bat_priv, 1604 "Unexpected packet from %pM!\n", 1605 icmp->orig); 1606 goto out; 1607 } 1608 1609 WRITE_ONCE(tp_vars->last_recv_time, jiffies); 1610 } 1611 1612 spin_lock_bh(&tp_vars->common.unacked_lock); 1613 1614 /* if the packet is a duplicate, it may be the case that an ACK has been 1615 * lost. Resend the ACK 1616 */ 1617 payload_len = skb->len - sizeof(struct batadv_unicast_packet); 1618 to_ack = seqno + payload_len; 1619 if (batadv_seq_before(to_ack, tp_vars->last_recv)) 1620 goto send_ack; 1621 1622 /* if the packet is out of order enqueue it */ 1623 if (batadv_seq_before(tp_vars->last_recv, seqno)) { 1624 /* exit immediately (and do not send any ACK) if the packet has 1625 * not been enqueued correctly 1626 */ 1627 if (!batadv_tp_handle_out_of_order(tp_vars, seqno, payload_len)) { 1628 spin_unlock_bh(&tp_vars->common.unacked_lock); 1629 goto out; 1630 } 1631 1632 /* send a duplicate ACK */ 1633 goto send_ack; 1634 } 1635 1636 /* if everything was fine count the ACKed bytes */ 1637 tp_vars->last_recv = to_ack; 1638 1639 /* check if this ordered message filled a gap.... */ 1640 batadv_tp_ack_unordered(tp_vars); 1641 1642 send_ack: 1643 to_ack = tp_vars->last_recv; 1644 spin_unlock_bh(&tp_vars->common.unacked_lock); 1645 1646 /* send the ACK. If the received packet was out of order, the ACK that 1647 * is going to be sent is a duplicate (the sender will count them and 1648 * possibly enter Fast Retransmit as soon as it has reached 3) 1649 */ 1650 batadv_tp_send_ack(bat_priv, icmp->orig, to_ack, 1651 icmp->timestamp, icmp->session, icmp->uid); 1652 out: 1653 batadv_tp_receiver_put(tp_vars); 1654 } 1655 1656 /** 1657 * batadv_tp_meter_recv() - main TP Meter receiving function 1658 * @bat_priv: the bat priv with all the mesh interface information 1659 * @skb: the buffer containing the received packet 1660 */ 1661 void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb) 1662 { 1663 struct batadv_icmp_tp_packet *icmp; 1664 1665 if (READ_ONCE(bat_priv->mesh_state) != BATADV_MESH_ACTIVE) 1666 goto out; 1667 1668 icmp = (struct batadv_icmp_tp_packet *)skb->data; 1669 1670 switch (icmp->subtype) { 1671 case BATADV_TP_MSG: 1672 batadv_tp_recv_msg(bat_priv, skb); 1673 break; 1674 case BATADV_TP_ACK: 1675 batadv_tp_recv_ack(bat_priv, skb); 1676 break; 1677 default: 1678 batadv_dbg(BATADV_DBG_TP_METER, bat_priv, 1679 "Received unknown TP Metric packet type %u\n", 1680 icmp->subtype); 1681 } 1682 1683 out: 1684 consume_skb(skb); 1685 } 1686 1687 /** 1688 * batadv_tp_stop_all() - stop all currently running tp meter sessions 1689 * @bat_priv: the bat priv with all the mesh interface information 1690 */ 1691 void batadv_tp_stop_all(struct batadv_priv *bat_priv) 1692 { 1693 struct batadv_tp_receiver *tp_receivers[BATADV_TP_MAX_NUM]; 1694 struct batadv_tp_sender *tp_senders[BATADV_TP_MAX_NUM]; 1695 struct batadv_tp_receiver *tp_receiver; 1696 struct batadv_tp_sender *tp_sender; 1697 size_t receiver_count = 0; 1698 size_t sender_count = 0; 1699 size_t i; 1700 1701 spin_lock_bh(&bat_priv->tp_list_lock); 1702 hlist_for_each_entry(tp_receiver, &bat_priv->tp_receiver_list, common.list) { 1703 if (WARN_ON_ONCE(receiver_count >= BATADV_TP_MAX_NUM)) 1704 break; 1705 1706 if (!kref_get_unless_zero(&tp_receiver->common.refcount)) 1707 continue; 1708 1709 tp_receivers[receiver_count++] = tp_receiver; 1710 } 1711 1712 hlist_for_each_entry(tp_sender, &bat_priv->tp_sender_list, common.list) { 1713 if (WARN_ON_ONCE(sender_count >= BATADV_TP_MAX_NUM)) 1714 break; 1715 1716 if (!kref_get_unless_zero(&tp_sender->common.refcount)) 1717 continue; 1718 1719 tp_senders[sender_count++] = tp_sender; 1720 } 1721 spin_unlock_bh(&bat_priv->tp_list_lock); 1722 1723 for (i = 0; i < receiver_count; i++) { 1724 tp_receiver = tp_receivers[i]; 1725 1726 if (batadv_tp_list_detach(&tp_receiver->common)) 1727 batadv_tp_receiver_put(tp_receiver); 1728 1729 timer_shutdown_sync(&tp_receiver->common.timer); 1730 1731 if (atomic_xchg(&tp_receiver->receiving, 0) != 0) 1732 batadv_tp_receiver_put(tp_receiver); 1733 1734 batadv_tp_receiver_put(tp_receiver); 1735 } 1736 1737 for (i = 0; i < sender_count; i++) { 1738 tp_sender = tp_senders[i]; 1739 1740 batadv_tp_sender_shutdown(tp_sender, BATADV_TP_REASON_CANCEL); 1741 wake_up(&tp_sender->more_bytes); 1742 wait_for_completion(&tp_sender->finished); 1743 1744 batadv_tp_sender_put(tp_sender); 1745 } 1746 1747 synchronize_net(); 1748 } 1749 1750 /** 1751 * batadv_tp_meter_init() - initialize global tp_meter structures 1752 */ 1753 void __init batadv_tp_meter_init(void) 1754 { 1755 get_random_bytes(batadv_tp_prerandom, sizeof(batadv_tp_prerandom)); 1756 } 1757