1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2012 Chelsio Communications, Inc. 5 * All rights reserved. 6 * Written by: Navdeep Parhar <np@FreeBSD.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/param.h> 35 #include <sys/eventhandler.h> 36 #include <sys/kernel.h> 37 #include <sys/systm.h> 38 #include <sys/malloc.h> 39 #include <sys/mbuf.h> 40 #include <sys/module.h> 41 #include <sys/types.h> 42 #include <sys/sockopt.h> 43 #include <sys/sysctl.h> 44 #include <sys/socket.h> 45 46 #include <net/ethernet.h> 47 #include <net/if.h> 48 #include <net/if_var.h> 49 #include <net/if_private.h> 50 #include <net/if_types.h> 51 #include <net/if_vlan_var.h> 52 #include <net/if_llatbl.h> 53 #include <net/route.h> 54 55 #include <netinet/if_ether.h> 56 #include <netinet/in.h> 57 #include <netinet/in_pcb.h> 58 #include <netinet/in_var.h> 59 #include <netinet6/in6_var.h> 60 #include <netinet6/in6_pcb.h> 61 #include <netinet6/nd6.h> 62 #define TCPSTATES 63 #include <netinet/tcp.h> 64 #include <netinet/tcp_fsm.h> 65 #include <netinet/tcp_timer.h> 66 #include <netinet/tcp_var.h> 67 #include <netinet/tcp_syncache.h> 68 #include <netinet/tcp_offload.h> 69 #include <netinet/toecore.h> 70 71 static struct mtx toedev_lock; 72 static TAILQ_HEAD(, toedev) toedev_list; 73 static eventhandler_tag listen_start_eh; 74 static eventhandler_tag listen_stop_eh; 75 static eventhandler_tag lle_event_eh; 76 77 static int 78 toedev_connect(struct toedev *tod __unused, struct socket *so __unused, 79 struct nhop_object *nh __unused, struct sockaddr *nam __unused) 80 { 81 82 return (ENOTSUP); 83 } 84 85 static int 86 toedev_listen_start(struct toedev *tod __unused, struct tcpcb *tp __unused) 87 { 88 89 return (ENOTSUP); 90 } 91 92 static int 93 toedev_listen_stop(struct toedev *tod __unused, struct tcpcb *tp __unused) 94 { 95 96 return (ENOTSUP); 97 } 98 99 static void 100 toedev_input(struct toedev *tod __unused, struct tcpcb *tp __unused, 101 struct mbuf *m) 102 { 103 104 m_freem(m); 105 return; 106 } 107 108 static void 109 toedev_rcvd(struct toedev *tod __unused, struct tcpcb *tp __unused) 110 { 111 112 return; 113 } 114 115 static int 116 toedev_output(struct toedev *tod __unused, struct tcpcb *tp __unused) 117 { 118 119 return (ENOTSUP); 120 } 121 122 static void 123 toedev_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp __unused) 124 { 125 126 return; 127 } 128 129 static void 130 toedev_l2_update(struct toedev *tod __unused, struct ifnet *ifp __unused, 131 struct sockaddr *sa __unused, uint8_t *lladdr __unused, 132 uint16_t vtag __unused) 133 { 134 135 return; 136 } 137 138 static void 139 toedev_route_redirect(struct toedev *tod __unused, struct ifnet *ifp __unused, 140 struct nhop_object *nh0 __unused, struct nhop_object *nh1 __unused) 141 { 142 143 return; 144 } 145 146 static void 147 toedev_syncache_added(struct toedev *tod __unused, void *ctx __unused) 148 { 149 150 return; 151 } 152 153 static void 154 toedev_syncache_removed(struct toedev *tod __unused, void *ctx __unused) 155 { 156 157 return; 158 } 159 160 static int 161 toedev_syncache_respond(struct toedev *tod __unused, void *ctx __unused, 162 struct mbuf *m) 163 { 164 165 m_freem(m); 166 return (0); 167 } 168 169 static void 170 toedev_offload_socket(struct toedev *tod __unused, void *ctx __unused, 171 struct socket *so __unused) 172 { 173 174 return; 175 } 176 177 static void 178 toedev_ctloutput(struct toedev *tod __unused, struct tcpcb *tp __unused, 179 int sopt_dir __unused, int sopt_name __unused) 180 { 181 182 return; 183 } 184 185 static void 186 toedev_tcp_info(struct toedev *tod __unused, struct tcpcb *tp __unused, 187 struct tcp_info *ti __unused) 188 { 189 190 return; 191 } 192 193 static int 194 toedev_alloc_tls_session(struct toedev *tod __unused, struct tcpcb *tp __unused, 195 struct ktls_session *tls __unused, int direction __unused) 196 { 197 198 return (EINVAL); 199 } 200 201 static void 202 toedev_pmtu_update(struct toedev *tod __unused, struct tcpcb *tp __unused, 203 tcp_seq seq __unused, int mtu __unused) 204 { 205 206 return; 207 } 208 209 /* 210 * Inform one or more TOE devices about a listening socket. 211 */ 212 static void 213 toe_listen_start(struct inpcb *inp, void *arg) 214 { 215 struct toedev *t, *tod; 216 struct tcpcb *tp; 217 218 INP_WLOCK_ASSERT(inp); 219 KASSERT(inp->inp_pcbinfo == &V_tcbinfo, 220 ("%s: inp is not a TCP inp", __func__)); 221 222 if (inp->inp_flags & INP_DROPPED) 223 return; 224 225 tp = intotcpcb(inp); 226 if (tp->t_state != TCPS_LISTEN) 227 return; 228 229 t = arg; 230 mtx_lock(&toedev_lock); 231 TAILQ_FOREACH(tod, &toedev_list, link) { 232 if (t == NULL || t == tod) 233 tod->tod_listen_start(tod, tp); 234 } 235 mtx_unlock(&toedev_lock); 236 } 237 238 static void 239 toe_listen_start_event(void *arg __unused, struct tcpcb *tp) 240 { 241 struct inpcb *inp = tptoinpcb(tp); 242 243 INP_WLOCK_ASSERT(inp); 244 KASSERT(tp->t_state == TCPS_LISTEN, 245 ("%s: t_state %s", __func__, tcpstates[tp->t_state])); 246 247 toe_listen_start(inp, NULL); 248 } 249 250 static void 251 toe_listen_stop_event(void *arg __unused, struct tcpcb *tp) 252 { 253 struct toedev *tod; 254 #ifdef INVARIANTS 255 struct inpcb *inp = tptoinpcb(tp); 256 #endif 257 258 INP_WLOCK_ASSERT(inp); 259 KASSERT(tp->t_state == TCPS_LISTEN, 260 ("%s: t_state %s", __func__, tcpstates[tp->t_state])); 261 262 mtx_lock(&toedev_lock); 263 TAILQ_FOREACH(tod, &toedev_list, link) 264 tod->tod_listen_stop(tod, tp); 265 mtx_unlock(&toedev_lock); 266 } 267 268 /* 269 * Fill up a freshly allocated toedev struct with reasonable defaults. 270 */ 271 void 272 init_toedev(struct toedev *tod) 273 { 274 275 tod->tod_softc = NULL; 276 277 /* 278 * Provide no-op defaults so that the kernel can call any toedev 279 * function without having to check whether the TOE driver supplied one 280 * or not. 281 */ 282 tod->tod_connect = toedev_connect; 283 tod->tod_listen_start = toedev_listen_start; 284 tod->tod_listen_stop = toedev_listen_stop; 285 tod->tod_input = toedev_input; 286 tod->tod_rcvd = toedev_rcvd; 287 tod->tod_output = toedev_output; 288 tod->tod_send_rst = toedev_output; 289 tod->tod_send_fin = toedev_output; 290 tod->tod_pcb_detach = toedev_pcb_detach; 291 tod->tod_l2_update = toedev_l2_update; 292 tod->tod_route_redirect = toedev_route_redirect; 293 tod->tod_syncache_added = toedev_syncache_added; 294 tod->tod_syncache_removed = toedev_syncache_removed; 295 tod->tod_syncache_respond = toedev_syncache_respond; 296 tod->tod_offload_socket = toedev_offload_socket; 297 tod->tod_ctloutput = toedev_ctloutput; 298 tod->tod_tcp_info = toedev_tcp_info; 299 tod->tod_alloc_tls_session = toedev_alloc_tls_session; 300 tod->tod_pmtu_update = toedev_pmtu_update; 301 } 302 303 /* 304 * Register an active TOE device with the system. This allows it to receive 305 * notifications from the kernel. 306 */ 307 int 308 register_toedev(struct toedev *tod) 309 { 310 struct toedev *t; 311 312 mtx_lock(&toedev_lock); 313 TAILQ_FOREACH(t, &toedev_list, link) { 314 if (t == tod) { 315 mtx_unlock(&toedev_lock); 316 return (EEXIST); 317 } 318 } 319 320 TAILQ_INSERT_TAIL(&toedev_list, tod, link); 321 registered_toedevs++; 322 mtx_unlock(&toedev_lock); 323 324 inp_apply_all(&V_tcbinfo, toe_listen_start, tod); 325 326 return (0); 327 } 328 329 /* 330 * Remove the TOE device from the global list of active TOE devices. It is the 331 * caller's responsibility to ensure that the TOE device is quiesced prior to 332 * this call. 333 */ 334 int 335 unregister_toedev(struct toedev *tod) 336 { 337 struct toedev *t, *t2; 338 int rc = ENODEV; 339 340 mtx_lock(&toedev_lock); 341 TAILQ_FOREACH_SAFE(t, &toedev_list, link, t2) { 342 if (t == tod) { 343 TAILQ_REMOVE(&toedev_list, tod, link); 344 registered_toedevs--; 345 rc = 0; 346 break; 347 } 348 } 349 KASSERT(registered_toedevs >= 0, 350 ("%s: registered_toedevs (%d) < 0", __func__, registered_toedevs)); 351 mtx_unlock(&toedev_lock); 352 return (rc); 353 } 354 355 void 356 toe_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, 357 struct inpcb *inp, void *tod, void *todctx, uint8_t iptos) 358 { 359 360 INP_RLOCK_ASSERT(inp); 361 362 (void )syncache_add(inc, to, th, inp, inp->inp_socket, NULL, tod, 363 todctx, iptos, htons(0)); 364 } 365 366 int 367 toe_syncache_expand(struct in_conninfo *inc, struct tcpopt *to, 368 struct tcphdr *th, struct socket **lsop) 369 { 370 371 NET_EPOCH_ASSERT(); 372 373 return (syncache_expand(inc, to, th, lsop, NULL, htons(0))); 374 } 375 376 /* 377 * General purpose check to see if a 4-tuple is in use by the kernel. If a TCP 378 * header (presumably for an incoming SYN) is also provided, an existing 4-tuple 379 * in TIME_WAIT may be assassinated freeing it up for re-use. 380 * 381 * Note that the TCP header must have been run through tcp_fields_to_host() or 382 * equivalent. 383 */ 384 int 385 toe_4tuple_check(struct in_conninfo *inc, struct tcphdr *th, struct ifnet *ifp) 386 { 387 struct inpcb *inp; 388 struct tcpcb *tp; 389 390 if (inc->inc_flags & INC_ISIPV6) { 391 inp = in6_pcblookup(&V_tcbinfo, &inc->inc6_faddr, 392 inc->inc_fport, &inc->inc6_laddr, inc->inc_lport, 393 INPLOOKUP_RLOCKPCB, ifp); 394 } else { 395 inp = in_pcblookup(&V_tcbinfo, inc->inc_faddr, inc->inc_fport, 396 inc->inc_laddr, inc->inc_lport, INPLOOKUP_RLOCKPCB, ifp); 397 } 398 if (inp != NULL) { 399 INP_RLOCK_ASSERT(inp); 400 401 tp = intotcpcb(inp); 402 if (tp->t_state == TCPS_TIME_WAIT && th != NULL) { 403 if (!tcp_twcheck(inp, NULL, th, NULL, 0)) 404 return (EADDRINUSE); 405 } else { 406 INP_RUNLOCK(inp); 407 return (EADDRINUSE); 408 } 409 } 410 411 return (0); 412 } 413 414 static void 415 toe_lle_event(void *arg __unused, struct llentry *lle, int evt) 416 { 417 struct toedev *tod; 418 struct ifnet *ifp; 419 struct sockaddr *sa; 420 uint8_t *lladdr; 421 uint16_t vid, pcp; 422 int family; 423 struct sockaddr_in6 sin6; 424 425 LLE_WLOCK_ASSERT(lle); 426 427 ifp = lltable_get_ifp(lle->lle_tbl); 428 family = lltable_get_af(lle->lle_tbl); 429 430 if (family != AF_INET && family != AF_INET6) 431 return; 432 /* 433 * Not interested if the interface's TOE capability is not enabled. 434 */ 435 if ((family == AF_INET && !(ifp->if_capenable & IFCAP_TOE4)) || 436 (family == AF_INET6 && !(ifp->if_capenable & IFCAP_TOE6))) 437 return; 438 439 tod = TOEDEV(ifp); 440 if (tod == NULL) 441 return; 442 443 sa = (struct sockaddr *)&sin6; 444 lltable_fill_sa_entry(lle, sa); 445 446 vid = 0xfff; 447 pcp = 0; 448 if (evt != LLENTRY_RESOLVED) { 449 /* 450 * LLENTRY_TIMEDOUT, LLENTRY_DELETED, LLENTRY_EXPIRED all mean 451 * this entry is going to be deleted. 452 */ 453 454 lladdr = NULL; 455 } else { 456 KASSERT(lle->la_flags & LLE_VALID, 457 ("%s: %p resolved but not valid?", __func__, lle)); 458 459 lladdr = (uint8_t *)lle->ll_addr; 460 VLAN_TAG(ifp, &vid); 461 VLAN_PCP(ifp, &pcp); 462 } 463 464 tod->tod_l2_update(tod, ifp, sa, lladdr, EVL_MAKETAG(vid, pcp, 0)); 465 } 466 467 /* 468 * Returns 0 or EWOULDBLOCK on success (any other value is an error). 0 means 469 * lladdr and vtag are valid on return, EWOULDBLOCK means the TOE driver's 470 * tod_l2_update will be called later, when the entry is resolved or times out. 471 */ 472 int 473 toe_l2_resolve(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa, 474 uint8_t *lladdr, uint16_t *vtag) 475 { 476 int rc; 477 uint16_t vid, pcp; 478 479 switch (sa->sa_family) { 480 #ifdef INET 481 case AF_INET: 482 rc = arpresolve(ifp, 0, NULL, sa, lladdr, NULL, NULL); 483 break; 484 #endif 485 #ifdef INET6 486 case AF_INET6: 487 rc = nd6_resolve(ifp, LLE_SF(AF_INET6, 0), NULL, sa, lladdr, 488 NULL, NULL); 489 break; 490 #endif 491 default: 492 return (EPROTONOSUPPORT); 493 } 494 495 if (rc == 0) { 496 vid = 0xfff; 497 pcp = 0; 498 if (ifp->if_type == IFT_L2VLAN) { 499 VLAN_TAG(ifp, &vid); 500 VLAN_PCP(ifp, &pcp); 501 } else if (ifp->if_pcp != IFNET_PCP_NONE) { 502 vid = 0; 503 pcp = ifp->if_pcp; 504 } 505 *vtag = EVL_MAKETAG(vid, pcp, 0); 506 } 507 508 return (rc); 509 } 510 511 void 512 toe_connect_failed(struct toedev *tod, struct inpcb *inp, int err) 513 { 514 515 NET_EPOCH_ASSERT(); 516 INP_WLOCK_ASSERT(inp); 517 518 if (!(inp->inp_flags & INP_DROPPED)) { 519 struct tcpcb *tp = intotcpcb(inp); 520 521 KASSERT(tp->t_flags & TF_TOE, 522 ("%s: tp %p not offloaded.", __func__, tp)); 523 524 if (err == EAGAIN) { 525 /* 526 * Temporary failure during offload, take this PCB back. 527 * Detach from the TOE driver and do the rest of what 528 * TCP's pru_connect would have done if the connection 529 * wasn't offloaded. 530 */ 531 532 tod->tod_pcb_detach(tod, tp); 533 KASSERT(!(tp->t_flags & TF_TOE), 534 ("%s: tp %p still offloaded.", __func__, tp)); 535 tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)); 536 if (tcp_output(tp) < 0) 537 INP_WLOCK(inp); /* re-acquire */ 538 } else { 539 tp = tcp_drop(tp, err); 540 if (tp == NULL) 541 INP_WLOCK(inp); /* re-acquire */ 542 } 543 } 544 INP_WLOCK_ASSERT(inp); 545 } 546 547 static int 548 toecore_load(void) 549 { 550 551 mtx_init(&toedev_lock, "toedev lock", NULL, MTX_DEF); 552 TAILQ_INIT(&toedev_list); 553 554 listen_start_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_start, 555 toe_listen_start_event, NULL, EVENTHANDLER_PRI_ANY); 556 listen_stop_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_stop, 557 toe_listen_stop_event, NULL, EVENTHANDLER_PRI_ANY); 558 lle_event_eh = EVENTHANDLER_REGISTER(lle_event, toe_lle_event, NULL, 559 EVENTHANDLER_PRI_ANY); 560 561 return (0); 562 } 563 564 static int 565 toecore_unload(void) 566 { 567 568 mtx_lock(&toedev_lock); 569 if (!TAILQ_EMPTY(&toedev_list)) { 570 mtx_unlock(&toedev_lock); 571 return (EBUSY); 572 } 573 574 EVENTHANDLER_DEREGISTER(tcp_offload_listen_start, listen_start_eh); 575 EVENTHANDLER_DEREGISTER(tcp_offload_listen_stop, listen_stop_eh); 576 EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh); 577 578 mtx_unlock(&toedev_lock); 579 mtx_destroy(&toedev_lock); 580 581 return (0); 582 } 583 584 static int 585 toecore_mod_handler(module_t mod, int cmd, void *arg) 586 { 587 588 if (cmd == MOD_LOAD) 589 return (toecore_load()); 590 591 if (cmd == MOD_UNLOAD) 592 return (toecore_unload()); 593 594 return (EOPNOTSUPP); 595 } 596 597 static moduledata_t mod_data= { 598 "toecore", 599 toecore_mod_handler, 600 0 601 }; 602 603 MODULE_VERSION(toecore, 1); 604 DECLARE_MODULE(toecore, mod_data, SI_SUB_EXEC, SI_ORDER_ANY); 605