1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2012 Chelsio Communications, Inc. 5 * All rights reserved. 6 * Written by: Navdeep Parhar <np@FreeBSD.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include "opt_inet.h" 31 #include "opt_inet6.h" 32 33 #include <sys/param.h> 34 #include <sys/eventhandler.h> 35 #include <sys/kernel.h> 36 #include <sys/systm.h> 37 #include <sys/malloc.h> 38 #include <sys/mbuf.h> 39 #include <sys/module.h> 40 #include <sys/types.h> 41 #include <sys/sockopt.h> 42 #include <sys/sysctl.h> 43 #include <sys/socket.h> 44 45 #include <net/ethernet.h> 46 #include <net/if.h> 47 #include <net/if_var.h> 48 #include <net/if_private.h> 49 #include <net/if_types.h> 50 #include <net/if_vlan_var.h> 51 #include <net/if_llatbl.h> 52 #include <net/route.h> 53 54 #include <netinet/if_ether.h> 55 #include <netinet/in.h> 56 #include <netinet/in_pcb.h> 57 #include <netinet/in_var.h> 58 #include <netinet6/in6_var.h> 59 #include <netinet6/in6_pcb.h> 60 #include <netinet6/nd6.h> 61 #define TCPSTATES 62 #include <netinet/tcp.h> 63 #include <netinet/tcp_fsm.h> 64 #include <netinet/tcp_timer.h> 65 #include <netinet/tcp_var.h> 66 #include <netinet/tcp_syncache.h> 67 #include <netinet/tcp_offload.h> 68 #include <netinet/toecore.h> 69 70 static struct mtx toedev_lock; 71 static TAILQ_HEAD(, toedev) toedev_list; 72 static eventhandler_tag listen_start_eh; 73 static eventhandler_tag listen_stop_eh; 74 static eventhandler_tag lle_event_eh; 75 76 static int 77 toedev_connect(struct toedev *tod __unused, struct socket *so __unused, 78 struct nhop_object *nh __unused, struct sockaddr *nam __unused) 79 { 80 81 return (ENOTSUP); 82 } 83 84 static int 85 toedev_listen_start(struct toedev *tod __unused, struct tcpcb *tp __unused) 86 { 87 88 return (ENOTSUP); 89 } 90 91 static int 92 toedev_listen_stop(struct toedev *tod __unused, struct tcpcb *tp __unused) 93 { 94 95 return (ENOTSUP); 96 } 97 98 static void 99 toedev_input(struct toedev *tod __unused, struct tcpcb *tp __unused, 100 struct mbuf *m) 101 { 102 103 m_freem(m); 104 return; 105 } 106 107 static void 108 toedev_rcvd(struct toedev *tod __unused, struct tcpcb *tp __unused) 109 { 110 111 return; 112 } 113 114 static int 115 toedev_output(struct toedev *tod __unused, struct tcpcb *tp __unused) 116 { 117 118 return (ENOTSUP); 119 } 120 121 static void 122 toedev_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp __unused) 123 { 124 125 return; 126 } 127 128 static void 129 toedev_l2_update(struct toedev *tod __unused, struct ifnet *ifp __unused, 130 struct sockaddr *sa __unused, uint8_t *lladdr __unused, 131 uint16_t vtag __unused) 132 { 133 134 return; 135 } 136 137 static void 138 toedev_route_redirect(struct toedev *tod __unused, struct ifnet *ifp __unused, 139 struct nhop_object *nh0 __unused, struct nhop_object *nh1 __unused) 140 { 141 142 return; 143 } 144 145 static void 146 toedev_syncache_added(struct toedev *tod __unused, void *ctx __unused) 147 { 148 149 return; 150 } 151 152 static void 153 toedev_syncache_removed(struct toedev *tod __unused, void *ctx __unused) 154 { 155 156 return; 157 } 158 159 static int 160 toedev_syncache_respond(struct toedev *tod __unused, void *ctx __unused, 161 struct mbuf *m) 162 { 163 164 m_freem(m); 165 return (0); 166 } 167 168 static void 169 toedev_offload_socket(struct toedev *tod __unused, void *ctx __unused, 170 struct socket *so __unused) 171 { 172 173 return; 174 } 175 176 static void 177 toedev_ctloutput(struct toedev *tod __unused, struct tcpcb *tp __unused, 178 int sopt_dir __unused, int sopt_name __unused) 179 { 180 181 return; 182 } 183 184 static void 185 toedev_tcp_info(struct toedev *tod __unused, const struct tcpcb *tp __unused, 186 struct tcp_info *ti __unused) 187 { 188 189 return; 190 } 191 192 static int 193 toedev_alloc_tls_session(struct toedev *tod __unused, struct tcpcb *tp __unused, 194 struct ktls_session *tls __unused, int direction __unused) 195 { 196 197 return (EINVAL); 198 } 199 200 static void 201 toedev_pmtu_update(struct toedev *tod __unused, struct tcpcb *tp __unused, 202 tcp_seq seq __unused, int mtu __unused) 203 { 204 205 return; 206 } 207 208 /* 209 * Inform one or more TOE devices about a listening socket. 210 */ 211 static void 212 toe_listen_start(struct inpcb *inp, void *arg) 213 { 214 struct toedev *t, *tod; 215 struct tcpcb *tp = intotcpcb(inp); 216 217 INP_WLOCK_ASSERT(inp); 218 KASSERT(inp->inp_pcbinfo == &V_tcbinfo, 219 ("%s: inp is not a TCP inp", __func__)); 220 221 if (tp->t_flags & TF_DISCONNECTED) 222 return; 223 224 if (tp->t_state != TCPS_LISTEN) 225 return; 226 227 t = arg; 228 mtx_lock(&toedev_lock); 229 TAILQ_FOREACH(tod, &toedev_list, link) { 230 if (t == NULL || t == tod) 231 tod->tod_listen_start(tod, tp); 232 } 233 mtx_unlock(&toedev_lock); 234 } 235 236 static void 237 toe_listen_start_event(void *arg __unused, struct tcpcb *tp) 238 { 239 struct inpcb *inp = tptoinpcb(tp); 240 241 INP_WLOCK_ASSERT(inp); 242 KASSERT(tp->t_state == TCPS_LISTEN, 243 ("%s: t_state %s", __func__, tcpstates[tp->t_state])); 244 245 toe_listen_start(inp, NULL); 246 } 247 248 static void 249 toe_listen_stop_event(void *arg __unused, struct tcpcb *tp) 250 { 251 struct toedev *tod; 252 #ifdef INVARIANTS 253 struct inpcb *inp = tptoinpcb(tp); 254 #endif 255 256 INP_WLOCK_ASSERT(inp); 257 KASSERT(tp->t_state == TCPS_LISTEN, 258 ("%s: t_state %s", __func__, tcpstates[tp->t_state])); 259 260 mtx_lock(&toedev_lock); 261 TAILQ_FOREACH(tod, &toedev_list, link) 262 tod->tod_listen_stop(tod, tp); 263 mtx_unlock(&toedev_lock); 264 } 265 266 /* 267 * Fill up a freshly allocated toedev struct with reasonable defaults. 268 */ 269 void 270 init_toedev(struct toedev *tod) 271 { 272 273 tod->tod_softc = NULL; 274 275 /* 276 * Provide no-op defaults so that the kernel can call any toedev 277 * function without having to check whether the TOE driver supplied one 278 * or not. 279 */ 280 tod->tod_connect = toedev_connect; 281 tod->tod_listen_start = toedev_listen_start; 282 tod->tod_listen_stop = toedev_listen_stop; 283 tod->tod_input = toedev_input; 284 tod->tod_rcvd = toedev_rcvd; 285 tod->tod_output = toedev_output; 286 tod->tod_send_rst = toedev_output; 287 tod->tod_send_fin = toedev_output; 288 tod->tod_pcb_detach = toedev_pcb_detach; 289 tod->tod_l2_update = toedev_l2_update; 290 tod->tod_route_redirect = toedev_route_redirect; 291 tod->tod_syncache_added = toedev_syncache_added; 292 tod->tod_syncache_removed = toedev_syncache_removed; 293 tod->tod_syncache_respond = toedev_syncache_respond; 294 tod->tod_offload_socket = toedev_offload_socket; 295 tod->tod_ctloutput = toedev_ctloutput; 296 tod->tod_tcp_info = toedev_tcp_info; 297 tod->tod_alloc_tls_session = toedev_alloc_tls_session; 298 tod->tod_pmtu_update = toedev_pmtu_update; 299 } 300 301 /* 302 * Register an active TOE device with the system. This allows it to receive 303 * notifications from the kernel. 304 */ 305 int 306 register_toedev(struct toedev *tod) 307 { 308 struct toedev *t; 309 310 mtx_lock(&toedev_lock); 311 TAILQ_FOREACH(t, &toedev_list, link) { 312 if (t == tod) { 313 mtx_unlock(&toedev_lock); 314 return (EEXIST); 315 } 316 } 317 318 TAILQ_INSERT_TAIL(&toedev_list, tod, link); 319 registered_toedevs++; 320 mtx_unlock(&toedev_lock); 321 322 inp_apply_all(&V_tcbinfo, toe_listen_start, tod); 323 324 return (0); 325 } 326 327 /* 328 * Remove the TOE device from the global list of active TOE devices. It is the 329 * caller's responsibility to ensure that the TOE device is quiesced prior to 330 * this call. 331 */ 332 int 333 unregister_toedev(struct toedev *tod) 334 { 335 struct toedev *t, *t2; 336 int rc = ENODEV; 337 338 mtx_lock(&toedev_lock); 339 TAILQ_FOREACH_SAFE(t, &toedev_list, link, t2) { 340 if (t == tod) { 341 TAILQ_REMOVE(&toedev_list, tod, link); 342 registered_toedevs--; 343 rc = 0; 344 break; 345 } 346 } 347 KASSERT(registered_toedevs >= 0, 348 ("%s: registered_toedevs (%d) < 0", __func__, registered_toedevs)); 349 mtx_unlock(&toedev_lock); 350 return (rc); 351 } 352 353 void 354 toe_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, 355 struct inpcb *inp, void *tod, void *todctx, uint8_t iptos) 356 { 357 358 INP_RLOCK_ASSERT(inp); 359 360 (void )syncache_add(inc, to, th, inp, inp->inp_socket, NULL, tod, 361 todctx, iptos, htons(0)); 362 } 363 364 int 365 toe_syncache_expand(struct in_conninfo *inc, struct tcpopt *to, 366 struct tcphdr *th, struct socket **lsop) 367 { 368 369 NET_EPOCH_ASSERT(); 370 371 return (syncache_expand(inc, to, th, lsop, NULL, htons(0))); 372 } 373 374 /* 375 * General purpose check to see if a 4-tuple is in use by the kernel. If a TCP 376 * header (presumably for an incoming SYN) is also provided, an existing 4-tuple 377 * in TIME_WAIT may be assassinated freeing it up for re-use. 378 * 379 * Note that the TCP header must have been run through tcp_fields_to_host() or 380 * equivalent. 381 */ 382 int 383 toe_4tuple_check(struct in_conninfo *inc, struct tcphdr *th, struct ifnet *ifp) 384 { 385 struct inpcb *inp; 386 struct tcpcb *tp; 387 388 if (inc->inc_flags & INC_ISIPV6) { 389 inp = in6_pcblookup(&V_tcbinfo, &inc->inc6_faddr, 390 inc->inc_fport, &inc->inc6_laddr, inc->inc_lport, 391 INPLOOKUP_RLOCKPCB, ifp); 392 } else { 393 inp = in_pcblookup(&V_tcbinfo, inc->inc_faddr, inc->inc_fport, 394 inc->inc_laddr, inc->inc_lport, INPLOOKUP_RLOCKPCB, ifp); 395 } 396 if (inp != NULL) { 397 INP_RLOCK_ASSERT(inp); 398 399 tp = intotcpcb(inp); 400 if (tp->t_state == TCPS_TIME_WAIT && th != NULL) { 401 if (!tcp_twcheck(inp, NULL, th, NULL, 0)) 402 return (EADDRINUSE); 403 } else { 404 INP_RUNLOCK(inp); 405 return (EADDRINUSE); 406 } 407 } 408 409 return (0); 410 } 411 412 static void 413 toe_lle_event(void *arg __unused, struct llentry *lle, int evt) 414 { 415 struct toedev *tod; 416 struct ifnet *ifp; 417 struct sockaddr *sa; 418 uint8_t *lladdr; 419 uint16_t vid, pcp; 420 int family; 421 struct sockaddr_in6 sin6; 422 423 LLE_WLOCK_ASSERT(lle); 424 425 ifp = lltable_get_ifp(lle->lle_tbl); 426 family = lltable_get_af(lle->lle_tbl); 427 428 if (family != AF_INET && family != AF_INET6) 429 return; 430 /* 431 * Not interested if the interface's TOE capability is not enabled. 432 */ 433 if ((family == AF_INET && !(ifp->if_capenable & IFCAP_TOE4)) || 434 (family == AF_INET6 && !(ifp->if_capenable & IFCAP_TOE6))) 435 return; 436 437 tod = TOEDEV(ifp); 438 if (tod == NULL) 439 return; 440 441 sa = (struct sockaddr *)&sin6; 442 lltable_fill_sa_entry(lle, sa); 443 444 vid = 0xfff; 445 pcp = 0; 446 if (evt != LLENTRY_RESOLVED) { 447 /* 448 * LLENTRY_TIMEDOUT, LLENTRY_DELETED, LLENTRY_EXPIRED all mean 449 * this entry is going to be deleted. 450 */ 451 452 lladdr = NULL; 453 } else { 454 KASSERT(lle->la_flags & LLE_VALID, 455 ("%s: %p resolved but not valid?", __func__, lle)); 456 457 lladdr = (uint8_t *)lle->ll_addr; 458 VLAN_TAG(ifp, &vid); 459 VLAN_PCP(ifp, &pcp); 460 } 461 462 tod->tod_l2_update(tod, ifp, sa, lladdr, EVL_MAKETAG(vid, pcp, 0)); 463 } 464 465 /* 466 * Returns 0 or EWOULDBLOCK on success (any other value is an error). 0 means 467 * lladdr and vtag are valid on return, EWOULDBLOCK means the TOE driver's 468 * tod_l2_update will be called later, when the entry is resolved or times out. 469 */ 470 int 471 toe_l2_resolve(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa, 472 uint8_t *lladdr, uint16_t *vtag) 473 { 474 int rc; 475 uint16_t vid, pcp; 476 477 switch (sa->sa_family) { 478 #ifdef INET 479 case AF_INET: 480 rc = arpresolve(ifp, 0, NULL, sa, lladdr, NULL, NULL); 481 break; 482 #endif 483 #ifdef INET6 484 case AF_INET6: 485 rc = nd6_resolve(ifp, LLE_SF(AF_INET6, 0), NULL, sa, lladdr, 486 NULL, NULL); 487 break; 488 #endif 489 default: 490 return (EPROTONOSUPPORT); 491 } 492 493 if (rc == 0) { 494 vid = 0xfff; 495 pcp = 0; 496 if (ifp->if_type == IFT_L2VLAN) { 497 VLAN_TAG(ifp, &vid); 498 VLAN_PCP(ifp, &pcp); 499 } else if (ifp->if_pcp != IFNET_PCP_NONE) { 500 vid = 0; 501 pcp = ifp->if_pcp; 502 } 503 *vtag = EVL_MAKETAG(vid, pcp, 0); 504 } 505 506 return (rc); 507 } 508 509 void 510 toe_connect_failed(struct toedev *tod, struct inpcb *inp, int err) 511 { 512 struct tcpcb *tp = intotcpcb(inp); 513 514 NET_EPOCH_ASSERT(); 515 INP_WLOCK_ASSERT(inp); 516 517 if (!(tp->t_flags & TF_DISCONNECTED)) { 518 KASSERT(tp->t_flags & TF_TOE, 519 ("%s: tp %p not offloaded.", __func__, tp)); 520 521 if (err == EAGAIN) { 522 /* 523 * Temporary failure during offload, take this PCB back. 524 * Detach from the TOE driver and do the rest of what 525 * TCP's pr_connect() would have done if the connection 526 * wasn't offloaded. 527 */ 528 529 tod->tod_pcb_detach(tod, tp); 530 KASSERT(!(tp->t_flags & TF_TOE), 531 ("%s: tp %p still offloaded.", __func__, tp)); 532 tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)); 533 if (tcp_output(tp) < 0) 534 INP_WLOCK(inp); /* re-acquire */ 535 } else { 536 tp = tcp_drop(tp, err); 537 if (tp == NULL) 538 INP_WLOCK(inp); /* re-acquire */ 539 } 540 } 541 INP_WLOCK_ASSERT(inp); 542 } 543 544 static int 545 toecore_load(void) 546 { 547 548 mtx_init(&toedev_lock, "toedev lock", NULL, MTX_DEF); 549 TAILQ_INIT(&toedev_list); 550 551 listen_start_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_start, 552 toe_listen_start_event, NULL, EVENTHANDLER_PRI_ANY); 553 listen_stop_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_stop, 554 toe_listen_stop_event, NULL, EVENTHANDLER_PRI_ANY); 555 lle_event_eh = EVENTHANDLER_REGISTER(lle_event, toe_lle_event, NULL, 556 EVENTHANDLER_PRI_ANY); 557 558 return (0); 559 } 560 561 static int 562 toecore_unload(void) 563 { 564 565 mtx_lock(&toedev_lock); 566 if (!TAILQ_EMPTY(&toedev_list)) { 567 mtx_unlock(&toedev_lock); 568 return (EBUSY); 569 } 570 571 EVENTHANDLER_DEREGISTER(tcp_offload_listen_start, listen_start_eh); 572 EVENTHANDLER_DEREGISTER(tcp_offload_listen_stop, listen_stop_eh); 573 EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh); 574 575 mtx_unlock(&toedev_lock); 576 mtx_destroy(&toedev_lock); 577 578 return (0); 579 } 580 581 static int 582 toecore_mod_handler(module_t mod, int cmd, void *arg) 583 { 584 585 if (cmd == MOD_LOAD) 586 return (toecore_load()); 587 588 if (cmd == MOD_UNLOAD) 589 return (toecore_unload()); 590 591 return (EOPNOTSUPP); 592 } 593 594 static moduledata_t mod_data= { 595 "toecore", 596 toecore_mod_handler, 597 0 598 }; 599 600 MODULE_VERSION(toecore, 1); 601 DECLARE_MODULE(toecore, mod_data, SI_SUB_EXEC, SI_ORDER_ANY); 602