1 /*- 2 * Copyright (c) 2012 Chelsio Communications, Inc. 3 * All rights reserved. 4 * Written by: Navdeep Parhar <np@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/param.h> 35 #include <sys/kernel.h> 36 #include <sys/systm.h> 37 #include <sys/mbuf.h> 38 #include <sys/module.h> 39 #include <sys/types.h> 40 #include <sys/sockopt.h> 41 #include <sys/sysctl.h> 42 #include <sys/socket.h> 43 44 #include <net/ethernet.h> 45 #include <net/if.h> 46 #include <net/if_types.h> 47 #include <net/if_vlan_var.h> 48 #include <net/if_llatbl.h> 49 #include <net/route.h> 50 51 #include <netinet/if_ether.h> 52 #include <netinet/in.h> 53 #include <netinet/in_pcb.h> 54 #include <netinet/in_var.h> 55 #include <netinet6/nd6.h> 56 #define TCPSTATES 57 #include <netinet/tcp.h> 58 #include <netinet/tcp_fsm.h> 59 #include <netinet/tcp_timer.h> 60 #include <netinet/tcp_var.h> 61 #include <netinet/tcp_syncache.h> 62 #include <netinet/tcp_offload.h> 63 #include <netinet/toecore.h> 64 65 static struct mtx toedev_lock; 66 static TAILQ_HEAD(, toedev) toedev_list; 67 static eventhandler_tag listen_start_eh; 68 static eventhandler_tag listen_stop_eh; 69 static eventhandler_tag lle_event_eh; 70 static eventhandler_tag route_redirect_eh; 71 72 static int 73 toedev_connect(struct toedev *tod __unused, struct socket *so __unused, 74 struct rtentry *rt __unused, struct sockaddr *nam __unused) 75 { 76 77 return (ENOTSUP); 78 } 79 80 static int 81 toedev_listen_start(struct toedev *tod __unused, struct tcpcb *tp __unused) 82 { 83 84 return (ENOTSUP); 85 } 86 87 static int 88 toedev_listen_stop(struct toedev *tod __unused, struct tcpcb *tp __unused) 89 { 90 91 return (ENOTSUP); 92 } 93 94 static void 95 toedev_input(struct toedev *tod __unused, struct tcpcb *tp __unused, 96 struct mbuf *m) 97 { 98 99 m_freem(m); 100 return; 101 } 102 103 static void 104 toedev_rcvd(struct toedev *tod __unused, struct tcpcb *tp __unused) 105 { 106 107 return; 108 } 109 110 static int 111 toedev_output(struct toedev *tod __unused, struct tcpcb *tp __unused) 112 { 113 114 return (ENOTSUP); 115 } 116 117 static void 118 toedev_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp __unused) 119 { 120 121 return; 122 } 123 124 static void 125 toedev_l2_update(struct toedev *tod __unused, struct ifnet *ifp __unused, 126 struct sockaddr *sa __unused, uint8_t *lladdr __unused, 127 uint16_t vtag __unused) 128 { 129 130 return; 131 } 132 133 static void 134 toedev_route_redirect(struct toedev *tod __unused, struct ifnet *ifp __unused, 135 struct rtentry *rt0 __unused, struct rtentry *rt1 __unused) 136 { 137 138 return; 139 } 140 141 static void 142 toedev_syncache_added(struct toedev *tod __unused, void *ctx __unused) 143 { 144 145 return; 146 } 147 148 static void 149 toedev_syncache_removed(struct toedev *tod __unused, void *ctx __unused) 150 { 151 152 return; 153 } 154 155 static int 156 toedev_syncache_respond(struct toedev *tod __unused, void *ctx __unused, 157 struct mbuf *m) 158 { 159 160 m_freem(m); 161 return (0); 162 } 163 164 static void 165 toedev_offload_socket(struct toedev *tod __unused, void *ctx __unused, 166 struct socket *so __unused) 167 { 168 169 return; 170 } 171 172 static void 173 toedev_ctloutput(struct toedev *tod __unused, struct tcpcb *tp __unused, 174 int sopt_dir __unused, int sopt_name __unused) 175 { 176 177 return; 178 } 179 180 /* 181 * Inform one or more TOE devices about a listening socket. 182 */ 183 static void 184 toe_listen_start(struct inpcb *inp, void *arg) 185 { 186 struct toedev *t, *tod; 187 struct tcpcb *tp; 188 189 INP_WLOCK_ASSERT(inp); 190 KASSERT(inp->inp_pcbinfo == &V_tcbinfo, 191 ("%s: inp is not a TCP inp", __func__)); 192 193 if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) 194 return; 195 196 tp = intotcpcb(inp); 197 if (tp->t_state != TCPS_LISTEN) 198 return; 199 200 t = arg; 201 mtx_lock(&toedev_lock); 202 TAILQ_FOREACH(tod, &toedev_list, link) { 203 if (t == NULL || t == tod) 204 tod->tod_listen_start(tod, tp); 205 } 206 mtx_unlock(&toedev_lock); 207 } 208 209 static void 210 toe_listen_start_event(void *arg __unused, struct tcpcb *tp) 211 { 212 struct inpcb *inp = tp->t_inpcb; 213 214 INP_WLOCK_ASSERT(inp); 215 KASSERT(tp->t_state == TCPS_LISTEN, 216 ("%s: t_state %s", __func__, tcpstates[tp->t_state])); 217 218 toe_listen_start(inp, NULL); 219 } 220 221 static void 222 toe_listen_stop_event(void *arg __unused, struct tcpcb *tp) 223 { 224 struct toedev *tod; 225 #ifdef INVARIANTS 226 struct inpcb *inp = tp->t_inpcb; 227 #endif 228 229 INP_WLOCK_ASSERT(inp); 230 KASSERT(tp->t_state == TCPS_LISTEN, 231 ("%s: t_state %s", __func__, tcpstates[tp->t_state])); 232 233 mtx_lock(&toedev_lock); 234 TAILQ_FOREACH(tod, &toedev_list, link) 235 tod->tod_listen_stop(tod, tp); 236 mtx_unlock(&toedev_lock); 237 } 238 239 /* 240 * Fill up a freshly allocated toedev struct with reasonable defaults. 241 */ 242 void 243 init_toedev(struct toedev *tod) 244 { 245 246 tod->tod_softc = NULL; 247 248 /* 249 * Provide no-op defaults so that the kernel can call any toedev 250 * function without having to check whether the TOE driver supplied one 251 * or not. 252 */ 253 tod->tod_connect = toedev_connect; 254 tod->tod_listen_start = toedev_listen_start; 255 tod->tod_listen_stop = toedev_listen_stop; 256 tod->tod_input = toedev_input; 257 tod->tod_rcvd = toedev_rcvd; 258 tod->tod_output = toedev_output; 259 tod->tod_send_rst = toedev_output; 260 tod->tod_send_fin = toedev_output; 261 tod->tod_pcb_detach = toedev_pcb_detach; 262 tod->tod_l2_update = toedev_l2_update; 263 tod->tod_route_redirect = toedev_route_redirect; 264 tod->tod_syncache_added = toedev_syncache_added; 265 tod->tod_syncache_removed = toedev_syncache_removed; 266 tod->tod_syncache_respond = toedev_syncache_respond; 267 tod->tod_offload_socket = toedev_offload_socket; 268 tod->tod_ctloutput = toedev_ctloutput; 269 } 270 271 /* 272 * Register an active TOE device with the system. This allows it to receive 273 * notifications from the kernel. 274 */ 275 int 276 register_toedev(struct toedev *tod) 277 { 278 struct toedev *t; 279 280 mtx_lock(&toedev_lock); 281 TAILQ_FOREACH(t, &toedev_list, link) { 282 if (t == tod) { 283 mtx_unlock(&toedev_lock); 284 return (EEXIST); 285 } 286 } 287 288 TAILQ_INSERT_TAIL(&toedev_list, tod, link); 289 registered_toedevs++; 290 mtx_unlock(&toedev_lock); 291 292 inp_apply_all(toe_listen_start, tod); 293 294 return (0); 295 } 296 297 /* 298 * Remove the TOE device from the global list of active TOE devices. It is the 299 * caller's responsibility to ensure that the TOE device is quiesced prior to 300 * this call. 301 */ 302 int 303 unregister_toedev(struct toedev *tod) 304 { 305 struct toedev *t, *t2; 306 int rc = ENODEV; 307 308 mtx_lock(&toedev_lock); 309 TAILQ_FOREACH_SAFE(t, &toedev_list, link, t2) { 310 if (t == tod) { 311 TAILQ_REMOVE(&toedev_list, tod, link); 312 registered_toedevs--; 313 rc = 0; 314 break; 315 } 316 } 317 KASSERT(registered_toedevs >= 0, 318 ("%s: registered_toedevs (%d) < 0", __func__, registered_toedevs)); 319 mtx_unlock(&toedev_lock); 320 return (rc); 321 } 322 323 void 324 toe_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, 325 struct inpcb *inp, void *tod, void *todctx) 326 { 327 struct socket *lso = inp->inp_socket; 328 329 INP_INFO_WLOCK_ASSERT(&V_tcbinfo); 330 INP_WLOCK_ASSERT(inp); 331 332 syncache_add(inc, to, th, inp, &lso, NULL, tod, todctx); 333 } 334 335 int 336 toe_syncache_expand(struct in_conninfo *inc, struct tcpopt *to, 337 struct tcphdr *th, struct socket **lsop) 338 { 339 340 INP_INFO_WLOCK_ASSERT(&V_tcbinfo); 341 342 return (syncache_expand(inc, to, th, lsop, NULL)); 343 } 344 345 /* 346 * General purpose check to see if a 4-tuple is in use by the kernel. If a TCP 347 * header (presumably for an incoming SYN) is also provided, an existing 4-tuple 348 * in TIME_WAIT may be assassinated freeing it up for re-use. 349 * 350 * Note that the TCP header must have been run through tcp_fields_to_host() or 351 * equivalent. 352 */ 353 int 354 toe_4tuple_check(struct in_conninfo *inc, struct tcphdr *th, struct ifnet *ifp) 355 { 356 struct inpcb *inp; 357 358 if (inc->inc_flags & INC_ISIPV6) 359 return (ENOSYS); /* XXX: implement */ 360 361 inp = in_pcblookup(&V_tcbinfo, inc->inc_faddr, inc->inc_fport, 362 inc->inc_laddr, inc->inc_lport, INPLOOKUP_WLOCKPCB, ifp); 363 if (inp != NULL) { 364 INP_WLOCK_ASSERT(inp); 365 366 if ((inp->inp_flags & INP_TIMEWAIT) && th != NULL) { 367 368 INP_INFO_WLOCK_ASSERT(&V_tcbinfo); /* for twcheck */ 369 if (!tcp_twcheck(inp, NULL, th, NULL, 0)) 370 return (EADDRINUSE); 371 } else { 372 INP_WUNLOCK(inp); 373 return (EADDRINUSE); 374 } 375 } 376 377 return (0); 378 } 379 380 static void 381 toe_lle_event(void *arg __unused, struct llentry *lle, int evt) 382 { 383 struct toedev *tod; 384 struct ifnet *ifp; 385 struct sockaddr *sa; 386 uint8_t *lladdr; 387 uint16_t vtag; 388 389 LLE_WLOCK_ASSERT(lle); 390 391 ifp = lle->lle_tbl->llt_ifp; 392 sa = L3_ADDR(lle); 393 394 KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, 395 ("%s: lle_event %d for lle %p but sa %p !INET && !INET6", 396 __func__, evt, lle, sa)); 397 398 /* 399 * Not interested if the interface's TOE capability is not enabled. 400 */ 401 if ((sa->sa_family == AF_INET && !(ifp->if_capenable & IFCAP_TOE4)) || 402 (sa->sa_family == AF_INET6 && !(ifp->if_capenable & IFCAP_TOE6))) 403 return; 404 405 tod = TOEDEV(ifp); 406 if (tod == NULL) 407 return; 408 409 vtag = 0xfff; 410 if (evt != LLENTRY_RESOLVED) { 411 412 /* 413 * LLENTRY_TIMEDOUT, LLENTRY_DELETED, LLENTRY_EXPIRED all mean 414 * this entry is going to be deleted. 415 */ 416 417 lladdr = NULL; 418 } else { 419 420 KASSERT(lle->la_flags & LLE_VALID, 421 ("%s: %p resolved but not valid?", __func__, lle)); 422 423 lladdr = (uint8_t *)&lle->ll_addr; 424 #ifdef VLAN_TAG 425 VLAN_TAG(ifp, &vtag); 426 #endif 427 } 428 429 tod->tod_l2_update(tod, ifp, sa, lladdr, vtag); 430 } 431 432 /* 433 * XXX: implement. 434 */ 435 static void 436 toe_route_redirect_event(void *arg __unused, struct rtentry *rt0, 437 struct rtentry *rt1, struct sockaddr *sa) 438 { 439 440 return; 441 } 442 443 /* 444 * Returns 0 or EWOULDBLOCK on success (any other value is an error). 0 means 445 * lladdr and vtag are valid on return, EWOULDBLOCK means the TOE driver's 446 * tod_l2_update will be called later, when the entry is resolved or times out. 447 */ 448 int 449 toe_l2_resolve(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa, 450 uint8_t *lladdr, uint16_t *vtag) 451 { 452 struct llentry *lle; 453 int rc; 454 455 switch (sa->sa_family) { 456 #ifdef INET 457 case AF_INET: 458 rc = arpresolve(ifp, NULL, NULL, sa, lladdr, &lle); 459 break; 460 #endif 461 #ifdef INET6 462 case AF_INET6: 463 rc = nd6_storelladdr(ifp, NULL, sa, lladdr, &lle); 464 break; 465 #endif 466 default: 467 return (EPROTONOSUPPORT); 468 } 469 470 if (rc == 0) { 471 #ifdef VLAN_TAG 472 if (VLAN_TAG(ifp, vtag) != 0) 473 #endif 474 *vtag = 0xfff; 475 } 476 477 return (rc); 478 } 479 480 void 481 toe_connect_failed(struct toedev *tod, struct inpcb *inp, int err) 482 { 483 484 INP_WLOCK_ASSERT(inp); 485 486 if (!(inp->inp_flags & INP_DROPPED)) { 487 struct tcpcb *tp = intotcpcb(inp); 488 489 KASSERT(tp->t_flags & TF_TOE, 490 ("%s: tp %p not offloaded.", __func__, tp)); 491 492 if (err == EAGAIN) { 493 494 /* 495 * Temporary failure during offload, take this PCB back. 496 * Detach from the TOE driver and do the rest of what 497 * TCP's pru_connect would have done if the connection 498 * wasn't offloaded. 499 */ 500 501 tod->tod_pcb_detach(tod, tp); 502 KASSERT(!(tp->t_flags & TF_TOE), 503 ("%s: tp %p still offloaded.", __func__, tp)); 504 tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)); 505 (void) tcp_output(tp); 506 } else { 507 508 INP_INFO_WLOCK_ASSERT(&V_tcbinfo); 509 tp = tcp_drop(tp, err); 510 if (tp == NULL) 511 INP_WLOCK(inp); /* re-acquire */ 512 } 513 } 514 INP_WLOCK_ASSERT(inp); 515 } 516 517 static int 518 toecore_load(void) 519 { 520 521 mtx_init(&toedev_lock, "toedev lock", NULL, MTX_DEF); 522 TAILQ_INIT(&toedev_list); 523 524 listen_start_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_start, 525 toe_listen_start_event, NULL, EVENTHANDLER_PRI_ANY); 526 listen_stop_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_stop, 527 toe_listen_stop_event, NULL, EVENTHANDLER_PRI_ANY); 528 lle_event_eh = EVENTHANDLER_REGISTER(lle_event, toe_lle_event, NULL, 529 EVENTHANDLER_PRI_ANY); 530 route_redirect_eh = EVENTHANDLER_REGISTER(route_redirect_event, 531 toe_route_redirect_event, NULL, EVENTHANDLER_PRI_ANY); 532 533 return (0); 534 } 535 536 static int 537 toecore_unload(void) 538 { 539 540 mtx_lock(&toedev_lock); 541 if (!TAILQ_EMPTY(&toedev_list)) { 542 mtx_unlock(&toedev_lock); 543 return (EBUSY); 544 } 545 546 EVENTHANDLER_DEREGISTER(tcp_offload_listen_start, listen_start_eh); 547 EVENTHANDLER_DEREGISTER(tcp_offload_listen_stop, listen_stop_eh); 548 EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh); 549 EVENTHANDLER_DEREGISTER(route_redirect_event, route_redirect_eh); 550 551 mtx_unlock(&toedev_lock); 552 mtx_destroy(&toedev_lock); 553 554 return (0); 555 } 556 557 static int 558 toecore_mod_handler(module_t mod, int cmd, void *arg) 559 { 560 561 if (cmd == MOD_LOAD) 562 return (toecore_load()); 563 564 if (cmd == MOD_UNLOAD) 565 return (toecore_unload()); 566 567 return (EOPNOTSUPP); 568 } 569 570 static moduledata_t mod_data= { 571 "toecore", 572 toecore_mod_handler, 573 0 574 }; 575 576 MODULE_VERSION(toecore, 1); 577 DECLARE_MODULE(toecore, mod_data, SI_SUB_EXEC, SI_ORDER_ANY); 578