1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2012 Chelsio Communications, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 #include <sys/cdefs.h> 29 #include "opt_inet.h" 30 #include "opt_inet6.h" 31 32 #ifdef TCP_OFFLOAD 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/module.h> 37 #include <sys/bus.h> 38 #include <sys/fnv_hash.h> 39 #include <sys/lock.h> 40 #include <sys/mutex.h> 41 #include <sys/rwlock.h> 42 #include <sys/socket.h> 43 #include <sys/socketvar.h> 44 #include <sys/sbuf.h> 45 #include <sys/taskqueue.h> 46 #include <net/if.h> 47 #include <net/if_types.h> 48 #include <net/ethernet.h> 49 #include <net/if_vlan_var.h> 50 #include <net/route.h> 51 #include <netinet/in.h> 52 #include <netinet/in_pcb.h> 53 #include <netinet/tcp_var.h> 54 #include <netinet/toecore.h> 55 56 #include "common/common.h" 57 #include "common/t4_msg.h" 58 #include "tom/t4_tom_l2t.h" 59 #include "tom/t4_tom.h" 60 61 #define VLAN_NONE 0xfff 62 63 static inline void 64 l2t_hold(struct l2t_data *d, struct l2t_entry *e) 65 { 66 67 if (atomic_fetchadd_int(&e->refcnt, 1) == 0) /* 0 -> 1 transition */ 68 atomic_subtract_int(&d->nfree, 1); 69 } 70 71 static inline u_int 72 l2_hash(struct l2t_data *d, const struct sockaddr *sa, int ifindex) 73 { 74 u_int hash, half = d->l2t_size / 2, start = 0; 75 const void *key; 76 size_t len; 77 78 KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, 79 ("%s: sa %p has unexpected sa_family %d", __func__, sa, 80 sa->sa_family)); 81 82 if (sa->sa_family == AF_INET) { 83 const struct sockaddr_in *sin = (const void *)sa; 84 85 key = &sin->sin_addr; 86 len = sizeof(sin->sin_addr); 87 } else { 88 const struct sockaddr_in6 *sin6 = (const void *)sa; 89 90 key = &sin6->sin6_addr; 91 len = sizeof(sin6->sin6_addr); 92 start = half; 93 } 94 95 hash = fnv_32_buf(key, len, FNV1_32_INIT); 96 hash = fnv_32_buf(&ifindex, sizeof(ifindex), hash); 97 hash %= half; 98 99 return (hash + start); 100 } 101 102 static inline int 103 l2_cmp(const struct sockaddr *sa, struct l2t_entry *e) 104 { 105 106 KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, 107 ("%s: sa %p has unexpected sa_family %d", __func__, sa, 108 sa->sa_family)); 109 110 if (sa->sa_family == AF_INET) { 111 const struct sockaddr_in *sin = (const void *)sa; 112 113 return (e->addr[0] != sin->sin_addr.s_addr); 114 } else { 115 const struct sockaddr_in6 *sin6 = (const void *)sa; 116 117 return (memcmp(&e->addr[0], &sin6->sin6_addr, sizeof(e->addr))); 118 } 119 } 120 121 static inline void 122 l2_store(const struct sockaddr *sa, struct l2t_entry *e) 123 { 124 125 KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, 126 ("%s: sa %p has unexpected sa_family %d", __func__, sa, 127 sa->sa_family)); 128 129 if (sa->sa_family == AF_INET) { 130 const struct sockaddr_in *sin = (const void *)sa; 131 132 e->addr[0] = sin->sin_addr.s_addr; 133 e->ipv6 = 0; 134 } else { 135 const struct sockaddr_in6 *sin6 = (const void *)sa; 136 137 memcpy(&e->addr[0], &sin6->sin6_addr, sizeof(e->addr)); 138 e->ipv6 = 1; 139 } 140 } 141 142 /* 143 * Add a WR to an L2T entry's queue of work requests awaiting resolution. 144 * Must be called with the entry's lock held. 145 */ 146 static inline void 147 arpq_enqueue(struct l2t_entry *e, struct wrqe *wr) 148 { 149 mtx_assert(&e->lock, MA_OWNED); 150 151 STAILQ_INSERT_TAIL(&e->wr_list, wr, link); 152 } 153 154 static inline void 155 send_pending(struct adapter *sc, struct l2t_entry *e) 156 { 157 struct wrqe *wr; 158 159 mtx_assert(&e->lock, MA_OWNED); 160 161 while ((wr = STAILQ_FIRST(&e->wr_list)) != NULL) { 162 STAILQ_REMOVE_HEAD(&e->wr_list, link); 163 t4_wrq_tx(sc, wr); 164 } 165 } 166 167 static void 168 resolution_failed(struct adapter *sc, struct l2t_entry *e) 169 { 170 struct tom_data *td = sc->tom_softc; 171 172 mtx_assert(&e->lock, MA_OWNED); 173 174 mtx_lock(&td->unsent_wr_lock); 175 STAILQ_CONCAT(&td->unsent_wr_list, &e->wr_list); 176 mtx_unlock(&td->unsent_wr_lock); 177 178 taskqueue_enqueue(taskqueue_thread, &td->reclaim_wr_resources); 179 } 180 181 static void 182 update_entry(struct adapter *sc, struct l2t_entry *e, uint8_t *lladdr, 183 uint16_t vtag) 184 { 185 186 mtx_assert(&e->lock, MA_OWNED); 187 188 /* 189 * The entry may be in active use (e->refcount > 0) or not. We update 190 * it even when it's not as this simplifies the case where we decide to 191 * reuse the entry later. 192 */ 193 194 if (lladdr == NULL && 195 (e->state == L2T_STATE_RESOLVING || e->state == L2T_STATE_FAILED)) { 196 /* 197 * Never got a valid L2 address for this one. Just mark it as 198 * failed instead of removing it from the hash (for which we'd 199 * need to wlock the table). 200 */ 201 e->state = L2T_STATE_FAILED; 202 resolution_failed(sc, e); 203 return; 204 205 } else if (lladdr == NULL) { 206 207 /* Valid or already-stale entry was deleted (or expired) */ 208 209 KASSERT(e->state == L2T_STATE_VALID || 210 e->state == L2T_STATE_STALE, 211 ("%s: lladdr NULL, state %d", __func__, e->state)); 212 213 e->state = L2T_STATE_STALE; 214 215 } else { 216 217 if (e->state == L2T_STATE_RESOLVING || 218 e->state == L2T_STATE_FAILED || 219 memcmp(e->dmac, lladdr, ETHER_ADDR_LEN)) { 220 221 /* unresolved -> resolved; or dmac changed */ 222 223 memcpy(e->dmac, lladdr, ETHER_ADDR_LEN); 224 e->vlan = vtag; 225 t4_write_l2e(e, 1); 226 } 227 e->state = L2T_STATE_VALID; 228 } 229 } 230 231 static int 232 resolve_entry(struct adapter *sc, struct l2t_entry *e) 233 { 234 struct tom_data *td = sc->tom_softc; 235 struct toedev *tod = &td->tod; 236 struct sockaddr_in sin = {0}; 237 struct sockaddr_in6 sin6 = {0}; 238 struct sockaddr *sa; 239 uint8_t dmac[ETHER_HDR_LEN]; 240 uint16_t vtag; 241 int rc; 242 243 if (e->ipv6 == 0) { 244 sin.sin_family = AF_INET; 245 sin.sin_len = sizeof(struct sockaddr_in); 246 sin.sin_addr.s_addr = e->addr[0]; 247 sa = (void *)&sin; 248 } else { 249 sin6.sin6_family = AF_INET6; 250 sin6.sin6_len = sizeof(struct sockaddr_in6); 251 memcpy(&sin6.sin6_addr, &e->addr[0], sizeof(e->addr)); 252 sa = (void *)&sin6; 253 } 254 255 vtag = EVL_MAKETAG(VLAN_NONE, 0, 0); 256 rc = toe_l2_resolve(tod, e->ifp, sa, dmac, &vtag); 257 if (rc == EWOULDBLOCK) 258 return (rc); 259 260 mtx_lock(&e->lock); 261 update_entry(sc, e, rc == 0 ? dmac : NULL, vtag); 262 mtx_unlock(&e->lock); 263 264 return (rc); 265 } 266 267 int 268 t4_l2t_send_slow(struct adapter *sc, struct wrqe *wr, struct l2t_entry *e) 269 { 270 271 again: 272 switch (e->state) { 273 case L2T_STATE_STALE: /* entry is stale, kick off revalidation */ 274 275 resolve_entry(sc, e); 276 277 /* Fall through */ 278 279 case L2T_STATE_VALID: /* fast-path, send the packet on */ 280 281 t4_wrq_tx(sc, wr); 282 return (0); 283 284 case L2T_STATE_RESOLVING: 285 case L2T_STATE_SYNC_WRITE: 286 287 mtx_lock(&e->lock); 288 if (e->state != L2T_STATE_SYNC_WRITE && 289 e->state != L2T_STATE_RESOLVING) { 290 /* state changed by the time we got here */ 291 mtx_unlock(&e->lock); 292 goto again; 293 } 294 arpq_enqueue(e, wr); 295 mtx_unlock(&e->lock); 296 297 if (resolve_entry(sc, e) == EWOULDBLOCK) 298 break; 299 300 mtx_lock(&e->lock); 301 if (e->state == L2T_STATE_VALID && !STAILQ_EMPTY(&e->wr_list)) 302 send_pending(sc, e); 303 if (e->state == L2T_STATE_FAILED) 304 resolution_failed(sc, e); 305 mtx_unlock(&e->lock); 306 break; 307 308 case L2T_STATE_FAILED: 309 return (EHOSTUNREACH); 310 } 311 312 return (0); 313 } 314 315 int 316 do_l2t_write_rpl2(struct sge_iq *iq, const struct rss_header *rss, 317 struct mbuf *m) 318 { 319 struct adapter *sc = iq->adapter; 320 const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1); 321 const u_int hwidx = GET_TID(rpl) & ~(F_SYNC_WR | V_TID_QID(M_TID_QID)); 322 const bool sync = GET_TID(rpl) & F_SYNC_WR; 323 324 MPASS(iq->abs_id == G_TID_QID(GET_TID(rpl))); 325 326 if (__predict_false(hwidx < sc->vres.l2t.start) || 327 __predict_false(hwidx >= sc->vres.l2t.start + sc->vres.l2t.size) || 328 __predict_false(rpl->status != CPL_ERR_NONE)) { 329 CH_ERR(sc, "%s: hwidx %u, rpl %u, sync %u; L2T st %u, sz %u\n", 330 __func__, hwidx, rpl->status, sync, sc->vres.l2t.start, 331 sc->vres.l2t.size); 332 return (EINVAL); 333 } 334 335 if (sync) { 336 const u_int idx = hwidx - sc->vres.l2t.start; 337 struct l2t_entry *e = &sc->l2t->l2tab[idx]; 338 339 mtx_lock(&e->lock); 340 if (e->state != L2T_STATE_SWITCHING) { 341 send_pending(sc, e); 342 e->state = L2T_STATE_VALID; 343 } 344 mtx_unlock(&e->lock); 345 } 346 347 return (0); 348 } 349 350 /* 351 * The TOE wants an L2 table entry that it can use to reach the next hop over 352 * the specified port. Produce such an entry - create one if needed. 353 * 354 * Note that the ifnet could be a pseudo-device like if_vlan, if_lagg, etc. on 355 * top of the real cxgbe interface. 356 */ 357 struct l2t_entry * 358 t4_l2t_get(struct port_info *pi, if_t ifp, struct sockaddr *sa) 359 { 360 struct l2t_entry *e; 361 struct adapter *sc = pi->adapter; 362 struct l2t_data *d = sc->l2t; 363 u_int hash, smt_idx = pi->port_id; 364 uint16_t vid, pcp, vtag; 365 366 KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, 367 ("%s: sa %p has unexpected sa_family %d", __func__, sa, 368 sa->sa_family)); 369 370 vid = VLAN_NONE; 371 pcp = 0; 372 if (if_gettype(ifp) == IFT_L2VLAN) { 373 VLAN_TAG(ifp, &vid); 374 VLAN_PCP(ifp, &pcp); 375 } else if ((pcp = if_getpcp(ifp)) != IFNET_PCP_NONE) 376 vid = 0; 377 else 378 pcp = 0; 379 vtag = EVL_MAKETAG(vid, pcp, 0); 380 381 hash = l2_hash(d, sa, if_getindex(ifp)); 382 rw_wlock(&d->lock); 383 if (__predict_false(d->l2t_stopped)) { 384 e = NULL; 385 goto done; 386 } 387 for (e = d->l2tab[hash].first; e; e = e->next) { 388 if (l2_cmp(sa, e) == 0 && e->ifp == ifp && e->vlan == vtag && 389 e->smt_idx == smt_idx) { 390 l2t_hold(d, e); 391 goto done; 392 } 393 } 394 395 /* Need to allocate a new entry */ 396 e = t4_alloc_l2e(d); 397 if (e) { 398 mtx_lock(&e->lock); /* avoid race with t4_l2t_free */ 399 e->next = d->l2tab[hash].first; 400 d->l2tab[hash].first = e; 401 402 e->state = L2T_STATE_RESOLVING; 403 l2_store(sa, e); 404 e->ifp = ifp; 405 e->smt_idx = smt_idx; 406 e->hash = hash; 407 e->lport = pi->lport; 408 e->wrq = &sc->sge.ctrlq[pi->port_id]; 409 e->iqid = sc->sge.ofld_rxq[pi->vi[0].first_ofld_rxq].iq.abs_id; 410 atomic_store_rel_int(&e->refcnt, 1); 411 e->vlan = vtag; 412 mtx_unlock(&e->lock); 413 } 414 done: 415 rw_wunlock(&d->lock); 416 return e; 417 } 418 419 /* 420 * Called when the host's ARP layer makes a change to some entry that is loaded 421 * into the HW L2 table. 422 */ 423 void 424 t4_l2_update(struct toedev *tod, if_t ifp, struct sockaddr *sa, 425 uint8_t *lladdr, uint16_t vtag) 426 { 427 struct adapter *sc = tod->tod_softc; 428 struct l2t_entry *e; 429 struct l2t_data *d = sc->l2t; 430 u_int hash; 431 432 KASSERT(d != NULL, ("%s: no L2 table", __func__)); 433 434 hash = l2_hash(d, sa, if_getindex(ifp)); 435 rw_rlock(&d->lock); 436 if (__predict_false(d->l2t_stopped)) 437 goto done; 438 for (e = d->l2tab[hash].first; e; e = e->next) { 439 if (l2_cmp(sa, e) == 0 && e->ifp == ifp) { 440 mtx_lock(&e->lock); 441 if (atomic_load_acq_int(&e->refcnt)) 442 goto found; 443 e->state = L2T_STATE_STALE; 444 mtx_unlock(&e->lock); 445 break; 446 } 447 } 448 done: 449 rw_runlock(&d->lock); 450 451 /* 452 * This is of no interest to us. We've never had an offloaded 453 * connection to this destination, and we aren't attempting one right 454 * now. 455 */ 456 return; 457 458 found: 459 rw_runlock(&d->lock); 460 461 KASSERT(e->state != L2T_STATE_UNUSED, 462 ("%s: unused entry in the hash.", __func__)); 463 464 update_entry(sc, e, lladdr, vtag); 465 mtx_unlock(&e->lock); 466 } 467 #endif 468