1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2012 Chelsio Communications, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 #include <sys/cdefs.h> 29 #include "opt_inet.h" 30 #include "opt_inet6.h" 31 32 #ifdef TCP_OFFLOAD 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/module.h> 37 #include <sys/bus.h> 38 #include <sys/fnv_hash.h> 39 #include <sys/lock.h> 40 #include <sys/mutex.h> 41 #include <sys/rwlock.h> 42 #include <sys/socket.h> 43 #include <sys/socketvar.h> 44 #include <sys/sbuf.h> 45 #include <sys/taskqueue.h> 46 #include <net/if.h> 47 #include <net/if_types.h> 48 #include <net/ethernet.h> 49 #include <net/if_vlan_var.h> 50 #include <net/route.h> 51 #include <netinet/in.h> 52 #include <netinet/in_pcb.h> 53 #include <netinet/tcp_var.h> 54 #include <netinet/toecore.h> 55 56 #include "common/common.h" 57 #include "common/t4_msg.h" 58 #include "tom/t4_tom_l2t.h" 59 #include "tom/t4_tom.h" 60 61 #define VLAN_NONE 0xfff 62 63 static inline void 64 l2t_hold(struct l2t_data *d, struct l2t_entry *e) 65 { 66 67 if (atomic_fetchadd_int(&e->refcnt, 1) == 0) /* 0 -> 1 transition */ 68 atomic_subtract_int(&d->nfree, 1); 69 } 70 71 static inline u_int 72 l2_hash(struct l2t_data *d, const struct sockaddr *sa, int ifindex) 73 { 74 u_int hash, half = d->l2t_size / 2, start = 0; 75 const void *key; 76 size_t len; 77 78 KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, 79 ("%s: sa %p has unexpected sa_family %d", __func__, sa, 80 sa->sa_family)); 81 82 if (sa->sa_family == AF_INET) { 83 const struct sockaddr_in *sin = (const void *)sa; 84 85 key = &sin->sin_addr; 86 len = sizeof(sin->sin_addr); 87 } else { 88 const struct sockaddr_in6 *sin6 = (const void *)sa; 89 90 key = &sin6->sin6_addr; 91 len = sizeof(sin6->sin6_addr); 92 start = half; 93 } 94 95 hash = fnv_32_buf(key, len, FNV1_32_INIT); 96 hash = fnv_32_buf(&ifindex, sizeof(ifindex), hash); 97 hash %= half; 98 99 return (hash + start); 100 } 101 102 static inline int 103 l2_cmp(const struct sockaddr *sa, struct l2t_entry *e) 104 { 105 106 KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, 107 ("%s: sa %p has unexpected sa_family %d", __func__, sa, 108 sa->sa_family)); 109 110 if (sa->sa_family == AF_INET) { 111 const struct sockaddr_in *sin = (const void *)sa; 112 113 return (e->addr[0] != sin->sin_addr.s_addr); 114 } else { 115 const struct sockaddr_in6 *sin6 = (const void *)sa; 116 117 return (memcmp(&e->addr[0], &sin6->sin6_addr, sizeof(e->addr))); 118 } 119 } 120 121 static inline void 122 l2_store(const struct sockaddr *sa, struct l2t_entry *e) 123 { 124 125 KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, 126 ("%s: sa %p has unexpected sa_family %d", __func__, sa, 127 sa->sa_family)); 128 129 if (sa->sa_family == AF_INET) { 130 const struct sockaddr_in *sin = (const void *)sa; 131 132 e->addr[0] = sin->sin_addr.s_addr; 133 e->ipv6 = 0; 134 } else { 135 const struct sockaddr_in6 *sin6 = (const void *)sa; 136 137 memcpy(&e->addr[0], &sin6->sin6_addr, sizeof(e->addr)); 138 e->ipv6 = 1; 139 } 140 } 141 142 /* 143 * Add a WR to an L2T entry's queue of work requests awaiting resolution. 144 * Must be called with the entry's lock held. 145 */ 146 static inline void 147 arpq_enqueue(struct l2t_entry *e, struct wrqe *wr) 148 { 149 mtx_assert(&e->lock, MA_OWNED); 150 151 STAILQ_INSERT_TAIL(&e->wr_list, wr, link); 152 } 153 154 static inline void 155 send_pending(struct adapter *sc, struct l2t_entry *e) 156 { 157 struct wrqe *wr; 158 159 mtx_assert(&e->lock, MA_OWNED); 160 161 while ((wr = STAILQ_FIRST(&e->wr_list)) != NULL) { 162 STAILQ_REMOVE_HEAD(&e->wr_list, link); 163 t4_wrq_tx(sc, wr); 164 } 165 } 166 167 static void 168 resolution_failed(struct adapter *sc, struct l2t_entry *e) 169 { 170 struct tom_data *td = sc->tom_softc; 171 172 mtx_assert(&e->lock, MA_OWNED); 173 174 mtx_lock(&td->unsent_wr_lock); 175 STAILQ_CONCAT(&td->unsent_wr_list, &e->wr_list); 176 mtx_unlock(&td->unsent_wr_lock); 177 178 taskqueue_enqueue(taskqueue_thread, &td->reclaim_wr_resources); 179 } 180 181 static void 182 update_entry(struct adapter *sc, struct l2t_entry *e, uint8_t *lladdr, 183 uint16_t vtag) 184 { 185 186 mtx_assert(&e->lock, MA_OWNED); 187 188 /* 189 * The entry may be in active use (e->refcount > 0) or not. We update 190 * it even when it's not as this simplifies the case where we decide to 191 * reuse the entry later. 192 */ 193 194 if (lladdr == NULL && 195 (e->state == L2T_STATE_RESOLVING || e->state == L2T_STATE_FAILED)) { 196 /* 197 * Never got a valid L2 address for this one. Just mark it as 198 * failed instead of removing it from the hash (for which we'd 199 * need to wlock the table). 200 */ 201 e->state = L2T_STATE_FAILED; 202 resolution_failed(sc, e); 203 return; 204 205 } else if (lladdr == NULL) { 206 207 /* Valid or already-stale entry was deleted (or expired) */ 208 209 KASSERT(e->state == L2T_STATE_VALID || 210 e->state == L2T_STATE_STALE, 211 ("%s: lladdr NULL, state %d", __func__, e->state)); 212 213 e->state = L2T_STATE_STALE; 214 215 } else if (e->state == L2T_STATE_RESOLVING || 216 e->state == L2T_STATE_FAILED || 217 memcmp(e->dmac, lladdr, ETHER_ADDR_LEN)) { 218 219 /* unresolved -> resolved; or dmac changed */ 220 221 memcpy(e->dmac, lladdr, ETHER_ADDR_LEN); 222 e->vlan = vtag; 223 if (t4_write_l2e(e, 1) == 0) 224 e->state = L2T_STATE_VALID; 225 } else 226 e->state = L2T_STATE_VALID; 227 } 228 229 static int 230 resolve_entry(struct adapter *sc, struct l2t_entry *e) 231 { 232 struct tom_data *td = sc->tom_softc; 233 struct toedev *tod = &td->tod; 234 struct sockaddr_in sin = {0}; 235 struct sockaddr_in6 sin6 = {0}; 236 struct sockaddr *sa; 237 uint8_t dmac[ETHER_HDR_LEN]; 238 uint16_t vtag; 239 int rc; 240 241 if (e->ipv6 == 0) { 242 sin.sin_family = AF_INET; 243 sin.sin_len = sizeof(struct sockaddr_in); 244 sin.sin_addr.s_addr = e->addr[0]; 245 sa = (void *)&sin; 246 } else { 247 sin6.sin6_family = AF_INET6; 248 sin6.sin6_len = sizeof(struct sockaddr_in6); 249 memcpy(&sin6.sin6_addr, &e->addr[0], sizeof(e->addr)); 250 sa = (void *)&sin6; 251 } 252 253 vtag = EVL_MAKETAG(VLAN_NONE, 0, 0); 254 rc = toe_l2_resolve(tod, e->ifp, sa, dmac, &vtag); 255 if (rc == EWOULDBLOCK) 256 return (rc); 257 258 mtx_lock(&e->lock); 259 update_entry(sc, e, rc == 0 ? dmac : NULL, vtag); 260 mtx_unlock(&e->lock); 261 262 return (rc); 263 } 264 265 int 266 t4_l2t_send_slow(struct adapter *sc, struct wrqe *wr, struct l2t_entry *e) 267 { 268 269 again: 270 switch (e->state) { 271 case L2T_STATE_STALE: /* entry is stale, kick off revalidation */ 272 273 resolve_entry(sc, e); 274 275 /* Fall through */ 276 277 case L2T_STATE_VALID: /* fast-path, send the packet on */ 278 279 t4_wrq_tx(sc, wr); 280 return (0); 281 282 case L2T_STATE_RESOLVING: 283 case L2T_STATE_SYNC_WRITE: 284 285 mtx_lock(&e->lock); 286 if (e->state != L2T_STATE_SYNC_WRITE && 287 e->state != L2T_STATE_RESOLVING) { 288 /* state changed by the time we got here */ 289 mtx_unlock(&e->lock); 290 goto again; 291 } 292 if (adapter_stopped(sc)) 293 free(wr, M_CXGBE); 294 else 295 arpq_enqueue(e, wr); 296 mtx_unlock(&e->lock); 297 298 if (resolve_entry(sc, e) == EWOULDBLOCK) 299 break; 300 301 mtx_lock(&e->lock); 302 if (e->state == L2T_STATE_VALID && !STAILQ_EMPTY(&e->wr_list)) 303 send_pending(sc, e); 304 if (e->state == L2T_STATE_FAILED) 305 resolution_failed(sc, e); 306 mtx_unlock(&e->lock); 307 break; 308 309 case L2T_STATE_FAILED: 310 return (EHOSTUNREACH); 311 } 312 313 return (0); 314 } 315 316 int 317 do_l2t_write_rpl2(struct sge_iq *iq, const struct rss_header *rss, 318 struct mbuf *m) 319 { 320 struct adapter *sc = iq->adapter; 321 const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1); 322 const u_int hwidx = GET_TID(rpl) & ~(F_SYNC_WR | V_TID_QID(M_TID_QID)); 323 const bool sync = GET_TID(rpl) & F_SYNC_WR; 324 325 MPASS(iq->abs_id == G_TID_QID(GET_TID(rpl))); 326 327 if (__predict_false(hwidx < sc->vres.l2t.start) || 328 __predict_false(hwidx >= sc->vres.l2t.start + sc->vres.l2t.size) || 329 __predict_false(rpl->status != CPL_ERR_NONE)) { 330 CH_ERR(sc, "%s: hwidx %u, rpl %u, sync %u; L2T st %u, sz %u\n", 331 __func__, hwidx, rpl->status, sync, sc->vres.l2t.start, 332 sc->vres.l2t.size); 333 return (EINVAL); 334 } 335 336 if (sync) { 337 const u_int idx = hwidx - sc->vres.l2t.start; 338 struct l2t_entry *e = &sc->l2t->l2tab[idx]; 339 340 mtx_lock(&e->lock); 341 if (e->state != L2T_STATE_SWITCHING) { 342 send_pending(sc, e); 343 e->state = L2T_STATE_VALID; 344 } 345 mtx_unlock(&e->lock); 346 } 347 348 return (0); 349 } 350 351 /* 352 * The TOE wants an L2 table entry that it can use to reach the next hop over 353 * the specified port. Produce such an entry - create one if needed. 354 * 355 * Note that the ifnet could be a pseudo-device like if_vlan, if_lagg, etc. on 356 * top of the real cxgbe interface. 357 */ 358 struct l2t_entry * 359 t4_l2t_get(struct port_info *pi, if_t ifp, struct sockaddr *sa) 360 { 361 struct l2t_entry *e; 362 struct adapter *sc = pi->adapter; 363 struct l2t_data *d = sc->l2t; 364 u_int hash, smt_idx = pi->port_id; 365 uint16_t vid, pcp, vtag; 366 367 KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, 368 ("%s: sa %p has unexpected sa_family %d", __func__, sa, 369 sa->sa_family)); 370 371 vid = VLAN_NONE; 372 pcp = 0; 373 if (if_gettype(ifp) == IFT_L2VLAN) { 374 VLAN_TAG(ifp, &vid); 375 VLAN_PCP(ifp, &pcp); 376 } else if ((pcp = if_getpcp(ifp)) != IFNET_PCP_NONE) 377 vid = 0; 378 else 379 pcp = 0; 380 vtag = EVL_MAKETAG(vid, pcp, 0); 381 382 hash = l2_hash(d, sa, if_getindex(ifp)); 383 rw_wlock(&d->lock); 384 if (__predict_false(d->l2t_stopped)) { 385 e = NULL; 386 goto done; 387 } 388 for (e = d->l2tab[hash].first; e; e = e->next) { 389 if (l2_cmp(sa, e) == 0 && e->ifp == ifp && e->vlan == vtag && 390 e->smt_idx == smt_idx) { 391 l2t_hold(d, e); 392 goto done; 393 } 394 } 395 396 /* Need to allocate a new entry */ 397 e = t4_alloc_l2e(d); 398 if (e) { 399 mtx_lock(&e->lock); /* avoid race with t4_l2t_free */ 400 e->next = d->l2tab[hash].first; 401 d->l2tab[hash].first = e; 402 403 e->state = L2T_STATE_RESOLVING; 404 l2_store(sa, e); 405 e->ifp = ifp; 406 e->smt_idx = smt_idx; 407 e->hash = hash; 408 e->lport = pi->lport; 409 e->wrq = &sc->sge.ctrlq[pi->port_id]; 410 e->iqid = sc->sge.ofld_rxq[pi->vi[0].first_ofld_rxq].iq.abs_id; 411 atomic_store_rel_int(&e->refcnt, 1); 412 e->vlan = vtag; 413 mtx_unlock(&e->lock); 414 } 415 done: 416 rw_wunlock(&d->lock); 417 return e; 418 } 419 420 /* 421 * Called when the host's ARP layer makes a change to some entry that is loaded 422 * into the HW L2 table. 423 */ 424 void 425 t4_l2_update(struct toedev *tod, if_t ifp, struct sockaddr *sa, 426 uint8_t *lladdr, uint16_t vtag) 427 { 428 struct adapter *sc = tod->tod_softc; 429 struct l2t_entry *e; 430 struct l2t_data *d = sc->l2t; 431 u_int hash; 432 433 KASSERT(d != NULL, ("%s: no L2 table", __func__)); 434 435 hash = l2_hash(d, sa, if_getindex(ifp)); 436 rw_rlock(&d->lock); 437 if (__predict_false(d->l2t_stopped)) 438 goto done; 439 for (e = d->l2tab[hash].first; e; e = e->next) { 440 if (l2_cmp(sa, e) == 0 && e->ifp == ifp) { 441 mtx_lock(&e->lock); 442 if (atomic_load_acq_int(&e->refcnt)) 443 goto found; 444 if (e->state == L2T_STATE_VALID) 445 e->state = L2T_STATE_STALE; 446 mtx_unlock(&e->lock); 447 break; 448 } 449 } 450 done: 451 rw_runlock(&d->lock); 452 453 /* 454 * This is of no interest to us. We've never had an offloaded 455 * connection to this destination, and we aren't attempting one right 456 * now. 457 */ 458 return; 459 460 found: 461 rw_runlock(&d->lock); 462 463 KASSERT(e->state != L2T_STATE_UNUSED, 464 ("%s: unused entry in the hash.", __func__)); 465 466 update_entry(sc, e, lladdr, vtag); 467 mtx_unlock(&e->lock); 468 } 469 #endif 470