1 /*- 2 * Copyright (c) 2012 Chelsio Communications, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 #include <sys/cdefs.h> 27 __FBSDID("$FreeBSD$"); 28 29 #include "opt_inet.h" 30 31 #ifdef TCP_OFFLOAD 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/kernel.h> 35 #include <sys/module.h> 36 #include <sys/bus.h> 37 #include <sys/lock.h> 38 #include <sys/mutex.h> 39 #include <sys/rwlock.h> 40 #include <sys/socket.h> 41 #include <sys/sbuf.h> 42 #include <net/if.h> 43 #include <net/if_types.h> 44 #include <net/ethernet.h> 45 #include <net/if_vlan_var.h> 46 #include <net/route.h> 47 #include <netinet/in.h> 48 #include <netinet/toecore.h> 49 50 #include "common/common.h" 51 #include "common/jhash.h" 52 #include "common/t4_msg.h" 53 #include "tom/t4_tom_l2t.h" 54 #include "tom/t4_tom.h" 55 56 #define VLAN_NONE 0xfff 57 58 #define SA(x) ((struct sockaddr *)(x)) 59 #define SIN(x) ((struct sockaddr_in *)(x)) 60 #define SINADDR(x) (SIN(x)->sin_addr.s_addr) 61 62 static inline void 63 l2t_hold(struct l2t_data *d, struct l2t_entry *e) 64 { 65 if (atomic_fetchadd_int(&e->refcnt, 1) == 0) /* 0 -> 1 transition */ 66 atomic_subtract_int(&d->nfree, 1); 67 } 68 69 static inline unsigned int 70 arp_hash(const uint32_t key, int ifindex) 71 { 72 return jhash_2words(key, ifindex, 0) & (L2T_SIZE - 1); 73 } 74 75 /* 76 * Add a WR to an L2T entry's queue of work requests awaiting resolution. 77 * Must be called with the entry's lock held. 78 */ 79 static inline void 80 arpq_enqueue(struct l2t_entry *e, struct wrqe *wr) 81 { 82 mtx_assert(&e->lock, MA_OWNED); 83 84 STAILQ_INSERT_TAIL(&e->wr_list, wr, link); 85 } 86 87 static inline void 88 send_pending(struct adapter *sc, struct l2t_entry *e) 89 { 90 struct wrqe *wr; 91 92 mtx_assert(&e->lock, MA_OWNED); 93 94 while ((wr = STAILQ_FIRST(&e->wr_list)) != NULL) { 95 STAILQ_REMOVE_HEAD(&e->wr_list, link); 96 t4_wrq_tx(sc, wr); 97 } 98 } 99 100 static void 101 resolution_failed_for_wr(struct wrqe *wr) 102 { 103 log(LOG_ERR, "%s: leaked work request %p, wr_len %d", __func__, wr, 104 wr->wr_len); 105 106 /* free(wr, M_CXGBE); */ 107 } 108 109 static void 110 resolution_failed(struct l2t_entry *e) 111 { 112 struct wrqe *wr; 113 114 mtx_assert(&e->lock, MA_OWNED); 115 116 while ((wr = STAILQ_FIRST(&e->wr_list)) != NULL) { 117 STAILQ_REMOVE_HEAD(&e->wr_list, link); 118 resolution_failed_for_wr(wr); 119 } 120 } 121 122 static void 123 update_entry(struct adapter *sc, struct l2t_entry *e, uint8_t *lladdr, 124 uint16_t vtag) 125 { 126 127 mtx_assert(&e->lock, MA_OWNED); 128 129 /* 130 * The entry may be in active use (e->refcount > 0) or not. We update 131 * it even when it's not as this simplifies the case where we decide to 132 * reuse the entry later. 133 */ 134 135 if (lladdr == NULL && 136 (e->state == L2T_STATE_RESOLVING || e->state == L2T_STATE_FAILED)) { 137 /* 138 * Never got a valid L2 address for this one. Just mark it as 139 * failed instead of removing it from the hash (for which we'd 140 * need to wlock the table). 141 */ 142 e->state = L2T_STATE_FAILED; 143 resolution_failed(e); 144 return; 145 146 } else if (lladdr == NULL) { 147 148 /* Valid or already-stale entry was deleted (or expired) */ 149 150 KASSERT(e->state == L2T_STATE_VALID || 151 e->state == L2T_STATE_STALE, 152 ("%s: lladdr NULL, state %d", __func__, e->state)); 153 154 e->state = L2T_STATE_STALE; 155 156 } else { 157 158 if (e->state == L2T_STATE_RESOLVING || 159 e->state == L2T_STATE_FAILED || 160 memcmp(e->dmac, lladdr, ETHER_ADDR_LEN)) { 161 162 /* unresolved -> resolved; or dmac changed */ 163 164 memcpy(e->dmac, lladdr, ETHER_ADDR_LEN); 165 e->vlan = vtag; 166 t4_write_l2e(sc, e, 1); 167 } 168 e->state = L2T_STATE_VALID; 169 } 170 } 171 172 static int 173 resolve_entry(struct adapter *sc, struct l2t_entry *e) 174 { 175 struct tom_data *td = sc->tom_softc; 176 struct toedev *tod = &td->tod; 177 struct sockaddr_in sin = {0}; 178 uint8_t dmac[ETHER_ADDR_LEN]; 179 uint16_t vtag = VLAN_NONE; 180 int rc; 181 182 sin.sin_family = AF_INET; 183 sin.sin_len = sizeof(struct sockaddr_in); 184 SINADDR(&sin) = e->addr; 185 186 rc = toe_l2_resolve(tod, e->ifp, SA(&sin), dmac, &vtag); 187 if (rc == EWOULDBLOCK) 188 return (rc); 189 190 mtx_lock(&e->lock); 191 update_entry(sc, e, rc == 0 ? dmac : NULL, vtag); 192 mtx_unlock(&e->lock); 193 194 return (rc); 195 } 196 197 int 198 t4_l2t_send_slow(struct adapter *sc, struct wrqe *wr, struct l2t_entry *e) 199 { 200 201 again: 202 switch (e->state) { 203 case L2T_STATE_STALE: /* entry is stale, kick off revalidation */ 204 205 if (resolve_entry(sc, e) != EWOULDBLOCK) 206 goto again; /* entry updated, re-examine state */ 207 208 /* Fall through */ 209 210 case L2T_STATE_VALID: /* fast-path, send the packet on */ 211 212 t4_wrq_tx(sc, wr); 213 return (0); 214 215 case L2T_STATE_RESOLVING: 216 case L2T_STATE_SYNC_WRITE: 217 218 mtx_lock(&e->lock); 219 if (e->state != L2T_STATE_SYNC_WRITE && 220 e->state != L2T_STATE_RESOLVING) { 221 /* state changed by the time we got here */ 222 mtx_unlock(&e->lock); 223 goto again; 224 } 225 arpq_enqueue(e, wr); 226 mtx_unlock(&e->lock); 227 228 if (resolve_entry(sc, e) == EWOULDBLOCK) 229 break; 230 231 mtx_lock(&e->lock); 232 if (e->state == L2T_STATE_VALID && !STAILQ_EMPTY(&e->wr_list)) 233 send_pending(sc, e); 234 if (e->state == L2T_STATE_FAILED) 235 resolution_failed(e); 236 mtx_unlock(&e->lock); 237 break; 238 239 case L2T_STATE_FAILED: 240 resolution_failed_for_wr(wr); 241 return (EHOSTUNREACH); 242 } 243 244 return (0); 245 } 246 247 /* 248 * Called when an L2T entry has no more users. The entry is left in the hash 249 * table since it is likely to be reused but we also bump nfree to indicate 250 * that the entry can be reallocated for a different neighbor. We also drop 251 * the existing neighbor reference in case the neighbor is going away and is 252 * waiting on our reference. 253 * 254 * Because entries can be reallocated to other neighbors once their ref count 255 * drops to 0 we need to take the entry's lock to avoid races with a new 256 * incarnation. 257 */ 258 259 static int 260 do_l2t_write_rpl2(struct sge_iq *iq, const struct rss_header *rss, 261 struct mbuf *m) 262 { 263 struct adapter *sc = iq->adapter; 264 const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1); 265 unsigned int tid = GET_TID(rpl); 266 unsigned int idx = tid & (L2T_SIZE - 1); 267 int rc; 268 269 rc = do_l2t_write_rpl(iq, rss, m); 270 if (rc != 0) 271 return (rc); 272 273 if (tid & F_SYNC_WR) { 274 struct l2t_entry *e = &sc->l2t->l2tab[idx]; 275 276 mtx_lock(&e->lock); 277 if (e->state != L2T_STATE_SWITCHING) { 278 send_pending(sc, e); 279 e->state = L2T_STATE_VALID; 280 } 281 mtx_unlock(&e->lock); 282 } 283 284 return (0); 285 } 286 287 void 288 t4_init_l2t_cpl_handlers(struct adapter *sc) 289 { 290 291 t4_register_cpl_handler(sc, CPL_L2T_WRITE_RPL, do_l2t_write_rpl2); 292 } 293 294 void 295 t4_uninit_l2t_cpl_handlers(struct adapter *sc) 296 { 297 298 t4_register_cpl_handler(sc, CPL_L2T_WRITE_RPL, do_l2t_write_rpl); 299 } 300 301 /* 302 * The TOE wants an L2 table entry that it can use to reach the next hop over 303 * the specified port. Produce such an entry - create one if needed. 304 * 305 * Note that the ifnet could be a pseudo-device like if_vlan, if_lagg, etc. on 306 * top of the real cxgbe interface. 307 */ 308 struct l2t_entry * 309 t4_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa) 310 { 311 struct l2t_entry *e; 312 struct l2t_data *d = pi->adapter->l2t; 313 uint32_t addr = SINADDR(sa); 314 int hash = arp_hash(addr, ifp->if_index); 315 unsigned int smt_idx = pi->port_id; 316 317 if (sa->sa_family != AF_INET) 318 return (NULL); /* XXX: no IPv6 support right now */ 319 320 #ifndef VLAN_TAG 321 if (ifp->if_type == IFT_L2VLAN) 322 return (NULL); 323 #endif 324 325 rw_wlock(&d->lock); 326 for (e = d->l2tab[hash].first; e; e = e->next) { 327 if (e->addr == addr && e->ifp == ifp && e->smt_idx == smt_idx) { 328 l2t_hold(d, e); 329 goto done; 330 } 331 } 332 333 /* Need to allocate a new entry */ 334 e = t4_alloc_l2e(d); 335 if (e) { 336 mtx_lock(&e->lock); /* avoid race with t4_l2t_free */ 337 e->next = d->l2tab[hash].first; 338 d->l2tab[hash].first = e; 339 340 e->state = L2T_STATE_RESOLVING; 341 e->addr = addr; 342 e->ifp = ifp; 343 e->smt_idx = smt_idx; 344 e->hash = hash; 345 e->lport = pi->lport; 346 atomic_store_rel_int(&e->refcnt, 1); 347 #ifdef VLAN_TAG 348 if (ifp->if_type == IFT_L2VLAN) 349 VLAN_TAG(ifp, &e->vlan); 350 else 351 e->vlan = VLAN_NONE; 352 #endif 353 mtx_unlock(&e->lock); 354 } 355 done: 356 rw_wunlock(&d->lock); 357 return e; 358 } 359 360 /* 361 * Called when the host's ARP layer makes a change to some entry that is loaded 362 * into the HW L2 table. 363 */ 364 void 365 t4_l2_update(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa, 366 uint8_t *lladdr, uint16_t vtag) 367 { 368 struct adapter *sc = tod->tod_softc; 369 struct l2t_entry *e; 370 struct l2t_data *d = sc->l2t; 371 uint32_t addr = SINADDR(sa); 372 int hash = arp_hash(addr, ifp->if_index); 373 374 KASSERT(d != NULL, ("%s: no L2 table", __func__)); 375 376 rw_rlock(&d->lock); 377 for (e = d->l2tab[hash].first; e; e = e->next) { 378 if (e->addr == addr && e->ifp == ifp) { 379 mtx_lock(&e->lock); 380 if (atomic_load_acq_int(&e->refcnt)) 381 goto found; 382 e->state = L2T_STATE_STALE; 383 mtx_unlock(&e->lock); 384 break; 385 } 386 } 387 rw_runlock(&d->lock); 388 389 /* 390 * This is of no interest to us. We've never had an offloaded 391 * connection to this destination, and we aren't attempting one right 392 * now. 393 */ 394 return; 395 396 found: 397 rw_runlock(&d->lock); 398 399 KASSERT(e->state != L2T_STATE_UNUSED, 400 ("%s: unused entry in the hash.", __func__)); 401 402 update_entry(sc, e, lladdr, vtag); 403 mtx_unlock(&e->lock); 404 } 405 #endif 406