xref: /freebsd/sys/dev/cxgbe/t4_l2t.c (revision 6486b015fc84e96725fef22b0e3363351399ae83)
1 /*-
2  * Copyright (c) 2011 Chelsio Communications, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 #include <sys/cdefs.h>
27 __FBSDID("$FreeBSD$");
28 
29 #include "opt_inet.h"
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/module.h>
35 #include <sys/bus.h>
36 #include <sys/lock.h>
37 #include <sys/mutex.h>
38 #include <sys/rwlock.h>
39 #include <sys/socket.h>
40 #include <sys/sbuf.h>
41 #include <net/if.h>
42 #include <net/if_types.h>
43 #include <net/ethernet.h>
44 #include <net/if_vlan_var.h>
45 #include <net/if_dl.h>
46 #include <net/if_llatbl.h>
47 #include <net/route.h>
48 #include <netinet/in.h>
49 #include <netinet/in_var.h>
50 #include <netinet/if_ether.h>
51 
52 #include "common/common.h"
53 #include "common/jhash.h"
54 #include "common/t4_msg.h"
55 #include "t4_l2t.h"
56 
57 /*
58  * Module locking notes:  There is a RW lock protecting the L2 table as a
59  * whole plus a spinlock per L2T entry.  Entry lookups and allocations happen
60  * under the protection of the table lock, individual entry changes happen
61  * while holding that entry's spinlock.  The table lock nests outside the
62  * entry locks.  Allocations of new entries take the table lock as writers so
63  * no other lookups can happen while allocating new entries.  Entry updates
64  * take the table lock as readers so multiple entries can be updated in
65  * parallel.  An L2T entry can be dropped by decrementing its reference count
66  * and therefore can happen in parallel with entry allocation but no entry
67  * can change state or increment its ref count during allocation as both of
68  * these perform lookups.
69  *
70  * Note: We do not take refereces to ifnets in this module because both
71  * the TOE and the sockets already hold references to the interfaces and the
72  * lifetime of an L2T entry is fully contained in the lifetime of the TOE.
73  */
74 
75 /* identifies sync vs async L2T_WRITE_REQs */
76 #define S_SYNC_WR    12
77 #define V_SYNC_WR(x) ((x) << S_SYNC_WR)
78 #define F_SYNC_WR    V_SYNC_WR(1)
79 
80 enum {
81 	L2T_STATE_VALID,	/* entry is up to date */
82 	L2T_STATE_STALE,	/* entry may be used but needs revalidation */
83 	L2T_STATE_RESOLVING,	/* entry needs address resolution */
84 	L2T_STATE_SYNC_WRITE,	/* synchronous write of entry underway */
85 
86 	/* when state is one of the below the entry is not hashed */
87 	L2T_STATE_SWITCHING,	/* entry is being used by a switching filter */
88 	L2T_STATE_UNUSED	/* entry not in use */
89 };
90 
91 struct l2t_data {
92 	struct rwlock lock;
93 	volatile int nfree;	/* number of free entries */
94 	struct l2t_entry *rover;/* starting point for next allocation */
95 	struct l2t_entry l2tab[L2T_SIZE];
96 };
97 
98 static int do_l2t_write_rpl(struct sge_iq *, const struct rss_header *,
99     struct mbuf *);
100 
101 #define VLAN_NONE	0xfff
102 #define SA(x)           ((struct sockaddr *)(x))
103 #define SIN(x)          ((struct sockaddr_in *)(x))
104 #define SINADDR(x)      (SIN(x)->sin_addr.s_addr)
105 
106 /*
107  * Allocate a free L2T entry.  Must be called with l2t_data.lock held.
108  */
109 static struct l2t_entry *
110 alloc_l2e(struct l2t_data *d)
111 {
112 	struct l2t_entry *end, *e, **p;
113 
114 	rw_assert(&d->lock, RA_WLOCKED);
115 
116 	if (!atomic_load_acq_int(&d->nfree))
117 		return (NULL);
118 
119 	/* there's definitely a free entry */
120 	for (e = d->rover, end = &d->l2tab[L2T_SIZE]; e != end; ++e)
121 		if (atomic_load_acq_int(&e->refcnt) == 0)
122 			goto found;
123 
124 	for (e = d->l2tab; atomic_load_acq_int(&e->refcnt); ++e) ;
125 found:
126 	d->rover = e + 1;
127 	atomic_subtract_int(&d->nfree, 1);
128 
129 	/*
130 	 * The entry we found may be an inactive entry that is
131 	 * presently in the hash table.  We need to remove it.
132 	 */
133 	if (e->state < L2T_STATE_SWITCHING) {
134 		for (p = &d->l2tab[e->hash].first; *p; p = &(*p)->next) {
135 			if (*p == e) {
136 				*p = e->next;
137 				e->next = NULL;
138 				break;
139 			}
140 		}
141 	}
142 
143 	e->state = L2T_STATE_UNUSED;
144 	return (e);
145 }
146 
147 /*
148  * Write an L2T entry.  Must be called with the entry locked.
149  * The write may be synchronous or asynchronous.
150  */
151 static int
152 write_l2e(struct adapter *sc, struct l2t_entry *e, int sync)
153 {
154 	struct mbuf *m;
155 	struct cpl_l2t_write_req *req;
156 
157 	mtx_assert(&e->lock, MA_OWNED);
158 
159 	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
160 		return (ENOMEM);
161 
162 	req = mtod(m, struct cpl_l2t_write_req *);
163 	m->m_pkthdr.len = m->m_len = sizeof(*req);
164 
165 	INIT_TP_WR(req, 0);
166 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx |
167 	    V_SYNC_WR(sync) | V_TID_QID(sc->sge.fwq.abs_id)));
168 	req->params = htons(V_L2T_W_PORT(e->lport) | V_L2T_W_NOREPLY(!sync));
169 	req->l2t_idx = htons(e->idx);
170 	req->vlan = htons(e->vlan);
171 	memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac));
172 
173 	t4_mgmt_tx(sc, m);
174 
175 	if (sync && e->state != L2T_STATE_SWITCHING)
176 		e->state = L2T_STATE_SYNC_WRITE;
177 
178 	return (0);
179 }
180 
181 /*
182  * Allocate an L2T entry for use by a switching rule.  Such need to be
183  * explicitly freed and while busy they are not on any hash chain, so normal
184  * address resolution updates do not see them.
185  */
186 struct l2t_entry *
187 t4_l2t_alloc_switching(struct l2t_data *d)
188 {
189 	struct l2t_entry *e;
190 
191 	rw_rlock(&d->lock);
192 	e = alloc_l2e(d);
193 	if (e) {
194 		mtx_lock(&e->lock);          /* avoid race with t4_l2t_free */
195 		e->state = L2T_STATE_SWITCHING;
196 		atomic_store_rel_int(&e->refcnt, 1);
197 		mtx_unlock(&e->lock);
198 	}
199 	rw_runlock(&d->lock);
200 	return e;
201 }
202 
203 /*
204  * Sets/updates the contents of a switching L2T entry that has been allocated
205  * with an earlier call to @t4_l2t_alloc_switching.
206  */
207 int
208 t4_l2t_set_switching(struct adapter *sc, struct l2t_entry *e, uint16_t vlan,
209     uint8_t port, uint8_t *eth_addr)
210 {
211 	int rc;
212 
213 	e->vlan = vlan;
214 	e->lport = port;
215 	memcpy(e->dmac, eth_addr, ETHER_ADDR_LEN);
216 	mtx_lock(&e->lock);
217 	rc = write_l2e(sc, e, 0);
218 	mtx_unlock(&e->lock);
219 	return (rc);
220 }
221 
222 int
223 t4_init_l2t(struct adapter *sc, int flags)
224 {
225 	int i;
226 	struct l2t_data *d;
227 
228 	d = malloc(sizeof(*d), M_CXGBE, M_ZERO | flags);
229 	if (!d)
230 		return (ENOMEM);
231 
232 	d->rover = d->l2tab;
233 	atomic_store_rel_int(&d->nfree, L2T_SIZE);
234 	rw_init(&d->lock, "L2T");
235 
236 	for (i = 0; i < L2T_SIZE; i++) {
237 		d->l2tab[i].idx = i;
238 		d->l2tab[i].state = L2T_STATE_UNUSED;
239 		mtx_init(&d->l2tab[i].lock, "L2T_E", NULL, MTX_DEF);
240 		atomic_store_rel_int(&d->l2tab[i].refcnt, 0);
241 	}
242 
243 	sc->l2t = d;
244 	t4_register_cpl_handler(sc, CPL_L2T_WRITE_RPL, do_l2t_write_rpl);
245 
246 	return (0);
247 }
248 
249 int
250 t4_free_l2t(struct l2t_data *d)
251 {
252 	int i;
253 
254 	for (i = 0; i < L2T_SIZE; i++)
255 		mtx_destroy(&d->l2tab[i].lock);
256 	rw_destroy(&d->lock);
257 	free(d, M_CXGBE);
258 
259 	return (0);
260 }
261 
262 #ifdef SBUF_DRAIN
263 static inline unsigned int
264 vlan_prio(const struct l2t_entry *e)
265 {
266 	return e->vlan >> 13;
267 }
268 
269 static char
270 l2e_state(const struct l2t_entry *e)
271 {
272 	switch (e->state) {
273 	case L2T_STATE_VALID: return 'V';  /* valid, fast-path entry */
274 	case L2T_STATE_STALE: return 'S';  /* needs revalidation, but usable */
275 	case L2T_STATE_SYNC_WRITE: return 'W';
276 	case L2T_STATE_RESOLVING: return e->arpq_head ? 'A' : 'R';
277 	case L2T_STATE_SWITCHING: return 'X';
278 	default: return 'U';
279 	}
280 }
281 
282 int
283 sysctl_l2t(SYSCTL_HANDLER_ARGS)
284 {
285 	struct adapter *sc = arg1;
286 	struct l2t_data *l2t = sc->l2t;
287 	struct l2t_entry *e;
288 	struct sbuf *sb;
289 	int rc, i, header = 0;
290 	char ip[60];
291 
292 	if (l2t == NULL)
293 		return (ENXIO);
294 
295 	rc = sysctl_wire_old_buffer(req, 0);
296 	if (rc != 0)
297 		return (rc);
298 
299 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
300 	if (sb == NULL)
301 		return (ENOMEM);
302 
303 	e = &l2t->l2tab[0];
304 	for (i = 0; i < L2T_SIZE; i++, e++) {
305 		mtx_lock(&e->lock);
306 		if (e->state == L2T_STATE_UNUSED)
307 			goto skip;
308 
309 		if (header == 0) {
310 			sbuf_printf(sb, " Idx IP address      "
311 			    "Ethernet address  VLAN/P LP State Users Port");
312 			header = 1;
313 		}
314 		if (e->state == L2T_STATE_SWITCHING || e->v6)
315 			ip[0] = 0;
316 		else
317 			snprintf(ip, sizeof(ip), "%s",
318 			    inet_ntoa(*(struct in_addr *)&e->addr[0]));
319 
320 		/* XXX: accessing lle probably not safe? */
321 		sbuf_printf(sb, "\n%4u %-15s %02x:%02x:%02x:%02x:%02x:%02x %4d"
322 			   " %u %2u   %c   %5u %s",
323 			   e->idx, ip, e->dmac[0], e->dmac[1], e->dmac[2],
324 			   e->dmac[3], e->dmac[4], e->dmac[5],
325 			   e->vlan & 0xfff, vlan_prio(e), e->lport,
326 			   l2e_state(e), atomic_load_acq_int(&e->refcnt),
327 			   e->lle ? e->lle->lle_tbl->llt_ifp->if_xname : "");
328 skip:
329 		mtx_unlock(&e->lock);
330 	}
331 
332 	rc = sbuf_finish(sb);
333 	sbuf_delete(sb);
334 
335 	return (rc);
336 }
337 #endif
338 
339 #ifndef TCP_OFFLOAD_DISABLE
340 static inline void
341 l2t_hold(struct l2t_data *d, struct l2t_entry *e)
342 {
343 	if (atomic_fetchadd_int(&e->refcnt, 1) == 0)  /* 0 -> 1 transition */
344 		atomic_subtract_int(&d->nfree, 1);
345 }
346 
347 /*
348  * To avoid having to check address families we do not allow v4 and v6
349  * neighbors to be on the same hash chain.  We keep v4 entries in the first
350  * half of available hash buckets and v6 in the second.
351  */
352 enum {
353 	L2T_SZ_HALF = L2T_SIZE / 2,
354 	L2T_HASH_MASK = L2T_SZ_HALF - 1
355 };
356 
357 static inline unsigned int
358 arp_hash(const uint32_t *key, int ifindex)
359 {
360 	return jhash_2words(*key, ifindex, 0) & L2T_HASH_MASK;
361 }
362 
363 static inline unsigned int
364 ipv6_hash(const uint32_t *key, int ifindex)
365 {
366 	uint32_t xor = key[0] ^ key[1] ^ key[2] ^ key[3];
367 
368 	return L2T_SZ_HALF + (jhash_2words(xor, ifindex, 0) & L2T_HASH_MASK);
369 }
370 
371 static inline unsigned int
372 addr_hash(const uint32_t *addr, int addr_len, int ifindex)
373 {
374 	return addr_len == 4 ? arp_hash(addr, ifindex) :
375 			       ipv6_hash(addr, ifindex);
376 }
377 
378 /*
379  * Checks if an L2T entry is for the given IP/IPv6 address.  It does not check
380  * whether the L2T entry and the address are of the same address family.
381  * Callers ensure an address is only checked against L2T entries of the same
382  * family, something made trivial by the separation of IP and IPv6 hash chains
383  * mentioned above.  Returns 0 if there's a match,
384  */
385 static inline int
386 addreq(const struct l2t_entry *e, const uint32_t *addr)
387 {
388 	if (e->v6)
389 		return (e->addr[0] ^ addr[0]) | (e->addr[1] ^ addr[1]) |
390 		       (e->addr[2] ^ addr[2]) | (e->addr[3] ^ addr[3]);
391 	return e->addr[0] ^ addr[0];
392 }
393 
394 /*
395  * Add a packet to an L2T entry's queue of packets awaiting resolution.
396  * Must be called with the entry's lock held.
397  */
398 static inline void
399 arpq_enqueue(struct l2t_entry *e, struct mbuf *m)
400 {
401 	mtx_assert(&e->lock, MA_OWNED);
402 
403 	KASSERT(m->m_nextpkt == NULL, ("%s: m_nextpkt not NULL", __func__));
404 	if (e->arpq_head)
405 		e->arpq_tail->m_nextpkt = m;
406 	else
407 		e->arpq_head = m;
408 	e->arpq_tail = m;
409 }
410 
411 static inline void
412 send_pending(struct adapter *sc, struct l2t_entry *e)
413 {
414 	struct mbuf *m, *next;
415 
416 	mtx_assert(&e->lock, MA_OWNED);
417 
418 	for (m = e->arpq_head; m; m = next) {
419 		next = m->m_nextpkt;
420 		m->m_nextpkt = NULL;
421 		t4_wrq_tx(sc, MBUF_EQ(m), m);
422 	}
423 	e->arpq_head = e->arpq_tail = NULL;
424 }
425 
426 #ifdef INET
427 /*
428  * Looks up and fills up an l2t_entry's lle.  We grab all the locks that we need
429  * ourself, and update e->state at the end if e->lle was successfully filled.
430  *
431  * The lle passed in comes from arpresolve and is ignored as it does not appear
432  * to be of much use.
433  */
434 static int
435 l2t_fill_lle(struct adapter *sc, struct l2t_entry *e, struct llentry *unused)
436 {
437         int rc = 0;
438         struct sockaddr_in sin;
439         struct ifnet *ifp = e->ifp;
440         struct llentry *lle;
441 
442         bzero(&sin, sizeof(struct sockaddr_in));
443 	if (e->v6)
444 		panic("%s: IPv6 L2 resolution not supported yet.", __func__);
445 
446 	sin.sin_family = AF_INET;
447 	sin.sin_len = sizeof(struct sockaddr_in);
448 	memcpy(&sin.sin_addr, e->addr, sizeof(struct sockaddr_in));
449 
450         mtx_assert(&e->lock, MA_NOTOWNED);
451         KASSERT(e->addr && ifp, ("%s: bad prep before call", __func__));
452 
453         IF_AFDATA_LOCK(ifp);
454         lle = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, SA(&sin));
455         IF_AFDATA_UNLOCK(ifp);
456         if (!LLE_IS_VALID(lle))
457                 return (ENOMEM);
458         if (!(lle->la_flags & LLE_VALID)) {
459                 rc = EINVAL;
460                 goto done;
461         }
462 
463         LLE_ADDREF(lle);
464 
465         mtx_lock(&e->lock);
466         if (e->state == L2T_STATE_RESOLVING) {
467                 KASSERT(e->lle == NULL, ("%s: lle already valid", __func__));
468                 e->lle = lle;
469                 memcpy(e->dmac, &lle->ll_addr, ETHER_ADDR_LEN);
470 		write_l2e(sc, e, 1);
471         } else {
472                 KASSERT(e->lle == lle, ("%s: lle changed", __func__));
473                 LLE_REMREF(lle);
474         }
475         mtx_unlock(&e->lock);
476 done:
477         LLE_WUNLOCK(lle);
478         return (rc);
479 }
480 #endif
481 
482 int
483 t4_l2t_send(struct adapter *sc, struct mbuf *m, struct l2t_entry *e)
484 {
485 #ifndef INET
486 	return (EINVAL);
487 #else
488 	struct llentry *lle = NULL;
489 	struct sockaddr_in sin;
490 	struct ifnet *ifp = e->ifp;
491 
492 	if (e->v6)
493 		panic("%s: IPv6 L2 resolution not supported yet.", __func__);
494 
495         bzero(&sin, sizeof(struct sockaddr_in));
496 	sin.sin_family = AF_INET;
497 	sin.sin_len = sizeof(struct sockaddr_in);
498 	memcpy(&sin.sin_addr, e->addr, sizeof(struct sockaddr_in));
499 
500 again:
501 	switch (e->state) {
502 	case L2T_STATE_STALE:     /* entry is stale, kick off revalidation */
503 		if (arpresolve(ifp, NULL, NULL, SA(&sin), e->dmac, &lle) == 0)
504 			l2t_fill_lle(sc, e, lle);
505 
506 		/* Fall through */
507 
508 	case L2T_STATE_VALID:     /* fast-path, send the packet on */
509 		return t4_wrq_tx(sc, MBUF_EQ(m), m);
510 
511 	case L2T_STATE_RESOLVING:
512 	case L2T_STATE_SYNC_WRITE:
513 		mtx_lock(&e->lock);
514 		if (e->state != L2T_STATE_SYNC_WRITE &&
515 		    e->state != L2T_STATE_RESOLVING) {
516 			/* state changed by the time we got here */
517 			mtx_unlock(&e->lock);
518 			goto again;
519 		}
520 		arpq_enqueue(e, m);
521 		mtx_unlock(&e->lock);
522 
523 		if (e->state == L2T_STATE_RESOLVING &&
524 		    arpresolve(ifp, NULL, NULL, SA(&sin), e->dmac, &lle) == 0)
525 			l2t_fill_lle(sc, e, lle);
526 	}
527 
528 	return (0);
529 #endif
530 }
531 
532 /*
533  * Called when an L2T entry has no more users.  The entry is left in the hash
534  * table since it is likely to be reused but we also bump nfree to indicate
535  * that the entry can be reallocated for a different neighbor.  We also drop
536  * the existing neighbor reference in case the neighbor is going away and is
537  * waiting on our reference.
538  *
539  * Because entries can be reallocated to other neighbors once their ref count
540  * drops to 0 we need to take the entry's lock to avoid races with a new
541  * incarnation.
542  */
543 static void
544 t4_l2e_free(struct l2t_entry *e)
545 {
546 	struct llentry *lle = NULL;
547 	struct l2t_data *d;
548 
549 	mtx_lock(&e->lock);
550 	if (atomic_load_acq_int(&e->refcnt) == 0) {  /* hasn't been recycled */
551 		lle = e->lle;
552 		e->lle = NULL;
553 		/*
554 		 * Don't need to worry about the arpq, an L2T entry can't be
555 		 * released if any packets are waiting for resolution as we
556 		 * need to be able to communicate with the device to close a
557 		 * connection.
558 		 */
559 	}
560 	mtx_unlock(&e->lock);
561 
562 	d = container_of(e, struct l2t_data, l2tab[e->idx]);
563 	atomic_add_int(&d->nfree, 1);
564 
565 	if (lle)
566 		LLE_FREE(lle);
567 }
568 
569 void
570 t4_l2t_release(struct l2t_entry *e)
571 {
572 	if (atomic_fetchadd_int(&e->refcnt, -1) == 1)
573 		t4_l2e_free(e);
574 }
575 
576 static int
577 do_l2t_write_rpl(struct sge_iq *iq, const struct rss_header *rss,
578     struct mbuf *m)
579 {
580 	struct adapter *sc = iq->adapter;
581 	const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1);
582 	unsigned int tid = GET_TID(rpl);
583 	unsigned int idx = tid & (L2T_SIZE - 1);
584 
585 	if (__predict_false(rpl->status != CPL_ERR_NONE)) {
586 		log(LOG_ERR,
587 		    "Unexpected L2T_WRITE_RPL status %u for entry %u\n",
588 		    rpl->status, idx);
589 		return (EINVAL);
590 	}
591 
592 	if (tid & F_SYNC_WR) {
593 		struct l2t_entry *e = &sc->l2t->l2tab[idx];
594 
595 		mtx_lock(&e->lock);
596 		if (e->state != L2T_STATE_SWITCHING) {
597 			send_pending(sc, e);
598 			e->state = L2T_STATE_VALID;
599 		}
600 		mtx_unlock(&e->lock);
601 	}
602 
603 	return (0);
604 }
605 
606 /*
607  * Reuse an L2T entry that was previously used for the same next hop.
608  */
609 static void
610 reuse_entry(struct l2t_entry *e)
611 {
612 	struct llentry *lle;
613 
614 	mtx_lock(&e->lock);                /* avoid race with t4_l2t_free */
615 	lle = e->lle;
616 	if (lle) {
617 		KASSERT(lle->la_flags & LLE_VALID,
618 			("%s: invalid lle stored in l2t_entry", __func__));
619 
620 		if (lle->la_expire >= time_uptime)
621 			e->state = L2T_STATE_STALE;
622 		else
623 			e->state = L2T_STATE_VALID;
624 	} else
625 		e->state = L2T_STATE_RESOLVING;
626 	mtx_unlock(&e->lock);
627 }
628 
629 /*
630  * The TOE wants an L2 table entry that it can use to reach the next hop over
631  * the specified port.  Produce such an entry - create one if needed.
632  *
633  * Note that the ifnet could be a pseudo-device like if_vlan, if_lagg, etc. on
634  * top of the real cxgbe interface.
635  */
636 struct l2t_entry *
637 t4_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa)
638 {
639 	struct l2t_entry *e;
640 	struct l2t_data *d = pi->adapter->l2t;
641 	int addr_len;
642 	uint32_t *addr;
643 	int hash;
644 	struct sockaddr_in6 *sin6;
645 	unsigned int smt_idx = pi->port_id;
646 
647 	if (sa->sa_family == AF_INET) {
648 		addr = (uint32_t *)&SINADDR(sa);
649 		addr_len = sizeof(SINADDR(sa));
650 	} else if (sa->sa_family == AF_INET6) {
651 		sin6 = (struct sockaddr_in6 *)sa;
652 		addr = (uint32_t *)&sin6->sin6_addr.s6_addr;
653 		addr_len = sizeof(sin6->sin6_addr.s6_addr);
654 	} else
655 		return (NULL);
656 
657 #ifndef VLAN_TAG
658 	if (ifp->if_type == IFT_L2VLAN)
659 		return (NULL);
660 #endif
661 
662 	hash = addr_hash(addr, addr_len, ifp->if_index);
663 
664 	rw_wlock(&d->lock);
665 	for (e = d->l2tab[hash].first; e; e = e->next) {
666 		if (!addreq(e, addr) && e->ifp == ifp && e->smt_idx == smt_idx){
667 			l2t_hold(d, e);
668 			if (atomic_load_acq_int(&e->refcnt) == 1)
669 				reuse_entry(e);
670 			goto done;
671 		}
672 	}
673 
674 	/* Need to allocate a new entry */
675 	e = alloc_l2e(d);
676 	if (e) {
677 		mtx_lock(&e->lock);          /* avoid race with t4_l2t_free */
678 		e->state = L2T_STATE_RESOLVING;
679 		memcpy(e->addr, addr, addr_len);
680 		e->ifindex = ifp->if_index;
681 		e->smt_idx = smt_idx;
682 		e->ifp = ifp;
683 		e->hash = hash;
684 		e->lport = pi->lport;
685 		e->v6 = (addr_len == 16);
686 		e->lle = NULL;
687 		atomic_store_rel_int(&e->refcnt, 1);
688 #ifdef VLAN_TAG
689 		if (ifp->if_type == IFT_L2VLAN)
690 			VLAN_TAG(ifp, &e->vlan);
691 		else
692 			e->vlan = VLAN_NONE;
693 #endif
694 		e->next = d->l2tab[hash].first;
695 		d->l2tab[hash].first = e;
696 		mtx_unlock(&e->lock);
697 	}
698 done:
699 	rw_wunlock(&d->lock);
700 	return e;
701 }
702 
703 /*
704  * Called when the host's neighbor layer makes a change to some entry that is
705  * loaded into the HW L2 table.
706  */
707 void
708 t4_l2t_update(struct adapter *sc, struct llentry *lle)
709 {
710 	struct l2t_entry *e;
711 	struct l2t_data *d = sc->l2t;
712 	struct sockaddr *sa = L3_ADDR(lle);
713 	struct llentry *old_lle = NULL;
714 	uint32_t *addr = (uint32_t *)&SINADDR(sa);
715 	struct ifnet *ifp = lle->lle_tbl->llt_ifp;
716 	int hash = addr_hash(addr, sizeof(*addr), ifp->if_index);
717 
718 	KASSERT(d != NULL, ("%s: no L2 table", __func__));
719 	LLE_WLOCK_ASSERT(lle);
720 	KASSERT(lle->la_flags & LLE_VALID || lle->la_flags & LLE_DELETED,
721 	    ("%s: entry neither valid nor deleted.", __func__));
722 
723 	rw_rlock(&d->lock);
724 	for (e = d->l2tab[hash].first; e; e = e->next) {
725 		if (!addreq(e, addr) && e->ifp == ifp) {
726 			mtx_lock(&e->lock);
727 			if (atomic_load_acq_int(&e->refcnt))
728 				goto found;
729 			e->state = L2T_STATE_STALE;
730 			mtx_unlock(&e->lock);
731 			break;
732 		}
733 	}
734 	rw_runlock(&d->lock);
735 
736 	/* The TOE has no interest in this LLE */
737 	return;
738 
739  found:
740 	rw_runlock(&d->lock);
741 
742         if (atomic_load_acq_int(&e->refcnt)) {
743 
744                 /* Entry is referenced by at least 1 offloaded connection. */
745 
746                 /* Handle deletes first */
747                 if (lle->la_flags & LLE_DELETED) {
748                         if (lle == e->lle) {
749                                 e->lle = NULL;
750                                 e->state = L2T_STATE_RESOLVING;
751                                 LLE_REMREF(lle);
752                         }
753                         goto done;
754                 }
755 
756                 if (lle != e->lle) {
757                         old_lle = e->lle;
758                         LLE_ADDREF(lle);
759                         e->lle = lle;
760                 }
761 
762                 if (e->state == L2T_STATE_RESOLVING ||
763                     memcmp(e->dmac, &lle->ll_addr, ETHER_ADDR_LEN)) {
764 
765                         /* unresolved -> resolved; or dmac changed */
766 
767                         memcpy(e->dmac, &lle->ll_addr, ETHER_ADDR_LEN);
768 			write_l2e(sc, e, 1);
769                 } else {
770 
771                         /* +ve reinforcement of a valid or stale entry */
772 
773                 }
774 
775                 e->state = L2T_STATE_VALID;
776 
777         } else {
778                 /*
779                  * Entry was used previously but is unreferenced right now.
780                  * e->lle has been released and NULL'd out by t4_l2t_free, or
781                  * l2t_release is about to call t4_l2t_free and do that.
782                  *
783                  * Either way this is of no interest to us.
784                  */
785         }
786 
787 done:
788         mtx_unlock(&e->lock);
789         if (old_lle)
790                 LLE_FREE(old_lle);
791 }
792 
793 #endif
794