xref: /freebsd/sys/dev/cxgbe/tom/t4_tom_l2t.c (revision c243e4902be8df1e643c76b5f18b68bb77cc5268)
1 /*-
2  * Copyright (c) 2012 Chelsio Communications, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 #include <sys/cdefs.h>
27 __FBSDID("$FreeBSD$");
28 
29 #include "opt_inet.h"
30 
31 #ifdef TCP_OFFLOAD
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/module.h>
36 #include <sys/bus.h>
37 #include <sys/lock.h>
38 #include <sys/mutex.h>
39 #include <sys/rwlock.h>
40 #include <sys/socket.h>
41 #include <sys/sbuf.h>
42 #include <net/if.h>
43 #include <net/if_types.h>
44 #include <net/ethernet.h>
45 #include <net/if_vlan_var.h>
46 #include <net/route.h>
47 #include <netinet/in.h>
48 #include <netinet/toecore.h>
49 
50 #include "common/common.h"
51 #include "common/jhash.h"
52 #include "common/t4_msg.h"
53 #include "tom/t4_tom_l2t.h"
54 #include "tom/t4_tom.h"
55 
56 #define VLAN_NONE	0xfff
57 
58 #define SA(x)           ((struct sockaddr *)(x))
59 #define SIN(x)          ((struct sockaddr_in *)(x))
60 #define SINADDR(x)      (SIN(x)->sin_addr.s_addr)
61 
62 static inline void
63 l2t_hold(struct l2t_data *d, struct l2t_entry *e)
64 {
65 	if (atomic_fetchadd_int(&e->refcnt, 1) == 0)  /* 0 -> 1 transition */
66 		atomic_subtract_int(&d->nfree, 1);
67 }
68 
69 static inline unsigned int
70 arp_hash(const uint32_t key, int ifindex)
71 {
72 	return jhash_2words(key, ifindex, 0) & (L2T_SIZE - 1);
73 }
74 
75 /*
76  * Add a WR to an L2T entry's queue of work requests awaiting resolution.
77  * Must be called with the entry's lock held.
78  */
79 static inline void
80 arpq_enqueue(struct l2t_entry *e, struct wrqe *wr)
81 {
82 	mtx_assert(&e->lock, MA_OWNED);
83 
84 	STAILQ_INSERT_TAIL(&e->wr_list, wr, link);
85 }
86 
87 static inline void
88 send_pending(struct adapter *sc, struct l2t_entry *e)
89 {
90 	struct wrqe *wr;
91 
92 	mtx_assert(&e->lock, MA_OWNED);
93 
94 	while ((wr = STAILQ_FIRST(&e->wr_list)) != NULL) {
95 		STAILQ_REMOVE_HEAD(&e->wr_list, link);
96 		t4_wrq_tx(sc, wr);
97 	}
98 }
99 
100 static void
101 resolution_failed_for_wr(struct wrqe *wr)
102 {
103 	log(LOG_ERR, "%s: leaked work request %p, wr_len %d", __func__, wr,
104 	    wr->wr_len);
105 
106 	/* free(wr, M_CXGBE); */
107 }
108 
109 static void
110 resolution_failed(struct l2t_entry *e)
111 {
112 	struct wrqe *wr;
113 
114 	mtx_assert(&e->lock, MA_OWNED);
115 
116 	while ((wr = STAILQ_FIRST(&e->wr_list)) != NULL) {
117 		STAILQ_REMOVE_HEAD(&e->wr_list, link);
118 		resolution_failed_for_wr(wr);
119 	}
120 }
121 
122 static void
123 update_entry(struct adapter *sc, struct l2t_entry *e, uint8_t *lladdr,
124     uint16_t vtag)
125 {
126 
127 	mtx_assert(&e->lock, MA_OWNED);
128 
129 	/*
130 	 * The entry may be in active use (e->refcount > 0) or not.  We update
131 	 * it even when it's not as this simplifies the case where we decide to
132 	 * reuse the entry later.
133 	 */
134 
135 	if (lladdr == NULL &&
136 	    (e->state == L2T_STATE_RESOLVING || e->state == L2T_STATE_FAILED)) {
137 		/*
138 		 * Never got a valid L2 address for this one.  Just mark it as
139 		 * failed instead of removing it from the hash (for which we'd
140 		 * need to wlock the table).
141 		 */
142 		e->state = L2T_STATE_FAILED;
143 		resolution_failed(e);
144 		return;
145 
146 	} else if (lladdr == NULL) {
147 
148 		/* Valid or already-stale entry was deleted (or expired) */
149 
150 		KASSERT(e->state == L2T_STATE_VALID ||
151 		    e->state == L2T_STATE_STALE,
152 		    ("%s: lladdr NULL, state %d", __func__, e->state));
153 
154 		e->state = L2T_STATE_STALE;
155 
156 	} else {
157 
158 		if (e->state == L2T_STATE_RESOLVING ||
159 		    e->state == L2T_STATE_FAILED ||
160 		    memcmp(e->dmac, lladdr, ETHER_ADDR_LEN)) {
161 
162 			/* unresolved -> resolved; or dmac changed */
163 
164 			memcpy(e->dmac, lladdr, ETHER_ADDR_LEN);
165 			e->vlan = vtag;
166 			t4_write_l2e(sc, e, 1);
167 		}
168 		e->state = L2T_STATE_VALID;
169 	}
170 }
171 
172 static int
173 resolve_entry(struct adapter *sc, struct l2t_entry *e)
174 {
175 	struct tom_data *td = sc->tom_softc;
176 	struct toedev *tod = &td->tod;
177 	struct sockaddr_in sin = {0};
178 	uint8_t dmac[ETHER_ADDR_LEN];
179 	uint16_t vtag = VLAN_NONE;
180 	int rc;
181 
182 	sin.sin_family = AF_INET;
183 	sin.sin_len = sizeof(struct sockaddr_in);
184 	SINADDR(&sin) = e->addr;
185 
186 	rc = toe_l2_resolve(tod, e->ifp, SA(&sin), dmac, &vtag);
187 	if (rc == EWOULDBLOCK)
188 		return (rc);
189 
190 	mtx_lock(&e->lock);
191 	update_entry(sc, e, rc == 0 ? dmac : NULL, vtag);
192 	mtx_unlock(&e->lock);
193 
194 	return (rc);
195 }
196 
197 int
198 t4_l2t_send_slow(struct adapter *sc, struct wrqe *wr, struct l2t_entry *e)
199 {
200 
201 again:
202 	switch (e->state) {
203 	case L2T_STATE_STALE:     /* entry is stale, kick off revalidation */
204 
205 		if (resolve_entry(sc, e) != EWOULDBLOCK)
206 			goto again;	/* entry updated, re-examine state */
207 
208 		/* Fall through */
209 
210 	case L2T_STATE_VALID:     /* fast-path, send the packet on */
211 
212 		t4_wrq_tx(sc, wr);
213 		return (0);
214 
215 	case L2T_STATE_RESOLVING:
216 	case L2T_STATE_SYNC_WRITE:
217 
218 		mtx_lock(&e->lock);
219 		if (e->state != L2T_STATE_SYNC_WRITE &&
220 		    e->state != L2T_STATE_RESOLVING) {
221 			/* state changed by the time we got here */
222 			mtx_unlock(&e->lock);
223 			goto again;
224 		}
225 		arpq_enqueue(e, wr);
226 		mtx_unlock(&e->lock);
227 
228 		if (resolve_entry(sc, e) == EWOULDBLOCK)
229 			break;
230 
231 		mtx_lock(&e->lock);
232 		if (e->state == L2T_STATE_VALID && !STAILQ_EMPTY(&e->wr_list))
233 			send_pending(sc, e);
234 		if (e->state == L2T_STATE_FAILED)
235 			resolution_failed(e);
236 		mtx_unlock(&e->lock);
237 		break;
238 
239 	case L2T_STATE_FAILED:
240 		resolution_failed_for_wr(wr);
241 		return (EHOSTUNREACH);
242 	}
243 
244 	return (0);
245 }
246 
247 /*
248  * Called when an L2T entry has no more users.  The entry is left in the hash
249  * table since it is likely to be reused but we also bump nfree to indicate
250  * that the entry can be reallocated for a different neighbor.  We also drop
251  * the existing neighbor reference in case the neighbor is going away and is
252  * waiting on our reference.
253  *
254  * Because entries can be reallocated to other neighbors once their ref count
255  * drops to 0 we need to take the entry's lock to avoid races with a new
256  * incarnation.
257  */
258 
259 static int
260 do_l2t_write_rpl2(struct sge_iq *iq, const struct rss_header *rss,
261     struct mbuf *m)
262 {
263 	struct adapter *sc = iq->adapter;
264 	const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1);
265 	unsigned int tid = GET_TID(rpl);
266 	unsigned int idx = tid & (L2T_SIZE - 1);
267 	int rc;
268 
269 	rc = do_l2t_write_rpl(iq, rss, m);
270 	if (rc != 0)
271 		return (rc);
272 
273 	if (tid & F_SYNC_WR) {
274 		struct l2t_entry *e = &sc->l2t->l2tab[idx];
275 
276 		mtx_lock(&e->lock);
277 		if (e->state != L2T_STATE_SWITCHING) {
278 			send_pending(sc, e);
279 			e->state = L2T_STATE_VALID;
280 		}
281 		mtx_unlock(&e->lock);
282 	}
283 
284 	return (0);
285 }
286 
287 void
288 t4_init_l2t_cpl_handlers(struct adapter *sc)
289 {
290 
291 	t4_register_cpl_handler(sc, CPL_L2T_WRITE_RPL, do_l2t_write_rpl2);
292 }
293 
294 void
295 t4_uninit_l2t_cpl_handlers(struct adapter *sc)
296 {
297 
298 	t4_register_cpl_handler(sc, CPL_L2T_WRITE_RPL, do_l2t_write_rpl);
299 }
300 
301 /*
302  * The TOE wants an L2 table entry that it can use to reach the next hop over
303  * the specified port.  Produce such an entry - create one if needed.
304  *
305  * Note that the ifnet could be a pseudo-device like if_vlan, if_lagg, etc. on
306  * top of the real cxgbe interface.
307  */
308 struct l2t_entry *
309 t4_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa)
310 {
311 	struct l2t_entry *e;
312 	struct l2t_data *d = pi->adapter->l2t;
313 	uint32_t addr = SINADDR(sa);
314 	int hash = arp_hash(addr, ifp->if_index);
315 	unsigned int smt_idx = pi->port_id;
316 
317 	if (sa->sa_family != AF_INET)
318 		return (NULL);	/* XXX: no IPv6 support right now */
319 
320 #ifndef VLAN_TAG
321 	if (ifp->if_type == IFT_L2VLAN)
322 		return (NULL);
323 #endif
324 
325 	rw_wlock(&d->lock);
326 	for (e = d->l2tab[hash].first; e; e = e->next) {
327 		if (e->addr == addr && e->ifp == ifp && e->smt_idx == smt_idx) {
328 			l2t_hold(d, e);
329 			goto done;
330 		}
331 	}
332 
333 	/* Need to allocate a new entry */
334 	e = t4_alloc_l2e(d);
335 	if (e) {
336 		mtx_lock(&e->lock);          /* avoid race with t4_l2t_free */
337 		e->next = d->l2tab[hash].first;
338 		d->l2tab[hash].first = e;
339 
340 		e->state = L2T_STATE_RESOLVING;
341 		e->addr = addr;
342 		e->ifp = ifp;
343 		e->smt_idx = smt_idx;
344 		e->hash = hash;
345 		e->lport = pi->lport;
346 		atomic_store_rel_int(&e->refcnt, 1);
347 #ifdef VLAN_TAG
348 		if (ifp->if_type == IFT_L2VLAN)
349 			VLAN_TAG(ifp, &e->vlan);
350 		else
351 			e->vlan = VLAN_NONE;
352 #endif
353 		mtx_unlock(&e->lock);
354 	}
355 done:
356 	rw_wunlock(&d->lock);
357 	return e;
358 }
359 
360 /*
361  * Called when the host's ARP layer makes a change to some entry that is loaded
362  * into the HW L2 table.
363  */
364 void
365 t4_l2_update(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa,
366     uint8_t *lladdr, uint16_t vtag)
367 {
368 	struct adapter *sc = tod->tod_softc;
369 	struct l2t_entry *e;
370 	struct l2t_data *d = sc->l2t;
371 	uint32_t addr = SINADDR(sa);
372 	int hash = arp_hash(addr, ifp->if_index);
373 
374 	KASSERT(d != NULL, ("%s: no L2 table", __func__));
375 
376 	rw_rlock(&d->lock);
377 	for (e = d->l2tab[hash].first; e; e = e->next) {
378 		if (e->addr == addr && e->ifp == ifp) {
379 			mtx_lock(&e->lock);
380 			if (atomic_load_acq_int(&e->refcnt))
381 				goto found;
382 			e->state = L2T_STATE_STALE;
383 			mtx_unlock(&e->lock);
384 			break;
385 		}
386 	}
387 	rw_runlock(&d->lock);
388 
389 	/*
390 	 * This is of no interest to us.  We've never had an offloaded
391 	 * connection to this destination, and we aren't attempting one right
392 	 * now.
393 	 */
394 	return;
395 
396 found:
397 	rw_runlock(&d->lock);
398 
399 	KASSERT(e->state != L2T_STATE_UNUSED,
400 	    ("%s: unused entry in the hash.", __func__));
401 
402 	update_entry(sc, e, lladdr, vtag);
403 	mtx_unlock(&e->lock);
404 }
405 #endif
406