1 /* 2 * Copyright (c) 2003-2008 Chelsio, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 #include <linux/skbuff.h> 33 #include <linux/netdevice.h> 34 #include <linux/if.h> 35 #include <linux/if_vlan.h> 36 #include <linux/jhash.h> 37 #include <linux/slab.h> 38 #include <linux/export.h> 39 #include <net/neighbour.h> 40 #include "common.h" 41 #include "t3cdev.h" 42 #include "cxgb3_defs.h" 43 #include "l2t.h" 44 #include "t3_cpl.h" 45 #include "firmware_exports.h" 46 47 #define VLAN_NONE 0xfff 48 49 /* 50 * Module locking notes: There is a RW lock protecting the L2 table as a 51 * whole plus a spinlock per L2T entry. Entry lookups and allocations happen 52 * under the protection of the table lock, individual entry changes happen 53 * while holding that entry's spinlock. The table lock nests outside the 54 * entry locks. Allocations of new entries take the table lock as writers so 55 * no other lookups can happen while allocating new entries. Entry updates 56 * take the table lock as readers so multiple entries can be updated in 57 * parallel. An L2T entry can be dropped by decrementing its reference count 58 * and therefore can happen in parallel with entry allocation but no entry 59 * can change state or increment its ref count during allocation as both of 60 * these perform lookups. 61 */ 62 63 static inline unsigned int vlan_prio(const struct l2t_entry *e) 64 { 65 return e->vlan >> 13; 66 } 67 68 static inline unsigned int arp_hash(u32 key, int ifindex, 69 const struct l2t_data *d) 70 { 71 return jhash_2words(key, ifindex, 0) & (d->nentries - 1); 72 } 73 74 static inline void neigh_replace(struct l2t_entry *e, struct neighbour *n) 75 { 76 neigh_hold(n); 77 if (e->neigh) 78 neigh_release(e->neigh); 79 e->neigh = n; 80 } 81 82 /* 83 * Set up an L2T entry and send any packets waiting in the arp queue. The 84 * supplied skb is used for the CPL_L2T_WRITE_REQ. Must be called with the 85 * entry locked. 86 */ 87 static int setup_l2e_send_pending(struct t3cdev *dev, struct sk_buff *skb, 88 struct l2t_entry *e) 89 { 90 struct cpl_l2t_write_req *req; 91 struct sk_buff *tmp; 92 93 if (!skb) { 94 skb = alloc_skb(sizeof(*req), GFP_ATOMIC); 95 if (!skb) 96 return -ENOMEM; 97 } 98 99 req = __skb_put(skb, sizeof(*req)); 100 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 101 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx)); 102 req->params = htonl(V_L2T_W_IDX(e->idx) | V_L2T_W_IFF(e->smt_idx) | 103 V_L2T_W_VLAN(e->vlan & VLAN_VID_MASK) | 104 V_L2T_W_PRIO(vlan_prio(e))); 105 memcpy(e->dmac, e->neigh->ha, sizeof(e->dmac)); 106 memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac)); 107 skb->priority = CPL_PRIORITY_CONTROL; 108 cxgb3_ofld_send(dev, skb); 109 110 skb_queue_walk_safe(&e->arpq, skb, tmp) { 111 __skb_unlink(skb, &e->arpq); 112 cxgb3_ofld_send(dev, skb); 113 } 114 e->state = L2T_STATE_VALID; 115 116 return 0; 117 } 118 119 /* 120 * Add a packet to the an L2T entry's queue of packets awaiting resolution. 121 * Must be called with the entry's lock held. 122 */ 123 static inline void arpq_enqueue(struct l2t_entry *e, struct sk_buff *skb) 124 { 125 __skb_queue_tail(&e->arpq, skb); 126 } 127 128 int t3_l2t_send_slow(struct t3cdev *dev, struct sk_buff *skb, 129 struct l2t_entry *e) 130 { 131 again: 132 switch (e->state) { 133 case L2T_STATE_STALE: /* entry is stale, kick off revalidation */ 134 neigh_event_send(e->neigh, NULL); 135 spin_lock_bh(&e->lock); 136 if (e->state == L2T_STATE_STALE) 137 e->state = L2T_STATE_VALID; 138 spin_unlock_bh(&e->lock); 139 fallthrough; 140 case L2T_STATE_VALID: /* fast-path, send the packet on */ 141 return cxgb3_ofld_send(dev, skb); 142 case L2T_STATE_RESOLVING: 143 spin_lock_bh(&e->lock); 144 if (e->state != L2T_STATE_RESOLVING) { 145 /* ARP already completed */ 146 spin_unlock_bh(&e->lock); 147 goto again; 148 } 149 arpq_enqueue(e, skb); 150 spin_unlock_bh(&e->lock); 151 152 /* 153 * Only the first packet added to the arpq should kick off 154 * resolution. However, because the alloc_skb below can fail, 155 * we allow each packet added to the arpq to retry resolution 156 * as a way of recovering from transient memory exhaustion. 157 * A better way would be to use a work request to retry L2T 158 * entries when there's no memory. 159 */ 160 if (!neigh_event_send(e->neigh, NULL)) { 161 skb = alloc_skb(sizeof(struct cpl_l2t_write_req), 162 GFP_ATOMIC); 163 if (!skb) 164 break; 165 166 spin_lock_bh(&e->lock); 167 if (!skb_queue_empty(&e->arpq)) 168 setup_l2e_send_pending(dev, skb, e); 169 else /* we lost the race */ 170 __kfree_skb(skb); 171 spin_unlock_bh(&e->lock); 172 } 173 } 174 return 0; 175 } 176 177 EXPORT_SYMBOL(t3_l2t_send_slow); 178 179 /* 180 * Allocate a free L2T entry. Must be called with l2t_data.lock held. 181 */ 182 static struct l2t_entry *alloc_l2e(struct l2t_data *d) 183 { 184 struct l2t_entry *end, *e, **p; 185 186 if (!atomic_read(&d->nfree)) 187 return NULL; 188 189 /* there's definitely a free entry */ 190 for (e = d->rover, end = &d->l2tab[d->nentries]; e != end; ++e) 191 if (atomic_read(&e->refcnt) == 0) 192 goto found; 193 194 for (e = &d->l2tab[1]; atomic_read(&e->refcnt); ++e) ; 195 found: 196 d->rover = e + 1; 197 atomic_dec(&d->nfree); 198 199 /* 200 * The entry we found may be an inactive entry that is 201 * presently in the hash table. We need to remove it. 202 */ 203 if (e->state != L2T_STATE_UNUSED) { 204 int hash = arp_hash(e->addr, e->ifindex, d); 205 206 for (p = &d->l2tab[hash].first; *p; p = &(*p)->next) 207 if (*p == e) { 208 *p = e->next; 209 break; 210 } 211 e->state = L2T_STATE_UNUSED; 212 } 213 return e; 214 } 215 216 /* 217 * Called when an L2T entry has no more users. The entry is left in the hash 218 * table since it is likely to be reused but we also bump nfree to indicate 219 * that the entry can be reallocated for a different neighbor. We also drop 220 * the existing neighbor reference in case the neighbor is going away and is 221 * waiting on our reference. 222 * 223 * Because entries can be reallocated to other neighbors once their ref count 224 * drops to 0 we need to take the entry's lock to avoid races with a new 225 * incarnation. 226 */ 227 void t3_l2e_free(struct l2t_data *d, struct l2t_entry *e) 228 { 229 spin_lock_bh(&e->lock); 230 if (atomic_read(&e->refcnt) == 0) { /* hasn't been recycled */ 231 if (e->neigh) { 232 neigh_release(e->neigh); 233 e->neigh = NULL; 234 } 235 } 236 spin_unlock_bh(&e->lock); 237 atomic_inc(&d->nfree); 238 } 239 240 EXPORT_SYMBOL(t3_l2e_free); 241 242 /* 243 * Update an L2T entry that was previously used for the same next hop as neigh. 244 * Must be called with softirqs disabled. 245 */ 246 static inline void reuse_entry(struct l2t_entry *e, struct neighbour *neigh) 247 { 248 unsigned int nud_state; 249 250 spin_lock(&e->lock); /* avoid race with t3_l2t_free */ 251 252 if (neigh != e->neigh) 253 neigh_replace(e, neigh); 254 nud_state = neigh->nud_state; 255 if (memcmp(e->dmac, neigh->ha, sizeof(e->dmac)) || 256 !(nud_state & NUD_VALID)) 257 e->state = L2T_STATE_RESOLVING; 258 else if (nud_state & NUD_CONNECTED) 259 e->state = L2T_STATE_VALID; 260 else 261 e->state = L2T_STATE_STALE; 262 spin_unlock(&e->lock); 263 } 264 265 struct l2t_entry *t3_l2t_get(struct t3cdev *cdev, struct dst_entry *dst, 266 struct net_device *dev, const void *daddr) 267 { 268 struct l2t_entry *e = NULL; 269 struct neighbour *neigh; 270 struct port_info *p; 271 struct l2t_data *d; 272 int hash; 273 u32 addr; 274 int ifidx; 275 int smt_idx; 276 277 rcu_read_lock(); 278 neigh = dst_neigh_lookup(dst, daddr); 279 if (!neigh) 280 goto done_rcu; 281 282 addr = *(u32 *) neigh->primary_key; 283 ifidx = neigh->dev->ifindex; 284 285 if (!dev) 286 dev = neigh->dev; 287 p = netdev_priv(dev); 288 smt_idx = p->port_id; 289 290 d = L2DATA(cdev); 291 if (!d) 292 goto done_rcu; 293 294 hash = arp_hash(addr, ifidx, d); 295 296 write_lock_bh(&d->lock); 297 for (e = d->l2tab[hash].first; e; e = e->next) 298 if (e->addr == addr && e->ifindex == ifidx && 299 e->smt_idx == smt_idx) { 300 l2t_hold(d, e); 301 if (atomic_read(&e->refcnt) == 1) 302 reuse_entry(e, neigh); 303 goto done_unlock; 304 } 305 306 /* Need to allocate a new entry */ 307 e = alloc_l2e(d); 308 if (e) { 309 spin_lock(&e->lock); /* avoid race with t3_l2t_free */ 310 e->next = d->l2tab[hash].first; 311 d->l2tab[hash].first = e; 312 e->state = L2T_STATE_RESOLVING; 313 e->addr = addr; 314 e->ifindex = ifidx; 315 e->smt_idx = smt_idx; 316 atomic_set(&e->refcnt, 1); 317 neigh_replace(e, neigh); 318 if (is_vlan_dev(neigh->dev)) 319 e->vlan = vlan_dev_vlan_id(neigh->dev); 320 else 321 e->vlan = VLAN_NONE; 322 spin_unlock(&e->lock); 323 } 324 done_unlock: 325 write_unlock_bh(&d->lock); 326 done_rcu: 327 if (neigh) 328 neigh_release(neigh); 329 rcu_read_unlock(); 330 return e; 331 } 332 333 EXPORT_SYMBOL(t3_l2t_get); 334 335 /* 336 * Called when address resolution fails for an L2T entry to handle packets 337 * on the arpq head. If a packet specifies a failure handler it is invoked, 338 * otherwise the packets is sent to the offload device. 339 * 340 * XXX: maybe we should abandon the latter behavior and just require a failure 341 * handler. 342 */ 343 static void handle_failed_resolution(struct t3cdev *dev, struct sk_buff_head *arpq) 344 { 345 struct sk_buff *skb, *tmp; 346 347 skb_queue_walk_safe(arpq, skb, tmp) { 348 struct l2t_skb_cb *cb = L2T_SKB_CB(skb); 349 350 __skb_unlink(skb, arpq); 351 if (cb->arp_failure_handler) 352 cb->arp_failure_handler(dev, skb); 353 else 354 cxgb3_ofld_send(dev, skb); 355 } 356 } 357 358 /* 359 * Called when the host's ARP layer makes a change to some entry that is 360 * loaded into the HW L2 table. 361 */ 362 void t3_l2t_update(struct t3cdev *dev, struct neighbour *neigh) 363 { 364 struct sk_buff_head arpq; 365 struct l2t_entry *e; 366 struct l2t_data *d = L2DATA(dev); 367 u32 addr = *(u32 *) neigh->primary_key; 368 int ifidx = neigh->dev->ifindex; 369 int hash = arp_hash(addr, ifidx, d); 370 371 read_lock_bh(&d->lock); 372 for (e = d->l2tab[hash].first; e; e = e->next) 373 if (e->addr == addr && e->ifindex == ifidx) { 374 spin_lock(&e->lock); 375 goto found; 376 } 377 read_unlock_bh(&d->lock); 378 return; 379 380 found: 381 __skb_queue_head_init(&arpq); 382 383 read_unlock(&d->lock); 384 if (atomic_read(&e->refcnt)) { 385 if (neigh != e->neigh) 386 neigh_replace(e, neigh); 387 388 if (e->state == L2T_STATE_RESOLVING) { 389 if (neigh->nud_state & NUD_FAILED) { 390 skb_queue_splice_init(&e->arpq, &arpq); 391 } else if (neigh->nud_state & (NUD_CONNECTED|NUD_STALE)) 392 setup_l2e_send_pending(dev, NULL, e); 393 } else { 394 e->state = neigh->nud_state & NUD_CONNECTED ? 395 L2T_STATE_VALID : L2T_STATE_STALE; 396 if (!ether_addr_equal(e->dmac, neigh->ha)) 397 setup_l2e_send_pending(dev, NULL, e); 398 } 399 } 400 spin_unlock_bh(&e->lock); 401 402 if (!skb_queue_empty(&arpq)) 403 handle_failed_resolution(dev, &arpq); 404 } 405 406 struct l2t_data *t3_init_l2t(unsigned int l2t_capacity) 407 { 408 struct l2t_data *d; 409 int i; 410 411 d = kvzalloc(struct_size(d, l2tab, l2t_capacity), GFP_KERNEL); 412 if (!d) 413 return NULL; 414 415 d->nentries = l2t_capacity; 416 d->rover = &d->l2tab[1]; /* entry 0 is not used */ 417 atomic_set(&d->nfree, l2t_capacity - 1); 418 rwlock_init(&d->lock); 419 420 for (i = 0; i < l2t_capacity; ++i) { 421 d->l2tab[i].idx = i; 422 d->l2tab[i].state = L2T_STATE_UNUSED; 423 __skb_queue_head_init(&d->l2tab[i].arpq); 424 spin_lock_init(&d->l2tab[i].lock); 425 atomic_set(&d->l2tab[i].refcnt, 0); 426 } 427 return d; 428 } 429