1 /* 2 * Copyright(c) 2017 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 48 /* 49 * This file contains OPA VNIC encapsulation/decapsulation function. 50 */ 51 52 #include <linux/if_ether.h> 53 #include <linux/if_vlan.h> 54 55 #include "opa_vnic_internal.h" 56 57 /* OPA 16B Header fields */ 58 #define OPA_16B_LID_MASK 0xFFFFFull 59 #define OPA_16B_SLID_HIGH_SHFT 8 60 #define OPA_16B_SLID_MASK 0xF00ull 61 #define OPA_16B_DLID_MASK 0xF000ull 62 #define OPA_16B_DLID_HIGH_SHFT 12 63 #define OPA_16B_LEN_SHFT 20 64 #define OPA_16B_SC_SHFT 20 65 #define OPA_16B_RC_SHFT 25 66 #define OPA_16B_PKEY_SHFT 16 67 68 #define OPA_VNIC_L4_HDR_SHFT 16 69 70 /* L2+L4 hdr len is 20 bytes (5 quad words) */ 71 #define OPA_VNIC_HDR_QW_LEN 5 72 73 static inline void opa_vnic_make_header(u8 *hdr, u32 slid, u32 dlid, u16 len, 74 u16 pkey, u16 entropy, u8 sc, u8 rc, 75 u8 l4_type, u16 l4_hdr) 76 { 77 /* h[1]: LT=1, 16B L2=10 */ 78 u32 h[OPA_VNIC_HDR_QW_LEN] = {0, 0xc0000000, 0, 0, 0}; 79 80 h[2] = l4_type; 81 h[3] = entropy; 82 h[4] = l4_hdr << OPA_VNIC_L4_HDR_SHFT; 83 84 /* Extract and set 4 upper bits and 20 lower bits of the lids */ 85 h[0] |= (slid & OPA_16B_LID_MASK); 86 h[2] |= ((slid >> (20 - OPA_16B_SLID_HIGH_SHFT)) & OPA_16B_SLID_MASK); 87 88 h[1] |= (dlid & OPA_16B_LID_MASK); 89 h[2] |= ((dlid >> (20 - OPA_16B_DLID_HIGH_SHFT)) & OPA_16B_DLID_MASK); 90 91 h[0] |= (len << OPA_16B_LEN_SHFT); 92 h[1] |= (rc << OPA_16B_RC_SHFT); 93 h[1] |= (sc << OPA_16B_SC_SHFT); 94 h[2] |= ((u32)pkey << OPA_16B_PKEY_SHFT); 95 96 memcpy(hdr, h, OPA_VNIC_HDR_LEN); 97 } 98 99 /* 100 * Using a simple hash table for mac table implementation with the last octet 101 * of mac address as a key. 102 */ 103 static void opa_vnic_free_mac_tbl(struct hlist_head *mactbl) 104 { 105 struct opa_vnic_mac_tbl_node *node; 106 struct hlist_node *tmp; 107 int bkt; 108 109 if (!mactbl) 110 return; 111 112 vnic_hash_for_each_safe(mactbl, bkt, tmp, node, hlist) { 113 hash_del(&node->hlist); 114 kfree(node); 115 } 116 kfree(mactbl); 117 } 118 119 static struct hlist_head *opa_vnic_alloc_mac_tbl(void) 120 { 121 u32 size = sizeof(struct hlist_head) * OPA_VNIC_MAC_TBL_SIZE; 122 struct hlist_head *mactbl; 123 124 mactbl = kzalloc(size, GFP_KERNEL); 125 if (!mactbl) 126 return ERR_PTR(-ENOMEM); 127 128 vnic_hash_init(mactbl); 129 return mactbl; 130 } 131 132 /* opa_vnic_release_mac_tbl - empty and free the mac table */ 133 void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter) 134 { 135 struct hlist_head *mactbl; 136 137 mutex_lock(&adapter->mactbl_lock); 138 mactbl = rcu_access_pointer(adapter->mactbl); 139 rcu_assign_pointer(adapter->mactbl, NULL); 140 synchronize_rcu(); 141 opa_vnic_free_mac_tbl(mactbl); 142 adapter->info.vport.mac_tbl_digest = 0; 143 mutex_unlock(&adapter->mactbl_lock); 144 } 145 146 /* 147 * opa_vnic_query_mac_tbl - query the mac table for a section 148 * 149 * This function implements query of specific function of the mac table. 150 * The function also expects the requested range to be valid. 151 */ 152 void opa_vnic_query_mac_tbl(struct opa_vnic_adapter *adapter, 153 struct opa_veswport_mactable *tbl) 154 { 155 struct opa_vnic_mac_tbl_node *node; 156 struct hlist_head *mactbl; 157 int bkt; 158 u16 loffset, lnum_entries; 159 160 rcu_read_lock(); 161 mactbl = rcu_dereference(adapter->mactbl); 162 if (!mactbl) 163 goto get_mac_done; 164 165 loffset = be16_to_cpu(tbl->offset); 166 lnum_entries = be16_to_cpu(tbl->num_entries); 167 168 vnic_hash_for_each(mactbl, bkt, node, hlist) { 169 struct __opa_vnic_mactable_entry *nentry = &node->entry; 170 struct opa_veswport_mactable_entry *entry; 171 172 if ((node->index < loffset) || 173 (node->index >= (loffset + lnum_entries))) 174 continue; 175 176 /* populate entry in the tbl corresponding to the index */ 177 entry = &tbl->tbl_entries[node->index - loffset]; 178 memcpy(entry->mac_addr, nentry->mac_addr, 179 ARRAY_SIZE(entry->mac_addr)); 180 memcpy(entry->mac_addr_mask, nentry->mac_addr_mask, 181 ARRAY_SIZE(entry->mac_addr_mask)); 182 entry->dlid_sd = cpu_to_be32(nentry->dlid_sd); 183 } 184 tbl->mac_tbl_digest = cpu_to_be32(adapter->info.vport.mac_tbl_digest); 185 get_mac_done: 186 rcu_read_unlock(); 187 } 188 189 /* 190 * opa_vnic_update_mac_tbl - update mac table section 191 * 192 * This function updates the specified section of the mac table. 193 * The procedure includes following steps. 194 * - Allocate a new mac (hash) table. 195 * - Add the specified entries to the new table. 196 * (except the ones that are requested to be deleted). 197 * - Add all the other entries from the old mac table. 198 * - If there is a failure, free the new table and return. 199 * - Switch to the new table. 200 * - Free the old table and return. 201 * 202 * The function also expects the requested range to be valid. 203 */ 204 int opa_vnic_update_mac_tbl(struct opa_vnic_adapter *adapter, 205 struct opa_veswport_mactable *tbl) 206 { 207 struct opa_vnic_mac_tbl_node *node, *new_node; 208 struct hlist_head *new_mactbl, *old_mactbl; 209 int i, bkt, rc = 0; 210 u8 key; 211 u16 loffset, lnum_entries; 212 213 mutex_lock(&adapter->mactbl_lock); 214 /* allocate new mac table */ 215 new_mactbl = opa_vnic_alloc_mac_tbl(); 216 if (IS_ERR(new_mactbl)) { 217 mutex_unlock(&adapter->mactbl_lock); 218 return PTR_ERR(new_mactbl); 219 } 220 221 loffset = be16_to_cpu(tbl->offset); 222 lnum_entries = be16_to_cpu(tbl->num_entries); 223 224 /* add updated entries to the new mac table */ 225 for (i = 0; i < lnum_entries; i++) { 226 struct __opa_vnic_mactable_entry *nentry; 227 struct opa_veswport_mactable_entry *entry = 228 &tbl->tbl_entries[i]; 229 u8 *mac_addr = entry->mac_addr; 230 u8 empty_mac[ETH_ALEN] = { 0 }; 231 232 v_dbg("new mac entry %4d: %02x:%02x:%02x:%02x:%02x:%02x %x\n", 233 loffset + i, mac_addr[0], mac_addr[1], mac_addr[2], 234 mac_addr[3], mac_addr[4], mac_addr[5], 235 entry->dlid_sd); 236 237 /* if the entry is being removed, do not add it */ 238 if (!memcmp(mac_addr, empty_mac, ARRAY_SIZE(empty_mac))) 239 continue; 240 241 node = kzalloc(sizeof(*node), GFP_KERNEL); 242 if (!node) { 243 rc = -ENOMEM; 244 goto updt_done; 245 } 246 247 node->index = loffset + i; 248 nentry = &node->entry; 249 memcpy(nentry->mac_addr, entry->mac_addr, 250 ARRAY_SIZE(nentry->mac_addr)); 251 memcpy(nentry->mac_addr_mask, entry->mac_addr_mask, 252 ARRAY_SIZE(nentry->mac_addr_mask)); 253 nentry->dlid_sd = be32_to_cpu(entry->dlid_sd); 254 key = node->entry.mac_addr[OPA_VNIC_MAC_HASH_IDX]; 255 vnic_hash_add(new_mactbl, &node->hlist, key); 256 } 257 258 /* add other entries from current mac table to new mac table */ 259 old_mactbl = rcu_access_pointer(adapter->mactbl); 260 if (!old_mactbl) 261 goto switch_tbl; 262 263 vnic_hash_for_each(old_mactbl, bkt, node, hlist) { 264 if ((node->index >= loffset) && 265 (node->index < (loffset + lnum_entries))) 266 continue; 267 268 new_node = kzalloc(sizeof(*new_node), GFP_KERNEL); 269 if (!new_node) { 270 rc = -ENOMEM; 271 goto updt_done; 272 } 273 274 new_node->index = node->index; 275 memcpy(&new_node->entry, &node->entry, sizeof(node->entry)); 276 key = new_node->entry.mac_addr[OPA_VNIC_MAC_HASH_IDX]; 277 vnic_hash_add(new_mactbl, &new_node->hlist, key); 278 } 279 280 switch_tbl: 281 /* switch to new table */ 282 rcu_assign_pointer(adapter->mactbl, new_mactbl); 283 synchronize_rcu(); 284 285 adapter->info.vport.mac_tbl_digest = be32_to_cpu(tbl->mac_tbl_digest); 286 updt_done: 287 /* upon failure, free the new table; otherwise, free the old table */ 288 if (rc) 289 opa_vnic_free_mac_tbl(new_mactbl); 290 else 291 opa_vnic_free_mac_tbl(old_mactbl); 292 293 mutex_unlock(&adapter->mactbl_lock); 294 return rc; 295 } 296 297 /* opa_vnic_chk_mac_tbl - check mac table for dlid */ 298 static uint32_t opa_vnic_chk_mac_tbl(struct opa_vnic_adapter *adapter, 299 struct ethhdr *mac_hdr) 300 { 301 struct opa_vnic_mac_tbl_node *node; 302 struct hlist_head *mactbl; 303 u32 dlid = 0; 304 u8 key; 305 306 rcu_read_lock(); 307 mactbl = rcu_dereference(adapter->mactbl); 308 if (unlikely(!mactbl)) 309 goto chk_done; 310 311 key = mac_hdr->h_dest[OPA_VNIC_MAC_HASH_IDX]; 312 vnic_hash_for_each_possible(mactbl, node, hlist, key) { 313 struct __opa_vnic_mactable_entry *entry = &node->entry; 314 315 /* if related to source mac, skip */ 316 if (unlikely(OPA_VNIC_DLID_SD_IS_SRC_MAC(entry->dlid_sd))) 317 continue; 318 319 if (!memcmp(node->entry.mac_addr, mac_hdr->h_dest, 320 ARRAY_SIZE(node->entry.mac_addr))) { 321 /* mac address found */ 322 dlid = OPA_VNIC_DLID_SD_GET_DLID(node->entry.dlid_sd); 323 break; 324 } 325 } 326 327 chk_done: 328 rcu_read_unlock(); 329 return dlid; 330 } 331 332 /* opa_vnic_get_dlid - find and return the DLID */ 333 static uint32_t opa_vnic_get_dlid(struct opa_vnic_adapter *adapter, 334 struct sk_buff *skb, u8 def_port) 335 { 336 struct __opa_veswport_info *info = &adapter->info; 337 struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb); 338 u32 dlid; 339 340 dlid = opa_vnic_chk_mac_tbl(adapter, mac_hdr); 341 if (dlid) 342 return dlid; 343 344 if (is_multicast_ether_addr(mac_hdr->h_dest)) { 345 dlid = info->vesw.u_mcast_dlid; 346 } else { 347 if (is_local_ether_addr(mac_hdr->h_dest)) { 348 dlid = ((uint32_t)mac_hdr->h_dest[5] << 16) | 349 ((uint32_t)mac_hdr->h_dest[4] << 8) | 350 mac_hdr->h_dest[3]; 351 if (unlikely(!dlid)) 352 v_warn("Null dlid in MAC address\n"); 353 } else if (def_port != OPA_VNIC_INVALID_PORT) { 354 dlid = info->vesw.u_ucast_dlid[def_port]; 355 } 356 } 357 358 return dlid; 359 } 360 361 /* opa_vnic_get_sc - return the service class */ 362 static u8 opa_vnic_get_sc(struct __opa_veswport_info *info, 363 struct sk_buff *skb) 364 { 365 struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb); 366 u16 vlan_tci; 367 u8 sc; 368 369 if (!__vlan_get_tag(skb, &vlan_tci)) { 370 u8 pcp = OPA_VNIC_VLAN_PCP(vlan_tci); 371 372 if (is_multicast_ether_addr(mac_hdr->h_dest)) 373 sc = info->vport.pcp_to_sc_mc[pcp]; 374 else 375 sc = info->vport.pcp_to_sc_uc[pcp]; 376 } else { 377 if (is_multicast_ether_addr(mac_hdr->h_dest)) 378 sc = info->vport.non_vlan_sc_mc; 379 else 380 sc = info->vport.non_vlan_sc_uc; 381 } 382 383 return sc; 384 } 385 386 u8 opa_vnic_get_vl(struct opa_vnic_adapter *adapter, struct sk_buff *skb) 387 { 388 struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb); 389 struct __opa_veswport_info *info = &adapter->info; 390 u8 vl; 391 392 if (skb_vlan_tag_present(skb)) { 393 u8 pcp = skb_vlan_tag_get(skb) >> VLAN_PRIO_SHIFT; 394 395 if (is_multicast_ether_addr(mac_hdr->h_dest)) 396 vl = info->vport.pcp_to_vl_mc[pcp]; 397 else 398 vl = info->vport.pcp_to_vl_uc[pcp]; 399 } else { 400 if (is_multicast_ether_addr(mac_hdr->h_dest)) 401 vl = info->vport.non_vlan_vl_mc; 402 else 403 vl = info->vport.non_vlan_vl_uc; 404 } 405 406 return vl; 407 } 408 409 /* opa_vnic_get_rc - return the routing control */ 410 static u8 opa_vnic_get_rc(struct __opa_veswport_info *info, 411 struct sk_buff *skb) 412 { 413 u8 proto, rout_ctrl; 414 415 switch (vlan_get_protocol(skb)) { 416 case htons(ETH_P_IPV6): 417 proto = ipv6_hdr(skb)->nexthdr; 418 if (proto == IPPROTO_TCP) 419 rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc, 420 IPV6_TCP); 421 else if (proto == IPPROTO_UDP) 422 rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc, 423 IPV6_UDP); 424 else 425 rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc, IPV6); 426 break; 427 case htons(ETH_P_IP): 428 proto = ip_hdr(skb)->protocol; 429 if (proto == IPPROTO_TCP) 430 rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc, 431 IPV4_TCP); 432 else if (proto == IPPROTO_UDP) 433 rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc, 434 IPV4_UDP); 435 else 436 rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc, IPV4); 437 break; 438 default: 439 rout_ctrl = OPA_VNIC_ENCAP_RC_EXT(info->vesw.rc, DEFAULT); 440 } 441 442 return rout_ctrl; 443 } 444 445 /* opa_vnic_calc_entropy - calculate the packet entropy */ 446 u8 opa_vnic_calc_entropy(struct sk_buff *skb) 447 { 448 u32 hash = skb_get_hash(skb); 449 450 /* store XOR of all bytes in lower 8 bits */ 451 hash ^= hash >> 8; 452 hash ^= hash >> 16; 453 454 /* return lower 8 bits as entropy */ 455 return (u8)(hash & 0xFF); 456 } 457 458 /* opa_vnic_get_def_port - get default port based on entropy */ 459 static inline u8 opa_vnic_get_def_port(struct opa_vnic_adapter *adapter, 460 u8 entropy) 461 { 462 u8 flow_id; 463 464 /* Add the upper and lower 4-bits of entropy to get the flow id */ 465 flow_id = ((entropy & 0xf) + (entropy >> 4)); 466 return adapter->flow_tbl[flow_id & (OPA_VNIC_FLOW_TBL_SIZE - 1)]; 467 } 468 469 /* Calculate packet length including OPA header, crc and padding */ 470 static inline int opa_vnic_wire_length(struct sk_buff *skb) 471 { 472 u32 pad_len; 473 474 /* padding for 8 bytes size alignment */ 475 pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7; 476 pad_len += OPA_VNIC_ICRC_TAIL_LEN; 477 478 return (skb->len + pad_len) >> 3; 479 } 480 481 /* opa_vnic_encap_skb - encapsulate skb packet with OPA header and meta data */ 482 void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb) 483 { 484 struct __opa_veswport_info *info = &adapter->info; 485 struct opa_vnic_skb_mdata *mdata; 486 u8 def_port, sc, rc, entropy, *hdr; 487 u16 len, l4_hdr; 488 u32 dlid; 489 490 hdr = skb_push(skb, OPA_VNIC_HDR_LEN); 491 492 entropy = opa_vnic_calc_entropy(skb); 493 def_port = opa_vnic_get_def_port(adapter, entropy); 494 len = opa_vnic_wire_length(skb); 495 dlid = opa_vnic_get_dlid(adapter, skb, def_port); 496 sc = opa_vnic_get_sc(info, skb); 497 rc = opa_vnic_get_rc(info, skb); 498 l4_hdr = info->vesw.vesw_id; 499 500 mdata = skb_push(skb, sizeof(*mdata)); 501 mdata->vl = opa_vnic_get_vl(adapter, skb); 502 mdata->entropy = entropy; 503 mdata->flags = 0; 504 if (unlikely(!dlid)) { 505 mdata->flags = OPA_VNIC_SKB_MDATA_ENCAP_ERR; 506 return; 507 } 508 509 opa_vnic_make_header(hdr, info->vport.encap_slid, dlid, len, 510 info->vesw.pkey, entropy, sc, rc, 511 OPA_VNIC_L4_ETHR, l4_hdr); 512 } 513