1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2015, Joyent, Inc. 24 */ 25 26 /* 27 * Common routines usable by any part of the networking stack for hashing 28 * packets. The hashing logic originally was part of MAC, but it has more 29 * utility being usable by the rest of the broader system. 30 */ 31 32 #include <sys/types.h> 33 #include <sys/mac.h> 34 #include <sys/strsubr.h> 35 #include <sys/strsun.h> 36 #include <sys/vlan.h> 37 #include <inet/ip.h> 38 #include <inet/ip_impl.h> 39 #include <inet/ip6.h> 40 #include <sys/dlpi.h> 41 #include <sys/sunndi.h> 42 #include <inet/ipsec_impl.h> 43 #include <inet/sadb.h> 44 #include <inet/ipsecesp.h> 45 #include <inet/ipsecah.h> 46 #include <inet/inet_hash.h> 47 48 /* 49 * Determines the IPv6 header length accounting for all the optional IPv6 50 * headers (hop-by-hop, destination, routing and fragment). The header length 51 * and next header value (a transport header) is captured. 52 * 53 * Returns B_FALSE if all the IP headers are not in the same mblk otherwise 54 * returns B_TRUE. 55 */ 56 static boolean_t 57 inet_pkthash_ip_hdr_length_v6(ip6_t *ip6h, uint8_t *endptr, 58 uint16_t *hdr_length, uint8_t *next_hdr, ip6_frag_t **fragp) 59 { 60 uint16_t length; 61 uint_t ehdrlen; 62 uint8_t *whereptr; 63 uint8_t *nexthdrp; 64 ip6_dest_t *desthdr; 65 ip6_rthdr_t *rthdr; 66 ip6_frag_t *fraghdr; 67 68 if (((uchar_t *)ip6h + IPV6_HDR_LEN) > endptr) 69 return (B_FALSE); 70 ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION); 71 length = IPV6_HDR_LEN; 72 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 73 74 if (fragp != NULL) 75 *fragp = NULL; 76 77 nexthdrp = &ip6h->ip6_nxt; 78 while (whereptr < endptr) { 79 /* Is there enough left for len + nexthdr? */ 80 if (whereptr + MIN_EHDR_LEN > endptr) 81 break; 82 83 switch (*nexthdrp) { 84 case IPPROTO_HOPOPTS: 85 case IPPROTO_DSTOPTS: 86 /* Assumes the headers are identical for hbh and dst */ 87 desthdr = (ip6_dest_t *)whereptr; 88 ehdrlen = 8 * (desthdr->ip6d_len + 1); 89 if ((uchar_t *)desthdr + ehdrlen > endptr) 90 return (B_FALSE); 91 nexthdrp = &desthdr->ip6d_nxt; 92 break; 93 case IPPROTO_ROUTING: 94 rthdr = (ip6_rthdr_t *)whereptr; 95 ehdrlen = 8 * (rthdr->ip6r_len + 1); 96 if ((uchar_t *)rthdr + ehdrlen > endptr) 97 return (B_FALSE); 98 nexthdrp = &rthdr->ip6r_nxt; 99 break; 100 case IPPROTO_FRAGMENT: 101 fraghdr = (ip6_frag_t *)whereptr; 102 ehdrlen = sizeof (ip6_frag_t); 103 if ((uchar_t *)&fraghdr[1] > endptr) 104 return (B_FALSE); 105 nexthdrp = &fraghdr->ip6f_nxt; 106 if (fragp != NULL) 107 *fragp = fraghdr; 108 break; 109 case IPPROTO_NONE: 110 /* No next header means we're finished */ 111 default: 112 *hdr_length = length; 113 *next_hdr = *nexthdrp; 114 return (B_TRUE); 115 } 116 length += ehdrlen; 117 whereptr += ehdrlen; 118 *hdr_length = length; 119 *next_hdr = *nexthdrp; 120 } 121 switch (*nexthdrp) { 122 case IPPROTO_HOPOPTS: 123 case IPPROTO_DSTOPTS: 124 case IPPROTO_ROUTING: 125 case IPPROTO_FRAGMENT: 126 /* 127 * If any known extension headers are still to be processed, 128 * the packet's malformed (or at least all the IP header(s) are 129 * not in the same mblk - and that should never happen. 130 */ 131 return (B_FALSE); 132 133 default: 134 /* 135 * If we get here, we know that all of the IP headers were in 136 * the same mblk, even if the ULP header is in the next mblk. 137 */ 138 *hdr_length = length; 139 *next_hdr = *nexthdrp; 140 return (B_TRUE); 141 } 142 } 143 144 #define PKT_HASH_2BYTES(x) ((x)[0] ^ (x)[1]) 145 #define PKT_HASH_4BYTES(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3]) 146 #define PKT_HASH_MAC(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3] ^ (x)[4] ^ (x)[5]) 147 uint64_t 148 inet_pkt_hash(uint_t media, mblk_t *mp, uint8_t policy) 149 { 150 struct ether_header *ehp; 151 uint64_t hash = 0; 152 uint16_t sap; 153 uint_t skip_len; 154 uint8_t proto; 155 boolean_t ip_fragmented; 156 157 /* 158 * We may want to have one of these per MAC type plugin in the 159 * future. For now supports only ethernet. 160 */ 161 if (media != DL_ETHER) 162 return (0L); 163 164 /* for now we support only outbound packets */ 165 ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t))); 166 ASSERT(MBLKL(mp) >= sizeof (struct ether_header)); 167 168 /* compute L2 hash */ 169 170 ehp = (struct ether_header *)mp->b_rptr; 171 172 if ((policy & INET_PKT_HASH_L2) != 0) { 173 uchar_t *mac_src = ehp->ether_shost.ether_addr_octet; 174 uchar_t *mac_dst = ehp->ether_dhost.ether_addr_octet; 175 hash = PKT_HASH_MAC(mac_src) ^ PKT_HASH_MAC(mac_dst); 176 policy &= ~INET_PKT_HASH_L2; 177 } 178 179 if (policy == 0) 180 goto done; 181 182 /* skip ethernet header */ 183 184 sap = ntohs(ehp->ether_type); 185 if (sap == ETHERTYPE_VLAN) { 186 struct ether_vlan_header *evhp; 187 mblk_t *newmp = NULL; 188 189 skip_len = sizeof (struct ether_vlan_header); 190 if (MBLKL(mp) < skip_len) { 191 /* the vlan tag is the payload, pull up first */ 192 newmp = msgpullup(mp, -1); 193 if ((newmp == NULL) || (MBLKL(newmp) < skip_len)) { 194 goto done; 195 } 196 evhp = (struct ether_vlan_header *)newmp->b_rptr; 197 } else { 198 evhp = (struct ether_vlan_header *)mp->b_rptr; 199 } 200 201 sap = ntohs(evhp->ether_type); 202 freemsg(newmp); 203 } else { 204 skip_len = sizeof (struct ether_header); 205 } 206 207 /* if ethernet header is in its own mblk, skip it */ 208 if (MBLKL(mp) <= skip_len) { 209 skip_len -= MBLKL(mp); 210 mp = mp->b_cont; 211 if (mp == NULL) 212 goto done; 213 } 214 215 sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap; 216 217 /* compute IP src/dst addresses hash and skip IPv{4,6} header */ 218 219 switch (sap) { 220 case ETHERTYPE_IP: { 221 ipha_t *iphp; 222 223 /* 224 * If the header is not aligned or the header doesn't fit 225 * in the mblk, bail now. Note that this may cause packet 226 * reordering. 227 */ 228 iphp = (ipha_t *)(mp->b_rptr + skip_len); 229 if (((unsigned char *)iphp + sizeof (ipha_t) > mp->b_wptr) || 230 !OK_32PTR((char *)iphp)) 231 goto done; 232 233 proto = iphp->ipha_protocol; 234 skip_len += IPH_HDR_LENGTH(iphp); 235 236 /* Check if the packet is fragmented. */ 237 ip_fragmented = ntohs(iphp->ipha_fragment_offset_and_flags) & 238 IPH_OFFSET; 239 240 /* 241 * For fragmented packets, use addresses in addition to 242 * the frag_id to generate the hash inorder to get 243 * better distribution. 244 */ 245 if (ip_fragmented || (policy & INET_PKT_HASH_L3) != 0) { 246 uint8_t *ip_src = (uint8_t *)&(iphp->ipha_src); 247 uint8_t *ip_dst = (uint8_t *)&(iphp->ipha_dst); 248 249 hash ^= (PKT_HASH_4BYTES(ip_src) ^ 250 PKT_HASH_4BYTES(ip_dst)); 251 policy &= ~INET_PKT_HASH_L3; 252 } 253 254 if (ip_fragmented) { 255 uint8_t *identp = (uint8_t *)&iphp->ipha_ident; 256 hash ^= PKT_HASH_2BYTES(identp); 257 goto done; 258 } 259 break; 260 } 261 case ETHERTYPE_IPV6: { 262 ip6_t *ip6hp; 263 ip6_frag_t *frag = NULL; 264 uint16_t hdr_length; 265 266 /* 267 * If the header is not aligned or the header doesn't fit 268 * in the mblk, bail now. Note that this may cause packets 269 * reordering. 270 */ 271 272 ip6hp = (ip6_t *)(mp->b_rptr + skip_len); 273 if (((unsigned char *)ip6hp + IPV6_HDR_LEN > mp->b_wptr) || 274 !OK_32PTR((char *)ip6hp)) 275 goto done; 276 277 if (!inet_pkthash_ip_hdr_length_v6(ip6hp, mp->b_wptr, 278 &hdr_length, &proto, &frag)) 279 goto done; 280 skip_len += hdr_length; 281 282 /* 283 * For fragmented packets, use addresses in addition to 284 * the frag_id to generate the hash inorder to get 285 * better distribution. 286 */ 287 if (frag != NULL || (policy & INET_PKT_HASH_L3) != 0) { 288 uint8_t *ip_src = &(ip6hp->ip6_src.s6_addr8[12]); 289 uint8_t *ip_dst = &(ip6hp->ip6_dst.s6_addr8[12]); 290 291 hash ^= (PKT_HASH_4BYTES(ip_src) ^ 292 PKT_HASH_4BYTES(ip_dst)); 293 policy &= ~INET_PKT_HASH_L3; 294 } 295 296 if (frag != NULL) { 297 uint8_t *identp = (uint8_t *)&frag->ip6f_ident; 298 hash ^= PKT_HASH_4BYTES(identp); 299 goto done; 300 } 301 break; 302 } 303 default: 304 goto done; 305 } 306 307 if (policy == 0) 308 goto done; 309 310 /* if ip header is in its own mblk, skip it */ 311 if (MBLKL(mp) <= skip_len) { 312 skip_len -= MBLKL(mp); 313 mp = mp->b_cont; 314 if (mp == NULL) 315 goto done; 316 } 317 318 /* parse ULP header */ 319 again: 320 switch (proto) { 321 case IPPROTO_TCP: 322 case IPPROTO_UDP: 323 case IPPROTO_ESP: 324 case IPPROTO_SCTP: 325 /* 326 * These Internet Protocols are intentionally designed 327 * for hashing from the git-go. Port numbers are in the first 328 * word for transports, SPI is first for ESP. 329 */ 330 if (mp->b_rptr + skip_len + 4 > mp->b_wptr) 331 goto done; 332 hash ^= PKT_HASH_4BYTES((mp->b_rptr + skip_len)); 333 break; 334 335 case IPPROTO_AH: { 336 ah_t *ah = (ah_t *)(mp->b_rptr + skip_len); 337 uint_t ah_length = AH_TOTAL_LEN(ah); 338 339 if ((unsigned char *)ah + sizeof (ah_t) > mp->b_wptr) 340 goto done; 341 342 proto = ah->ah_nexthdr; 343 skip_len += ah_length; 344 345 /* if AH header is in its own mblk, skip it */ 346 if (MBLKL(mp) <= skip_len) { 347 skip_len -= MBLKL(mp); 348 mp = mp->b_cont; 349 if (mp == NULL) 350 goto done; 351 } 352 353 goto again; 354 } 355 } 356 357 done: 358 return (hash); 359 } 360