1 /* 2 * Copyright (c) 2007-2014 Nicira, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write to the Free Software 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 * 02110-1301, USA 17 */ 18 19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 20 21 #include "flow.h" 22 #include "datapath.h" 23 #include <linux/uaccess.h> 24 #include <linux/netdevice.h> 25 #include <linux/etherdevice.h> 26 #include <linux/if_ether.h> 27 #include <linux/if_vlan.h> 28 #include <net/llc_pdu.h> 29 #include <linux/kernel.h> 30 #include <linux/jhash.h> 31 #include <linux/jiffies.h> 32 #include <linux/llc.h> 33 #include <linux/module.h> 34 #include <linux/in.h> 35 #include <linux/rcupdate.h> 36 #include <linux/if_arp.h> 37 #include <linux/ip.h> 38 #include <linux/ipv6.h> 39 #include <linux/sctp.h> 40 #include <linux/tcp.h> 41 #include <linux/udp.h> 42 #include <linux/icmp.h> 43 #include <linux/icmpv6.h> 44 #include <linux/rculist.h> 45 #include <net/geneve.h> 46 #include <net/ip.h> 47 #include <net/ipv6.h> 48 #include <net/ndisc.h> 49 #include <net/mpls.h> 50 #include <net/vxlan.h> 51 52 #include "flow_netlink.h" 53 54 struct ovs_len_tbl { 55 int len; 56 const struct ovs_len_tbl *next; 57 }; 58 59 #define OVS_ATTR_NESTED -1 60 #define OVS_ATTR_VARIABLE -2 61 62 static void update_range(struct sw_flow_match *match, 63 size_t offset, size_t size, bool is_mask) 64 { 65 struct sw_flow_key_range *range; 66 size_t start = rounddown(offset, sizeof(long)); 67 size_t end = roundup(offset + size, sizeof(long)); 68 69 if (!is_mask) 70 range = &match->range; 71 else 72 range = &match->mask->range; 73 74 if (range->start == range->end) { 75 range->start = start; 76 range->end = end; 77 return; 78 } 79 80 if (range->start > start) 81 range->start = start; 82 83 if (range->end < end) 84 range->end = end; 85 } 86 87 #define SW_FLOW_KEY_PUT(match, field, value, is_mask) \ 88 do { \ 89 update_range(match, offsetof(struct sw_flow_key, field), \ 90 sizeof((match)->key->field), is_mask); \ 91 if (is_mask) \ 92 (match)->mask->key.field = value; \ 93 else \ 94 (match)->key->field = value; \ 95 } while (0) 96 97 #define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask) \ 98 do { \ 99 update_range(match, offset, len, is_mask); \ 100 if (is_mask) \ 101 memcpy((u8 *)&(match)->mask->key + offset, value_p, \ 102 len); \ 103 else \ 104 memcpy((u8 *)(match)->key + offset, value_p, len); \ 105 } while (0) 106 107 #define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ 108 SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \ 109 value_p, len, is_mask) 110 111 #define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask) \ 112 do { \ 113 update_range(match, offsetof(struct sw_flow_key, field), \ 114 sizeof((match)->key->field), is_mask); \ 115 if (is_mask) \ 116 memset((u8 *)&(match)->mask->key.field, value, \ 117 sizeof((match)->mask->key.field)); \ 118 else \ 119 memset((u8 *)&(match)->key->field, value, \ 120 sizeof((match)->key->field)); \ 121 } while (0) 122 123 static bool match_validate(const struct sw_flow_match *match, 124 u64 key_attrs, u64 mask_attrs, bool log) 125 { 126 u64 key_expected = 0; 127 u64 mask_allowed = key_attrs; /* At most allow all key attributes */ 128 129 /* The following mask attributes allowed only if they 130 * pass the validation tests. */ 131 mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4) 132 | (1 << OVS_KEY_ATTR_IPV6) 133 | (1 << OVS_KEY_ATTR_TCP) 134 | (1 << OVS_KEY_ATTR_TCP_FLAGS) 135 | (1 << OVS_KEY_ATTR_UDP) 136 | (1 << OVS_KEY_ATTR_SCTP) 137 | (1 << OVS_KEY_ATTR_ICMP) 138 | (1 << OVS_KEY_ATTR_ICMPV6) 139 | (1 << OVS_KEY_ATTR_ARP) 140 | (1 << OVS_KEY_ATTR_ND) 141 | (1 << OVS_KEY_ATTR_MPLS)); 142 143 /* Always allowed mask fields. */ 144 mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL) 145 | (1 << OVS_KEY_ATTR_IN_PORT) 146 | (1 << OVS_KEY_ATTR_ETHERTYPE)); 147 148 /* Check key attributes. */ 149 if (match->key->eth.type == htons(ETH_P_ARP) 150 || match->key->eth.type == htons(ETH_P_RARP)) { 151 key_expected |= 1 << OVS_KEY_ATTR_ARP; 152 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 153 mask_allowed |= 1 << OVS_KEY_ATTR_ARP; 154 } 155 156 if (eth_p_mpls(match->key->eth.type)) { 157 key_expected |= 1 << OVS_KEY_ATTR_MPLS; 158 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 159 mask_allowed |= 1 << OVS_KEY_ATTR_MPLS; 160 } 161 162 if (match->key->eth.type == htons(ETH_P_IP)) { 163 key_expected |= 1 << OVS_KEY_ATTR_IPV4; 164 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 165 mask_allowed |= 1 << OVS_KEY_ATTR_IPV4; 166 167 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { 168 if (match->key->ip.proto == IPPROTO_UDP) { 169 key_expected |= 1 << OVS_KEY_ATTR_UDP; 170 if (match->mask && (match->mask->key.ip.proto == 0xff)) 171 mask_allowed |= 1 << OVS_KEY_ATTR_UDP; 172 } 173 174 if (match->key->ip.proto == IPPROTO_SCTP) { 175 key_expected |= 1 << OVS_KEY_ATTR_SCTP; 176 if (match->mask && (match->mask->key.ip.proto == 0xff)) 177 mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; 178 } 179 180 if (match->key->ip.proto == IPPROTO_TCP) { 181 key_expected |= 1 << OVS_KEY_ATTR_TCP; 182 key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 183 if (match->mask && (match->mask->key.ip.proto == 0xff)) { 184 mask_allowed |= 1 << OVS_KEY_ATTR_TCP; 185 mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 186 } 187 } 188 189 if (match->key->ip.proto == IPPROTO_ICMP) { 190 key_expected |= 1 << OVS_KEY_ATTR_ICMP; 191 if (match->mask && (match->mask->key.ip.proto == 0xff)) 192 mask_allowed |= 1 << OVS_KEY_ATTR_ICMP; 193 } 194 } 195 } 196 197 if (match->key->eth.type == htons(ETH_P_IPV6)) { 198 key_expected |= 1 << OVS_KEY_ATTR_IPV6; 199 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 200 mask_allowed |= 1 << OVS_KEY_ATTR_IPV6; 201 202 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { 203 if (match->key->ip.proto == IPPROTO_UDP) { 204 key_expected |= 1 << OVS_KEY_ATTR_UDP; 205 if (match->mask && (match->mask->key.ip.proto == 0xff)) 206 mask_allowed |= 1 << OVS_KEY_ATTR_UDP; 207 } 208 209 if (match->key->ip.proto == IPPROTO_SCTP) { 210 key_expected |= 1 << OVS_KEY_ATTR_SCTP; 211 if (match->mask && (match->mask->key.ip.proto == 0xff)) 212 mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; 213 } 214 215 if (match->key->ip.proto == IPPROTO_TCP) { 216 key_expected |= 1 << OVS_KEY_ATTR_TCP; 217 key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 218 if (match->mask && (match->mask->key.ip.proto == 0xff)) { 219 mask_allowed |= 1 << OVS_KEY_ATTR_TCP; 220 mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 221 } 222 } 223 224 if (match->key->ip.proto == IPPROTO_ICMPV6) { 225 key_expected |= 1 << OVS_KEY_ATTR_ICMPV6; 226 if (match->mask && (match->mask->key.ip.proto == 0xff)) 227 mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6; 228 229 if (match->key->tp.src == 230 htons(NDISC_NEIGHBOUR_SOLICITATION) || 231 match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { 232 key_expected |= 1 << OVS_KEY_ATTR_ND; 233 if (match->mask && (match->mask->key.tp.src == htons(0xff))) 234 mask_allowed |= 1 << OVS_KEY_ATTR_ND; 235 } 236 } 237 } 238 } 239 240 if ((key_attrs & key_expected) != key_expected) { 241 /* Key attributes check failed. */ 242 OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)", 243 (unsigned long long)key_attrs, 244 (unsigned long long)key_expected); 245 return false; 246 } 247 248 if ((mask_attrs & mask_allowed) != mask_attrs) { 249 /* Mask attributes check failed. */ 250 OVS_NLERR(log, "Unexpected mask (mask=%llx, allowed=%llx)", 251 (unsigned long long)mask_attrs, 252 (unsigned long long)mask_allowed); 253 return false; 254 } 255 256 return true; 257 } 258 259 size_t ovs_tun_key_attr_size(void) 260 { 261 /* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider 262 * updating this function. 263 */ 264 return nla_total_size_64bit(8) /* OVS_TUNNEL_KEY_ATTR_ID */ 265 + nla_total_size(16) /* OVS_TUNNEL_KEY_ATTR_IPV[46]_SRC */ 266 + nla_total_size(16) /* OVS_TUNNEL_KEY_ATTR_IPV[46]_DST */ 267 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */ 268 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */ 269 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ 270 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ 271 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ 272 + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */ 273 /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with 274 * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it. 275 */ 276 + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */ 277 + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */ 278 } 279 280 size_t ovs_key_attr_size(void) 281 { 282 /* Whenever adding new OVS_KEY_ FIELDS, we should consider 283 * updating this function. 284 */ 285 BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 26); 286 287 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ 288 + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ 289 + ovs_tun_key_attr_size() 290 + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ 291 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ 292 + nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */ 293 + nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */ 294 + nla_total_size(4) /* OVS_KEY_ATTR_CT_STATE */ 295 + nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */ 296 + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */ 297 + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */ 298 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ 299 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ 300 + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */ 301 + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */ 302 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ 303 + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */ 304 + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */ 305 + nla_total_size(28); /* OVS_KEY_ATTR_ND */ 306 } 307 308 static const struct ovs_len_tbl ovs_vxlan_ext_key_lens[OVS_VXLAN_EXT_MAX + 1] = { 309 [OVS_VXLAN_EXT_GBP] = { .len = sizeof(u32) }, 310 }; 311 312 static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { 313 [OVS_TUNNEL_KEY_ATTR_ID] = { .len = sizeof(u64) }, 314 [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = { .len = sizeof(u32) }, 315 [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = { .len = sizeof(u32) }, 316 [OVS_TUNNEL_KEY_ATTR_TOS] = { .len = 1 }, 317 [OVS_TUNNEL_KEY_ATTR_TTL] = { .len = 1 }, 318 [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 }, 319 [OVS_TUNNEL_KEY_ATTR_CSUM] = { .len = 0 }, 320 [OVS_TUNNEL_KEY_ATTR_TP_SRC] = { .len = sizeof(u16) }, 321 [OVS_TUNNEL_KEY_ATTR_TP_DST] = { .len = sizeof(u16) }, 322 [OVS_TUNNEL_KEY_ATTR_OAM] = { .len = 0 }, 323 [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_VARIABLE }, 324 [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED, 325 .next = ovs_vxlan_ext_key_lens }, 326 [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, 327 [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) }, 328 }; 329 330 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ 331 static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { 332 [OVS_KEY_ATTR_ENCAP] = { .len = OVS_ATTR_NESTED }, 333 [OVS_KEY_ATTR_PRIORITY] = { .len = sizeof(u32) }, 334 [OVS_KEY_ATTR_IN_PORT] = { .len = sizeof(u32) }, 335 [OVS_KEY_ATTR_SKB_MARK] = { .len = sizeof(u32) }, 336 [OVS_KEY_ATTR_ETHERNET] = { .len = sizeof(struct ovs_key_ethernet) }, 337 [OVS_KEY_ATTR_VLAN] = { .len = sizeof(__be16) }, 338 [OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) }, 339 [OVS_KEY_ATTR_IPV4] = { .len = sizeof(struct ovs_key_ipv4) }, 340 [OVS_KEY_ATTR_IPV6] = { .len = sizeof(struct ovs_key_ipv6) }, 341 [OVS_KEY_ATTR_TCP] = { .len = sizeof(struct ovs_key_tcp) }, 342 [OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) }, 343 [OVS_KEY_ATTR_UDP] = { .len = sizeof(struct ovs_key_udp) }, 344 [OVS_KEY_ATTR_SCTP] = { .len = sizeof(struct ovs_key_sctp) }, 345 [OVS_KEY_ATTR_ICMP] = { .len = sizeof(struct ovs_key_icmp) }, 346 [OVS_KEY_ATTR_ICMPV6] = { .len = sizeof(struct ovs_key_icmpv6) }, 347 [OVS_KEY_ATTR_ARP] = { .len = sizeof(struct ovs_key_arp) }, 348 [OVS_KEY_ATTR_ND] = { .len = sizeof(struct ovs_key_nd) }, 349 [OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) }, 350 [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) }, 351 [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, 352 .next = ovs_tunnel_key_lens, }, 353 [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, 354 [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u32) }, 355 [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, 356 [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, 357 [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) }, 358 }; 359 360 static bool check_attr_len(unsigned int attr_len, unsigned int expected_len) 361 { 362 return expected_len == attr_len || 363 expected_len == OVS_ATTR_NESTED || 364 expected_len == OVS_ATTR_VARIABLE; 365 } 366 367 static bool is_all_zero(const u8 *fp, size_t size) 368 { 369 int i; 370 371 if (!fp) 372 return false; 373 374 for (i = 0; i < size; i++) 375 if (fp[i]) 376 return false; 377 378 return true; 379 } 380 381 static int __parse_flow_nlattrs(const struct nlattr *attr, 382 const struct nlattr *a[], 383 u64 *attrsp, bool log, bool nz) 384 { 385 const struct nlattr *nla; 386 u64 attrs; 387 int rem; 388 389 attrs = *attrsp; 390 nla_for_each_nested(nla, attr, rem) { 391 u16 type = nla_type(nla); 392 int expected_len; 393 394 if (type > OVS_KEY_ATTR_MAX) { 395 OVS_NLERR(log, "Key type %d is out of range max %d", 396 type, OVS_KEY_ATTR_MAX); 397 return -EINVAL; 398 } 399 400 if (attrs & (1 << type)) { 401 OVS_NLERR(log, "Duplicate key (type %d).", type); 402 return -EINVAL; 403 } 404 405 expected_len = ovs_key_lens[type].len; 406 if (!check_attr_len(nla_len(nla), expected_len)) { 407 OVS_NLERR(log, "Key %d has unexpected len %d expected %d", 408 type, nla_len(nla), expected_len); 409 return -EINVAL; 410 } 411 412 if (!nz || !is_all_zero(nla_data(nla), expected_len)) { 413 attrs |= 1 << type; 414 a[type] = nla; 415 } 416 } 417 if (rem) { 418 OVS_NLERR(log, "Message has %d unknown bytes.", rem); 419 return -EINVAL; 420 } 421 422 *attrsp = attrs; 423 return 0; 424 } 425 426 static int parse_flow_mask_nlattrs(const struct nlattr *attr, 427 const struct nlattr *a[], u64 *attrsp, 428 bool log) 429 { 430 return __parse_flow_nlattrs(attr, a, attrsp, log, true); 431 } 432 433 static int parse_flow_nlattrs(const struct nlattr *attr, 434 const struct nlattr *a[], u64 *attrsp, 435 bool log) 436 { 437 return __parse_flow_nlattrs(attr, a, attrsp, log, false); 438 } 439 440 static int genev_tun_opt_from_nlattr(const struct nlattr *a, 441 struct sw_flow_match *match, bool is_mask, 442 bool log) 443 { 444 unsigned long opt_key_offset; 445 446 if (nla_len(a) > sizeof(match->key->tun_opts)) { 447 OVS_NLERR(log, "Geneve option length err (len %d, max %zu).", 448 nla_len(a), sizeof(match->key->tun_opts)); 449 return -EINVAL; 450 } 451 452 if (nla_len(a) % 4 != 0) { 453 OVS_NLERR(log, "Geneve opt len %d is not a multiple of 4.", 454 nla_len(a)); 455 return -EINVAL; 456 } 457 458 /* We need to record the length of the options passed 459 * down, otherwise packets with the same format but 460 * additional options will be silently matched. 461 */ 462 if (!is_mask) { 463 SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a), 464 false); 465 } else { 466 /* This is somewhat unusual because it looks at 467 * both the key and mask while parsing the 468 * attributes (and by extension assumes the key 469 * is parsed first). Normally, we would verify 470 * that each is the correct length and that the 471 * attributes line up in the validate function. 472 * However, that is difficult because this is 473 * variable length and we won't have the 474 * information later. 475 */ 476 if (match->key->tun_opts_len != nla_len(a)) { 477 OVS_NLERR(log, "Geneve option len %d != mask len %d", 478 match->key->tun_opts_len, nla_len(a)); 479 return -EINVAL; 480 } 481 482 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); 483 } 484 485 opt_key_offset = TUN_METADATA_OFFSET(nla_len(a)); 486 SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a), 487 nla_len(a), is_mask); 488 return 0; 489 } 490 491 static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr, 492 struct sw_flow_match *match, bool is_mask, 493 bool log) 494 { 495 struct nlattr *a; 496 int rem; 497 unsigned long opt_key_offset; 498 struct vxlan_metadata opts; 499 500 BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts)); 501 502 memset(&opts, 0, sizeof(opts)); 503 nla_for_each_nested(a, attr, rem) { 504 int type = nla_type(a); 505 506 if (type > OVS_VXLAN_EXT_MAX) { 507 OVS_NLERR(log, "VXLAN extension %d out of range max %d", 508 type, OVS_VXLAN_EXT_MAX); 509 return -EINVAL; 510 } 511 512 if (!check_attr_len(nla_len(a), 513 ovs_vxlan_ext_key_lens[type].len)) { 514 OVS_NLERR(log, "VXLAN extension %d has unexpected len %d expected %d", 515 type, nla_len(a), 516 ovs_vxlan_ext_key_lens[type].len); 517 return -EINVAL; 518 } 519 520 switch (type) { 521 case OVS_VXLAN_EXT_GBP: 522 opts.gbp = nla_get_u32(a); 523 break; 524 default: 525 OVS_NLERR(log, "Unknown VXLAN extension attribute %d", 526 type); 527 return -EINVAL; 528 } 529 } 530 if (rem) { 531 OVS_NLERR(log, "VXLAN extension message has %d unknown bytes.", 532 rem); 533 return -EINVAL; 534 } 535 536 if (!is_mask) 537 SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false); 538 else 539 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); 540 541 opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts)); 542 SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts), 543 is_mask); 544 return 0; 545 } 546 547 static int ip_tun_from_nlattr(const struct nlattr *attr, 548 struct sw_flow_match *match, bool is_mask, 549 bool log) 550 { 551 bool ttl = false, ipv4 = false, ipv6 = false; 552 __be16 tun_flags = 0; 553 int opts_type = 0; 554 struct nlattr *a; 555 int rem; 556 557 nla_for_each_nested(a, attr, rem) { 558 int type = nla_type(a); 559 int err; 560 561 if (type > OVS_TUNNEL_KEY_ATTR_MAX) { 562 OVS_NLERR(log, "Tunnel attr %d out of range max %d", 563 type, OVS_TUNNEL_KEY_ATTR_MAX); 564 return -EINVAL; 565 } 566 567 if (!check_attr_len(nla_len(a), 568 ovs_tunnel_key_lens[type].len)) { 569 OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d", 570 type, nla_len(a), ovs_tunnel_key_lens[type].len); 571 return -EINVAL; 572 } 573 574 switch (type) { 575 case OVS_TUNNEL_KEY_ATTR_ID: 576 SW_FLOW_KEY_PUT(match, tun_key.tun_id, 577 nla_get_be64(a), is_mask); 578 tun_flags |= TUNNEL_KEY; 579 break; 580 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: 581 SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src, 582 nla_get_in_addr(a), is_mask); 583 ipv4 = true; 584 break; 585 case OVS_TUNNEL_KEY_ATTR_IPV4_DST: 586 SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.dst, 587 nla_get_in_addr(a), is_mask); 588 ipv4 = true; 589 break; 590 case OVS_TUNNEL_KEY_ATTR_IPV6_SRC: 591 SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst, 592 nla_get_in6_addr(a), is_mask); 593 ipv6 = true; 594 break; 595 case OVS_TUNNEL_KEY_ATTR_IPV6_DST: 596 SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst, 597 nla_get_in6_addr(a), is_mask); 598 ipv6 = true; 599 break; 600 case OVS_TUNNEL_KEY_ATTR_TOS: 601 SW_FLOW_KEY_PUT(match, tun_key.tos, 602 nla_get_u8(a), is_mask); 603 break; 604 case OVS_TUNNEL_KEY_ATTR_TTL: 605 SW_FLOW_KEY_PUT(match, tun_key.ttl, 606 nla_get_u8(a), is_mask); 607 ttl = true; 608 break; 609 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: 610 tun_flags |= TUNNEL_DONT_FRAGMENT; 611 break; 612 case OVS_TUNNEL_KEY_ATTR_CSUM: 613 tun_flags |= TUNNEL_CSUM; 614 break; 615 case OVS_TUNNEL_KEY_ATTR_TP_SRC: 616 SW_FLOW_KEY_PUT(match, tun_key.tp_src, 617 nla_get_be16(a), is_mask); 618 break; 619 case OVS_TUNNEL_KEY_ATTR_TP_DST: 620 SW_FLOW_KEY_PUT(match, tun_key.tp_dst, 621 nla_get_be16(a), is_mask); 622 break; 623 case OVS_TUNNEL_KEY_ATTR_OAM: 624 tun_flags |= TUNNEL_OAM; 625 break; 626 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: 627 if (opts_type) { 628 OVS_NLERR(log, "Multiple metadata blocks provided"); 629 return -EINVAL; 630 } 631 632 err = genev_tun_opt_from_nlattr(a, match, is_mask, log); 633 if (err) 634 return err; 635 636 tun_flags |= TUNNEL_GENEVE_OPT; 637 opts_type = type; 638 break; 639 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: 640 if (opts_type) { 641 OVS_NLERR(log, "Multiple metadata blocks provided"); 642 return -EINVAL; 643 } 644 645 err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log); 646 if (err) 647 return err; 648 649 tun_flags |= TUNNEL_VXLAN_OPT; 650 opts_type = type; 651 break; 652 default: 653 OVS_NLERR(log, "Unknown IP tunnel attribute %d", 654 type); 655 return -EINVAL; 656 } 657 } 658 659 SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask); 660 if (is_mask) 661 SW_FLOW_KEY_MEMSET_FIELD(match, tun_proto, 0xff, true); 662 else 663 SW_FLOW_KEY_PUT(match, tun_proto, ipv6 ? AF_INET6 : AF_INET, 664 false); 665 666 if (rem > 0) { 667 OVS_NLERR(log, "IP tunnel attribute has %d unknown bytes.", 668 rem); 669 return -EINVAL; 670 } 671 672 if (ipv4 && ipv6) { 673 OVS_NLERR(log, "Mixed IPv4 and IPv6 tunnel attributes"); 674 return -EINVAL; 675 } 676 677 if (!is_mask) { 678 if (!ipv4 && !ipv6) { 679 OVS_NLERR(log, "IP tunnel dst address not specified"); 680 return -EINVAL; 681 } 682 if (ipv4 && !match->key->tun_key.u.ipv4.dst) { 683 OVS_NLERR(log, "IPv4 tunnel dst address is zero"); 684 return -EINVAL; 685 } 686 if (ipv6 && ipv6_addr_any(&match->key->tun_key.u.ipv6.dst)) { 687 OVS_NLERR(log, "IPv6 tunnel dst address is zero"); 688 return -EINVAL; 689 } 690 691 if (!ttl) { 692 OVS_NLERR(log, "IP tunnel TTL not specified."); 693 return -EINVAL; 694 } 695 } 696 697 return opts_type; 698 } 699 700 static int vxlan_opt_to_nlattr(struct sk_buff *skb, 701 const void *tun_opts, int swkey_tun_opts_len) 702 { 703 const struct vxlan_metadata *opts = tun_opts; 704 struct nlattr *nla; 705 706 nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS); 707 if (!nla) 708 return -EMSGSIZE; 709 710 if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0) 711 return -EMSGSIZE; 712 713 nla_nest_end(skb, nla); 714 return 0; 715 } 716 717 static int __ip_tun_to_nlattr(struct sk_buff *skb, 718 const struct ip_tunnel_key *output, 719 const void *tun_opts, int swkey_tun_opts_len, 720 unsigned short tun_proto) 721 { 722 if (output->tun_flags & TUNNEL_KEY && 723 nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id, 724 OVS_TUNNEL_KEY_ATTR_PAD)) 725 return -EMSGSIZE; 726 switch (tun_proto) { 727 case AF_INET: 728 if (output->u.ipv4.src && 729 nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, 730 output->u.ipv4.src)) 731 return -EMSGSIZE; 732 if (output->u.ipv4.dst && 733 nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, 734 output->u.ipv4.dst)) 735 return -EMSGSIZE; 736 break; 737 case AF_INET6: 738 if (!ipv6_addr_any(&output->u.ipv6.src) && 739 nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_SRC, 740 &output->u.ipv6.src)) 741 return -EMSGSIZE; 742 if (!ipv6_addr_any(&output->u.ipv6.dst) && 743 nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_DST, 744 &output->u.ipv6.dst)) 745 return -EMSGSIZE; 746 break; 747 } 748 if (output->tos && 749 nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->tos)) 750 return -EMSGSIZE; 751 if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ttl)) 752 return -EMSGSIZE; 753 if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) && 754 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) 755 return -EMSGSIZE; 756 if ((output->tun_flags & TUNNEL_CSUM) && 757 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) 758 return -EMSGSIZE; 759 if (output->tp_src && 760 nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_SRC, output->tp_src)) 761 return -EMSGSIZE; 762 if (output->tp_dst && 763 nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst)) 764 return -EMSGSIZE; 765 if ((output->tun_flags & TUNNEL_OAM) && 766 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) 767 return -EMSGSIZE; 768 if (swkey_tun_opts_len) { 769 if (output->tun_flags & TUNNEL_GENEVE_OPT && 770 nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, 771 swkey_tun_opts_len, tun_opts)) 772 return -EMSGSIZE; 773 else if (output->tun_flags & TUNNEL_VXLAN_OPT && 774 vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len)) 775 return -EMSGSIZE; 776 } 777 778 return 0; 779 } 780 781 static int ip_tun_to_nlattr(struct sk_buff *skb, 782 const struct ip_tunnel_key *output, 783 const void *tun_opts, int swkey_tun_opts_len, 784 unsigned short tun_proto) 785 { 786 struct nlattr *nla; 787 int err; 788 789 nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); 790 if (!nla) 791 return -EMSGSIZE; 792 793 err = __ip_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len, 794 tun_proto); 795 if (err) 796 return err; 797 798 nla_nest_end(skb, nla); 799 return 0; 800 } 801 802 int ovs_nla_put_tunnel_info(struct sk_buff *skb, 803 struct ip_tunnel_info *tun_info) 804 { 805 return __ip_tun_to_nlattr(skb, &tun_info->key, 806 ip_tunnel_info_opts(tun_info), 807 tun_info->options_len, 808 ip_tunnel_info_af(tun_info)); 809 } 810 811 static int encode_vlan_from_nlattrs(struct sw_flow_match *match, 812 const struct nlattr *a[], 813 bool is_mask, bool inner) 814 { 815 __be16 tci = 0; 816 __be16 tpid = 0; 817 818 if (a[OVS_KEY_ATTR_VLAN]) 819 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 820 821 if (a[OVS_KEY_ATTR_ETHERTYPE]) 822 tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 823 824 if (likely(!inner)) { 825 SW_FLOW_KEY_PUT(match, eth.vlan.tpid, tpid, is_mask); 826 SW_FLOW_KEY_PUT(match, eth.vlan.tci, tci, is_mask); 827 } else { 828 SW_FLOW_KEY_PUT(match, eth.cvlan.tpid, tpid, is_mask); 829 SW_FLOW_KEY_PUT(match, eth.cvlan.tci, tci, is_mask); 830 } 831 return 0; 832 } 833 834 static int validate_vlan_from_nlattrs(const struct sw_flow_match *match, 835 u64 key_attrs, bool inner, 836 const struct nlattr **a, bool log) 837 { 838 __be16 tci = 0; 839 840 if (!((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) && 841 (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) && 842 eth_type_vlan(nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE])))) { 843 /* Not a VLAN. */ 844 return 0; 845 } 846 847 if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) && 848 (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) { 849 OVS_NLERR(log, "Invalid %s frame", (inner) ? "C-VLAN" : "VLAN"); 850 return -EINVAL; 851 } 852 853 if (a[OVS_KEY_ATTR_VLAN]) 854 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 855 856 if (!(tci & htons(VLAN_TAG_PRESENT))) { 857 if (tci) { 858 OVS_NLERR(log, "%s TCI does not have VLAN_TAG_PRESENT bit set.", 859 (inner) ? "C-VLAN" : "VLAN"); 860 return -EINVAL; 861 } else if (nla_len(a[OVS_KEY_ATTR_ENCAP])) { 862 /* Corner case for truncated VLAN header. */ 863 OVS_NLERR(log, "Truncated %s header has non-zero encap attribute.", 864 (inner) ? "C-VLAN" : "VLAN"); 865 return -EINVAL; 866 } 867 } 868 869 return 1; 870 } 871 872 static int validate_vlan_mask_from_nlattrs(const struct sw_flow_match *match, 873 u64 key_attrs, bool inner, 874 const struct nlattr **a, bool log) 875 { 876 __be16 tci = 0; 877 __be16 tpid = 0; 878 bool encap_valid = !!(match->key->eth.vlan.tci & 879 htons(VLAN_TAG_PRESENT)); 880 bool i_encap_valid = !!(match->key->eth.cvlan.tci & 881 htons(VLAN_TAG_PRESENT)); 882 883 if (!(key_attrs & (1 << OVS_KEY_ATTR_ENCAP))) { 884 /* Not a VLAN. */ 885 return 0; 886 } 887 888 if ((!inner && !encap_valid) || (inner && !i_encap_valid)) { 889 OVS_NLERR(log, "Encap mask attribute is set for non-%s frame.", 890 (inner) ? "C-VLAN" : "VLAN"); 891 return -EINVAL; 892 } 893 894 if (a[OVS_KEY_ATTR_VLAN]) 895 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 896 897 if (a[OVS_KEY_ATTR_ETHERTYPE]) 898 tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 899 900 if (tpid != htons(0xffff)) { 901 OVS_NLERR(log, "Must have an exact match on %s TPID (mask=%x).", 902 (inner) ? "C-VLAN" : "VLAN", ntohs(tpid)); 903 return -EINVAL; 904 } 905 if (!(tci & htons(VLAN_TAG_PRESENT))) { 906 OVS_NLERR(log, "%s TCI mask does not have exact match for VLAN_TAG_PRESENT bit.", 907 (inner) ? "C-VLAN" : "VLAN"); 908 return -EINVAL; 909 } 910 911 return 1; 912 } 913 914 static int __parse_vlan_from_nlattrs(struct sw_flow_match *match, 915 u64 *key_attrs, bool inner, 916 const struct nlattr **a, bool is_mask, 917 bool log) 918 { 919 int err; 920 const struct nlattr *encap; 921 922 if (!is_mask) 923 err = validate_vlan_from_nlattrs(match, *key_attrs, inner, 924 a, log); 925 else 926 err = validate_vlan_mask_from_nlattrs(match, *key_attrs, inner, 927 a, log); 928 if (err <= 0) 929 return err; 930 931 err = encode_vlan_from_nlattrs(match, a, is_mask, inner); 932 if (err) 933 return err; 934 935 *key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); 936 *key_attrs &= ~(1 << OVS_KEY_ATTR_VLAN); 937 *key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 938 939 encap = a[OVS_KEY_ATTR_ENCAP]; 940 941 if (!is_mask) 942 err = parse_flow_nlattrs(encap, a, key_attrs, log); 943 else 944 err = parse_flow_mask_nlattrs(encap, a, key_attrs, log); 945 946 return err; 947 } 948 949 static int parse_vlan_from_nlattrs(struct sw_flow_match *match, 950 u64 *key_attrs, const struct nlattr **a, 951 bool is_mask, bool log) 952 { 953 int err; 954 bool encap_valid = false; 955 956 err = __parse_vlan_from_nlattrs(match, key_attrs, false, a, 957 is_mask, log); 958 if (err) 959 return err; 960 961 encap_valid = !!(match->key->eth.vlan.tci & htons(VLAN_TAG_PRESENT)); 962 if (encap_valid) { 963 err = __parse_vlan_from_nlattrs(match, key_attrs, true, a, 964 is_mask, log); 965 if (err) 966 return err; 967 } 968 969 return 0; 970 } 971 972 static int parse_eth_type_from_nlattrs(struct sw_flow_match *match, 973 u64 *attrs, const struct nlattr **a, 974 bool is_mask, bool log) 975 { 976 __be16 eth_type; 977 978 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 979 if (is_mask) { 980 /* Always exact match EtherType. */ 981 eth_type = htons(0xffff); 982 } else if (!eth_proto_is_802_3(eth_type)) { 983 OVS_NLERR(log, "EtherType %x is less than min %x", 984 ntohs(eth_type), ETH_P_802_3_MIN); 985 return -EINVAL; 986 } 987 988 SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); 989 *attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 990 return 0; 991 } 992 993 static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, 994 u64 *attrs, const struct nlattr **a, 995 bool is_mask, bool log) 996 { 997 u8 mac_proto = MAC_PROTO_ETHERNET; 998 999 if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) { 1000 u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]); 1001 1002 SW_FLOW_KEY_PUT(match, ovs_flow_hash, hash_val, is_mask); 1003 *attrs &= ~(1 << OVS_KEY_ATTR_DP_HASH); 1004 } 1005 1006 if (*attrs & (1 << OVS_KEY_ATTR_RECIRC_ID)) { 1007 u32 recirc_id = nla_get_u32(a[OVS_KEY_ATTR_RECIRC_ID]); 1008 1009 SW_FLOW_KEY_PUT(match, recirc_id, recirc_id, is_mask); 1010 *attrs &= ~(1 << OVS_KEY_ATTR_RECIRC_ID); 1011 } 1012 1013 if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) { 1014 SW_FLOW_KEY_PUT(match, phy.priority, 1015 nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask); 1016 *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY); 1017 } 1018 1019 if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) { 1020 u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]); 1021 1022 if (is_mask) { 1023 in_port = 0xffffffff; /* Always exact match in_port. */ 1024 } else if (in_port >= DP_MAX_PORTS) { 1025 OVS_NLERR(log, "Port %d exceeds max allowable %d", 1026 in_port, DP_MAX_PORTS); 1027 return -EINVAL; 1028 } 1029 1030 SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask); 1031 *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT); 1032 } else if (!is_mask) { 1033 SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask); 1034 } 1035 1036 if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) { 1037 uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]); 1038 1039 SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask); 1040 *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); 1041 } 1042 if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { 1043 if (ip_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, 1044 is_mask, log) < 0) 1045 return -EINVAL; 1046 *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); 1047 } 1048 1049 if (*attrs & (1 << OVS_KEY_ATTR_CT_STATE) && 1050 ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) { 1051 u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]); 1052 1053 if (ct_state & ~CT_SUPPORTED_MASK) { 1054 OVS_NLERR(log, "ct_state flags %08x unsupported", 1055 ct_state); 1056 return -EINVAL; 1057 } 1058 1059 SW_FLOW_KEY_PUT(match, ct.state, ct_state, is_mask); 1060 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE); 1061 } 1062 if (*attrs & (1 << OVS_KEY_ATTR_CT_ZONE) && 1063 ovs_ct_verify(net, OVS_KEY_ATTR_CT_ZONE)) { 1064 u16 ct_zone = nla_get_u16(a[OVS_KEY_ATTR_CT_ZONE]); 1065 1066 SW_FLOW_KEY_PUT(match, ct.zone, ct_zone, is_mask); 1067 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ZONE); 1068 } 1069 if (*attrs & (1 << OVS_KEY_ATTR_CT_MARK) && 1070 ovs_ct_verify(net, OVS_KEY_ATTR_CT_MARK)) { 1071 u32 mark = nla_get_u32(a[OVS_KEY_ATTR_CT_MARK]); 1072 1073 SW_FLOW_KEY_PUT(match, ct.mark, mark, is_mask); 1074 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_MARK); 1075 } 1076 if (*attrs & (1 << OVS_KEY_ATTR_CT_LABELS) && 1077 ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABELS)) { 1078 const struct ovs_key_ct_labels *cl; 1079 1080 cl = nla_data(a[OVS_KEY_ATTR_CT_LABELS]); 1081 SW_FLOW_KEY_MEMCPY(match, ct.labels, cl->ct_labels, 1082 sizeof(*cl), is_mask); 1083 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS); 1084 } 1085 1086 /* For layer 3 packets the Ethernet type is provided 1087 * and treated as metadata but no MAC addresses are provided. 1088 */ 1089 if (!(*attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) && 1090 (*attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE))) 1091 mac_proto = MAC_PROTO_NONE; 1092 1093 /* Always exact match mac_proto */ 1094 SW_FLOW_KEY_PUT(match, mac_proto, is_mask ? 0xff : mac_proto, is_mask); 1095 1096 if (mac_proto == MAC_PROTO_NONE) 1097 return parse_eth_type_from_nlattrs(match, attrs, a, is_mask, 1098 log); 1099 1100 return 0; 1101 } 1102 1103 static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, 1104 u64 attrs, const struct nlattr **a, 1105 bool is_mask, bool log) 1106 { 1107 int err; 1108 1109 err = metadata_from_nlattrs(net, match, &attrs, a, is_mask, log); 1110 if (err) 1111 return err; 1112 1113 if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) { 1114 const struct ovs_key_ethernet *eth_key; 1115 1116 eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]); 1117 SW_FLOW_KEY_MEMCPY(match, eth.src, 1118 eth_key->eth_src, ETH_ALEN, is_mask); 1119 SW_FLOW_KEY_MEMCPY(match, eth.dst, 1120 eth_key->eth_dst, ETH_ALEN, is_mask); 1121 attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET); 1122 1123 if (attrs & (1 << OVS_KEY_ATTR_VLAN)) { 1124 /* VLAN attribute is always parsed before getting here since it 1125 * may occur multiple times. 1126 */ 1127 OVS_NLERR(log, "VLAN attribute unexpected."); 1128 return -EINVAL; 1129 } 1130 1131 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { 1132 err = parse_eth_type_from_nlattrs(match, &attrs, a, is_mask, 1133 log); 1134 if (err) 1135 return err; 1136 } else if (!is_mask) { 1137 SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); 1138 } 1139 } else if (!match->key->eth.type) { 1140 OVS_NLERR(log, "Either Ethernet header or EtherType is required."); 1141 return -EINVAL; 1142 } 1143 1144 if (attrs & (1 << OVS_KEY_ATTR_IPV4)) { 1145 const struct ovs_key_ipv4 *ipv4_key; 1146 1147 ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); 1148 if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) { 1149 OVS_NLERR(log, "IPv4 frag type %d is out of range max %d", 1150 ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX); 1151 return -EINVAL; 1152 } 1153 SW_FLOW_KEY_PUT(match, ip.proto, 1154 ipv4_key->ipv4_proto, is_mask); 1155 SW_FLOW_KEY_PUT(match, ip.tos, 1156 ipv4_key->ipv4_tos, is_mask); 1157 SW_FLOW_KEY_PUT(match, ip.ttl, 1158 ipv4_key->ipv4_ttl, is_mask); 1159 SW_FLOW_KEY_PUT(match, ip.frag, 1160 ipv4_key->ipv4_frag, is_mask); 1161 SW_FLOW_KEY_PUT(match, ipv4.addr.src, 1162 ipv4_key->ipv4_src, is_mask); 1163 SW_FLOW_KEY_PUT(match, ipv4.addr.dst, 1164 ipv4_key->ipv4_dst, is_mask); 1165 attrs &= ~(1 << OVS_KEY_ATTR_IPV4); 1166 } 1167 1168 if (attrs & (1 << OVS_KEY_ATTR_IPV6)) { 1169 const struct ovs_key_ipv6 *ipv6_key; 1170 1171 ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); 1172 if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) { 1173 OVS_NLERR(log, "IPv6 frag type %d is out of range max %d", 1174 ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX); 1175 return -EINVAL; 1176 } 1177 1178 if (!is_mask && ipv6_key->ipv6_label & htonl(0xFFF00000)) { 1179 OVS_NLERR(log, "IPv6 flow label %x is out of range (max=%x).\n", 1180 ntohl(ipv6_key->ipv6_label), (1 << 20) - 1); 1181 return -EINVAL; 1182 } 1183 1184 SW_FLOW_KEY_PUT(match, ipv6.label, 1185 ipv6_key->ipv6_label, is_mask); 1186 SW_FLOW_KEY_PUT(match, ip.proto, 1187 ipv6_key->ipv6_proto, is_mask); 1188 SW_FLOW_KEY_PUT(match, ip.tos, 1189 ipv6_key->ipv6_tclass, is_mask); 1190 SW_FLOW_KEY_PUT(match, ip.ttl, 1191 ipv6_key->ipv6_hlimit, is_mask); 1192 SW_FLOW_KEY_PUT(match, ip.frag, 1193 ipv6_key->ipv6_frag, is_mask); 1194 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src, 1195 ipv6_key->ipv6_src, 1196 sizeof(match->key->ipv6.addr.src), 1197 is_mask); 1198 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst, 1199 ipv6_key->ipv6_dst, 1200 sizeof(match->key->ipv6.addr.dst), 1201 is_mask); 1202 1203 attrs &= ~(1 << OVS_KEY_ATTR_IPV6); 1204 } 1205 1206 if (attrs & (1 << OVS_KEY_ATTR_ARP)) { 1207 const struct ovs_key_arp *arp_key; 1208 1209 arp_key = nla_data(a[OVS_KEY_ATTR_ARP]); 1210 if (!is_mask && (arp_key->arp_op & htons(0xff00))) { 1211 OVS_NLERR(log, "Unknown ARP opcode (opcode=%d).", 1212 arp_key->arp_op); 1213 return -EINVAL; 1214 } 1215 1216 SW_FLOW_KEY_PUT(match, ipv4.addr.src, 1217 arp_key->arp_sip, is_mask); 1218 SW_FLOW_KEY_PUT(match, ipv4.addr.dst, 1219 arp_key->arp_tip, is_mask); 1220 SW_FLOW_KEY_PUT(match, ip.proto, 1221 ntohs(arp_key->arp_op), is_mask); 1222 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha, 1223 arp_key->arp_sha, ETH_ALEN, is_mask); 1224 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha, 1225 arp_key->arp_tha, ETH_ALEN, is_mask); 1226 1227 attrs &= ~(1 << OVS_KEY_ATTR_ARP); 1228 } 1229 1230 if (attrs & (1 << OVS_KEY_ATTR_MPLS)) { 1231 const struct ovs_key_mpls *mpls_key; 1232 1233 mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]); 1234 SW_FLOW_KEY_PUT(match, mpls.top_lse, 1235 mpls_key->mpls_lse, is_mask); 1236 1237 attrs &= ~(1 << OVS_KEY_ATTR_MPLS); 1238 } 1239 1240 if (attrs & (1 << OVS_KEY_ATTR_TCP)) { 1241 const struct ovs_key_tcp *tcp_key; 1242 1243 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); 1244 SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask); 1245 SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask); 1246 attrs &= ~(1 << OVS_KEY_ATTR_TCP); 1247 } 1248 1249 if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) { 1250 SW_FLOW_KEY_PUT(match, tp.flags, 1251 nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), 1252 is_mask); 1253 attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS); 1254 } 1255 1256 if (attrs & (1 << OVS_KEY_ATTR_UDP)) { 1257 const struct ovs_key_udp *udp_key; 1258 1259 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); 1260 SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask); 1261 SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask); 1262 attrs &= ~(1 << OVS_KEY_ATTR_UDP); 1263 } 1264 1265 if (attrs & (1 << OVS_KEY_ATTR_SCTP)) { 1266 const struct ovs_key_sctp *sctp_key; 1267 1268 sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]); 1269 SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask); 1270 SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask); 1271 attrs &= ~(1 << OVS_KEY_ATTR_SCTP); 1272 } 1273 1274 if (attrs & (1 << OVS_KEY_ATTR_ICMP)) { 1275 const struct ovs_key_icmp *icmp_key; 1276 1277 icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]); 1278 SW_FLOW_KEY_PUT(match, tp.src, 1279 htons(icmp_key->icmp_type), is_mask); 1280 SW_FLOW_KEY_PUT(match, tp.dst, 1281 htons(icmp_key->icmp_code), is_mask); 1282 attrs &= ~(1 << OVS_KEY_ATTR_ICMP); 1283 } 1284 1285 if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) { 1286 const struct ovs_key_icmpv6 *icmpv6_key; 1287 1288 icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]); 1289 SW_FLOW_KEY_PUT(match, tp.src, 1290 htons(icmpv6_key->icmpv6_type), is_mask); 1291 SW_FLOW_KEY_PUT(match, tp.dst, 1292 htons(icmpv6_key->icmpv6_code), is_mask); 1293 attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6); 1294 } 1295 1296 if (attrs & (1 << OVS_KEY_ATTR_ND)) { 1297 const struct ovs_key_nd *nd_key; 1298 1299 nd_key = nla_data(a[OVS_KEY_ATTR_ND]); 1300 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target, 1301 nd_key->nd_target, 1302 sizeof(match->key->ipv6.nd.target), 1303 is_mask); 1304 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll, 1305 nd_key->nd_sll, ETH_ALEN, is_mask); 1306 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll, 1307 nd_key->nd_tll, ETH_ALEN, is_mask); 1308 attrs &= ~(1 << OVS_KEY_ATTR_ND); 1309 } 1310 1311 if (attrs != 0) { 1312 OVS_NLERR(log, "Unknown key attributes %llx", 1313 (unsigned long long)attrs); 1314 return -EINVAL; 1315 } 1316 1317 return 0; 1318 } 1319 1320 static void nlattr_set(struct nlattr *attr, u8 val, 1321 const struct ovs_len_tbl *tbl) 1322 { 1323 struct nlattr *nla; 1324 int rem; 1325 1326 /* The nlattr stream should already have been validated */ 1327 nla_for_each_nested(nla, attr, rem) { 1328 if (tbl[nla_type(nla)].len == OVS_ATTR_NESTED) { 1329 if (tbl[nla_type(nla)].next) 1330 tbl = tbl[nla_type(nla)].next; 1331 nlattr_set(nla, val, tbl); 1332 } else { 1333 memset(nla_data(nla), val, nla_len(nla)); 1334 } 1335 1336 if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE) 1337 *(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK; 1338 } 1339 } 1340 1341 static void mask_set_nlattr(struct nlattr *attr, u8 val) 1342 { 1343 nlattr_set(attr, val, ovs_key_lens); 1344 } 1345 1346 /** 1347 * ovs_nla_get_match - parses Netlink attributes into a flow key and 1348 * mask. In case the 'mask' is NULL, the flow is treated as exact match 1349 * flow. Otherwise, it is treated as a wildcarded flow, except the mask 1350 * does not include any don't care bit. 1351 * @net: Used to determine per-namespace field support. 1352 * @match: receives the extracted flow match information. 1353 * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute 1354 * sequence. The fields should of the packet that triggered the creation 1355 * of this flow. 1356 * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink 1357 * attribute specifies the mask field of the wildcarded flow. 1358 * @log: Boolean to allow kernel error logging. Normally true, but when 1359 * probing for feature compatibility this should be passed in as false to 1360 * suppress unnecessary error logging. 1361 */ 1362 int ovs_nla_get_match(struct net *net, struct sw_flow_match *match, 1363 const struct nlattr *nla_key, 1364 const struct nlattr *nla_mask, 1365 bool log) 1366 { 1367 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; 1368 struct nlattr *newmask = NULL; 1369 u64 key_attrs = 0; 1370 u64 mask_attrs = 0; 1371 int err; 1372 1373 err = parse_flow_nlattrs(nla_key, a, &key_attrs, log); 1374 if (err) 1375 return err; 1376 1377 err = parse_vlan_from_nlattrs(match, &key_attrs, a, false, log); 1378 if (err) 1379 return err; 1380 1381 err = ovs_key_from_nlattrs(net, match, key_attrs, a, false, log); 1382 if (err) 1383 return err; 1384 1385 if (match->mask) { 1386 if (!nla_mask) { 1387 /* Create an exact match mask. We need to set to 0xff 1388 * all the 'match->mask' fields that have been touched 1389 * in 'match->key'. We cannot simply memset 1390 * 'match->mask', because padding bytes and fields not 1391 * specified in 'match->key' should be left to 0. 1392 * Instead, we use a stream of netlink attributes, 1393 * copied from 'key' and set to 0xff. 1394 * ovs_key_from_nlattrs() will take care of filling 1395 * 'match->mask' appropriately. 1396 */ 1397 newmask = kmemdup(nla_key, 1398 nla_total_size(nla_len(nla_key)), 1399 GFP_KERNEL); 1400 if (!newmask) 1401 return -ENOMEM; 1402 1403 mask_set_nlattr(newmask, 0xff); 1404 1405 /* The userspace does not send tunnel attributes that 1406 * are 0, but we should not wildcard them nonetheless. 1407 */ 1408 if (match->key->tun_proto) 1409 SW_FLOW_KEY_MEMSET_FIELD(match, tun_key, 1410 0xff, true); 1411 1412 nla_mask = newmask; 1413 } 1414 1415 err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs, log); 1416 if (err) 1417 goto free_newmask; 1418 1419 /* Always match on tci. */ 1420 SW_FLOW_KEY_PUT(match, eth.vlan.tci, htons(0xffff), true); 1421 SW_FLOW_KEY_PUT(match, eth.cvlan.tci, htons(0xffff), true); 1422 1423 err = parse_vlan_from_nlattrs(match, &mask_attrs, a, true, log); 1424 if (err) 1425 goto free_newmask; 1426 1427 err = ovs_key_from_nlattrs(net, match, mask_attrs, a, true, 1428 log); 1429 if (err) 1430 goto free_newmask; 1431 } 1432 1433 if (!match_validate(match, key_attrs, mask_attrs, log)) 1434 err = -EINVAL; 1435 1436 free_newmask: 1437 kfree(newmask); 1438 return err; 1439 } 1440 1441 static size_t get_ufid_len(const struct nlattr *attr, bool log) 1442 { 1443 size_t len; 1444 1445 if (!attr) 1446 return 0; 1447 1448 len = nla_len(attr); 1449 if (len < 1 || len > MAX_UFID_LENGTH) { 1450 OVS_NLERR(log, "ufid size %u bytes exceeds the range (1, %d)", 1451 nla_len(attr), MAX_UFID_LENGTH); 1452 return 0; 1453 } 1454 1455 return len; 1456 } 1457 1458 /* Initializes 'flow->ufid', returning true if 'attr' contains a valid UFID, 1459 * or false otherwise. 1460 */ 1461 bool ovs_nla_get_ufid(struct sw_flow_id *sfid, const struct nlattr *attr, 1462 bool log) 1463 { 1464 sfid->ufid_len = get_ufid_len(attr, log); 1465 if (sfid->ufid_len) 1466 memcpy(sfid->ufid, nla_data(attr), sfid->ufid_len); 1467 1468 return sfid->ufid_len; 1469 } 1470 1471 int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid, 1472 const struct sw_flow_key *key, bool log) 1473 { 1474 struct sw_flow_key *new_key; 1475 1476 if (ovs_nla_get_ufid(sfid, ufid, log)) 1477 return 0; 1478 1479 /* If UFID was not provided, use unmasked key. */ 1480 new_key = kmalloc(sizeof(*new_key), GFP_KERNEL); 1481 if (!new_key) 1482 return -ENOMEM; 1483 memcpy(new_key, key, sizeof(*key)); 1484 sfid->unmasked_key = new_key; 1485 1486 return 0; 1487 } 1488 1489 u32 ovs_nla_get_ufid_flags(const struct nlattr *attr) 1490 { 1491 return attr ? nla_get_u32(attr) : 0; 1492 } 1493 1494 /** 1495 * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key. 1496 * @key: Receives extracted in_port, priority, tun_key and skb_mark. 1497 * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute 1498 * sequence. 1499 * @log: Boolean to allow kernel error logging. Normally true, but when 1500 * probing for feature compatibility this should be passed in as false to 1501 * suppress unnecessary error logging. 1502 * 1503 * This parses a series of Netlink attributes that form a flow key, which must 1504 * take the same form accepted by flow_from_nlattrs(), but only enough of it to 1505 * get the metadata, that is, the parts of the flow key that cannot be 1506 * extracted from the packet itself. 1507 */ 1508 1509 int ovs_nla_get_flow_metadata(struct net *net, const struct nlattr *attr, 1510 struct sw_flow_key *key, 1511 bool log) 1512 { 1513 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; 1514 struct sw_flow_match match; 1515 u64 attrs = 0; 1516 int err; 1517 1518 err = parse_flow_nlattrs(attr, a, &attrs, log); 1519 if (err) 1520 return -EINVAL; 1521 1522 memset(&match, 0, sizeof(match)); 1523 match.key = key; 1524 1525 memset(&key->ct, 0, sizeof(key->ct)); 1526 key->phy.in_port = DP_MAX_PORTS; 1527 1528 return metadata_from_nlattrs(net, &match, &attrs, a, false, log); 1529 } 1530 1531 static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh, 1532 bool is_mask) 1533 { 1534 __be16 eth_type = !is_mask ? vh->tpid : htons(0xffff); 1535 1536 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) || 1537 nla_put_be16(skb, OVS_KEY_ATTR_VLAN, vh->tci)) 1538 return -EMSGSIZE; 1539 return 0; 1540 } 1541 1542 static int __ovs_nla_put_key(const struct sw_flow_key *swkey, 1543 const struct sw_flow_key *output, bool is_mask, 1544 struct sk_buff *skb) 1545 { 1546 struct ovs_key_ethernet *eth_key; 1547 struct nlattr *nla; 1548 struct nlattr *encap = NULL; 1549 struct nlattr *in_encap = NULL; 1550 1551 if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id)) 1552 goto nla_put_failure; 1553 1554 if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash)) 1555 goto nla_put_failure; 1556 1557 if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) 1558 goto nla_put_failure; 1559 1560 if ((swkey->tun_proto || is_mask)) { 1561 const void *opts = NULL; 1562 1563 if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT) 1564 opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len); 1565 1566 if (ip_tun_to_nlattr(skb, &output->tun_key, opts, 1567 swkey->tun_opts_len, swkey->tun_proto)) 1568 goto nla_put_failure; 1569 } 1570 1571 if (swkey->phy.in_port == DP_MAX_PORTS) { 1572 if (is_mask && (output->phy.in_port == 0xffff)) 1573 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff)) 1574 goto nla_put_failure; 1575 } else { 1576 u16 upper_u16; 1577 upper_u16 = !is_mask ? 0 : 0xffff; 1578 1579 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 1580 (upper_u16 << 16) | output->phy.in_port)) 1581 goto nla_put_failure; 1582 } 1583 1584 if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) 1585 goto nla_put_failure; 1586 1587 if (ovs_ct_put_key(output, skb)) 1588 goto nla_put_failure; 1589 1590 if (ovs_key_mac_proto(swkey) == MAC_PROTO_ETHERNET) { 1591 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); 1592 if (!nla) 1593 goto nla_put_failure; 1594 1595 eth_key = nla_data(nla); 1596 ether_addr_copy(eth_key->eth_src, output->eth.src); 1597 ether_addr_copy(eth_key->eth_dst, output->eth.dst); 1598 1599 if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) { 1600 if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask)) 1601 goto nla_put_failure; 1602 encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); 1603 if (!swkey->eth.vlan.tci) 1604 goto unencap; 1605 1606 if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) { 1607 if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask)) 1608 goto nla_put_failure; 1609 in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); 1610 if (!swkey->eth.cvlan.tci) 1611 goto unencap; 1612 } 1613 } 1614 1615 if (swkey->eth.type == htons(ETH_P_802_2)) { 1616 /* 1617 * Ethertype 802.2 is represented in the netlink with omitted 1618 * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and 1619 * 0xffff in the mask attribute. Ethertype can also 1620 * be wildcarded. 1621 */ 1622 if (is_mask && output->eth.type) 1623 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, 1624 output->eth.type)) 1625 goto nla_put_failure; 1626 goto unencap; 1627 } 1628 } 1629 1630 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) 1631 goto nla_put_failure; 1632 1633 if (eth_type_vlan(swkey->eth.type)) { 1634 /* There are 3 VLAN tags, we don't know anything about the rest 1635 * of the packet, so truncate here. 1636 */ 1637 WARN_ON_ONCE(!(encap && in_encap)); 1638 goto unencap; 1639 } 1640 1641 if (swkey->eth.type == htons(ETH_P_IP)) { 1642 struct ovs_key_ipv4 *ipv4_key; 1643 1644 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key)); 1645 if (!nla) 1646 goto nla_put_failure; 1647 ipv4_key = nla_data(nla); 1648 ipv4_key->ipv4_src = output->ipv4.addr.src; 1649 ipv4_key->ipv4_dst = output->ipv4.addr.dst; 1650 ipv4_key->ipv4_proto = output->ip.proto; 1651 ipv4_key->ipv4_tos = output->ip.tos; 1652 ipv4_key->ipv4_ttl = output->ip.ttl; 1653 ipv4_key->ipv4_frag = output->ip.frag; 1654 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1655 struct ovs_key_ipv6 *ipv6_key; 1656 1657 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key)); 1658 if (!nla) 1659 goto nla_put_failure; 1660 ipv6_key = nla_data(nla); 1661 memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src, 1662 sizeof(ipv6_key->ipv6_src)); 1663 memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst, 1664 sizeof(ipv6_key->ipv6_dst)); 1665 ipv6_key->ipv6_label = output->ipv6.label; 1666 ipv6_key->ipv6_proto = output->ip.proto; 1667 ipv6_key->ipv6_tclass = output->ip.tos; 1668 ipv6_key->ipv6_hlimit = output->ip.ttl; 1669 ipv6_key->ipv6_frag = output->ip.frag; 1670 } else if (swkey->eth.type == htons(ETH_P_ARP) || 1671 swkey->eth.type == htons(ETH_P_RARP)) { 1672 struct ovs_key_arp *arp_key; 1673 1674 nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key)); 1675 if (!nla) 1676 goto nla_put_failure; 1677 arp_key = nla_data(nla); 1678 memset(arp_key, 0, sizeof(struct ovs_key_arp)); 1679 arp_key->arp_sip = output->ipv4.addr.src; 1680 arp_key->arp_tip = output->ipv4.addr.dst; 1681 arp_key->arp_op = htons(output->ip.proto); 1682 ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); 1683 ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); 1684 } else if (eth_p_mpls(swkey->eth.type)) { 1685 struct ovs_key_mpls *mpls_key; 1686 1687 nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key)); 1688 if (!nla) 1689 goto nla_put_failure; 1690 mpls_key = nla_data(nla); 1691 mpls_key->mpls_lse = output->mpls.top_lse; 1692 } 1693 1694 if ((swkey->eth.type == htons(ETH_P_IP) || 1695 swkey->eth.type == htons(ETH_P_IPV6)) && 1696 swkey->ip.frag != OVS_FRAG_TYPE_LATER) { 1697 1698 if (swkey->ip.proto == IPPROTO_TCP) { 1699 struct ovs_key_tcp *tcp_key; 1700 1701 nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key)); 1702 if (!nla) 1703 goto nla_put_failure; 1704 tcp_key = nla_data(nla); 1705 tcp_key->tcp_src = output->tp.src; 1706 tcp_key->tcp_dst = output->tp.dst; 1707 if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, 1708 output->tp.flags)) 1709 goto nla_put_failure; 1710 } else if (swkey->ip.proto == IPPROTO_UDP) { 1711 struct ovs_key_udp *udp_key; 1712 1713 nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key)); 1714 if (!nla) 1715 goto nla_put_failure; 1716 udp_key = nla_data(nla); 1717 udp_key->udp_src = output->tp.src; 1718 udp_key->udp_dst = output->tp.dst; 1719 } else if (swkey->ip.proto == IPPROTO_SCTP) { 1720 struct ovs_key_sctp *sctp_key; 1721 1722 nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key)); 1723 if (!nla) 1724 goto nla_put_failure; 1725 sctp_key = nla_data(nla); 1726 sctp_key->sctp_src = output->tp.src; 1727 sctp_key->sctp_dst = output->tp.dst; 1728 } else if (swkey->eth.type == htons(ETH_P_IP) && 1729 swkey->ip.proto == IPPROTO_ICMP) { 1730 struct ovs_key_icmp *icmp_key; 1731 1732 nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key)); 1733 if (!nla) 1734 goto nla_put_failure; 1735 icmp_key = nla_data(nla); 1736 icmp_key->icmp_type = ntohs(output->tp.src); 1737 icmp_key->icmp_code = ntohs(output->tp.dst); 1738 } else if (swkey->eth.type == htons(ETH_P_IPV6) && 1739 swkey->ip.proto == IPPROTO_ICMPV6) { 1740 struct ovs_key_icmpv6 *icmpv6_key; 1741 1742 nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6, 1743 sizeof(*icmpv6_key)); 1744 if (!nla) 1745 goto nla_put_failure; 1746 icmpv6_key = nla_data(nla); 1747 icmpv6_key->icmpv6_type = ntohs(output->tp.src); 1748 icmpv6_key->icmpv6_code = ntohs(output->tp.dst); 1749 1750 if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || 1751 icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { 1752 struct ovs_key_nd *nd_key; 1753 1754 nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key)); 1755 if (!nla) 1756 goto nla_put_failure; 1757 nd_key = nla_data(nla); 1758 memcpy(nd_key->nd_target, &output->ipv6.nd.target, 1759 sizeof(nd_key->nd_target)); 1760 ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll); 1761 ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll); 1762 } 1763 } 1764 } 1765 1766 unencap: 1767 if (in_encap) 1768 nla_nest_end(skb, in_encap); 1769 if (encap) 1770 nla_nest_end(skb, encap); 1771 1772 return 0; 1773 1774 nla_put_failure: 1775 return -EMSGSIZE; 1776 } 1777 1778 int ovs_nla_put_key(const struct sw_flow_key *swkey, 1779 const struct sw_flow_key *output, int attr, bool is_mask, 1780 struct sk_buff *skb) 1781 { 1782 int err; 1783 struct nlattr *nla; 1784 1785 nla = nla_nest_start(skb, attr); 1786 if (!nla) 1787 return -EMSGSIZE; 1788 err = __ovs_nla_put_key(swkey, output, is_mask, skb); 1789 if (err) 1790 return err; 1791 nla_nest_end(skb, nla); 1792 1793 return 0; 1794 } 1795 1796 /* Called with ovs_mutex or RCU read lock. */ 1797 int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb) 1798 { 1799 if (ovs_identifier_is_ufid(&flow->id)) 1800 return nla_put(skb, OVS_FLOW_ATTR_UFID, flow->id.ufid_len, 1801 flow->id.ufid); 1802 1803 return ovs_nla_put_key(flow->id.unmasked_key, flow->id.unmasked_key, 1804 OVS_FLOW_ATTR_KEY, false, skb); 1805 } 1806 1807 /* Called with ovs_mutex or RCU read lock. */ 1808 int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb) 1809 { 1810 return ovs_nla_put_key(&flow->key, &flow->key, 1811 OVS_FLOW_ATTR_KEY, false, skb); 1812 } 1813 1814 /* Called with ovs_mutex or RCU read lock. */ 1815 int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb) 1816 { 1817 return ovs_nla_put_key(&flow->key, &flow->mask->key, 1818 OVS_FLOW_ATTR_MASK, true, skb); 1819 } 1820 1821 #define MAX_ACTIONS_BUFSIZE (32 * 1024) 1822 1823 static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log) 1824 { 1825 struct sw_flow_actions *sfa; 1826 1827 if (size > MAX_ACTIONS_BUFSIZE) { 1828 OVS_NLERR(log, "Flow action size %u bytes exceeds max", size); 1829 return ERR_PTR(-EINVAL); 1830 } 1831 1832 sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); 1833 if (!sfa) 1834 return ERR_PTR(-ENOMEM); 1835 1836 sfa->actions_len = 0; 1837 return sfa; 1838 } 1839 1840 static void ovs_nla_free_set_action(const struct nlattr *a) 1841 { 1842 const struct nlattr *ovs_key = nla_data(a); 1843 struct ovs_tunnel_info *ovs_tun; 1844 1845 switch (nla_type(ovs_key)) { 1846 case OVS_KEY_ATTR_TUNNEL_INFO: 1847 ovs_tun = nla_data(ovs_key); 1848 dst_release((struct dst_entry *)ovs_tun->tun_dst); 1849 break; 1850 } 1851 } 1852 1853 void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) 1854 { 1855 const struct nlattr *a; 1856 int rem; 1857 1858 if (!sf_acts) 1859 return; 1860 1861 nla_for_each_attr(a, sf_acts->actions, sf_acts->actions_len, rem) { 1862 switch (nla_type(a)) { 1863 case OVS_ACTION_ATTR_SET: 1864 ovs_nla_free_set_action(a); 1865 break; 1866 case OVS_ACTION_ATTR_CT: 1867 ovs_ct_free_action(a); 1868 break; 1869 } 1870 } 1871 1872 kfree(sf_acts); 1873 } 1874 1875 static void __ovs_nla_free_flow_actions(struct rcu_head *head) 1876 { 1877 ovs_nla_free_flow_actions(container_of(head, struct sw_flow_actions, rcu)); 1878 } 1879 1880 /* Schedules 'sf_acts' to be freed after the next RCU grace period. 1881 * The caller must hold rcu_read_lock for this to be sensible. */ 1882 void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *sf_acts) 1883 { 1884 call_rcu(&sf_acts->rcu, __ovs_nla_free_flow_actions); 1885 } 1886 1887 static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, 1888 int attr_len, bool log) 1889 { 1890 1891 struct sw_flow_actions *acts; 1892 int new_acts_size; 1893 int req_size = NLA_ALIGN(attr_len); 1894 int next_offset = offsetof(struct sw_flow_actions, actions) + 1895 (*sfa)->actions_len; 1896 1897 if (req_size <= (ksize(*sfa) - next_offset)) 1898 goto out; 1899 1900 new_acts_size = ksize(*sfa) * 2; 1901 1902 if (new_acts_size > MAX_ACTIONS_BUFSIZE) { 1903 if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) 1904 return ERR_PTR(-EMSGSIZE); 1905 new_acts_size = MAX_ACTIONS_BUFSIZE; 1906 } 1907 1908 acts = nla_alloc_flow_actions(new_acts_size, log); 1909 if (IS_ERR(acts)) 1910 return (void *)acts; 1911 1912 memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len); 1913 acts->actions_len = (*sfa)->actions_len; 1914 acts->orig_len = (*sfa)->orig_len; 1915 kfree(*sfa); 1916 *sfa = acts; 1917 1918 out: 1919 (*sfa)->actions_len += req_size; 1920 return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset); 1921 } 1922 1923 static struct nlattr *__add_action(struct sw_flow_actions **sfa, 1924 int attrtype, void *data, int len, bool log) 1925 { 1926 struct nlattr *a; 1927 1928 a = reserve_sfa_size(sfa, nla_attr_size(len), log); 1929 if (IS_ERR(a)) 1930 return a; 1931 1932 a->nla_type = attrtype; 1933 a->nla_len = nla_attr_size(len); 1934 1935 if (data) 1936 memcpy(nla_data(a), data, len); 1937 memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len)); 1938 1939 return a; 1940 } 1941 1942 int ovs_nla_add_action(struct sw_flow_actions **sfa, int attrtype, void *data, 1943 int len, bool log) 1944 { 1945 struct nlattr *a; 1946 1947 a = __add_action(sfa, attrtype, data, len, log); 1948 1949 return PTR_ERR_OR_ZERO(a); 1950 } 1951 1952 static inline int add_nested_action_start(struct sw_flow_actions **sfa, 1953 int attrtype, bool log) 1954 { 1955 int used = (*sfa)->actions_len; 1956 int err; 1957 1958 err = ovs_nla_add_action(sfa, attrtype, NULL, 0, log); 1959 if (err) 1960 return err; 1961 1962 return used; 1963 } 1964 1965 static inline void add_nested_action_end(struct sw_flow_actions *sfa, 1966 int st_offset) 1967 { 1968 struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + 1969 st_offset); 1970 1971 a->nla_len = sfa->actions_len - st_offset; 1972 } 1973 1974 static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, 1975 const struct sw_flow_key *key, 1976 int depth, struct sw_flow_actions **sfa, 1977 __be16 eth_type, __be16 vlan_tci, bool log); 1978 1979 static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, 1980 const struct sw_flow_key *key, int depth, 1981 struct sw_flow_actions **sfa, 1982 __be16 eth_type, __be16 vlan_tci, bool log) 1983 { 1984 const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; 1985 const struct nlattr *probability, *actions; 1986 const struct nlattr *a; 1987 int rem, start, err, st_acts; 1988 1989 memset(attrs, 0, sizeof(attrs)); 1990 nla_for_each_nested(a, attr, rem) { 1991 int type = nla_type(a); 1992 if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type]) 1993 return -EINVAL; 1994 attrs[type] = a; 1995 } 1996 if (rem) 1997 return -EINVAL; 1998 1999 probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY]; 2000 if (!probability || nla_len(probability) != sizeof(u32)) 2001 return -EINVAL; 2002 2003 actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; 2004 if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) 2005 return -EINVAL; 2006 2007 /* validation done, copy sample action. */ 2008 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log); 2009 if (start < 0) 2010 return start; 2011 err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, 2012 nla_data(probability), sizeof(u32), log); 2013 if (err) 2014 return err; 2015 st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS, log); 2016 if (st_acts < 0) 2017 return st_acts; 2018 2019 err = __ovs_nla_copy_actions(net, actions, key, depth + 1, sfa, 2020 eth_type, vlan_tci, log); 2021 if (err) 2022 return err; 2023 2024 add_nested_action_end(*sfa, st_acts); 2025 add_nested_action_end(*sfa, start); 2026 2027 return 0; 2028 } 2029 2030 void ovs_match_init(struct sw_flow_match *match, 2031 struct sw_flow_key *key, 2032 bool reset_key, 2033 struct sw_flow_mask *mask) 2034 { 2035 memset(match, 0, sizeof(*match)); 2036 match->key = key; 2037 match->mask = mask; 2038 2039 if (reset_key) 2040 memset(key, 0, sizeof(*key)); 2041 2042 if (mask) { 2043 memset(&mask->key, 0, sizeof(mask->key)); 2044 mask->range.start = mask->range.end = 0; 2045 } 2046 } 2047 2048 static int validate_geneve_opts(struct sw_flow_key *key) 2049 { 2050 struct geneve_opt *option; 2051 int opts_len = key->tun_opts_len; 2052 bool crit_opt = false; 2053 2054 option = (struct geneve_opt *)TUN_METADATA_OPTS(key, key->tun_opts_len); 2055 while (opts_len > 0) { 2056 int len; 2057 2058 if (opts_len < sizeof(*option)) 2059 return -EINVAL; 2060 2061 len = sizeof(*option) + option->length * 4; 2062 if (len > opts_len) 2063 return -EINVAL; 2064 2065 crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE); 2066 2067 option = (struct geneve_opt *)((u8 *)option + len); 2068 opts_len -= len; 2069 }; 2070 2071 key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; 2072 2073 return 0; 2074 } 2075 2076 static int validate_and_copy_set_tun(const struct nlattr *attr, 2077 struct sw_flow_actions **sfa, bool log) 2078 { 2079 struct sw_flow_match match; 2080 struct sw_flow_key key; 2081 struct metadata_dst *tun_dst; 2082 struct ip_tunnel_info *tun_info; 2083 struct ovs_tunnel_info *ovs_tun; 2084 struct nlattr *a; 2085 int err = 0, start, opts_type; 2086 2087 ovs_match_init(&match, &key, true, NULL); 2088 opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log); 2089 if (opts_type < 0) 2090 return opts_type; 2091 2092 if (key.tun_opts_len) { 2093 switch (opts_type) { 2094 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: 2095 err = validate_geneve_opts(&key); 2096 if (err < 0) 2097 return err; 2098 break; 2099 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: 2100 break; 2101 } 2102 }; 2103 2104 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log); 2105 if (start < 0) 2106 return start; 2107 2108 tun_dst = metadata_dst_alloc(key.tun_opts_len, GFP_KERNEL); 2109 if (!tun_dst) 2110 return -ENOMEM; 2111 2112 err = dst_cache_init(&tun_dst->u.tun_info.dst_cache, GFP_KERNEL); 2113 if (err) { 2114 dst_release((struct dst_entry *)tun_dst); 2115 return err; 2116 } 2117 2118 a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL, 2119 sizeof(*ovs_tun), log); 2120 if (IS_ERR(a)) { 2121 dst_release((struct dst_entry *)tun_dst); 2122 return PTR_ERR(a); 2123 } 2124 2125 ovs_tun = nla_data(a); 2126 ovs_tun->tun_dst = tun_dst; 2127 2128 tun_info = &tun_dst->u.tun_info; 2129 tun_info->mode = IP_TUNNEL_INFO_TX; 2130 if (key.tun_proto == AF_INET6) 2131 tun_info->mode |= IP_TUNNEL_INFO_IPV6; 2132 tun_info->key = key.tun_key; 2133 2134 /* We need to store the options in the action itself since 2135 * everything else will go away after flow setup. We can append 2136 * it to tun_info and then point there. 2137 */ 2138 ip_tunnel_info_opts_set(tun_info, 2139 TUN_METADATA_OPTS(&key, key.tun_opts_len), 2140 key.tun_opts_len); 2141 add_nested_action_end(*sfa, start); 2142 2143 return err; 2144 } 2145 2146 /* Return false if there are any non-masked bits set. 2147 * Mask follows data immediately, before any netlink padding. 2148 */ 2149 static bool validate_masked(u8 *data, int len) 2150 { 2151 u8 *mask = data + len; 2152 2153 while (len--) 2154 if (*data++ & ~*mask++) 2155 return false; 2156 2157 return true; 2158 } 2159 2160 static int validate_set(const struct nlattr *a, 2161 const struct sw_flow_key *flow_key, 2162 struct sw_flow_actions **sfa, bool *skip_copy, 2163 u8 mac_proto, __be16 eth_type, bool masked, bool log) 2164 { 2165 const struct nlattr *ovs_key = nla_data(a); 2166 int key_type = nla_type(ovs_key); 2167 size_t key_len; 2168 2169 /* There can be only one key in a action */ 2170 if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) 2171 return -EINVAL; 2172 2173 key_len = nla_len(ovs_key); 2174 if (masked) 2175 key_len /= 2; 2176 2177 if (key_type > OVS_KEY_ATTR_MAX || 2178 !check_attr_len(key_len, ovs_key_lens[key_type].len)) 2179 return -EINVAL; 2180 2181 if (masked && !validate_masked(nla_data(ovs_key), key_len)) 2182 return -EINVAL; 2183 2184 switch (key_type) { 2185 const struct ovs_key_ipv4 *ipv4_key; 2186 const struct ovs_key_ipv6 *ipv6_key; 2187 int err; 2188 2189 case OVS_KEY_ATTR_PRIORITY: 2190 case OVS_KEY_ATTR_SKB_MARK: 2191 case OVS_KEY_ATTR_CT_MARK: 2192 case OVS_KEY_ATTR_CT_LABELS: 2193 break; 2194 2195 case OVS_KEY_ATTR_ETHERNET: 2196 if (mac_proto != MAC_PROTO_ETHERNET) 2197 return -EINVAL; 2198 2199 case OVS_KEY_ATTR_TUNNEL: 2200 if (masked) 2201 return -EINVAL; /* Masked tunnel set not supported. */ 2202 2203 *skip_copy = true; 2204 err = validate_and_copy_set_tun(a, sfa, log); 2205 if (err) 2206 return err; 2207 break; 2208 2209 case OVS_KEY_ATTR_IPV4: 2210 if (eth_type != htons(ETH_P_IP)) 2211 return -EINVAL; 2212 2213 ipv4_key = nla_data(ovs_key); 2214 2215 if (masked) { 2216 const struct ovs_key_ipv4 *mask = ipv4_key + 1; 2217 2218 /* Non-writeable fields. */ 2219 if (mask->ipv4_proto || mask->ipv4_frag) 2220 return -EINVAL; 2221 } else { 2222 if (ipv4_key->ipv4_proto != flow_key->ip.proto) 2223 return -EINVAL; 2224 2225 if (ipv4_key->ipv4_frag != flow_key->ip.frag) 2226 return -EINVAL; 2227 } 2228 break; 2229 2230 case OVS_KEY_ATTR_IPV6: 2231 if (eth_type != htons(ETH_P_IPV6)) 2232 return -EINVAL; 2233 2234 ipv6_key = nla_data(ovs_key); 2235 2236 if (masked) { 2237 const struct ovs_key_ipv6 *mask = ipv6_key + 1; 2238 2239 /* Non-writeable fields. */ 2240 if (mask->ipv6_proto || mask->ipv6_frag) 2241 return -EINVAL; 2242 2243 /* Invalid bits in the flow label mask? */ 2244 if (ntohl(mask->ipv6_label) & 0xFFF00000) 2245 return -EINVAL; 2246 } else { 2247 if (ipv6_key->ipv6_proto != flow_key->ip.proto) 2248 return -EINVAL; 2249 2250 if (ipv6_key->ipv6_frag != flow_key->ip.frag) 2251 return -EINVAL; 2252 } 2253 if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) 2254 return -EINVAL; 2255 2256 break; 2257 2258 case OVS_KEY_ATTR_TCP: 2259 if ((eth_type != htons(ETH_P_IP) && 2260 eth_type != htons(ETH_P_IPV6)) || 2261 flow_key->ip.proto != IPPROTO_TCP) 2262 return -EINVAL; 2263 2264 break; 2265 2266 case OVS_KEY_ATTR_UDP: 2267 if ((eth_type != htons(ETH_P_IP) && 2268 eth_type != htons(ETH_P_IPV6)) || 2269 flow_key->ip.proto != IPPROTO_UDP) 2270 return -EINVAL; 2271 2272 break; 2273 2274 case OVS_KEY_ATTR_MPLS: 2275 if (!eth_p_mpls(eth_type)) 2276 return -EINVAL; 2277 break; 2278 2279 case OVS_KEY_ATTR_SCTP: 2280 if ((eth_type != htons(ETH_P_IP) && 2281 eth_type != htons(ETH_P_IPV6)) || 2282 flow_key->ip.proto != IPPROTO_SCTP) 2283 return -EINVAL; 2284 2285 break; 2286 2287 default: 2288 return -EINVAL; 2289 } 2290 2291 /* Convert non-masked non-tunnel set actions to masked set actions. */ 2292 if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) { 2293 int start, len = key_len * 2; 2294 struct nlattr *at; 2295 2296 *skip_copy = true; 2297 2298 start = add_nested_action_start(sfa, 2299 OVS_ACTION_ATTR_SET_TO_MASKED, 2300 log); 2301 if (start < 0) 2302 return start; 2303 2304 at = __add_action(sfa, key_type, NULL, len, log); 2305 if (IS_ERR(at)) 2306 return PTR_ERR(at); 2307 2308 memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */ 2309 memset(nla_data(at) + key_len, 0xff, key_len); /* Mask. */ 2310 /* Clear non-writeable bits from otherwise writeable fields. */ 2311 if (key_type == OVS_KEY_ATTR_IPV6) { 2312 struct ovs_key_ipv6 *mask = nla_data(at) + key_len; 2313 2314 mask->ipv6_label &= htonl(0x000FFFFF); 2315 } 2316 add_nested_action_end(*sfa, start); 2317 } 2318 2319 return 0; 2320 } 2321 2322 static int validate_userspace(const struct nlattr *attr) 2323 { 2324 static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { 2325 [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, 2326 [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC }, 2327 [OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = {.type = NLA_U32 }, 2328 }; 2329 struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; 2330 int error; 2331 2332 error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, 2333 attr, userspace_policy); 2334 if (error) 2335 return error; 2336 2337 if (!a[OVS_USERSPACE_ATTR_PID] || 2338 !nla_get_u32(a[OVS_USERSPACE_ATTR_PID])) 2339 return -EINVAL; 2340 2341 return 0; 2342 } 2343 2344 static int copy_action(const struct nlattr *from, 2345 struct sw_flow_actions **sfa, bool log) 2346 { 2347 int totlen = NLA_ALIGN(from->nla_len); 2348 struct nlattr *to; 2349 2350 to = reserve_sfa_size(sfa, from->nla_len, log); 2351 if (IS_ERR(to)) 2352 return PTR_ERR(to); 2353 2354 memcpy(to, from, totlen); 2355 return 0; 2356 } 2357 2358 static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, 2359 const struct sw_flow_key *key, 2360 int depth, struct sw_flow_actions **sfa, 2361 __be16 eth_type, __be16 vlan_tci, bool log) 2362 { 2363 u8 mac_proto = ovs_key_mac_proto(key); 2364 const struct nlattr *a; 2365 int rem, err; 2366 2367 if (depth >= SAMPLE_ACTION_DEPTH) 2368 return -EOVERFLOW; 2369 2370 nla_for_each_nested(a, attr, rem) { 2371 /* Expected argument lengths, (u32)-1 for variable length. */ 2372 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { 2373 [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), 2374 [OVS_ACTION_ATTR_RECIRC] = sizeof(u32), 2375 [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, 2376 [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls), 2377 [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16), 2378 [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), 2379 [OVS_ACTION_ATTR_POP_VLAN] = 0, 2380 [OVS_ACTION_ATTR_SET] = (u32)-1, 2381 [OVS_ACTION_ATTR_SET_MASKED] = (u32)-1, 2382 [OVS_ACTION_ATTR_SAMPLE] = (u32)-1, 2383 [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash), 2384 [OVS_ACTION_ATTR_CT] = (u32)-1, 2385 [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc), 2386 [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth), 2387 [OVS_ACTION_ATTR_POP_ETH] = 0, 2388 }; 2389 const struct ovs_action_push_vlan *vlan; 2390 int type = nla_type(a); 2391 bool skip_copy; 2392 2393 if (type > OVS_ACTION_ATTR_MAX || 2394 (action_lens[type] != nla_len(a) && 2395 action_lens[type] != (u32)-1)) 2396 return -EINVAL; 2397 2398 skip_copy = false; 2399 switch (type) { 2400 case OVS_ACTION_ATTR_UNSPEC: 2401 return -EINVAL; 2402 2403 case OVS_ACTION_ATTR_USERSPACE: 2404 err = validate_userspace(a); 2405 if (err) 2406 return err; 2407 break; 2408 2409 case OVS_ACTION_ATTR_OUTPUT: 2410 if (nla_get_u32(a) >= DP_MAX_PORTS) 2411 return -EINVAL; 2412 break; 2413 2414 case OVS_ACTION_ATTR_TRUNC: { 2415 const struct ovs_action_trunc *trunc = nla_data(a); 2416 2417 if (trunc->max_len < ETH_HLEN) 2418 return -EINVAL; 2419 break; 2420 } 2421 2422 case OVS_ACTION_ATTR_HASH: { 2423 const struct ovs_action_hash *act_hash = nla_data(a); 2424 2425 switch (act_hash->hash_alg) { 2426 case OVS_HASH_ALG_L4: 2427 break; 2428 default: 2429 return -EINVAL; 2430 } 2431 2432 break; 2433 } 2434 2435 case OVS_ACTION_ATTR_POP_VLAN: 2436 if (mac_proto != MAC_PROTO_ETHERNET) 2437 return -EINVAL; 2438 vlan_tci = htons(0); 2439 break; 2440 2441 case OVS_ACTION_ATTR_PUSH_VLAN: 2442 if (mac_proto != MAC_PROTO_ETHERNET) 2443 return -EINVAL; 2444 vlan = nla_data(a); 2445 if (!eth_type_vlan(vlan->vlan_tpid)) 2446 return -EINVAL; 2447 if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) 2448 return -EINVAL; 2449 vlan_tci = vlan->vlan_tci; 2450 break; 2451 2452 case OVS_ACTION_ATTR_RECIRC: 2453 break; 2454 2455 case OVS_ACTION_ATTR_PUSH_MPLS: { 2456 const struct ovs_action_push_mpls *mpls = nla_data(a); 2457 2458 if (!eth_p_mpls(mpls->mpls_ethertype)) 2459 return -EINVAL; 2460 /* Prohibit push MPLS other than to a white list 2461 * for packets that have a known tag order. 2462 */ 2463 if (vlan_tci & htons(VLAN_TAG_PRESENT) || 2464 (eth_type != htons(ETH_P_IP) && 2465 eth_type != htons(ETH_P_IPV6) && 2466 eth_type != htons(ETH_P_ARP) && 2467 eth_type != htons(ETH_P_RARP) && 2468 !eth_p_mpls(eth_type))) 2469 return -EINVAL; 2470 eth_type = mpls->mpls_ethertype; 2471 break; 2472 } 2473 2474 case OVS_ACTION_ATTR_POP_MPLS: 2475 if (vlan_tci & htons(VLAN_TAG_PRESENT) || 2476 !eth_p_mpls(eth_type)) 2477 return -EINVAL; 2478 2479 /* Disallow subsequent L2.5+ set and mpls_pop actions 2480 * as there is no check here to ensure that the new 2481 * eth_type is valid and thus set actions could 2482 * write off the end of the packet or otherwise 2483 * corrupt it. 2484 * 2485 * Support for these actions is planned using packet 2486 * recirculation. 2487 */ 2488 eth_type = htons(0); 2489 break; 2490 2491 case OVS_ACTION_ATTR_SET: 2492 err = validate_set(a, key, sfa, 2493 &skip_copy, mac_proto, eth_type, 2494 false, log); 2495 if (err) 2496 return err; 2497 break; 2498 2499 case OVS_ACTION_ATTR_SET_MASKED: 2500 err = validate_set(a, key, sfa, 2501 &skip_copy, mac_proto, eth_type, 2502 true, log); 2503 if (err) 2504 return err; 2505 break; 2506 2507 case OVS_ACTION_ATTR_SAMPLE: 2508 err = validate_and_copy_sample(net, a, key, depth, sfa, 2509 eth_type, vlan_tci, log); 2510 if (err) 2511 return err; 2512 skip_copy = true; 2513 break; 2514 2515 case OVS_ACTION_ATTR_CT: 2516 err = ovs_ct_copy_action(net, a, key, sfa, log); 2517 if (err) 2518 return err; 2519 skip_copy = true; 2520 break; 2521 2522 case OVS_ACTION_ATTR_PUSH_ETH: 2523 /* Disallow pushing an Ethernet header if one 2524 * is already present */ 2525 if (mac_proto != MAC_PROTO_NONE) 2526 return -EINVAL; 2527 mac_proto = MAC_PROTO_NONE; 2528 break; 2529 2530 case OVS_ACTION_ATTR_POP_ETH: 2531 if (mac_proto != MAC_PROTO_ETHERNET) 2532 return -EINVAL; 2533 if (vlan_tci & htons(VLAN_TAG_PRESENT)) 2534 return -EINVAL; 2535 mac_proto = MAC_PROTO_ETHERNET; 2536 break; 2537 2538 default: 2539 OVS_NLERR(log, "Unknown Action type %d", type); 2540 return -EINVAL; 2541 } 2542 if (!skip_copy) { 2543 err = copy_action(a, sfa, log); 2544 if (err) 2545 return err; 2546 } 2547 } 2548 2549 if (rem > 0) 2550 return -EINVAL; 2551 2552 return 0; 2553 } 2554 2555 /* 'key' must be the masked key. */ 2556 int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, 2557 const struct sw_flow_key *key, 2558 struct sw_flow_actions **sfa, bool log) 2559 { 2560 int err; 2561 2562 *sfa = nla_alloc_flow_actions(nla_len(attr), log); 2563 if (IS_ERR(*sfa)) 2564 return PTR_ERR(*sfa); 2565 2566 (*sfa)->orig_len = nla_len(attr); 2567 err = __ovs_nla_copy_actions(net, attr, key, 0, sfa, key->eth.type, 2568 key->eth.vlan.tci, log); 2569 if (err) 2570 ovs_nla_free_flow_actions(*sfa); 2571 2572 return err; 2573 } 2574 2575 static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) 2576 { 2577 const struct nlattr *a; 2578 struct nlattr *start; 2579 int err = 0, rem; 2580 2581 start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE); 2582 if (!start) 2583 return -EMSGSIZE; 2584 2585 nla_for_each_nested(a, attr, rem) { 2586 int type = nla_type(a); 2587 struct nlattr *st_sample; 2588 2589 switch (type) { 2590 case OVS_SAMPLE_ATTR_PROBABILITY: 2591 if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, 2592 sizeof(u32), nla_data(a))) 2593 return -EMSGSIZE; 2594 break; 2595 case OVS_SAMPLE_ATTR_ACTIONS: 2596 st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); 2597 if (!st_sample) 2598 return -EMSGSIZE; 2599 err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb); 2600 if (err) 2601 return err; 2602 nla_nest_end(skb, st_sample); 2603 break; 2604 } 2605 } 2606 2607 nla_nest_end(skb, start); 2608 return err; 2609 } 2610 2611 static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) 2612 { 2613 const struct nlattr *ovs_key = nla_data(a); 2614 int key_type = nla_type(ovs_key); 2615 struct nlattr *start; 2616 int err; 2617 2618 switch (key_type) { 2619 case OVS_KEY_ATTR_TUNNEL_INFO: { 2620 struct ovs_tunnel_info *ovs_tun = nla_data(ovs_key); 2621 struct ip_tunnel_info *tun_info = &ovs_tun->tun_dst->u.tun_info; 2622 2623 start = nla_nest_start(skb, OVS_ACTION_ATTR_SET); 2624 if (!start) 2625 return -EMSGSIZE; 2626 2627 err = ip_tun_to_nlattr(skb, &tun_info->key, 2628 ip_tunnel_info_opts(tun_info), 2629 tun_info->options_len, 2630 ip_tunnel_info_af(tun_info)); 2631 if (err) 2632 return err; 2633 nla_nest_end(skb, start); 2634 break; 2635 } 2636 default: 2637 if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key)) 2638 return -EMSGSIZE; 2639 break; 2640 } 2641 2642 return 0; 2643 } 2644 2645 static int masked_set_action_to_set_action_attr(const struct nlattr *a, 2646 struct sk_buff *skb) 2647 { 2648 const struct nlattr *ovs_key = nla_data(a); 2649 struct nlattr *nla; 2650 size_t key_len = nla_len(ovs_key) / 2; 2651 2652 /* Revert the conversion we did from a non-masked set action to 2653 * masked set action. 2654 */ 2655 nla = nla_nest_start(skb, OVS_ACTION_ATTR_SET); 2656 if (!nla) 2657 return -EMSGSIZE; 2658 2659 if (nla_put(skb, nla_type(ovs_key), key_len, nla_data(ovs_key))) 2660 return -EMSGSIZE; 2661 2662 nla_nest_end(skb, nla); 2663 return 0; 2664 } 2665 2666 int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) 2667 { 2668 const struct nlattr *a; 2669 int rem, err; 2670 2671 nla_for_each_attr(a, attr, len, rem) { 2672 int type = nla_type(a); 2673 2674 switch (type) { 2675 case OVS_ACTION_ATTR_SET: 2676 err = set_action_to_attr(a, skb); 2677 if (err) 2678 return err; 2679 break; 2680 2681 case OVS_ACTION_ATTR_SET_TO_MASKED: 2682 err = masked_set_action_to_set_action_attr(a, skb); 2683 if (err) 2684 return err; 2685 break; 2686 2687 case OVS_ACTION_ATTR_SAMPLE: 2688 err = sample_action_to_attr(a, skb); 2689 if (err) 2690 return err; 2691 break; 2692 2693 case OVS_ACTION_ATTR_CT: 2694 err = ovs_ct_action_to_attr(nla_data(a), skb); 2695 if (err) 2696 return err; 2697 break; 2698 2699 default: 2700 if (nla_put(skb, type, nla_len(a), nla_data(a))) 2701 return -EMSGSIZE; 2702 break; 2703 } 2704 } 2705 2706 return 0; 2707 } 2708