1 /* 2 * Copyright (c) 2007-2012 Nicira, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write to the Free Software 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 * 02110-1301, USA 17 */ 18 19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 20 21 #include <linux/if_arp.h> 22 #include <linux/if_bridge.h> 23 #include <linux/if_vlan.h> 24 #include <linux/kernel.h> 25 #include <linux/llc.h> 26 #include <linux/rtnetlink.h> 27 #include <linux/skbuff.h> 28 #include <linux/openvswitch.h> 29 30 #include <net/udp.h> 31 #include <net/ip_tunnels.h> 32 #include <net/rtnetlink.h> 33 #include <net/vxlan.h> 34 35 #include "datapath.h" 36 #include "vport.h" 37 #include "vport-internal_dev.h" 38 #include "vport-netdev.h" 39 40 static struct vport_ops ovs_netdev_vport_ops; 41 42 /* Must be called with rcu_read_lock. */ 43 static void netdev_port_receive(struct vport *vport, struct sk_buff *skb) 44 { 45 if (unlikely(!vport)) 46 goto error; 47 48 if (unlikely(skb_warn_if_lro(skb))) 49 goto error; 50 51 /* Make our own copy of the packet. Otherwise we will mangle the 52 * packet for anyone who came before us (e.g. tcpdump via AF_PACKET). 53 */ 54 skb = skb_share_check(skb, GFP_ATOMIC); 55 if (unlikely(!skb)) 56 return; 57 58 skb_push(skb, ETH_HLEN); 59 ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); 60 61 ovs_vport_receive(vport, skb, NULL); 62 return; 63 64 error: 65 kfree_skb(skb); 66 } 67 68 /* Called with rcu_read_lock and bottom-halves disabled. */ 69 static rx_handler_result_t netdev_frame_hook(struct sk_buff **pskb) 70 { 71 struct sk_buff *skb = *pskb; 72 struct vport *vport; 73 74 if (unlikely(skb->pkt_type == PACKET_LOOPBACK)) 75 return RX_HANDLER_PASS; 76 77 vport = ovs_netdev_get_vport(skb->dev); 78 79 netdev_port_receive(vport, skb); 80 81 return RX_HANDLER_CONSUMED; 82 } 83 84 static struct net_device *get_dpdev(const struct datapath *dp) 85 { 86 struct vport *local; 87 88 local = ovs_vport_ovsl(dp, OVSP_LOCAL); 89 BUG_ON(!local); 90 return local->dev; 91 } 92 93 static struct vport *netdev_link(struct vport *vport, const char *name) 94 { 95 int err; 96 97 vport->dev = dev_get_by_name(ovs_dp_get_net(vport->dp), name); 98 if (!vport->dev) { 99 err = -ENODEV; 100 goto error_free_vport; 101 } 102 103 if (vport->dev->flags & IFF_LOOPBACK || 104 vport->dev->type != ARPHRD_ETHER || 105 ovs_is_internal_dev(vport->dev)) { 106 err = -EINVAL; 107 goto error_put; 108 } 109 110 rtnl_lock(); 111 err = netdev_master_upper_dev_link(vport->dev, 112 get_dpdev(vport->dp)); 113 if (err) 114 goto error_unlock; 115 116 err = netdev_rx_handler_register(vport->dev, netdev_frame_hook, 117 vport); 118 if (err) 119 goto error_master_upper_dev_unlink; 120 121 dev_disable_lro(vport->dev); 122 dev_set_promiscuity(vport->dev, 1); 123 vport->dev->priv_flags |= IFF_OVS_DATAPATH; 124 rtnl_unlock(); 125 126 return vport; 127 128 error_master_upper_dev_unlink: 129 netdev_upper_dev_unlink(vport->dev, get_dpdev(vport->dp)); 130 error_unlock: 131 rtnl_unlock(); 132 error_put: 133 dev_put(vport->dev); 134 error_free_vport: 135 ovs_vport_free(vport); 136 return ERR_PTR(err); 137 } 138 139 static struct vport *netdev_create(const struct vport_parms *parms) 140 { 141 struct vport *vport; 142 143 vport = ovs_vport_alloc(0, &ovs_netdev_vport_ops, parms); 144 if (IS_ERR(vport)) 145 return vport; 146 147 return netdev_link(vport, parms->name); 148 } 149 150 static void free_port_rcu(struct rcu_head *rcu) 151 { 152 struct vport *vport = container_of(rcu, struct vport, rcu); 153 154 if (vport->dev) 155 dev_put(vport->dev); 156 ovs_vport_free(vport); 157 } 158 159 void ovs_netdev_detach_dev(struct vport *vport) 160 { 161 ASSERT_RTNL(); 162 vport->dev->priv_flags &= ~IFF_OVS_DATAPATH; 163 netdev_rx_handler_unregister(vport->dev); 164 netdev_upper_dev_unlink(vport->dev, 165 netdev_master_upper_dev_get(vport->dev)); 166 dev_set_promiscuity(vport->dev, -1); 167 } 168 169 static void netdev_destroy(struct vport *vport) 170 { 171 rtnl_lock(); 172 if (vport->dev->priv_flags & IFF_OVS_DATAPATH) 173 ovs_netdev_detach_dev(vport); 174 rtnl_unlock(); 175 176 call_rcu(&vport->rcu, free_port_rcu); 177 } 178 179 static unsigned int packet_length(const struct sk_buff *skb) 180 { 181 unsigned int length = skb->len - ETH_HLEN; 182 183 if (skb->protocol == htons(ETH_P_8021Q)) 184 length -= VLAN_HLEN; 185 186 return length; 187 } 188 189 static int netdev_send(struct vport *vport, struct sk_buff *skb) 190 { 191 int mtu = vport->dev->mtu; 192 int len; 193 194 if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) { 195 net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n", 196 vport->dev->name, 197 packet_length(skb), mtu); 198 goto drop; 199 } 200 201 skb->dev = vport->dev; 202 len = skb->len; 203 dev_queue_xmit(skb); 204 205 return len; 206 207 drop: 208 kfree_skb(skb); 209 return 0; 210 } 211 212 /* Returns null if this device is not attached to a datapath. */ 213 struct vport *ovs_netdev_get_vport(struct net_device *dev) 214 { 215 if (likely(dev->priv_flags & IFF_OVS_DATAPATH)) 216 return (struct vport *) 217 rcu_dereference_rtnl(dev->rx_handler_data); 218 else 219 return NULL; 220 } 221 222 static struct vport_ops ovs_netdev_vport_ops = { 223 .type = OVS_VPORT_TYPE_NETDEV, 224 .create = netdev_create, 225 .destroy = netdev_destroy, 226 .send = netdev_send, 227 }; 228 229 /* Compat code for old userspace. */ 230 #if IS_ENABLED(CONFIG_VXLAN) 231 static struct vport_ops ovs_vxlan_netdev_vport_ops; 232 233 static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb) 234 { 235 struct vxlan_dev *vxlan = netdev_priv(vport->dev); 236 __be16 dst_port = vxlan->cfg.dst_port; 237 238 if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port))) 239 return -EMSGSIZE; 240 241 if (vxlan->flags & VXLAN_F_GBP) { 242 struct nlattr *exts; 243 244 exts = nla_nest_start(skb, OVS_TUNNEL_ATTR_EXTENSION); 245 if (!exts) 246 return -EMSGSIZE; 247 248 if (vxlan->flags & VXLAN_F_GBP && 249 nla_put_flag(skb, OVS_VXLAN_EXT_GBP)) 250 return -EMSGSIZE; 251 252 nla_nest_end(skb, exts); 253 } 254 255 return 0; 256 } 257 258 static const struct nla_policy exts_policy[OVS_VXLAN_EXT_MAX + 1] = { 259 [OVS_VXLAN_EXT_GBP] = { .type = NLA_FLAG, }, 260 }; 261 262 static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr, 263 struct vxlan_config *conf) 264 { 265 struct nlattr *exts[OVS_VXLAN_EXT_MAX + 1]; 266 int err; 267 268 if (nla_len(attr) < sizeof(struct nlattr)) 269 return -EINVAL; 270 271 err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy); 272 if (err < 0) 273 return err; 274 275 if (exts[OVS_VXLAN_EXT_GBP]) 276 conf->flags |= VXLAN_F_GBP; 277 278 return 0; 279 } 280 281 static struct vport *vxlan_tnl_create(const struct vport_parms *parms) 282 { 283 struct net *net = ovs_dp_get_net(parms->dp); 284 struct nlattr *options = parms->options; 285 struct net_device *dev; 286 struct vport *vport; 287 struct nlattr *a; 288 int err; 289 struct vxlan_config conf = { 290 .no_share = true, 291 .flags = VXLAN_F_FLOW_BASED | VXLAN_F_COLLECT_METADATA, 292 }; 293 294 if (!options) { 295 err = -EINVAL; 296 goto error; 297 } 298 299 a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT); 300 if (a && nla_len(a) == sizeof(u16)) { 301 conf.dst_port = htons(nla_get_u16(a)); 302 } else { 303 /* Require destination port from userspace. */ 304 err = -EINVAL; 305 goto error; 306 } 307 308 vport = ovs_vport_alloc(0, &ovs_vxlan_netdev_vport_ops, parms); 309 if (IS_ERR(vport)) 310 return vport; 311 312 a = nla_find_nested(options, OVS_TUNNEL_ATTR_EXTENSION); 313 if (a) { 314 err = vxlan_configure_exts(vport, a, &conf); 315 if (err) { 316 ovs_vport_free(vport); 317 goto error; 318 } 319 } 320 321 rtnl_lock(); 322 dev = vxlan_dev_create(net, parms->name, NET_NAME_USER, &conf); 323 if (IS_ERR(dev)) { 324 rtnl_unlock(); 325 ovs_vport_free(vport); 326 return ERR_CAST(dev); 327 } 328 329 dev_change_flags(dev, dev->flags | IFF_UP); 330 rtnl_unlock(); 331 return vport; 332 error: 333 return ERR_PTR(err); 334 } 335 336 static struct vport *vxlan_create(const struct vport_parms *parms) 337 { 338 struct vport *vport; 339 340 vport = vxlan_tnl_create(parms); 341 if (IS_ERR(vport)) 342 return vport; 343 344 return netdev_link(vport, parms->name); 345 } 346 347 static void vxlan_destroy(struct vport *vport) 348 { 349 rtnl_lock(); 350 if (vport->dev->priv_flags & IFF_OVS_DATAPATH) 351 ovs_netdev_detach_dev(vport); 352 353 /* Early release so we can unregister the device */ 354 dev_put(vport->dev); 355 rtnl_delete_link(vport->dev); 356 vport->dev = NULL; 357 rtnl_unlock(); 358 359 call_rcu(&vport->rcu, free_port_rcu); 360 } 361 362 static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, 363 struct ip_tunnel_info *egress_tun_info) 364 { 365 struct vxlan_dev *vxlan = netdev_priv(vport->dev); 366 struct net *net = ovs_dp_get_net(vport->dp); 367 __be16 dst_port = vxlan_dev_dst_port(vxlan); 368 __be16 src_port; 369 int port_min; 370 int port_max; 371 372 inet_get_local_port_range(net, &port_min, &port_max); 373 src_port = udp_flow_src_port(net, skb, 0, 0, true); 374 375 return ovs_tunnel_get_egress_info(egress_tun_info, net, 376 OVS_CB(skb)->egress_tun_info, 377 IPPROTO_UDP, skb->mark, 378 src_port, dst_port); 379 } 380 381 static struct vport_ops ovs_vxlan_netdev_vport_ops = { 382 .type = OVS_VPORT_TYPE_VXLAN, 383 .create = vxlan_create, 384 .destroy = vxlan_destroy, 385 .get_options = vxlan_get_options, 386 .send = netdev_send, 387 .get_egress_tun_info = vxlan_get_egress_tun_info, 388 }; 389 390 static int vxlan_compat_init(void) 391 { 392 return ovs_vport_ops_register(&ovs_vxlan_netdev_vport_ops); 393 } 394 395 static void vxlan_compat_exit(void) 396 { 397 ovs_vport_ops_unregister(&ovs_vxlan_netdev_vport_ops); 398 } 399 #else 400 static int vxlan_compat_init(void) 401 { 402 return 0; 403 } 404 405 static void vxlan_compat_exit(void) 406 { 407 } 408 #endif 409 410 int __init ovs_netdev_init(void) 411 { 412 int err; 413 414 err = ovs_vport_ops_register(&ovs_netdev_vport_ops); 415 if (err) 416 return err; 417 err = vxlan_compat_init(); 418 if (err) 419 vxlan_compat_exit(); 420 return err; 421 } 422 423 void ovs_netdev_exit(void) 424 { 425 ovs_vport_ops_unregister(&ovs_netdev_vport_ops); 426 vxlan_compat_exit(); 427 } 428