1 /* 2 * Linux NET3: GRE over IP protocol decoder. 3 * 4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 * 11 */ 12 13 #include <linux/capability.h> 14 #include <linux/module.h> 15 #include <linux/types.h> 16 #include <linux/kernel.h> 17 #include <asm/uaccess.h> 18 #include <linux/skbuff.h> 19 #include <linux/netdevice.h> 20 #include <linux/in.h> 21 #include <linux/tcp.h> 22 #include <linux/udp.h> 23 #include <linux/if_arp.h> 24 #include <linux/mroute.h> 25 #include <linux/init.h> 26 #include <linux/in6.h> 27 #include <linux/inetdevice.h> 28 #include <linux/igmp.h> 29 #include <linux/netfilter_ipv4.h> 30 #include <linux/etherdevice.h> 31 #include <linux/if_ether.h> 32 33 #include <net/sock.h> 34 #include <net/ip.h> 35 #include <net/icmp.h> 36 #include <net/protocol.h> 37 #include <net/ipip.h> 38 #include <net/arp.h> 39 #include <net/checksum.h> 40 #include <net/dsfield.h> 41 #include <net/inet_ecn.h> 42 #include <net/xfrm.h> 43 #include <net/net_namespace.h> 44 #include <net/netns/generic.h> 45 #include <net/rtnetlink.h> 46 47 #ifdef CONFIG_IPV6 48 #include <net/ipv6.h> 49 #include <net/ip6_fib.h> 50 #include <net/ip6_route.h> 51 #endif 52 53 /* 54 Problems & solutions 55 -------------------- 56 57 1. The most important issue is detecting local dead loops. 58 They would cause complete host lockup in transmit, which 59 would be "resolved" by stack overflow or, if queueing is enabled, 60 with infinite looping in net_bh. 61 62 We cannot track such dead loops during route installation, 63 it is infeasible task. The most general solutions would be 64 to keep skb->encapsulation counter (sort of local ttl), 65 and silently drop packet when it expires. It is the best 66 solution, but it supposes maintaing new variable in ALL 67 skb, even if no tunneling is used. 68 69 Current solution: t->recursion lock breaks dead loops. It looks 70 like dev->tbusy flag, but I preferred new variable, because 71 the semantics is different. One day, when hard_start_xmit 72 will be multithreaded we will have to use skb->encapsulation. 73 74 75 76 2. Networking dead loops would not kill routers, but would really 77 kill network. IP hop limit plays role of "t->recursion" in this case, 78 if we copy it from packet being encapsulated to upper header. 79 It is very good solution, but it introduces two problems: 80 81 - Routing protocols, using packets with ttl=1 (OSPF, RIP2), 82 do not work over tunnels. 83 - traceroute does not work. I planned to relay ICMP from tunnel, 84 so that this problem would be solved and traceroute output 85 would even more informative. This idea appeared to be wrong: 86 only Linux complies to rfc1812 now (yes, guys, Linux is the only 87 true router now :-)), all routers (at least, in neighbourhood of mine) 88 return only 8 bytes of payload. It is the end. 89 90 Hence, if we want that OSPF worked or traceroute said something reasonable, 91 we should search for another solution. 92 93 One of them is to parse packet trying to detect inner encapsulation 94 made by our node. It is difficult or even impossible, especially, 95 taking into account fragmentation. TO be short, tt is not solution at all. 96 97 Current solution: The solution was UNEXPECTEDLY SIMPLE. 98 We force DF flag on tunnels with preconfigured hop limit, 99 that is ALL. :-) Well, it does not remove the problem completely, 100 but exponential growth of network traffic is changed to linear 101 (branches, that exceed pmtu are pruned) and tunnel mtu 102 fastly degrades to value <68, where looping stops. 103 Yes, it is not good if there exists a router in the loop, 104 which does not force DF, even when encapsulating packets have DF set. 105 But it is not our problem! Nobody could accuse us, we made 106 all that we could make. Even if it is your gated who injected 107 fatal route to network, even if it were you who configured 108 fatal static route: you are innocent. :-) 109 110 111 112 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain 113 practically identical code. It would be good to glue them 114 together, but it is not very evident, how to make them modular. 115 sit is integral part of IPv6, ipip and gre are naturally modular. 116 We could extract common parts (hash table, ioctl etc) 117 to a separate module (ip_tunnel.c). 118 119 Alexey Kuznetsov. 120 */ 121 122 static struct rtnl_link_ops ipgre_link_ops __read_mostly; 123 static int ipgre_tunnel_init(struct net_device *dev); 124 static void ipgre_tunnel_setup(struct net_device *dev); 125 static int ipgre_tunnel_bind_dev(struct net_device *dev); 126 127 /* Fallback tunnel: no source, no destination, no key, no options */ 128 129 static int ipgre_fb_tunnel_init(struct net_device *dev); 130 131 #define HASH_SIZE 16 132 133 static int ipgre_net_id; 134 struct ipgre_net { 135 struct ip_tunnel *tunnels[4][HASH_SIZE]; 136 137 struct net_device *fb_tunnel_dev; 138 }; 139 140 /* Tunnel hash table */ 141 142 /* 143 4 hash tables: 144 145 3: (remote,local) 146 2: (remote,*) 147 1: (*,local) 148 0: (*,*) 149 150 We require exact key match i.e. if a key is present in packet 151 it will match only tunnel with the same key; if it is not present, 152 it will match only keyless tunnel. 153 154 All keysless packets, if not matched configured keyless tunnels 155 will match fallback tunnel. 156 */ 157 158 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) 159 160 #define tunnels_r_l tunnels[3] 161 #define tunnels_r tunnels[2] 162 #define tunnels_l tunnels[1] 163 #define tunnels_wc tunnels[0] 164 165 static DEFINE_RWLOCK(ipgre_lock); 166 167 /* Given src, dst and key, find appropriate for input tunnel. */ 168 169 static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net, 170 __be32 remote, __be32 local, 171 __be32 key, __be16 gre_proto) 172 { 173 unsigned h0 = HASH(remote); 174 unsigned h1 = HASH(key); 175 struct ip_tunnel *t; 176 struct ip_tunnel *t2 = NULL; 177 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 178 int dev_type = (gre_proto == htons(ETH_P_TEB)) ? 179 ARPHRD_ETHER : ARPHRD_IPGRE; 180 181 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) { 182 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { 183 if (t->parms.i_key == key && t->dev->flags & IFF_UP) { 184 if (t->dev->type == dev_type) 185 return t; 186 if (t->dev->type == ARPHRD_IPGRE && !t2) 187 t2 = t; 188 } 189 } 190 } 191 192 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) { 193 if (remote == t->parms.iph.daddr) { 194 if (t->parms.i_key == key && t->dev->flags & IFF_UP) { 195 if (t->dev->type == dev_type) 196 return t; 197 if (t->dev->type == ARPHRD_IPGRE && !t2) 198 t2 = t; 199 } 200 } 201 } 202 203 for (t = ign->tunnels_l[h1]; t; t = t->next) { 204 if (local == t->parms.iph.saddr || 205 (local == t->parms.iph.daddr && 206 ipv4_is_multicast(local))) { 207 if (t->parms.i_key == key && t->dev->flags & IFF_UP) { 208 if (t->dev->type == dev_type) 209 return t; 210 if (t->dev->type == ARPHRD_IPGRE && !t2) 211 t2 = t; 212 } 213 } 214 } 215 216 for (t = ign->tunnels_wc[h1]; t; t = t->next) { 217 if (t->parms.i_key == key && t->dev->flags & IFF_UP) { 218 if (t->dev->type == dev_type) 219 return t; 220 if (t->dev->type == ARPHRD_IPGRE && !t2) 221 t2 = t; 222 } 223 } 224 225 if (t2) 226 return t2; 227 228 if (ign->fb_tunnel_dev->flags&IFF_UP) 229 return netdev_priv(ign->fb_tunnel_dev); 230 return NULL; 231 } 232 233 static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign, 234 struct ip_tunnel_parm *parms) 235 { 236 __be32 remote = parms->iph.daddr; 237 __be32 local = parms->iph.saddr; 238 __be32 key = parms->i_key; 239 unsigned h = HASH(key); 240 int prio = 0; 241 242 if (local) 243 prio |= 1; 244 if (remote && !ipv4_is_multicast(remote)) { 245 prio |= 2; 246 h ^= HASH(remote); 247 } 248 249 return &ign->tunnels[prio][h]; 250 } 251 252 static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign, 253 struct ip_tunnel *t) 254 { 255 return __ipgre_bucket(ign, &t->parms); 256 } 257 258 static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t) 259 { 260 struct ip_tunnel **tp = ipgre_bucket(ign, t); 261 262 t->next = *tp; 263 write_lock_bh(&ipgre_lock); 264 *tp = t; 265 write_unlock_bh(&ipgre_lock); 266 } 267 268 static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) 269 { 270 struct ip_tunnel **tp; 271 272 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) { 273 if (t == *tp) { 274 write_lock_bh(&ipgre_lock); 275 *tp = t->next; 276 write_unlock_bh(&ipgre_lock); 277 break; 278 } 279 } 280 } 281 282 static struct ip_tunnel *ipgre_tunnel_find(struct net *net, 283 struct ip_tunnel_parm *parms, 284 int type) 285 { 286 __be32 remote = parms->iph.daddr; 287 __be32 local = parms->iph.saddr; 288 __be32 key = parms->i_key; 289 struct ip_tunnel *t, **tp; 290 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 291 292 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) 293 if (local == t->parms.iph.saddr && 294 remote == t->parms.iph.daddr && 295 key == t->parms.i_key && 296 type == t->dev->type) 297 break; 298 299 return t; 300 } 301 302 static struct ip_tunnel * ipgre_tunnel_locate(struct net *net, 303 struct ip_tunnel_parm *parms, int create) 304 { 305 struct ip_tunnel *t, *nt; 306 struct net_device *dev; 307 char name[IFNAMSIZ]; 308 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 309 310 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE); 311 if (t || !create) 312 return t; 313 314 if (parms->name[0]) 315 strlcpy(name, parms->name, IFNAMSIZ); 316 else 317 sprintf(name, "gre%%d"); 318 319 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup); 320 if (!dev) 321 return NULL; 322 323 dev_net_set(dev, net); 324 325 if (strchr(name, '%')) { 326 if (dev_alloc_name(dev, name) < 0) 327 goto failed_free; 328 } 329 330 nt = netdev_priv(dev); 331 nt->parms = *parms; 332 dev->rtnl_link_ops = &ipgre_link_ops; 333 334 dev->mtu = ipgre_tunnel_bind_dev(dev); 335 336 if (register_netdevice(dev) < 0) 337 goto failed_free; 338 339 dev_hold(dev); 340 ipgre_tunnel_link(ign, nt); 341 return nt; 342 343 failed_free: 344 free_netdev(dev); 345 return NULL; 346 } 347 348 static void ipgre_tunnel_uninit(struct net_device *dev) 349 { 350 struct net *net = dev_net(dev); 351 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 352 353 ipgre_tunnel_unlink(ign, netdev_priv(dev)); 354 dev_put(dev); 355 } 356 357 358 static void ipgre_err(struct sk_buff *skb, u32 info) 359 { 360 361 /* All the routers (except for Linux) return only 362 8 bytes of packet payload. It means, that precise relaying of 363 ICMP in the real Internet is absolutely infeasible. 364 365 Moreover, Cisco "wise men" put GRE key to the third word 366 in GRE header. It makes impossible maintaining even soft state for keyed 367 GRE tunnels with enabled checksum. Tell them "thank you". 368 369 Well, I wonder, rfc1812 was written by Cisco employee, 370 what the hell these idiots break standrads established 371 by themself??? 372 */ 373 374 struct iphdr *iph = (struct iphdr*)skb->data; 375 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2)); 376 int grehlen = (iph->ihl<<2) + 4; 377 const int type = icmp_hdr(skb)->type; 378 const int code = icmp_hdr(skb)->code; 379 struct ip_tunnel *t; 380 __be16 flags; 381 382 flags = p[0]; 383 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { 384 if (flags&(GRE_VERSION|GRE_ROUTING)) 385 return; 386 if (flags&GRE_KEY) { 387 grehlen += 4; 388 if (flags&GRE_CSUM) 389 grehlen += 4; 390 } 391 } 392 393 /* If only 8 bytes returned, keyed message will be dropped here */ 394 if (skb_headlen(skb) < grehlen) 395 return; 396 397 switch (type) { 398 default: 399 case ICMP_PARAMETERPROB: 400 return; 401 402 case ICMP_DEST_UNREACH: 403 switch (code) { 404 case ICMP_SR_FAILED: 405 case ICMP_PORT_UNREACH: 406 /* Impossible event. */ 407 return; 408 case ICMP_FRAG_NEEDED: 409 /* Soft state for pmtu is maintained by IP core. */ 410 return; 411 default: 412 /* All others are translated to HOST_UNREACH. 413 rfc2003 contains "deep thoughts" about NET_UNREACH, 414 I believe they are just ether pollution. --ANK 415 */ 416 break; 417 } 418 break; 419 case ICMP_TIME_EXCEEDED: 420 if (code != ICMP_EXC_TTL) 421 return; 422 break; 423 } 424 425 read_lock(&ipgre_lock); 426 t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr, 427 flags & GRE_KEY ? 428 *(((__be32 *)p) + (grehlen / 4) - 1) : 0, 429 p[1]); 430 if (t == NULL || t->parms.iph.daddr == 0 || 431 ipv4_is_multicast(t->parms.iph.daddr)) 432 goto out; 433 434 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 435 goto out; 436 437 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) 438 t->err_count++; 439 else 440 t->err_count = 1; 441 t->err_time = jiffies; 442 out: 443 read_unlock(&ipgre_lock); 444 return; 445 } 446 447 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) 448 { 449 if (INET_ECN_is_ce(iph->tos)) { 450 if (skb->protocol == htons(ETH_P_IP)) { 451 IP_ECN_set_ce(ip_hdr(skb)); 452 } else if (skb->protocol == htons(ETH_P_IPV6)) { 453 IP6_ECN_set_ce(ipv6_hdr(skb)); 454 } 455 } 456 } 457 458 static inline u8 459 ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb) 460 { 461 u8 inner = 0; 462 if (skb->protocol == htons(ETH_P_IP)) 463 inner = old_iph->tos; 464 else if (skb->protocol == htons(ETH_P_IPV6)) 465 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph); 466 return INET_ECN_encapsulate(tos, inner); 467 } 468 469 static int ipgre_rcv(struct sk_buff *skb) 470 { 471 struct iphdr *iph; 472 u8 *h; 473 __be16 flags; 474 __sum16 csum = 0; 475 __be32 key = 0; 476 u32 seqno = 0; 477 struct ip_tunnel *tunnel; 478 int offset = 4; 479 __be16 gre_proto; 480 unsigned int len; 481 482 if (!pskb_may_pull(skb, 16)) 483 goto drop_nolock; 484 485 iph = ip_hdr(skb); 486 h = skb->data; 487 flags = *(__be16*)h; 488 489 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { 490 /* - Version must be 0. 491 - We do not support routing headers. 492 */ 493 if (flags&(GRE_VERSION|GRE_ROUTING)) 494 goto drop_nolock; 495 496 if (flags&GRE_CSUM) { 497 switch (skb->ip_summed) { 498 case CHECKSUM_COMPLETE: 499 csum = csum_fold(skb->csum); 500 if (!csum) 501 break; 502 /* fall through */ 503 case CHECKSUM_NONE: 504 skb->csum = 0; 505 csum = __skb_checksum_complete(skb); 506 skb->ip_summed = CHECKSUM_COMPLETE; 507 } 508 offset += 4; 509 } 510 if (flags&GRE_KEY) { 511 key = *(__be32*)(h + offset); 512 offset += 4; 513 } 514 if (flags&GRE_SEQ) { 515 seqno = ntohl(*(__be32*)(h + offset)); 516 offset += 4; 517 } 518 } 519 520 gre_proto = *(__be16 *)(h + 2); 521 522 read_lock(&ipgre_lock); 523 if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev), 524 iph->saddr, iph->daddr, key, 525 gre_proto))) { 526 struct net_device_stats *stats = &tunnel->dev->stats; 527 528 secpath_reset(skb); 529 530 skb->protocol = gre_proto; 531 /* WCCP version 1 and 2 protocol decoding. 532 * - Change protocol to IP 533 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header 534 */ 535 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) { 536 skb->protocol = htons(ETH_P_IP); 537 if ((*(h + offset) & 0xF0) != 0x40) 538 offset += 4; 539 } 540 541 skb->mac_header = skb->network_header; 542 __pskb_pull(skb, offset); 543 skb_postpull_rcsum(skb, skb_transport_header(skb), offset); 544 skb->pkt_type = PACKET_HOST; 545 #ifdef CONFIG_NET_IPGRE_BROADCAST 546 if (ipv4_is_multicast(iph->daddr)) { 547 /* Looped back packet, drop it! */ 548 if (skb->rtable->fl.iif == 0) 549 goto drop; 550 stats->multicast++; 551 skb->pkt_type = PACKET_BROADCAST; 552 } 553 #endif 554 555 if (((flags&GRE_CSUM) && csum) || 556 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { 557 stats->rx_crc_errors++; 558 stats->rx_errors++; 559 goto drop; 560 } 561 if (tunnel->parms.i_flags&GRE_SEQ) { 562 if (!(flags&GRE_SEQ) || 563 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) { 564 stats->rx_fifo_errors++; 565 stats->rx_errors++; 566 goto drop; 567 } 568 tunnel->i_seqno = seqno + 1; 569 } 570 571 len = skb->len; 572 573 /* Warning: All skb pointers will be invalidated! */ 574 if (tunnel->dev->type == ARPHRD_ETHER) { 575 if (!pskb_may_pull(skb, ETH_HLEN)) { 576 stats->rx_length_errors++; 577 stats->rx_errors++; 578 goto drop; 579 } 580 581 iph = ip_hdr(skb); 582 skb->protocol = eth_type_trans(skb, tunnel->dev); 583 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 584 } 585 586 stats->rx_packets++; 587 stats->rx_bytes += len; 588 skb->dev = tunnel->dev; 589 dst_release(skb->dst); 590 skb->dst = NULL; 591 nf_reset(skb); 592 593 skb_reset_network_header(skb); 594 ipgre_ecn_decapsulate(iph, skb); 595 596 netif_rx(skb); 597 read_unlock(&ipgre_lock); 598 return(0); 599 } 600 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 601 602 drop: 603 read_unlock(&ipgre_lock); 604 drop_nolock: 605 kfree_skb(skb); 606 return(0); 607 } 608 609 static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 610 { 611 struct ip_tunnel *tunnel = netdev_priv(dev); 612 struct net_device_stats *stats = &tunnel->dev->stats; 613 struct iphdr *old_iph = ip_hdr(skb); 614 struct iphdr *tiph; 615 u8 tos; 616 __be16 df; 617 struct rtable *rt; /* Route to the other host */ 618 struct net_device *tdev; /* Device to other host */ 619 struct iphdr *iph; /* Our new IP header */ 620 unsigned int max_headroom; /* The extra header space needed */ 621 int gre_hlen; 622 __be32 dst; 623 int mtu; 624 625 if (tunnel->recursion++) { 626 stats->collisions++; 627 goto tx_error; 628 } 629 630 if (dev->type == ARPHRD_ETHER) 631 IPCB(skb)->flags = 0; 632 633 if (dev->header_ops && dev->type == ARPHRD_IPGRE) { 634 gre_hlen = 0; 635 tiph = (struct iphdr*)skb->data; 636 } else { 637 gre_hlen = tunnel->hlen; 638 tiph = &tunnel->parms.iph; 639 } 640 641 if ((dst = tiph->daddr) == 0) { 642 /* NBMA tunnel */ 643 644 if (skb->dst == NULL) { 645 stats->tx_fifo_errors++; 646 goto tx_error; 647 } 648 649 if (skb->protocol == htons(ETH_P_IP)) { 650 rt = skb->rtable; 651 if ((dst = rt->rt_gateway) == 0) 652 goto tx_error_icmp; 653 } 654 #ifdef CONFIG_IPV6 655 else if (skb->protocol == htons(ETH_P_IPV6)) { 656 struct in6_addr *addr6; 657 int addr_type; 658 struct neighbour *neigh = skb->dst->neighbour; 659 660 if (neigh == NULL) 661 goto tx_error; 662 663 addr6 = (struct in6_addr*)&neigh->primary_key; 664 addr_type = ipv6_addr_type(addr6); 665 666 if (addr_type == IPV6_ADDR_ANY) { 667 addr6 = &ipv6_hdr(skb)->daddr; 668 addr_type = ipv6_addr_type(addr6); 669 } 670 671 if ((addr_type & IPV6_ADDR_COMPATv4) == 0) 672 goto tx_error_icmp; 673 674 dst = addr6->s6_addr32[3]; 675 } 676 #endif 677 else 678 goto tx_error; 679 } 680 681 tos = tiph->tos; 682 if (tos&1) { 683 if (skb->protocol == htons(ETH_P_IP)) 684 tos = old_iph->tos; 685 tos &= ~1; 686 } 687 688 { 689 struct flowi fl = { .oif = tunnel->parms.link, 690 .nl_u = { .ip4_u = 691 { .daddr = dst, 692 .saddr = tiph->saddr, 693 .tos = RT_TOS(tos) } }, 694 .proto = IPPROTO_GRE }; 695 if (ip_route_output_key(dev_net(dev), &rt, &fl)) { 696 stats->tx_carrier_errors++; 697 goto tx_error; 698 } 699 } 700 tdev = rt->u.dst.dev; 701 702 if (tdev == dev) { 703 ip_rt_put(rt); 704 stats->collisions++; 705 goto tx_error; 706 } 707 708 df = tiph->frag_off; 709 if (df) 710 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen; 711 else 712 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu; 713 714 if (skb->dst) 715 skb->dst->ops->update_pmtu(skb->dst, mtu); 716 717 if (skb->protocol == htons(ETH_P_IP)) { 718 df |= (old_iph->frag_off&htons(IP_DF)); 719 720 if ((old_iph->frag_off&htons(IP_DF)) && 721 mtu < ntohs(old_iph->tot_len)) { 722 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 723 ip_rt_put(rt); 724 goto tx_error; 725 } 726 } 727 #ifdef CONFIG_IPV6 728 else if (skb->protocol == htons(ETH_P_IPV6)) { 729 struct rt6_info *rt6 = (struct rt6_info*)skb->dst; 730 731 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) { 732 if ((tunnel->parms.iph.daddr && 733 !ipv4_is_multicast(tunnel->parms.iph.daddr)) || 734 rt6->rt6i_dst.plen == 128) { 735 rt6->rt6i_flags |= RTF_MODIFIED; 736 skb->dst->metrics[RTAX_MTU-1] = mtu; 737 } 738 } 739 740 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) { 741 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); 742 ip_rt_put(rt); 743 goto tx_error; 744 } 745 } 746 #endif 747 748 if (tunnel->err_count > 0) { 749 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { 750 tunnel->err_count--; 751 752 dst_link_failure(skb); 753 } else 754 tunnel->err_count = 0; 755 } 756 757 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen; 758 759 if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| 760 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { 761 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); 762 if (!new_skb) { 763 ip_rt_put(rt); 764 stats->tx_dropped++; 765 dev_kfree_skb(skb); 766 tunnel->recursion--; 767 return 0; 768 } 769 if (skb->sk) 770 skb_set_owner_w(new_skb, skb->sk); 771 dev_kfree_skb(skb); 772 skb = new_skb; 773 old_iph = ip_hdr(skb); 774 } 775 776 skb_reset_transport_header(skb); 777 skb_push(skb, gre_hlen); 778 skb_reset_network_header(skb); 779 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 780 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | 781 IPSKB_REROUTED); 782 dst_release(skb->dst); 783 skb->dst = &rt->u.dst; 784 785 /* 786 * Push down and install the IPIP header. 787 */ 788 789 iph = ip_hdr(skb); 790 iph->version = 4; 791 iph->ihl = sizeof(struct iphdr) >> 2; 792 iph->frag_off = df; 793 iph->protocol = IPPROTO_GRE; 794 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb); 795 iph->daddr = rt->rt_dst; 796 iph->saddr = rt->rt_src; 797 798 if ((iph->ttl = tiph->ttl) == 0) { 799 if (skb->protocol == htons(ETH_P_IP)) 800 iph->ttl = old_iph->ttl; 801 #ifdef CONFIG_IPV6 802 else if (skb->protocol == htons(ETH_P_IPV6)) 803 iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit; 804 #endif 805 else 806 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT); 807 } 808 809 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags; 810 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ? 811 htons(ETH_P_TEB) : skb->protocol; 812 813 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { 814 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4); 815 816 if (tunnel->parms.o_flags&GRE_SEQ) { 817 ++tunnel->o_seqno; 818 *ptr = htonl(tunnel->o_seqno); 819 ptr--; 820 } 821 if (tunnel->parms.o_flags&GRE_KEY) { 822 *ptr = tunnel->parms.o_key; 823 ptr--; 824 } 825 if (tunnel->parms.o_flags&GRE_CSUM) { 826 *ptr = 0; 827 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr)); 828 } 829 } 830 831 nf_reset(skb); 832 833 IPTUNNEL_XMIT(); 834 tunnel->recursion--; 835 return 0; 836 837 tx_error_icmp: 838 dst_link_failure(skb); 839 840 tx_error: 841 stats->tx_errors++; 842 dev_kfree_skb(skb); 843 tunnel->recursion--; 844 return 0; 845 } 846 847 static int ipgre_tunnel_bind_dev(struct net_device *dev) 848 { 849 struct net_device *tdev = NULL; 850 struct ip_tunnel *tunnel; 851 struct iphdr *iph; 852 int hlen = LL_MAX_HEADER; 853 int mtu = ETH_DATA_LEN; 854 int addend = sizeof(struct iphdr) + 4; 855 856 tunnel = netdev_priv(dev); 857 iph = &tunnel->parms.iph; 858 859 /* Guess output device to choose reasonable mtu and needed_headroom */ 860 861 if (iph->daddr) { 862 struct flowi fl = { .oif = tunnel->parms.link, 863 .nl_u = { .ip4_u = 864 { .daddr = iph->daddr, 865 .saddr = iph->saddr, 866 .tos = RT_TOS(iph->tos) } }, 867 .proto = IPPROTO_GRE }; 868 struct rtable *rt; 869 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { 870 tdev = rt->u.dst.dev; 871 ip_rt_put(rt); 872 } 873 874 if (dev->type != ARPHRD_ETHER) 875 dev->flags |= IFF_POINTOPOINT; 876 } 877 878 if (!tdev && tunnel->parms.link) 879 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); 880 881 if (tdev) { 882 hlen = tdev->hard_header_len + tdev->needed_headroom; 883 mtu = tdev->mtu; 884 } 885 dev->iflink = tunnel->parms.link; 886 887 /* Precalculate GRE options length */ 888 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) { 889 if (tunnel->parms.o_flags&GRE_CSUM) 890 addend += 4; 891 if (tunnel->parms.o_flags&GRE_KEY) 892 addend += 4; 893 if (tunnel->parms.o_flags&GRE_SEQ) 894 addend += 4; 895 } 896 dev->needed_headroom = addend + hlen; 897 mtu -= dev->hard_header_len - addend; 898 899 if (mtu < 68) 900 mtu = 68; 901 902 tunnel->hlen = addend; 903 904 return mtu; 905 } 906 907 static int 908 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) 909 { 910 int err = 0; 911 struct ip_tunnel_parm p; 912 struct ip_tunnel *t; 913 struct net *net = dev_net(dev); 914 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 915 916 switch (cmd) { 917 case SIOCGETTUNNEL: 918 t = NULL; 919 if (dev == ign->fb_tunnel_dev) { 920 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { 921 err = -EFAULT; 922 break; 923 } 924 t = ipgre_tunnel_locate(net, &p, 0); 925 } 926 if (t == NULL) 927 t = netdev_priv(dev); 928 memcpy(&p, &t->parms, sizeof(p)); 929 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 930 err = -EFAULT; 931 break; 932 933 case SIOCADDTUNNEL: 934 case SIOCCHGTUNNEL: 935 err = -EPERM; 936 if (!capable(CAP_NET_ADMIN)) 937 goto done; 938 939 err = -EFAULT; 940 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 941 goto done; 942 943 err = -EINVAL; 944 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || 945 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || 946 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) 947 goto done; 948 if (p.iph.ttl) 949 p.iph.frag_off |= htons(IP_DF); 950 951 if (!(p.i_flags&GRE_KEY)) 952 p.i_key = 0; 953 if (!(p.o_flags&GRE_KEY)) 954 p.o_key = 0; 955 956 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); 957 958 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { 959 if (t != NULL) { 960 if (t->dev != dev) { 961 err = -EEXIST; 962 break; 963 } 964 } else { 965 unsigned nflags=0; 966 967 t = netdev_priv(dev); 968 969 if (ipv4_is_multicast(p.iph.daddr)) 970 nflags = IFF_BROADCAST; 971 else if (p.iph.daddr) 972 nflags = IFF_POINTOPOINT; 973 974 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { 975 err = -EINVAL; 976 break; 977 } 978 ipgre_tunnel_unlink(ign, t); 979 t->parms.iph.saddr = p.iph.saddr; 980 t->parms.iph.daddr = p.iph.daddr; 981 t->parms.i_key = p.i_key; 982 t->parms.o_key = p.o_key; 983 memcpy(dev->dev_addr, &p.iph.saddr, 4); 984 memcpy(dev->broadcast, &p.iph.daddr, 4); 985 ipgre_tunnel_link(ign, t); 986 netdev_state_change(dev); 987 } 988 } 989 990 if (t) { 991 err = 0; 992 if (cmd == SIOCCHGTUNNEL) { 993 t->parms.iph.ttl = p.iph.ttl; 994 t->parms.iph.tos = p.iph.tos; 995 t->parms.iph.frag_off = p.iph.frag_off; 996 if (t->parms.link != p.link) { 997 t->parms.link = p.link; 998 dev->mtu = ipgre_tunnel_bind_dev(dev); 999 netdev_state_change(dev); 1000 } 1001 } 1002 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) 1003 err = -EFAULT; 1004 } else 1005 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); 1006 break; 1007 1008 case SIOCDELTUNNEL: 1009 err = -EPERM; 1010 if (!capable(CAP_NET_ADMIN)) 1011 goto done; 1012 1013 if (dev == ign->fb_tunnel_dev) { 1014 err = -EFAULT; 1015 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 1016 goto done; 1017 err = -ENOENT; 1018 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL) 1019 goto done; 1020 err = -EPERM; 1021 if (t == netdev_priv(ign->fb_tunnel_dev)) 1022 goto done; 1023 dev = t->dev; 1024 } 1025 unregister_netdevice(dev); 1026 err = 0; 1027 break; 1028 1029 default: 1030 err = -EINVAL; 1031 } 1032 1033 done: 1034 return err; 1035 } 1036 1037 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) 1038 { 1039 struct ip_tunnel *tunnel = netdev_priv(dev); 1040 if (new_mtu < 68 || 1041 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen) 1042 return -EINVAL; 1043 dev->mtu = new_mtu; 1044 return 0; 1045 } 1046 1047 /* Nice toy. Unfortunately, useless in real life :-) 1048 It allows to construct virtual multiprotocol broadcast "LAN" 1049 over the Internet, provided multicast routing is tuned. 1050 1051 1052 I have no idea was this bicycle invented before me, 1053 so that I had to set ARPHRD_IPGRE to a random value. 1054 I have an impression, that Cisco could make something similar, 1055 but this feature is apparently missing in IOS<=11.2(8). 1056 1057 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks 1058 with broadcast 224.66.66.66. If you have access to mbone, play with me :-) 1059 1060 ping -t 255 224.66.66.66 1061 1062 If nobody answers, mbone does not work. 1063 1064 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255 1065 ip addr add 10.66.66.<somewhat>/24 dev Universe 1066 ifconfig Universe up 1067 ifconfig Universe add fe80::<Your_real_addr>/10 1068 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96 1069 ftp 10.66.66.66 1070 ... 1071 ftp fec0:6666:6666::193.233.7.65 1072 ... 1073 1074 */ 1075 1076 static int ipgre_header(struct sk_buff *skb, struct net_device *dev, 1077 unsigned short type, 1078 const void *daddr, const void *saddr, unsigned len) 1079 { 1080 struct ip_tunnel *t = netdev_priv(dev); 1081 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); 1082 __be16 *p = (__be16*)(iph+1); 1083 1084 memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); 1085 p[0] = t->parms.o_flags; 1086 p[1] = htons(type); 1087 1088 /* 1089 * Set the source hardware address. 1090 */ 1091 1092 if (saddr) 1093 memcpy(&iph->saddr, saddr, 4); 1094 1095 if (daddr) { 1096 memcpy(&iph->daddr, daddr, 4); 1097 return t->hlen; 1098 } 1099 if (iph->daddr && !ipv4_is_multicast(iph->daddr)) 1100 return t->hlen; 1101 1102 return -t->hlen; 1103 } 1104 1105 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) 1106 { 1107 struct iphdr *iph = (struct iphdr*) skb_mac_header(skb); 1108 memcpy(haddr, &iph->saddr, 4); 1109 return 4; 1110 } 1111 1112 static const struct header_ops ipgre_header_ops = { 1113 .create = ipgre_header, 1114 .parse = ipgre_header_parse, 1115 }; 1116 1117 #ifdef CONFIG_NET_IPGRE_BROADCAST 1118 static int ipgre_open(struct net_device *dev) 1119 { 1120 struct ip_tunnel *t = netdev_priv(dev); 1121 1122 if (ipv4_is_multicast(t->parms.iph.daddr)) { 1123 struct flowi fl = { .oif = t->parms.link, 1124 .nl_u = { .ip4_u = 1125 { .daddr = t->parms.iph.daddr, 1126 .saddr = t->parms.iph.saddr, 1127 .tos = RT_TOS(t->parms.iph.tos) } }, 1128 .proto = IPPROTO_GRE }; 1129 struct rtable *rt; 1130 if (ip_route_output_key(dev_net(dev), &rt, &fl)) 1131 return -EADDRNOTAVAIL; 1132 dev = rt->u.dst.dev; 1133 ip_rt_put(rt); 1134 if (__in_dev_get_rtnl(dev) == NULL) 1135 return -EADDRNOTAVAIL; 1136 t->mlink = dev->ifindex; 1137 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr); 1138 } 1139 return 0; 1140 } 1141 1142 static int ipgre_close(struct net_device *dev) 1143 { 1144 struct ip_tunnel *t = netdev_priv(dev); 1145 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { 1146 struct in_device *in_dev; 1147 in_dev = inetdev_by_index(dev_net(dev), t->mlink); 1148 if (in_dev) { 1149 ip_mc_dec_group(in_dev, t->parms.iph.daddr); 1150 in_dev_put(in_dev); 1151 } 1152 } 1153 return 0; 1154 } 1155 1156 #endif 1157 1158 static void ipgre_tunnel_setup(struct net_device *dev) 1159 { 1160 dev->init = ipgre_tunnel_init; 1161 dev->uninit = ipgre_tunnel_uninit; 1162 dev->destructor = free_netdev; 1163 dev->hard_start_xmit = ipgre_tunnel_xmit; 1164 dev->do_ioctl = ipgre_tunnel_ioctl; 1165 dev->change_mtu = ipgre_tunnel_change_mtu; 1166 1167 dev->type = ARPHRD_IPGRE; 1168 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; 1169 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4; 1170 dev->flags = IFF_NOARP; 1171 dev->iflink = 0; 1172 dev->addr_len = 4; 1173 dev->features |= NETIF_F_NETNS_LOCAL; 1174 } 1175 1176 static int ipgre_tunnel_init(struct net_device *dev) 1177 { 1178 struct ip_tunnel *tunnel; 1179 struct iphdr *iph; 1180 1181 tunnel = netdev_priv(dev); 1182 iph = &tunnel->parms.iph; 1183 1184 tunnel->dev = dev; 1185 strcpy(tunnel->parms.name, dev->name); 1186 1187 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); 1188 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 1189 1190 if (iph->daddr) { 1191 #ifdef CONFIG_NET_IPGRE_BROADCAST 1192 if (ipv4_is_multicast(iph->daddr)) { 1193 if (!iph->saddr) 1194 return -EINVAL; 1195 dev->flags = IFF_BROADCAST; 1196 dev->header_ops = &ipgre_header_ops; 1197 dev->open = ipgre_open; 1198 dev->stop = ipgre_close; 1199 } 1200 #endif 1201 } else 1202 dev->header_ops = &ipgre_header_ops; 1203 1204 return 0; 1205 } 1206 1207 static int ipgre_fb_tunnel_init(struct net_device *dev) 1208 { 1209 struct ip_tunnel *tunnel = netdev_priv(dev); 1210 struct iphdr *iph = &tunnel->parms.iph; 1211 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id); 1212 1213 tunnel->dev = dev; 1214 strcpy(tunnel->parms.name, dev->name); 1215 1216 iph->version = 4; 1217 iph->protocol = IPPROTO_GRE; 1218 iph->ihl = 5; 1219 tunnel->hlen = sizeof(struct iphdr) + 4; 1220 1221 dev_hold(dev); 1222 ign->tunnels_wc[0] = tunnel; 1223 return 0; 1224 } 1225 1226 1227 static struct net_protocol ipgre_protocol = { 1228 .handler = ipgre_rcv, 1229 .err_handler = ipgre_err, 1230 .netns_ok = 1, 1231 }; 1232 1233 static void ipgre_destroy_tunnels(struct ipgre_net *ign) 1234 { 1235 int prio; 1236 1237 for (prio = 0; prio < 4; prio++) { 1238 int h; 1239 for (h = 0; h < HASH_SIZE; h++) { 1240 struct ip_tunnel *t; 1241 while ((t = ign->tunnels[prio][h]) != NULL) 1242 unregister_netdevice(t->dev); 1243 } 1244 } 1245 } 1246 1247 static int ipgre_init_net(struct net *net) 1248 { 1249 int err; 1250 struct ipgre_net *ign; 1251 1252 err = -ENOMEM; 1253 ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL); 1254 if (ign == NULL) 1255 goto err_alloc; 1256 1257 err = net_assign_generic(net, ipgre_net_id, ign); 1258 if (err < 0) 1259 goto err_assign; 1260 1261 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0", 1262 ipgre_tunnel_setup); 1263 if (!ign->fb_tunnel_dev) { 1264 err = -ENOMEM; 1265 goto err_alloc_dev; 1266 } 1267 1268 ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init; 1269 dev_net_set(ign->fb_tunnel_dev, net); 1270 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops; 1271 1272 if ((err = register_netdev(ign->fb_tunnel_dev))) 1273 goto err_reg_dev; 1274 1275 return 0; 1276 1277 err_reg_dev: 1278 free_netdev(ign->fb_tunnel_dev); 1279 err_alloc_dev: 1280 /* nothing */ 1281 err_assign: 1282 kfree(ign); 1283 err_alloc: 1284 return err; 1285 } 1286 1287 static void ipgre_exit_net(struct net *net) 1288 { 1289 struct ipgre_net *ign; 1290 1291 ign = net_generic(net, ipgre_net_id); 1292 rtnl_lock(); 1293 ipgre_destroy_tunnels(ign); 1294 rtnl_unlock(); 1295 kfree(ign); 1296 } 1297 1298 static struct pernet_operations ipgre_net_ops = { 1299 .init = ipgre_init_net, 1300 .exit = ipgre_exit_net, 1301 }; 1302 1303 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) 1304 { 1305 __be16 flags; 1306 1307 if (!data) 1308 return 0; 1309 1310 flags = 0; 1311 if (data[IFLA_GRE_IFLAGS]) 1312 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); 1313 if (data[IFLA_GRE_OFLAGS]) 1314 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); 1315 if (flags & (GRE_VERSION|GRE_ROUTING)) 1316 return -EINVAL; 1317 1318 return 0; 1319 } 1320 1321 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[]) 1322 { 1323 __be32 daddr; 1324 1325 if (tb[IFLA_ADDRESS]) { 1326 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) 1327 return -EINVAL; 1328 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) 1329 return -EADDRNOTAVAIL; 1330 } 1331 1332 if (!data) 1333 goto out; 1334 1335 if (data[IFLA_GRE_REMOTE]) { 1336 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4); 1337 if (!daddr) 1338 return -EINVAL; 1339 } 1340 1341 out: 1342 return ipgre_tunnel_validate(tb, data); 1343 } 1344 1345 static void ipgre_netlink_parms(struct nlattr *data[], 1346 struct ip_tunnel_parm *parms) 1347 { 1348 memset(parms, 0, sizeof(*parms)); 1349 1350 parms->iph.protocol = IPPROTO_GRE; 1351 1352 if (!data) 1353 return; 1354 1355 if (data[IFLA_GRE_LINK]) 1356 parms->link = nla_get_u32(data[IFLA_GRE_LINK]); 1357 1358 if (data[IFLA_GRE_IFLAGS]) 1359 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]); 1360 1361 if (data[IFLA_GRE_OFLAGS]) 1362 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]); 1363 1364 if (data[IFLA_GRE_IKEY]) 1365 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); 1366 1367 if (data[IFLA_GRE_OKEY]) 1368 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]); 1369 1370 if (data[IFLA_GRE_LOCAL]) 1371 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]); 1372 1373 if (data[IFLA_GRE_REMOTE]) 1374 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]); 1375 1376 if (data[IFLA_GRE_TTL]) 1377 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]); 1378 1379 if (data[IFLA_GRE_TOS]) 1380 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]); 1381 1382 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) 1383 parms->iph.frag_off = htons(IP_DF); 1384 } 1385 1386 static int ipgre_tap_init(struct net_device *dev) 1387 { 1388 struct ip_tunnel *tunnel; 1389 1390 tunnel = netdev_priv(dev); 1391 1392 tunnel->dev = dev; 1393 strcpy(tunnel->parms.name, dev->name); 1394 1395 ipgre_tunnel_bind_dev(dev); 1396 1397 return 0; 1398 } 1399 1400 static void ipgre_tap_setup(struct net_device *dev) 1401 { 1402 1403 ether_setup(dev); 1404 1405 dev->init = ipgre_tap_init; 1406 dev->uninit = ipgre_tunnel_uninit; 1407 dev->destructor = free_netdev; 1408 dev->hard_start_xmit = ipgre_tunnel_xmit; 1409 dev->change_mtu = ipgre_tunnel_change_mtu; 1410 1411 dev->iflink = 0; 1412 dev->features |= NETIF_F_NETNS_LOCAL; 1413 } 1414 1415 static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[], 1416 struct nlattr *data[]) 1417 { 1418 struct ip_tunnel *nt; 1419 struct net *net = dev_net(dev); 1420 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 1421 int mtu; 1422 int err; 1423 1424 nt = netdev_priv(dev); 1425 ipgre_netlink_parms(data, &nt->parms); 1426 1427 if (ipgre_tunnel_find(net, &nt->parms, dev->type)) 1428 return -EEXIST; 1429 1430 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) 1431 random_ether_addr(dev->dev_addr); 1432 1433 mtu = ipgre_tunnel_bind_dev(dev); 1434 if (!tb[IFLA_MTU]) 1435 dev->mtu = mtu; 1436 1437 err = register_netdevice(dev); 1438 if (err) 1439 goto out; 1440 1441 dev_hold(dev); 1442 ipgre_tunnel_link(ign, nt); 1443 1444 out: 1445 return err; 1446 } 1447 1448 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], 1449 struct nlattr *data[]) 1450 { 1451 struct ip_tunnel *t, *nt; 1452 struct net *net = dev_net(dev); 1453 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 1454 struct ip_tunnel_parm p; 1455 int mtu; 1456 1457 if (dev == ign->fb_tunnel_dev) 1458 return -EINVAL; 1459 1460 nt = netdev_priv(dev); 1461 ipgre_netlink_parms(data, &p); 1462 1463 t = ipgre_tunnel_locate(net, &p, 0); 1464 1465 if (t) { 1466 if (t->dev != dev) 1467 return -EEXIST; 1468 } else { 1469 unsigned nflags = 0; 1470 1471 t = nt; 1472 1473 if (ipv4_is_multicast(p.iph.daddr)) 1474 nflags = IFF_BROADCAST; 1475 else if (p.iph.daddr) 1476 nflags = IFF_POINTOPOINT; 1477 1478 if ((dev->flags ^ nflags) & 1479 (IFF_POINTOPOINT | IFF_BROADCAST)) 1480 return -EINVAL; 1481 1482 ipgre_tunnel_unlink(ign, t); 1483 t->parms.iph.saddr = p.iph.saddr; 1484 t->parms.iph.daddr = p.iph.daddr; 1485 t->parms.i_key = p.i_key; 1486 memcpy(dev->dev_addr, &p.iph.saddr, 4); 1487 memcpy(dev->broadcast, &p.iph.daddr, 4); 1488 ipgre_tunnel_link(ign, t); 1489 netdev_state_change(dev); 1490 } 1491 1492 t->parms.o_key = p.o_key; 1493 t->parms.iph.ttl = p.iph.ttl; 1494 t->parms.iph.tos = p.iph.tos; 1495 t->parms.iph.frag_off = p.iph.frag_off; 1496 1497 if (t->parms.link != p.link) { 1498 t->parms.link = p.link; 1499 mtu = ipgre_tunnel_bind_dev(dev); 1500 if (!tb[IFLA_MTU]) 1501 dev->mtu = mtu; 1502 netdev_state_change(dev); 1503 } 1504 1505 return 0; 1506 } 1507 1508 static size_t ipgre_get_size(const struct net_device *dev) 1509 { 1510 return 1511 /* IFLA_GRE_LINK */ 1512 nla_total_size(4) + 1513 /* IFLA_GRE_IFLAGS */ 1514 nla_total_size(2) + 1515 /* IFLA_GRE_OFLAGS */ 1516 nla_total_size(2) + 1517 /* IFLA_GRE_IKEY */ 1518 nla_total_size(4) + 1519 /* IFLA_GRE_OKEY */ 1520 nla_total_size(4) + 1521 /* IFLA_GRE_LOCAL */ 1522 nla_total_size(4) + 1523 /* IFLA_GRE_REMOTE */ 1524 nla_total_size(4) + 1525 /* IFLA_GRE_TTL */ 1526 nla_total_size(1) + 1527 /* IFLA_GRE_TOS */ 1528 nla_total_size(1) + 1529 /* IFLA_GRE_PMTUDISC */ 1530 nla_total_size(1) + 1531 0; 1532 } 1533 1534 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) 1535 { 1536 struct ip_tunnel *t = netdev_priv(dev); 1537 struct ip_tunnel_parm *p = &t->parms; 1538 1539 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link); 1540 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags); 1541 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags); 1542 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key); 1543 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key); 1544 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr); 1545 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr); 1546 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl); 1547 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos); 1548 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF))); 1549 1550 return 0; 1551 1552 nla_put_failure: 1553 return -EMSGSIZE; 1554 } 1555 1556 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = { 1557 [IFLA_GRE_LINK] = { .type = NLA_U32 }, 1558 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 }, 1559 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 }, 1560 [IFLA_GRE_IKEY] = { .type = NLA_U32 }, 1561 [IFLA_GRE_OKEY] = { .type = NLA_U32 }, 1562 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) }, 1563 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, 1564 [IFLA_GRE_TTL] = { .type = NLA_U8 }, 1565 [IFLA_GRE_TOS] = { .type = NLA_U8 }, 1566 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 }, 1567 }; 1568 1569 static struct rtnl_link_ops ipgre_link_ops __read_mostly = { 1570 .kind = "gre", 1571 .maxtype = IFLA_GRE_MAX, 1572 .policy = ipgre_policy, 1573 .priv_size = sizeof(struct ip_tunnel), 1574 .setup = ipgre_tunnel_setup, 1575 .validate = ipgre_tunnel_validate, 1576 .newlink = ipgre_newlink, 1577 .changelink = ipgre_changelink, 1578 .get_size = ipgre_get_size, 1579 .fill_info = ipgre_fill_info, 1580 }; 1581 1582 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = { 1583 .kind = "gretap", 1584 .maxtype = IFLA_GRE_MAX, 1585 .policy = ipgre_policy, 1586 .priv_size = sizeof(struct ip_tunnel), 1587 .setup = ipgre_tap_setup, 1588 .validate = ipgre_tap_validate, 1589 .newlink = ipgre_newlink, 1590 .changelink = ipgre_changelink, 1591 .get_size = ipgre_get_size, 1592 .fill_info = ipgre_fill_info, 1593 }; 1594 1595 /* 1596 * And now the modules code and kernel interface. 1597 */ 1598 1599 static int __init ipgre_init(void) 1600 { 1601 int err; 1602 1603 printk(KERN_INFO "GRE over IPv4 tunneling driver\n"); 1604 1605 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) { 1606 printk(KERN_INFO "ipgre init: can't add protocol\n"); 1607 return -EAGAIN; 1608 } 1609 1610 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops); 1611 if (err < 0) 1612 goto gen_device_failed; 1613 1614 err = rtnl_link_register(&ipgre_link_ops); 1615 if (err < 0) 1616 goto rtnl_link_failed; 1617 1618 err = rtnl_link_register(&ipgre_tap_ops); 1619 if (err < 0) 1620 goto tap_ops_failed; 1621 1622 out: 1623 return err; 1624 1625 tap_ops_failed: 1626 rtnl_link_unregister(&ipgre_link_ops); 1627 rtnl_link_failed: 1628 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops); 1629 gen_device_failed: 1630 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); 1631 goto out; 1632 } 1633 1634 static void __exit ipgre_fini(void) 1635 { 1636 rtnl_link_unregister(&ipgre_tap_ops); 1637 rtnl_link_unregister(&ipgre_link_ops); 1638 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops); 1639 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) 1640 printk(KERN_INFO "ipgre close: can't remove protocol\n"); 1641 } 1642 1643 module_init(ipgre_init); 1644 module_exit(ipgre_fini); 1645 MODULE_LICENSE("GPL"); 1646 MODULE_ALIAS_RTNL_LINK("gre"); 1647 MODULE_ALIAS_RTNL_LINK("gretap"); 1648