1 #include <linux/etherdevice.h> 2 #include <linux/if_macvlan.h> 3 #include <linux/interrupt.h> 4 #include <linux/nsproxy.h> 5 #include <linux/compat.h> 6 #include <linux/if_tun.h> 7 #include <linux/module.h> 8 #include <linux/skbuff.h> 9 #include <linux/cache.h> 10 #include <linux/sched.h> 11 #include <linux/types.h> 12 #include <linux/init.h> 13 #include <linux/wait.h> 14 #include <linux/cdev.h> 15 #include <linux/fs.h> 16 17 #include <net/net_namespace.h> 18 #include <net/rtnetlink.h> 19 #include <net/sock.h> 20 #include <linux/virtio_net.h> 21 22 /* 23 * A macvtap queue is the central object of this driver, it connects 24 * an open character device to a macvlan interface. There can be 25 * multiple queues on one interface, which map back to queues 26 * implemented in hardware on the underlying device. 27 * 28 * macvtap_proto is used to allocate queues through the sock allocation 29 * mechanism. 30 * 31 * TODO: multiqueue support is currently not implemented, even though 32 * macvtap is basically prepared for that. We will need to add this 33 * here as well as in virtio-net and qemu to get line rate on 10gbit 34 * adapters from a guest. 35 */ 36 struct macvtap_queue { 37 struct sock sk; 38 struct socket sock; 39 struct macvlan_dev *vlan; 40 struct file *file; 41 unsigned int flags; 42 }; 43 44 static struct proto macvtap_proto = { 45 .name = "macvtap", 46 .owner = THIS_MODULE, 47 .obj_size = sizeof (struct macvtap_queue), 48 }; 49 50 /* 51 * Minor number matches netdev->ifindex, so need a potentially 52 * large value. This also makes it possible to split the 53 * tap functionality out again in the future by offering it 54 * from other drivers besides macvtap. As long as every device 55 * only has one tap, the interface numbers assure that the 56 * device nodes are unique. 57 */ 58 static unsigned int macvtap_major; 59 #define MACVTAP_NUM_DEVS 65536 60 static struct class *macvtap_class; 61 static struct cdev macvtap_cdev; 62 63 static const struct proto_ops macvtap_socket_ops; 64 65 /* 66 * RCU usage: 67 * The macvtap_queue and the macvlan_dev are loosely coupled, the 68 * pointers from one to the other can only be read while rcu_read_lock 69 * or macvtap_lock is held. 70 * 71 * Both the file and the macvlan_dev hold a reference on the macvtap_queue 72 * through sock_hold(&q->sk). When the macvlan_dev goes away first, 73 * q->vlan becomes inaccessible. When the files gets closed, 74 * macvtap_get_queue() fails. 75 * 76 * There may still be references to the struct sock inside of the 77 * queue from outbound SKBs, but these never reference back to the 78 * file or the dev. The data structure is freed through __sk_free 79 * when both our references and any pending SKBs are gone. 80 */ 81 static DEFINE_SPINLOCK(macvtap_lock); 82 83 /* 84 * Choose the next free queue, for now there is only one 85 */ 86 static int macvtap_set_queue(struct net_device *dev, struct file *file, 87 struct macvtap_queue *q) 88 { 89 struct macvlan_dev *vlan = netdev_priv(dev); 90 int err = -EBUSY; 91 92 spin_lock(&macvtap_lock); 93 if (rcu_dereference(vlan->tap)) 94 goto out; 95 96 err = 0; 97 rcu_assign_pointer(q->vlan, vlan); 98 rcu_assign_pointer(vlan->tap, q); 99 sock_hold(&q->sk); 100 101 q->file = file; 102 file->private_data = q; 103 104 out: 105 spin_unlock(&macvtap_lock); 106 return err; 107 } 108 109 /* 110 * The file owning the queue got closed, give up both 111 * the reference that the files holds as well as the 112 * one from the macvlan_dev if that still exists. 113 * 114 * Using the spinlock makes sure that we don't get 115 * to the queue again after destroying it. 116 */ 117 static void macvtap_put_queue(struct macvtap_queue *q) 118 { 119 struct macvlan_dev *vlan; 120 121 spin_lock(&macvtap_lock); 122 vlan = rcu_dereference(q->vlan); 123 if (vlan) { 124 rcu_assign_pointer(vlan->tap, NULL); 125 rcu_assign_pointer(q->vlan, NULL); 126 sock_put(&q->sk); 127 } 128 129 spin_unlock(&macvtap_lock); 130 131 synchronize_rcu(); 132 sock_put(&q->sk); 133 } 134 135 /* 136 * Since we only support one queue, just dereference the pointer. 137 */ 138 static struct macvtap_queue *macvtap_get_queue(struct net_device *dev, 139 struct sk_buff *skb) 140 { 141 struct macvlan_dev *vlan = netdev_priv(dev); 142 143 return rcu_dereference(vlan->tap); 144 } 145 146 /* 147 * The net_device is going away, give up the reference 148 * that it holds on the queue (all the queues one day) 149 * and safely set the pointer from the queues to NULL. 150 */ 151 static void macvtap_del_queues(struct net_device *dev) 152 { 153 struct macvlan_dev *vlan = netdev_priv(dev); 154 struct macvtap_queue *q; 155 156 spin_lock(&macvtap_lock); 157 q = rcu_dereference(vlan->tap); 158 if (!q) { 159 spin_unlock(&macvtap_lock); 160 return; 161 } 162 163 rcu_assign_pointer(vlan->tap, NULL); 164 rcu_assign_pointer(q->vlan, NULL); 165 spin_unlock(&macvtap_lock); 166 167 synchronize_rcu(); 168 sock_put(&q->sk); 169 } 170 171 /* 172 * Forward happens for data that gets sent from one macvlan 173 * endpoint to another one in bridge mode. We just take 174 * the skb and put it into the receive queue. 175 */ 176 static int macvtap_forward(struct net_device *dev, struct sk_buff *skb) 177 { 178 struct macvtap_queue *q = macvtap_get_queue(dev, skb); 179 if (!q) 180 return -ENOLINK; 181 182 skb_queue_tail(&q->sk.sk_receive_queue, skb); 183 wake_up_interruptible_poll(q->sk.sk_sleep, POLLIN | POLLRDNORM | POLLRDBAND); 184 return 0; 185 } 186 187 /* 188 * Receive is for data from the external interface (lowerdev), 189 * in case of macvtap, we can treat that the same way as 190 * forward, which macvlan cannot. 191 */ 192 static int macvtap_receive(struct sk_buff *skb) 193 { 194 skb_push(skb, ETH_HLEN); 195 return macvtap_forward(skb->dev, skb); 196 } 197 198 static int macvtap_newlink(struct net *src_net, 199 struct net_device *dev, 200 struct nlattr *tb[], 201 struct nlattr *data[]) 202 { 203 struct device *classdev; 204 dev_t devt; 205 int err; 206 207 err = macvlan_common_newlink(src_net, dev, tb, data, 208 macvtap_receive, macvtap_forward); 209 if (err) 210 goto out; 211 212 devt = MKDEV(MAJOR(macvtap_major), dev->ifindex); 213 214 classdev = device_create(macvtap_class, &dev->dev, devt, 215 dev, "tap%d", dev->ifindex); 216 if (IS_ERR(classdev)) { 217 err = PTR_ERR(classdev); 218 macvtap_del_queues(dev); 219 } 220 221 out: 222 return err; 223 } 224 225 static void macvtap_dellink(struct net_device *dev, 226 struct list_head *head) 227 { 228 device_destroy(macvtap_class, 229 MKDEV(MAJOR(macvtap_major), dev->ifindex)); 230 231 macvtap_del_queues(dev); 232 macvlan_dellink(dev, head); 233 } 234 235 static struct rtnl_link_ops macvtap_link_ops __read_mostly = { 236 .kind = "macvtap", 237 .newlink = macvtap_newlink, 238 .dellink = macvtap_dellink, 239 }; 240 241 242 static void macvtap_sock_write_space(struct sock *sk) 243 { 244 if (!sock_writeable(sk) || 245 !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) 246 return; 247 248 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 249 wake_up_interruptible_poll(sk->sk_sleep, POLLOUT | POLLWRNORM | POLLWRBAND); 250 } 251 252 static int macvtap_open(struct inode *inode, struct file *file) 253 { 254 struct net *net = current->nsproxy->net_ns; 255 struct net_device *dev = dev_get_by_index(net, iminor(inode)); 256 struct macvtap_queue *q; 257 int err; 258 259 err = -ENODEV; 260 if (!dev) 261 goto out; 262 263 /* check if this is a macvtap device */ 264 err = -EINVAL; 265 if (dev->rtnl_link_ops != &macvtap_link_ops) 266 goto out; 267 268 err = -ENOMEM; 269 q = (struct macvtap_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, 270 &macvtap_proto); 271 if (!q) 272 goto out; 273 274 init_waitqueue_head(&q->sock.wait); 275 q->sock.type = SOCK_RAW; 276 q->sock.state = SS_CONNECTED; 277 q->sock.file = file; 278 q->sock.ops = &macvtap_socket_ops; 279 sock_init_data(&q->sock, &q->sk); 280 q->sk.sk_write_space = macvtap_sock_write_space; 281 q->flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP; 282 283 err = macvtap_set_queue(dev, file, q); 284 if (err) 285 sock_put(&q->sk); 286 287 out: 288 if (dev) 289 dev_put(dev); 290 291 return err; 292 } 293 294 static int macvtap_release(struct inode *inode, struct file *file) 295 { 296 struct macvtap_queue *q = file->private_data; 297 macvtap_put_queue(q); 298 return 0; 299 } 300 301 static unsigned int macvtap_poll(struct file *file, poll_table * wait) 302 { 303 struct macvtap_queue *q = file->private_data; 304 unsigned int mask = POLLERR; 305 306 if (!q) 307 goto out; 308 309 mask = 0; 310 poll_wait(file, &q->sock.wait, wait); 311 312 if (!skb_queue_empty(&q->sk.sk_receive_queue)) 313 mask |= POLLIN | POLLRDNORM; 314 315 if (sock_writeable(&q->sk) || 316 (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock.flags) && 317 sock_writeable(&q->sk))) 318 mask |= POLLOUT | POLLWRNORM; 319 320 out: 321 return mask; 322 } 323 324 static inline struct sk_buff *macvtap_alloc_skb(struct sock *sk, size_t prepad, 325 size_t len, size_t linear, 326 int noblock, int *err) 327 { 328 struct sk_buff *skb; 329 330 /* Under a page? Don't bother with paged skb. */ 331 if (prepad + len < PAGE_SIZE || !linear) 332 linear = len; 333 334 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, 335 err); 336 if (!skb) 337 return NULL; 338 339 skb_reserve(skb, prepad); 340 skb_put(skb, linear); 341 skb->data_len = len - linear; 342 skb->len += len - linear; 343 344 return skb; 345 } 346 347 /* 348 * macvtap_skb_from_vnet_hdr and macvtap_skb_to_vnet_hdr should 349 * be shared with the tun/tap driver. 350 */ 351 static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb, 352 struct virtio_net_hdr *vnet_hdr) 353 { 354 unsigned short gso_type = 0; 355 if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 356 switch (vnet_hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 357 case VIRTIO_NET_HDR_GSO_TCPV4: 358 gso_type = SKB_GSO_TCPV4; 359 break; 360 case VIRTIO_NET_HDR_GSO_TCPV6: 361 gso_type = SKB_GSO_TCPV6; 362 break; 363 case VIRTIO_NET_HDR_GSO_UDP: 364 gso_type = SKB_GSO_UDP; 365 break; 366 default: 367 return -EINVAL; 368 } 369 370 if (vnet_hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) 371 gso_type |= SKB_GSO_TCP_ECN; 372 373 if (vnet_hdr->gso_size == 0) 374 return -EINVAL; 375 } 376 377 if (vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 378 if (!skb_partial_csum_set(skb, vnet_hdr->csum_start, 379 vnet_hdr->csum_offset)) 380 return -EINVAL; 381 } 382 383 if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 384 skb_shinfo(skb)->gso_size = vnet_hdr->gso_size; 385 skb_shinfo(skb)->gso_type = gso_type; 386 387 /* Header must be checked, and gso_segs computed. */ 388 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; 389 skb_shinfo(skb)->gso_segs = 0; 390 } 391 return 0; 392 } 393 394 static int macvtap_skb_to_vnet_hdr(const struct sk_buff *skb, 395 struct virtio_net_hdr *vnet_hdr) 396 { 397 memset(vnet_hdr, 0, sizeof(*vnet_hdr)); 398 399 if (skb_is_gso(skb)) { 400 struct skb_shared_info *sinfo = skb_shinfo(skb); 401 402 /* This is a hint as to how much should be linear. */ 403 vnet_hdr->hdr_len = skb_headlen(skb); 404 vnet_hdr->gso_size = sinfo->gso_size; 405 if (sinfo->gso_type & SKB_GSO_TCPV4) 406 vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 407 else if (sinfo->gso_type & SKB_GSO_TCPV6) 408 vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 409 else if (sinfo->gso_type & SKB_GSO_UDP) 410 vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; 411 else 412 BUG(); 413 if (sinfo->gso_type & SKB_GSO_TCP_ECN) 414 vnet_hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; 415 } else 416 vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE; 417 418 if (skb->ip_summed == CHECKSUM_PARTIAL) { 419 vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 420 vnet_hdr->csum_start = skb->csum_start - 421 skb_headroom(skb); 422 vnet_hdr->csum_offset = skb->csum_offset; 423 } /* else everything is zero */ 424 425 return 0; 426 } 427 428 429 /* Get packet from user space buffer */ 430 static ssize_t macvtap_get_user(struct macvtap_queue *q, 431 const struct iovec *iv, size_t count, 432 int noblock) 433 { 434 struct sk_buff *skb; 435 struct macvlan_dev *vlan; 436 size_t len = count; 437 int err; 438 struct virtio_net_hdr vnet_hdr = { 0 }; 439 int vnet_hdr_len = 0; 440 441 if (q->flags & IFF_VNET_HDR) { 442 vnet_hdr_len = sizeof(vnet_hdr); 443 444 err = -EINVAL; 445 if ((len -= vnet_hdr_len) < 0) 446 goto err; 447 448 err = memcpy_fromiovecend((void *)&vnet_hdr, iv, 0, 449 vnet_hdr_len); 450 if (err < 0) 451 goto err; 452 if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && 453 vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 > 454 vnet_hdr.hdr_len) 455 vnet_hdr.hdr_len = vnet_hdr.csum_start + 456 vnet_hdr.csum_offset + 2; 457 err = -EINVAL; 458 if (vnet_hdr.hdr_len > len) 459 goto err; 460 } 461 462 err = -EINVAL; 463 if (unlikely(len < ETH_HLEN)) 464 goto err; 465 466 skb = macvtap_alloc_skb(&q->sk, NET_IP_ALIGN, len, vnet_hdr.hdr_len, 467 noblock, &err); 468 if (!skb) 469 goto err; 470 471 err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len, len); 472 if (err) 473 goto err_kfree; 474 475 skb_set_network_header(skb, ETH_HLEN); 476 skb_reset_mac_header(skb); 477 skb->protocol = eth_hdr(skb)->h_proto; 478 479 if (vnet_hdr_len) { 480 err = macvtap_skb_from_vnet_hdr(skb, &vnet_hdr); 481 if (err) 482 goto err_kfree; 483 } 484 485 rcu_read_lock_bh(); 486 vlan = rcu_dereference(q->vlan); 487 if (vlan) 488 macvlan_start_xmit(skb, vlan->dev); 489 else 490 kfree_skb(skb); 491 rcu_read_unlock_bh(); 492 493 return count; 494 495 err_kfree: 496 kfree_skb(skb); 497 498 err: 499 rcu_read_lock_bh(); 500 vlan = rcu_dereference(q->vlan); 501 if (vlan) 502 netdev_get_tx_queue(vlan->dev, 0)->tx_dropped++; 503 rcu_read_unlock_bh(); 504 505 return err; 506 } 507 508 static ssize_t macvtap_aio_write(struct kiocb *iocb, const struct iovec *iv, 509 unsigned long count, loff_t pos) 510 { 511 struct file *file = iocb->ki_filp; 512 ssize_t result = -ENOLINK; 513 struct macvtap_queue *q = file->private_data; 514 515 result = macvtap_get_user(q, iv, iov_length(iv, count), 516 file->f_flags & O_NONBLOCK); 517 return result; 518 } 519 520 /* Put packet to the user space buffer */ 521 static ssize_t macvtap_put_user(struct macvtap_queue *q, 522 const struct sk_buff *skb, 523 const struct iovec *iv, int len) 524 { 525 struct macvlan_dev *vlan; 526 int ret; 527 int vnet_hdr_len = 0; 528 529 if (q->flags & IFF_VNET_HDR) { 530 struct virtio_net_hdr vnet_hdr; 531 vnet_hdr_len = sizeof (vnet_hdr); 532 if ((len -= vnet_hdr_len) < 0) 533 return -EINVAL; 534 535 ret = macvtap_skb_to_vnet_hdr(skb, &vnet_hdr); 536 if (ret) 537 return ret; 538 539 if (memcpy_toiovecend(iv, (void *)&vnet_hdr, 0, vnet_hdr_len)) 540 return -EFAULT; 541 } 542 543 len = min_t(int, skb->len, len); 544 545 ret = skb_copy_datagram_const_iovec(skb, 0, iv, vnet_hdr_len, len); 546 547 rcu_read_lock_bh(); 548 vlan = rcu_dereference(q->vlan); 549 if (vlan) 550 macvlan_count_rx(vlan, len, ret == 0, 0); 551 rcu_read_unlock_bh(); 552 553 return ret ? ret : (len + vnet_hdr_len); 554 } 555 556 static ssize_t macvtap_do_read(struct macvtap_queue *q, struct kiocb *iocb, 557 const struct iovec *iv, unsigned long len, 558 int noblock) 559 { 560 DECLARE_WAITQUEUE(wait, current); 561 struct sk_buff *skb; 562 ssize_t ret = 0; 563 564 add_wait_queue(q->sk.sk_sleep, &wait); 565 while (len) { 566 current->state = TASK_INTERRUPTIBLE; 567 568 /* Read frames from the queue */ 569 skb = skb_dequeue(&q->sk.sk_receive_queue); 570 if (!skb) { 571 if (noblock) { 572 ret = -EAGAIN; 573 break; 574 } 575 if (signal_pending(current)) { 576 ret = -ERESTARTSYS; 577 break; 578 } 579 /* Nothing to read, let's sleep */ 580 schedule(); 581 continue; 582 } 583 ret = macvtap_put_user(q, skb, iv, len); 584 kfree_skb(skb); 585 break; 586 } 587 588 current->state = TASK_RUNNING; 589 remove_wait_queue(q->sk.sk_sleep, &wait); 590 return ret; 591 } 592 593 static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv, 594 unsigned long count, loff_t pos) 595 { 596 struct file *file = iocb->ki_filp; 597 struct macvtap_queue *q = file->private_data; 598 ssize_t len, ret = 0; 599 600 len = iov_length(iv, count); 601 if (len < 0) { 602 ret = -EINVAL; 603 goto out; 604 } 605 606 ret = macvtap_do_read(q, iocb, iv, len, file->f_flags & O_NONBLOCK); 607 ret = min_t(ssize_t, ret, len); /* XXX copied from tun.c. Why? */ 608 out: 609 return ret; 610 } 611 612 /* 613 * provide compatibility with generic tun/tap interface 614 */ 615 static long macvtap_ioctl(struct file *file, unsigned int cmd, 616 unsigned long arg) 617 { 618 struct macvtap_queue *q = file->private_data; 619 struct macvlan_dev *vlan; 620 void __user *argp = (void __user *)arg; 621 struct ifreq __user *ifr = argp; 622 unsigned int __user *up = argp; 623 unsigned int u; 624 int ret; 625 626 switch (cmd) { 627 case TUNSETIFF: 628 /* ignore the name, just look at flags */ 629 if (get_user(u, &ifr->ifr_flags)) 630 return -EFAULT; 631 632 ret = 0; 633 if ((u & ~IFF_VNET_HDR) != (IFF_NO_PI | IFF_TAP)) 634 ret = -EINVAL; 635 else 636 q->flags = u; 637 638 return ret; 639 640 case TUNGETIFF: 641 rcu_read_lock_bh(); 642 vlan = rcu_dereference(q->vlan); 643 if (vlan) 644 dev_hold(vlan->dev); 645 rcu_read_unlock_bh(); 646 647 if (!vlan) 648 return -ENOLINK; 649 650 ret = 0; 651 if (copy_to_user(&ifr->ifr_name, q->vlan->dev->name, IFNAMSIZ) || 652 put_user(q->flags, &ifr->ifr_flags)) 653 ret = -EFAULT; 654 dev_put(vlan->dev); 655 return ret; 656 657 case TUNGETFEATURES: 658 if (put_user(IFF_TAP | IFF_NO_PI | IFF_VNET_HDR, up)) 659 return -EFAULT; 660 return 0; 661 662 case TUNSETSNDBUF: 663 if (get_user(u, up)) 664 return -EFAULT; 665 666 q->sk.sk_sndbuf = u; 667 return 0; 668 669 case TUNSETOFFLOAD: 670 /* let the user check for future flags */ 671 if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | 672 TUN_F_TSO_ECN | TUN_F_UFO)) 673 return -EINVAL; 674 675 /* TODO: only accept frames with the features that 676 got enabled for forwarded frames */ 677 if (!(q->flags & IFF_VNET_HDR)) 678 return -EINVAL; 679 return 0; 680 681 default: 682 return -EINVAL; 683 } 684 } 685 686 #ifdef CONFIG_COMPAT 687 static long macvtap_compat_ioctl(struct file *file, unsigned int cmd, 688 unsigned long arg) 689 { 690 return macvtap_ioctl(file, cmd, (unsigned long)compat_ptr(arg)); 691 } 692 #endif 693 694 static const struct file_operations macvtap_fops = { 695 .owner = THIS_MODULE, 696 .open = macvtap_open, 697 .release = macvtap_release, 698 .aio_read = macvtap_aio_read, 699 .aio_write = macvtap_aio_write, 700 .poll = macvtap_poll, 701 .llseek = no_llseek, 702 .unlocked_ioctl = macvtap_ioctl, 703 #ifdef CONFIG_COMPAT 704 .compat_ioctl = macvtap_compat_ioctl, 705 #endif 706 }; 707 708 static int macvtap_sendmsg(struct kiocb *iocb, struct socket *sock, 709 struct msghdr *m, size_t total_len) 710 { 711 struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock); 712 return macvtap_get_user(q, m->msg_iov, total_len, 713 m->msg_flags & MSG_DONTWAIT); 714 } 715 716 static int macvtap_recvmsg(struct kiocb *iocb, struct socket *sock, 717 struct msghdr *m, size_t total_len, 718 int flags) 719 { 720 struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock); 721 int ret; 722 if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) 723 return -EINVAL; 724 ret = macvtap_do_read(q, iocb, m->msg_iov, total_len, 725 flags & MSG_DONTWAIT); 726 if (ret > total_len) { 727 m->msg_flags |= MSG_TRUNC; 728 ret = flags & MSG_TRUNC ? ret : total_len; 729 } 730 return ret; 731 } 732 733 /* Ops structure to mimic raw sockets with tun */ 734 static const struct proto_ops macvtap_socket_ops = { 735 .sendmsg = macvtap_sendmsg, 736 .recvmsg = macvtap_recvmsg, 737 }; 738 739 /* Get an underlying socket object from tun file. Returns error unless file is 740 * attached to a device. The returned object works like a packet socket, it 741 * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for 742 * holding a reference to the file for as long as the socket is in use. */ 743 struct socket *macvtap_get_socket(struct file *file) 744 { 745 struct macvtap_queue *q; 746 if (file->f_op != &macvtap_fops) 747 return ERR_PTR(-EINVAL); 748 q = file->private_data; 749 if (!q) 750 return ERR_PTR(-EBADFD); 751 return &q->sock; 752 } 753 EXPORT_SYMBOL_GPL(macvtap_get_socket); 754 755 static int macvtap_init(void) 756 { 757 int err; 758 759 err = alloc_chrdev_region(&macvtap_major, 0, 760 MACVTAP_NUM_DEVS, "macvtap"); 761 if (err) 762 goto out1; 763 764 cdev_init(&macvtap_cdev, &macvtap_fops); 765 err = cdev_add(&macvtap_cdev, macvtap_major, MACVTAP_NUM_DEVS); 766 if (err) 767 goto out2; 768 769 macvtap_class = class_create(THIS_MODULE, "macvtap"); 770 if (IS_ERR(macvtap_class)) { 771 err = PTR_ERR(macvtap_class); 772 goto out3; 773 } 774 775 err = macvlan_link_register(&macvtap_link_ops); 776 if (err) 777 goto out4; 778 779 return 0; 780 781 out4: 782 class_unregister(macvtap_class); 783 out3: 784 cdev_del(&macvtap_cdev); 785 out2: 786 unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS); 787 out1: 788 return err; 789 } 790 module_init(macvtap_init); 791 792 static void macvtap_exit(void) 793 { 794 rtnl_link_unregister(&macvtap_link_ops); 795 class_unregister(macvtap_class); 796 cdev_del(&macvtap_cdev); 797 unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS); 798 } 799 module_exit(macvtap_exit); 800 801 MODULE_ALIAS_RTNL_LINK("macvtap"); 802 MODULE_AUTHOR("Arnd Bergmann <arnd@arndb.de>"); 803 MODULE_LICENSE("GPL"); 804