1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2015 Joyent, Inc. All rights reserved. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/stropts.h> 31 #include <sys/socket.h> 32 #include <sys/socketvar.h> 33 #include <sys/socket_proto.h> 34 #include <sys/sockio.h> 35 #include <sys/strsun.h> 36 #include <sys/kstat.h> 37 #include <sys/modctl.h> 38 #include <sys/policy.h> 39 #include <sys/priv_const.h> 40 #include <sys/tihdr.h> 41 #include <sys/zone.h> 42 #include <sys/time.h> 43 #include <sys/ethernet.h> 44 #include <sys/llc1.h> 45 #include <fs/sockfs/sockcommon.h> 46 #include <net/if.h> 47 #include <inet/ip_arp.h> 48 49 #include <sys/dls.h> 50 #include <sys/mac.h> 51 #include <sys/mac_client.h> 52 #include <sys/mac_provider.h> 53 #include <sys/mac_client_priv.h> 54 55 #include <netpacket/packet.h> 56 57 static void pfp_close(mac_handle_t, mac_client_handle_t); 58 static int pfp_dl_to_arphrd(int); 59 static int pfp_getpacket_sockopt(sock_lower_handle_t, int, void *, 60 socklen_t *); 61 static int pfp_ifreq_getlinkid(intptr_t, struct ifreq *, datalink_id_t *, int); 62 static int pfp_lifreq_getlinkid(intptr_t, struct lifreq *, datalink_id_t *, 63 int); 64 static int pfp_open_index(int, mac_handle_t *, mac_client_handle_t *, 65 cred_t *); 66 static void pfp_packet(void *, mac_resource_handle_t, mblk_t *, boolean_t); 67 static void pfp_release_bpf(struct pfpsock *); 68 static int pfp_set_promisc(struct pfpsock *, mac_client_promisc_type_t); 69 static int pfp_setsocket_sockopt(sock_lower_handle_t, int, const void *, 70 socklen_t); 71 static int pfp_setpacket_sockopt(sock_lower_handle_t, int, const void *, 72 socklen_t); 73 74 /* 75 * PFP sockfs operations 76 * Most are currently no-ops because they have no meaning for a connectionless 77 * socket. 78 */ 79 static void sdpfp_activate(sock_lower_handle_t, sock_upper_handle_t, 80 sock_upcalls_t *, int, struct cred *); 81 static int sdpfp_bind(sock_lower_handle_t, struct sockaddr *, socklen_t, 82 struct cred *); 83 static int sdpfp_close(sock_lower_handle_t, int, struct cred *); 84 static void sdpfp_clr_flowctrl(sock_lower_handle_t); 85 static int sdpfp_getsockopt(sock_lower_handle_t, int, int, void *, 86 socklen_t *, struct cred *); 87 static int sdpfp_ioctl(sock_lower_handle_t, int, intptr_t, int, int32_t *, 88 struct cred *); 89 static int sdpfp_senduio(sock_lower_handle_t, struct uio *, struct nmsghdr *, 90 struct cred *); 91 static int sdpfp_setsockopt(sock_lower_handle_t, int, int, const void *, 92 socklen_t, struct cred *); 93 94 static sock_lower_handle_t sockpfp_create(int, int, int, sock_downcalls_t **, 95 uint_t *, int *, int, cred_t *); 96 97 static int sockpfp_init(void); 98 static void sockpfp_fini(void); 99 100 static kstat_t *pfp_ksp; 101 static pfp_kstats_t ks_stats; 102 static pfp_kstats_t pfp_kstats = { 103 /* 104 * Each one of these kstats is a different return path in handling 105 * a packet received from the mac layer. 106 */ 107 { "recvMacHeaderFail", KSTAT_DATA_UINT64 }, 108 { "recvBadProtocol", KSTAT_DATA_UINT64 }, 109 { "recvAllocbFail", KSTAT_DATA_UINT64 }, 110 { "recvOk", KSTAT_DATA_UINT64 }, 111 { "recvFail", KSTAT_DATA_UINT64 }, 112 { "recvFiltered", KSTAT_DATA_UINT64 }, 113 { "recvFlowControl", KSTAT_DATA_UINT64 }, 114 /* 115 * A global set of counters is maintained to track the behaviour 116 * of the system (kernel & applications) in sending packets. 117 */ 118 { "sendUnbound", KSTAT_DATA_UINT64 }, 119 { "sendFailed", KSTAT_DATA_UINT64 }, 120 { "sendTooBig", KSTAT_DATA_UINT64 }, 121 { "sendAllocFail", KSTAT_DATA_UINT64 }, 122 { "sendUiomoveFail", KSTAT_DATA_UINT64 }, 123 { "sendNoMemory", KSTAT_DATA_UINT64 }, 124 { "sendOpenFail", KSTAT_DATA_UINT64 }, 125 { "sendWrongFamily", KSTAT_DATA_UINT64 }, 126 { "sendShortMsg", KSTAT_DATA_UINT64 }, 127 { "sendOk", KSTAT_DATA_UINT64 } 128 }; 129 130 sock_downcalls_t pfp_downcalls = { 131 sdpfp_activate, 132 sock_accept_notsupp, 133 sdpfp_bind, 134 sock_listen_notsupp, 135 sock_connect_notsupp, 136 sock_getpeername_notsupp, 137 sock_getsockname_notsupp, 138 sdpfp_getsockopt, 139 sdpfp_setsockopt, 140 sock_send_notsupp, 141 sdpfp_senduio, 142 NULL, 143 sock_poll_notsupp, 144 sock_shutdown_notsupp, 145 sdpfp_clr_flowctrl, 146 sdpfp_ioctl, 147 sdpfp_close, 148 }; 149 150 static smod_reg_t sinfo = { 151 SOCKMOD_VERSION, 152 "sockpfp", 153 SOCK_UC_VERSION, 154 SOCK_DC_VERSION, 155 sockpfp_create, 156 NULL 157 }; 158 159 static int accepted_protos[3][2] = { 160 { ETH_P_ALL, 0 }, 161 { ETH_P_802_2, LLC_SNAP_SAP }, 162 { ETH_P_803_3, 0 }, 163 }; 164 165 /* 166 * This sets an upper bound on the size of the receive buffer for a PF_PACKET 167 * socket. More properly, this should be controlled through ipadm, ala TCP, UDP, 168 * SCTP, etc. Until that's done, this provides a hard cap of 4 MB and allows an 169 * opportunity for it to be changed, should it be needed. 170 */ 171 int sockmod_pfp_rcvbuf_max = 1024 * 1024 * 4; 172 173 /* 174 * Module linkage information for the kernel. 175 */ 176 static struct modlsockmod modlsockmod = { 177 &mod_sockmodops, "PF Packet socket module", &sinfo 178 }; 179 180 static struct modlinkage modlinkage = { 181 MODREV_1, 182 &modlsockmod, 183 NULL 184 }; 185 186 int 187 _init(void) 188 { 189 int error; 190 191 error = sockpfp_init(); 192 if (error != 0) 193 return (error); 194 195 error = mod_install(&modlinkage); 196 if (error != 0) 197 sockpfp_fini(); 198 199 return (error); 200 } 201 202 int 203 _fini(void) 204 { 205 int error; 206 207 error = mod_remove(&modlinkage); 208 if (error == 0) 209 sockpfp_fini(); 210 211 return (error); 212 } 213 214 int 215 _info(struct modinfo *modinfop) 216 { 217 return (mod_info(&modlinkage, modinfop)); 218 } 219 220 /* 221 * sockpfp_init: called as part of the initialisation of the module when 222 * loaded into the kernel. 223 * 224 * Being able to create and record the kstats data in the kernel is not 225 * considered to be vital to the operation of this kernel module, thus 226 * its failure is tolerated. 227 */ 228 static int 229 sockpfp_init(void) 230 { 231 (void) memset(&ks_stats, 0, sizeof (ks_stats)); 232 233 (void) memcpy(&ks_stats, &pfp_kstats, sizeof (pfp_kstats)); 234 235 pfp_ksp = kstat_create("pfpacket", 0, "global", "misc", 236 KSTAT_TYPE_NAMED, sizeof (pfp_kstats) / sizeof (kstat_named_t), 237 KSTAT_FLAG_VIRTUAL); 238 if (pfp_ksp != NULL) { 239 pfp_ksp->ks_data = &ks_stats; 240 kstat_install(pfp_ksp); 241 } 242 243 return (0); 244 } 245 246 /* 247 * sockpfp_fini: called when the operating system wants to unload the 248 * socket module from the kernel. 249 */ 250 static void 251 sockpfp_fini(void) 252 { 253 if (pfp_ksp != NULL) 254 kstat_delete(pfp_ksp); 255 } 256 257 /* 258 * Due to sockets being created read-write by default, all PF_PACKET sockets 259 * therefore require the NET_RAWACCESS priviliege, even if the socket is only 260 * being used for reading packets from. 261 * 262 * This create function enforces this module only being used with PF_PACKET 263 * sockets and the policy that we support via the config file in sock2path.d: 264 * PF_PACKET sockets must be either SOCK_DGRAM or SOCK_RAW. 265 */ 266 /* ARGSUSED */ 267 static sock_lower_handle_t 268 sockpfp_create(int family, int type, int proto, 269 sock_downcalls_t **sock_downcalls, uint_t *smodep, int *errorp, 270 int sflags, cred_t *cred) 271 { 272 struct pfpsock *ps; 273 int kmflags; 274 int newproto; 275 int i; 276 277 if (secpolicy_net_rawaccess(cred) != 0) { 278 *errorp = EACCES; 279 return (NULL); 280 } 281 282 if (family != AF_PACKET) { 283 *errorp = EAFNOSUPPORT; 284 return (NULL); 285 } 286 287 if ((type != SOCK_RAW) && (type != SOCK_DGRAM)) { 288 *errorp = ESOCKTNOSUPPORT; 289 return (NULL); 290 } 291 292 /* 293 * First check to see if the protocol number passed in via the socket 294 * creation should be mapped to a different number for internal use. 295 */ 296 for (i = 0, newproto = -1; 297 i < sizeof (accepted_protos)/ sizeof (accepted_protos[0]); i++) { 298 if (accepted_protos[i][0] == proto) { 299 newproto = accepted_protos[i][1]; 300 break; 301 } 302 } 303 304 /* 305 * If the mapping of the protocol that was under 0x800 failed to find 306 * a local equivalent then fail the socket creation. If the protocol 307 * for the socket is over 0x800 and it was not found in the mapping 308 * table above, then use the value as is. 309 */ 310 if (newproto == -1) { 311 if (proto < 0x800) { 312 *errorp = ENOPROTOOPT; 313 return (NULL); 314 } 315 newproto = proto; 316 } 317 proto = newproto; 318 319 kmflags = (sflags & SOCKET_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP; 320 ps = kmem_zalloc(sizeof (*ps), kmflags); 321 if (ps == NULL) { 322 *errorp = ENOMEM; 323 return (NULL); 324 } 325 326 ps->ps_type = type; 327 ps->ps_proto = proto; 328 rw_init(&ps->ps_bpflock, NULL, RW_DRIVER, NULL); 329 mutex_init(&ps->ps_lock, NULL, MUTEX_DRIVER, NULL); 330 331 *sock_downcalls = &pfp_downcalls; 332 /* 333 * Setting this causes bytes from a packet that do not fit into the 334 * destination user buffer to be discarded. Thus the API is one 335 * packet per receive and callers are required to use a buffer large 336 * enough for the biggest packet that the interface can provide. 337 */ 338 *smodep = SM_ATOMIC; 339 340 return ((sock_lower_handle_t)ps); 341 } 342 343 /* ************************************************************************* */ 344 345 /* 346 * pfp_packet is the callback function that is given to the mac layer for 347 * PF_PACKET to receive packets with. One packet at a time is passed into 348 * this function from the mac layer. Each packet is a private copy given 349 * to PF_PACKET to modify or free as it wishes and does not harm the original 350 * packet from which it was cloned. 351 */ 352 /* ARGSUSED */ 353 static void 354 pfp_packet(void *arg, mac_resource_handle_t mrh, mblk_t *mp, boolean_t flag) 355 { 356 struct T_unitdata_ind *tunit; 357 struct sockaddr_ll *sll; 358 struct sockaddr_ll *sol; 359 mac_header_info_t hdr; 360 struct pfpsock *ps; 361 size_t tusz; 362 mblk_t *mp0; 363 int error; 364 365 if (mp == NULL) 366 return; 367 368 ps = arg; 369 if (ps->ps_flow_ctrld) { 370 ps->ps_flow_ctrl_drops++; 371 ps->ps_stats.tp_drops++; 372 ks_stats.kp_recv_flow_cntrld.value.ui64++; 373 freemsg(mp); 374 return; 375 } 376 377 if (mac_header_info(ps->ps_mh, mp, &hdr) != 0) { 378 /* 379 * Can't decode the packet header information so drop it. 380 */ 381 ps->ps_stats.tp_drops++; 382 ks_stats.kp_recv_mac_hdr_fail.value.ui64++; 383 freemsg(mp); 384 return; 385 } 386 387 if (mac_type(ps->ps_mh) == DL_ETHER && 388 hdr.mhi_bindsap == ETHERTYPE_VLAN) { 389 struct ether_vlan_header *evhp; 390 struct ether_vlan_header evh; 391 392 hdr.mhi_hdrsize = sizeof (struct ether_vlan_header); 393 hdr.mhi_istagged = B_TRUE; 394 395 if (MBLKL(mp) >= sizeof (*evhp)) { 396 evhp = (struct ether_vlan_header *)mp->b_rptr; 397 } else { 398 int sz = sizeof (*evhp); 399 char *s = (char *)&evh; 400 mblk_t *tmp; 401 int len; 402 403 for (tmp = mp; sz > 0 && tmp != NULL; 404 tmp = tmp->b_cont) { 405 len = min(sz, MBLKL(tmp)); 406 bcopy(tmp->b_rptr, s, len); 407 sz -= len; 408 } 409 evhp = &evh; 410 } 411 hdr.mhi_tci = ntohs(evhp->ether_tci); 412 hdr.mhi_bindsap = ntohs(evhp->ether_type); 413 } 414 415 if ((ps->ps_proto != 0) && (ps->ps_proto != hdr.mhi_bindsap)) { 416 /* 417 * The packet is not of interest to this socket so 418 * drop it on the floor. Here the SAP is being used 419 * as a very course filter. 420 */ 421 ps->ps_stats.tp_drops++; 422 ks_stats.kp_recv_bad_proto.value.ui64++; 423 freemsg(mp); 424 return; 425 } 426 427 /* 428 * This field is not often set, even for ethernet, 429 * by mac_header_info, so compute it if it is 0. 430 */ 431 if (hdr.mhi_pktsize == 0) 432 hdr.mhi_pktsize = msgdsize(mp); 433 434 /* 435 * If a BPF filter is present, pass the raw packet into that. 436 * A failed match will result in zero being returned, indicating 437 * that this socket is not interested in the packet. 438 */ 439 if (ps->ps_bpf.bf_len != 0) { 440 uchar_t *buffer; 441 int buflen; 442 443 buflen = MBLKL(mp); 444 if (hdr.mhi_pktsize == buflen) { 445 buffer = mp->b_rptr; 446 } else { 447 buflen = 0; 448 buffer = (uchar_t *)mp; 449 } 450 rw_enter(&ps->ps_bpflock, RW_READER); 451 if (bpf_filter(ps->ps_bpf.bf_insns, buffer, 452 hdr.mhi_pktsize, buflen) == 0) { 453 rw_exit(&ps->ps_bpflock); 454 ps->ps_stats.tp_drops++; 455 ks_stats.kp_recv_filtered.value.ui64++; 456 freemsg(mp); 457 return; 458 } 459 rw_exit(&ps->ps_bpflock); 460 } 461 462 if (ps->ps_type == SOCK_DGRAM) { 463 /* 464 * SOCK_DGRAM socket expect a "layer 3" packet, so advance 465 * past the link layer header. 466 */ 467 mp->b_rptr += hdr.mhi_hdrsize; 468 hdr.mhi_pktsize -= hdr.mhi_hdrsize; 469 } 470 471 tusz = sizeof (struct T_unitdata_ind) + sizeof (struct sockaddr_ll); 472 if (ps->ps_auxdata) { 473 tusz += _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata)); 474 tusz += _TPI_ALIGN_TOPT(sizeof (struct T_opthdr)); 475 } 476 477 /* 478 * It is tempting to think that this could be optimised by having 479 * the base mblk_t allocated and hung off the pfpsock structure, 480 * except that then another one would need to be allocated for the 481 * sockaddr_ll that is included. Even creating a template to copy 482 * from is of questionable value, as read-write from one structure 483 * to the other is going to be slower than all of the initialisation. 484 */ 485 mp0 = allocb(tusz, BPRI_HI); 486 if (mp0 == NULL) { 487 ps->ps_stats.tp_drops++; 488 ks_stats.kp_recv_alloc_fail.value.ui64++; 489 freemsg(mp); 490 return; 491 } 492 493 (void) memset(mp0->b_rptr, 0, tusz); 494 495 mp0->b_datap->db_type = M_PROTO; 496 mp0->b_wptr = mp0->b_rptr + tusz; 497 498 tunit = (struct T_unitdata_ind *)mp0->b_rptr; 499 tunit->PRIM_type = T_UNITDATA_IND; 500 tunit->SRC_length = sizeof (struct sockaddr); 501 tunit->SRC_offset = sizeof (*tunit); 502 503 sol = (struct sockaddr_ll *)&ps->ps_sock; 504 sll = (struct sockaddr_ll *)(mp0->b_rptr + sizeof (*tunit)); 505 sll->sll_ifindex = sol->sll_ifindex; 506 sll->sll_hatype = (uint16_t)hdr.mhi_origsap; 507 sll->sll_halen = sol->sll_halen; 508 if (hdr.mhi_saddr != NULL) 509 (void) memcpy(sll->sll_addr, hdr.mhi_saddr, sll->sll_halen); 510 511 switch (hdr.mhi_dsttype) { 512 case MAC_ADDRTYPE_MULTICAST : 513 sll->sll_pkttype = PACKET_MULTICAST; 514 break; 515 case MAC_ADDRTYPE_BROADCAST : 516 sll->sll_pkttype = PACKET_BROADCAST; 517 break; 518 case MAC_ADDRTYPE_UNICAST : 519 if (memcmp(sol->sll_addr, hdr.mhi_daddr, sol->sll_halen) == 0) 520 sll->sll_pkttype = PACKET_HOST; 521 else 522 sll->sll_pkttype = PACKET_OTHERHOST; 523 break; 524 } 525 526 if (ps->ps_auxdata) { 527 struct tpacket_auxdata *aux; 528 struct T_opthdr *topt; 529 530 tunit->OPT_offset = _TPI_ALIGN_TOPT(tunit->SRC_offset + 531 sizeof (struct sockaddr_ll)); 532 tunit->OPT_length = _TPI_ALIGN_TOPT(sizeof (struct T_opthdr)) + 533 _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata)); 534 535 topt = (struct T_opthdr *)(mp0->b_rptr + tunit->OPT_offset); 536 aux = (struct tpacket_auxdata *) 537 ((char *)topt + _TPI_ALIGN_TOPT(sizeof (*topt))); 538 539 topt->len = tunit->OPT_length; 540 topt->level = SOL_PACKET; 541 topt->name = PACKET_AUXDATA; 542 topt->status = 0; 543 /* 544 * libpcap doesn't seem to use any other field, 545 * so it isn't clear how they should be filled in. 546 */ 547 aux->tp_vlan_vci = hdr.mhi_tci; 548 } 549 550 linkb(mp0, mp); 551 552 (void) gethrestime(&ps->ps_timestamp); 553 554 ps->ps_upcalls->su_recv(ps->ps_upper, mp0, hdr.mhi_pktsize, 0, 555 &error, NULL); 556 557 if (error == 0) { 558 ps->ps_stats.tp_packets++; 559 ks_stats.kp_recv_ok.value.ui64++; 560 } else { 561 mutex_enter(&ps->ps_lock); 562 if (error == ENOSPC) { 563 ps->ps_upcalls->su_recv(ps->ps_upper, NULL, 0, 0, 564 &error, NULL); 565 if (error == ENOSPC) 566 ps->ps_flow_ctrld = B_TRUE; 567 } 568 mutex_exit(&ps->ps_lock); 569 ps->ps_stats.tp_drops++; 570 ks_stats.kp_recv_fail.value.ui64++; 571 } 572 } 573 574 /* 575 * Bind a PF_PACKET socket to a network interface. 576 * 577 * The default operation of this bind() is to place the socket (and thus the 578 * network interface) into promiscuous mode. It is then up to the application 579 * to turn that down by issuing the relevant ioctls, if desired. 580 */ 581 /* ARGSUSED */ 582 static int 583 sdpfp_bind(sock_lower_handle_t handle, struct sockaddr *addr, 584 socklen_t addrlen, struct cred *cred) 585 { 586 struct sockaddr_ll *addr_ll, *sol; 587 mac_client_handle_t mch; 588 struct pfpsock *ps; 589 mac_handle_t mh; 590 int error; 591 592 ps = (struct pfpsock *)handle; 593 if (ps->ps_bound) 594 return (EINVAL); 595 596 addr_ll = (struct sockaddr_ll *)addr; 597 598 error = pfp_open_index(addr_ll->sll_ifindex, &mh, &mch, cred); 599 if (error != 0) 600 return (error); 601 /* 602 * Ensure that each socket is only bound once. 603 */ 604 mutex_enter(&ps->ps_lock); 605 if (ps->ps_mh != 0) { 606 mutex_exit(&ps->ps_lock); 607 pfp_close(mh, mch); 608 return (EADDRINUSE); 609 } 610 ps->ps_mh = mh; 611 ps->ps_mch = mch; 612 mutex_exit(&ps->ps_lock); 613 614 /* 615 * Cache all of the information from bind so that it's in an easy 616 * place to get at when packets are received. 617 */ 618 sol = (struct sockaddr_ll *)&ps->ps_sock; 619 sol->sll_family = AF_PACKET; 620 sol->sll_ifindex = addr_ll->sll_ifindex; 621 sol->sll_protocol = addr_ll->sll_protocol; 622 sol->sll_halen = mac_addr_len(ps->ps_mh); 623 mac_unicast_primary_get(ps->ps_mh, sol->sll_addr); 624 mac_sdu_get(ps->ps_mh, NULL, &ps->ps_max_sdu); 625 ps->ps_linkid = addr_ll->sll_ifindex; 626 627 error = mac_promisc_add(ps->ps_mch, MAC_CLIENT_PROMISC_ALL, 628 pfp_packet, ps, &ps->ps_phd, MAC_PROMISC_FLAGS_VLAN_TAG_STRIP); 629 if (error == 0) { 630 ps->ps_promisc = MAC_CLIENT_PROMISC_ALL; 631 ps->ps_bound = B_TRUE; 632 } 633 634 return (error); 635 } 636 637 /* ARGSUSED */ 638 static void 639 sdpfp_activate(sock_lower_handle_t lower, sock_upper_handle_t upper, 640 sock_upcalls_t *upcalls, int flags, cred_t *cred) 641 { 642 struct pfpsock *ps; 643 644 ps = (struct pfpsock *)lower; 645 ps->ps_upper = upper; 646 ps->ps_upcalls = upcalls; 647 } 648 649 /* 650 * This module only implements getting socket options for the new socket 651 * option level (SOL_PACKET) that it introduces. All other requests are 652 * passed back to the sockfs layer. 653 */ 654 /* ARGSUSED */ 655 static int 656 sdpfp_getsockopt(sock_lower_handle_t handle, int level, int option_name, 657 void *optval, socklen_t *optlenp, struct cred *cred) 658 { 659 struct pfpsock *ps; 660 int error = 0; 661 662 ps = (struct pfpsock *)handle; 663 664 switch (level) { 665 case SOL_PACKET : 666 error = pfp_getpacket_sockopt(handle, option_name, optval, 667 optlenp); 668 break; 669 670 case SOL_SOCKET : 671 if (option_name == SO_RCVBUF) { 672 if (*optlenp < sizeof (int32_t)) 673 return (EINVAL); 674 *((int32_t *)optval) = ps->ps_rcvbuf; 675 *optlenp = sizeof (int32_t); 676 } else { 677 error = ENOPROTOOPT; 678 } 679 break; 680 681 default : 682 /* 683 * If sockfs code receives this error in return from the 684 * getsockopt downcall it handles the option locally, if 685 * it can. 686 */ 687 error = ENOPROTOOPT; 688 break; 689 } 690 691 return (error); 692 } 693 694 /* 695 * PF_PACKET supports setting socket options at only two levels: 696 * SOL_SOCKET and SOL_PACKET. 697 */ 698 /* ARGSUSED */ 699 static int 700 sdpfp_setsockopt(sock_lower_handle_t handle, int level, int option_name, 701 const void *optval, socklen_t optlen, struct cred *cred) 702 { 703 int error = 0; 704 705 switch (level) { 706 case SOL_SOCKET : 707 error = pfp_setsocket_sockopt(handle, option_name, optval, 708 optlen); 709 break; 710 case SOL_PACKET : 711 error = pfp_setpacket_sockopt(handle, option_name, optval, 712 optlen); 713 break; 714 default : 715 error = EINVAL; 716 break; 717 } 718 719 return (error); 720 } 721 722 /* 723 * This function is incredibly inefficient for sending any packet that 724 * comes with a msghdr asking to be sent to an interface to which the 725 * socket has not been bound. Some possibilities here are keeping a 726 * cache of all open mac's and mac_client's, for the purpose of sending, 727 * and closing them after some amount of inactivity. Clearly, applications 728 * should not be written to use one socket for multiple interfaces if 729 * performance is desired with the code as is. 730 */ 731 /* ARGSUSED */ 732 static int 733 sdpfp_senduio(sock_lower_handle_t handle, struct uio *uiop, 734 struct nmsghdr *msg, struct cred *cred) 735 { 736 struct sockaddr_ll *sol; 737 mac_client_handle_t mch; 738 struct pfpsock *ps; 739 boolean_t new_open; 740 mac_handle_t mh; 741 size_t mpsize; 742 uint_t maxsdu; 743 mblk_t *mp0; 744 mblk_t *mp; 745 int error; 746 747 mp = NULL; 748 mp0 = NULL; 749 new_open = B_FALSE; 750 ps = (struct pfpsock *)handle; 751 mh = ps->ps_mh; 752 mch = ps->ps_mch; 753 maxsdu = ps->ps_max_sdu; 754 755 sol = (struct sockaddr_ll *)msg->msg_name; 756 if (sol == NULL) { 757 /* 758 * If no sockaddr_ll has been provided with the send call, 759 * use the one constructed when the socket was bound to an 760 * interface and fail if it hasn't been bound. 761 */ 762 if (!ps->ps_bound) { 763 ks_stats.kp_send_unbound.value.ui64++; 764 return (EPROTO); 765 } 766 sol = (struct sockaddr_ll *)&ps->ps_sock; 767 } else { 768 /* 769 * Verify the sockaddr_ll message passed down before using 770 * it to send a packet out with. If it refers to an interface 771 * that has not been bound, it is necessary to open it. 772 */ 773 struct sockaddr_ll *sll; 774 775 if (msg->msg_namelen < sizeof (struct sockaddr_ll)) { 776 ks_stats.kp_send_short_msg.value.ui64++; 777 return (EINVAL); 778 } 779 780 if (sol->sll_family != AF_PACKET) { 781 ks_stats.kp_send_wrong_family.value.ui64++; 782 return (EAFNOSUPPORT); 783 } 784 785 sll = (struct sockaddr_ll *)&ps->ps_sock; 786 if (sol->sll_ifindex != sll->sll_ifindex) { 787 error = pfp_open_index(sol->sll_ifindex, &mh, &mch, 788 cred); 789 if (error != 0) { 790 ks_stats.kp_send_open_fail.value.ui64++; 791 return (error); 792 } 793 mac_sdu_get(mh, NULL, &maxsdu); 794 new_open = B_TRUE; 795 } 796 } 797 798 mpsize = uiop->uio_resid; 799 if (mpsize > maxsdu) { 800 ks_stats.kp_send_too_big.value.ui64++; 801 error = EMSGSIZE; 802 goto done; 803 } 804 805 if ((mp = allocb(mpsize, BPRI_HI)) == NULL) { 806 ks_stats.kp_send_alloc_fail.value.ui64++; 807 error = ENOBUFS; 808 goto done; 809 } 810 811 mp->b_wptr = mp->b_rptr + mpsize; 812 error = uiomove(mp->b_rptr, mpsize, UIO_WRITE, uiop); 813 if (error != 0) { 814 ks_stats.kp_send_uiomove_fail.value.ui64++; 815 goto done; 816 } 817 818 if (ps->ps_type == SOCK_DGRAM) { 819 mp0 = mac_header(mh, sol->sll_addr, sol->sll_protocol, mp, 0); 820 if (mp0 == NULL) { 821 ks_stats.kp_send_no_memory.value.ui64++; 822 error = ENOBUFS; 823 goto done; 824 } 825 linkb(mp0, mp); 826 mp = mp0; 827 } 828 829 /* 830 * As this is sending datagrams and no promise is made about 831 * how or if a packet will be sent/delivered, no effort is to 832 * be expended in recovering from a situation where the packet 833 * cannot be sent - it is just dropped. 834 */ 835 error = mac_tx(mch, mp, 0, MAC_DROP_ON_NO_DESC, NULL); 836 if (error == 0) { 837 mp = NULL; 838 ks_stats.kp_send_ok.value.ui64++; 839 } else { 840 ks_stats.kp_send_failed.value.ui64++; 841 } 842 843 done: 844 845 if (new_open) { 846 ASSERT(mch != ps->ps_mch); 847 ASSERT(mh != ps->ps_mh); 848 pfp_close(mh, mch); 849 } 850 if (mp != NULL) 851 freemsg(mp); 852 853 return (error); 854 855 } 856 857 /* 858 * There's no use of a lock here, or at the bottom of pfp_packet() where 859 * ps_flow_ctrld is set to true, because in a situation where these two 860 * are racing to set the flag one way or the other, the end result is 861 * going to be ultimately determined by the scheduler anyway - which of 862 * the two threads gets the lock first? In such an operational environment, 863 * we've got packets arriving too fast to be delt with so packets are going 864 * to be dropped. Grabbing a lock just makes the drop more expensive. 865 */ 866 static void 867 sdpfp_clr_flowctrl(sock_lower_handle_t handle) 868 { 869 struct pfpsock *ps; 870 871 ps = (struct pfpsock *)handle; 872 873 mutex_enter(&ps->ps_lock); 874 ps->ps_flow_ctrld = B_FALSE; 875 mutex_exit(&ps->ps_lock); 876 } 877 878 /* 879 * The implementation of this ioctl() handler is intended to function 880 * in the absence of a bind() being made before it is called. Thus the 881 * function calls mac_open() itself to provide a handle 882 * This function is structured like this: 883 * - determine the linkid for the interface being targetted 884 * - open the interface with said linkid 885 * - perform ioctl 886 * - copy results back to caller 887 * 888 * The ioctls that interact with interface flags have been implented below 889 * to assume that the interface is always up and running (IFF_RUNNING) and 890 * to use the state of this socket to determine whether or not the network 891 * interface is in promiscuous mode. Thus an ioctl to get the interface flags 892 * of an interface that has been put in promiscuous mode by another socket 893 * (in the same program or different), will not report that status. 894 */ 895 /* ARGSUSED */ 896 static int 897 sdpfp_ioctl(sock_lower_handle_t handle, int cmd, intptr_t arg, int mod, 898 int32_t *rval, struct cred *cr) 899 { 900 struct timeval tival; 901 mac_client_promisc_type_t mtype; 902 struct sockaddr_dl *sock; 903 datalink_id_t linkid; 904 struct lifreq lifreq; 905 struct ifreq ifreq; 906 struct pfpsock *ps; 907 mac_handle_t mh; 908 int error; 909 910 ps = (struct pfpsock *)handle; 911 912 switch (cmd) { 913 /* 914 * ioctls that work on "struct lifreq" 915 */ 916 case SIOCSLIFFLAGS : 917 case SIOCGLIFINDEX : 918 case SIOCGLIFFLAGS : 919 case SIOCGLIFMTU : 920 case SIOCGLIFHWADDR : 921 error = pfp_lifreq_getlinkid(arg, &lifreq, &linkid, mod); 922 if (error != 0) 923 return (error); 924 break; 925 926 /* 927 * ioctls that work on "struct ifreq". 928 * Not all of these have a "struct lifreq" partner, for example 929 * SIOCGIFHWADDR, for the simple reason that the logical interface 930 * does not have a hardware address. 931 */ 932 case SIOCSIFFLAGS : 933 case SIOCGIFINDEX : 934 case SIOCGIFFLAGS : 935 case SIOCGIFMTU : 936 case SIOCGIFHWADDR : 937 error = pfp_ifreq_getlinkid(arg, &ifreq, &linkid, mod); 938 if (error != 0) 939 return (error); 940 break; 941 942 case SIOCGSTAMP : 943 tival.tv_sec = (time_t)ps->ps_timestamp.tv_sec; 944 tival.tv_usec = ps->ps_timestamp.tv_nsec / 1000; 945 if (get_udatamodel() == DATAMODEL_NATIVE) { 946 error = ddi_copyout(&tival, (void *)arg, 947 sizeof (tival), mod); 948 } 949 #ifdef _SYSCALL32_IMPL 950 else { 951 struct timeval32 tv32; 952 TIMEVAL_TO_TIMEVAL32(&tv32, &tival); 953 error = ddi_copyout(&tv32, (void *)arg, 954 sizeof (tv32), mod); 955 } 956 #endif 957 return (error); 958 } 959 960 error = mac_open_by_linkid(linkid, &mh); 961 if (error != 0) 962 return (error); 963 964 switch (cmd) { 965 case SIOCGLIFINDEX : 966 lifreq.lifr_index = linkid; 967 break; 968 969 case SIOCGIFINDEX : 970 ifreq.ifr_index = linkid; 971 break; 972 973 case SIOCGIFFLAGS : 974 ifreq.ifr_flags = IFF_RUNNING; 975 if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL) 976 ifreq.ifr_flags |= IFF_PROMISC; 977 break; 978 979 case SIOCGLIFFLAGS : 980 lifreq.lifr_flags = IFF_RUNNING; 981 if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL) 982 lifreq.lifr_flags |= IFF_PROMISC; 983 break; 984 985 case SIOCSIFFLAGS : 986 if (linkid != ps->ps_linkid) { 987 error = EINVAL; 988 } else { 989 if ((ifreq.ifr_flags & IFF_PROMISC) != 0) 990 mtype = MAC_CLIENT_PROMISC_ALL; 991 else 992 mtype = MAC_CLIENT_PROMISC_FILTERED; 993 error = pfp_set_promisc(ps, mtype); 994 } 995 break; 996 997 case SIOCSLIFFLAGS : 998 if (linkid != ps->ps_linkid) { 999 error = EINVAL; 1000 } else { 1001 if ((lifreq.lifr_flags & IFF_PROMISC) != 0) 1002 mtype = MAC_CLIENT_PROMISC_ALL; 1003 else 1004 mtype = MAC_CLIENT_PROMISC_FILTERED; 1005 error = pfp_set_promisc(ps, mtype); 1006 } 1007 break; 1008 1009 case SIOCGIFMTU : 1010 mac_sdu_get(mh, NULL, &ifreq.ifr_mtu); 1011 break; 1012 1013 case SIOCGLIFMTU : 1014 mac_sdu_get(mh, NULL, &lifreq.lifr_mtu); 1015 break; 1016 1017 case SIOCGIFHWADDR : 1018 if (mac_addr_len(mh) > sizeof (ifreq.ifr_addr.sa_data)) { 1019 error = EPFNOSUPPORT; 1020 break; 1021 } 1022 1023 if (mac_addr_len(mh) == 0) { 1024 (void) memset(ifreq.ifr_addr.sa_data, 0, 1025 sizeof (ifreq.ifr_addr.sa_data)); 1026 } else { 1027 mac_unicast_primary_get(mh, 1028 (uint8_t *)ifreq.ifr_addr.sa_data); 1029 } 1030 1031 /* 1032 * The behaviour here in setting sa_family is consistent 1033 * with what applications such as tcpdump would expect 1034 * for a Linux PF_PACKET socket. 1035 */ 1036 ifreq.ifr_addr.sa_family = pfp_dl_to_arphrd(mac_type(mh)); 1037 break; 1038 1039 case SIOCGLIFHWADDR : 1040 lifreq.lifr_type = 0; 1041 sock = (struct sockaddr_dl *)&lifreq.lifr_addr; 1042 1043 if (mac_addr_len(mh) > sizeof (sock->sdl_data)) { 1044 error = EPFNOSUPPORT; 1045 break; 1046 } 1047 1048 /* 1049 * Fill in the sockaddr_dl with link layer details. Of note, 1050 * the index is returned as 0 for a couple of reasons: 1051 * (1) there is no public API that uses or requires it 1052 * (2) the MAC index is currently 32bits and sdl_index is 16. 1053 */ 1054 sock->sdl_family = AF_LINK; 1055 sock->sdl_index = 0; 1056 sock->sdl_type = mac_type(mh); 1057 sock->sdl_nlen = 0; 1058 sock->sdl_alen = mac_addr_len(mh); 1059 sock->sdl_slen = 0; 1060 if (mac_addr_len(mh) == 0) { 1061 (void) memset(sock->sdl_data, 0, 1062 sizeof (sock->sdl_data)); 1063 } else { 1064 mac_unicast_primary_get(mh, (uint8_t *)sock->sdl_data); 1065 } 1066 break; 1067 1068 default : 1069 break; 1070 } 1071 1072 mac_close(mh); 1073 1074 if (error == 0) { 1075 /* 1076 * Only the "GET" ioctls need to copy data back to userace. 1077 */ 1078 switch (cmd) { 1079 case SIOCGLIFINDEX : 1080 case SIOCGLIFFLAGS : 1081 case SIOCGLIFMTU : 1082 case SIOCGLIFHWADDR : 1083 error = ddi_copyout(&lifreq, (void *)arg, 1084 sizeof (lifreq), mod); 1085 break; 1086 1087 case SIOCGIFINDEX : 1088 case SIOCGIFFLAGS : 1089 case SIOCGIFMTU : 1090 case SIOCGIFHWADDR : 1091 error = ddi_copyout(&ifreq, (void *)arg, 1092 sizeof (ifreq), mod); 1093 break; 1094 default : 1095 break; 1096 } 1097 } 1098 1099 return (error); 1100 } 1101 1102 /* 1103 * Closing the socket requires that all open references to network 1104 * interfaces be closed. 1105 */ 1106 /* ARGSUSED */ 1107 static int 1108 sdpfp_close(sock_lower_handle_t handle, int flag, struct cred *cr) 1109 { 1110 struct pfpsock *ps = (struct pfpsock *)handle; 1111 1112 if (ps->ps_phd != 0) { 1113 mac_promisc_remove(ps->ps_phd); 1114 ps->ps_phd = 0; 1115 } 1116 1117 if (ps->ps_mch != 0) { 1118 mac_client_close(ps->ps_mch, 0); 1119 ps->ps_mch = 0; 1120 } 1121 1122 if (ps->ps_mh != 0) { 1123 mac_close(ps->ps_mh); 1124 ps->ps_mh = 0; 1125 } 1126 1127 kmem_free(ps, sizeof (*ps)); 1128 1129 return (0); 1130 } 1131 1132 /* ************************************************************************* */ 1133 1134 /* 1135 * Given a pointer (arg) to a "struct ifreq" (potentially in user space), 1136 * determine the linkid for the interface name stored in that structure. 1137 * name is used as a buffer so that we can ensure a trailing \0 is appended 1138 * to the name safely. 1139 */ 1140 static int 1141 pfp_ifreq_getlinkid(intptr_t arg, struct ifreq *ifreqp, 1142 datalink_id_t *linkidp, int mode) 1143 { 1144 char name[IFNAMSIZ + 1]; 1145 int error; 1146 1147 if (ddi_copyin((void *)arg, ifreqp, sizeof (*ifreqp), mode) != 0) 1148 return (EFAULT); 1149 1150 (void) strlcpy(name, ifreqp->ifr_name, sizeof (name)); 1151 1152 error = dls_mgmt_get_linkid(name, linkidp); 1153 if (error != 0) 1154 error = dls_devnet_macname2linkid(name, linkidp); 1155 1156 return (error); 1157 } 1158 1159 /* 1160 * Given a pointer (arg) to a "struct lifreq" (potentially in user space), 1161 * determine the linkid for the interface name stored in that structure. 1162 * name is used as a buffer so that we can ensure a trailing \0 is appended 1163 * to the name safely. 1164 */ 1165 static int 1166 pfp_lifreq_getlinkid(intptr_t arg, struct lifreq *lifreqp, 1167 datalink_id_t *linkidp, int mode) 1168 { 1169 char name[LIFNAMSIZ + 1]; 1170 int error; 1171 1172 if (ddi_copyin((void *)arg, lifreqp, sizeof (*lifreqp), mode) != 0) 1173 return (EFAULT); 1174 1175 (void) strlcpy(name, lifreqp->lifr_name, sizeof (name)); 1176 1177 error = dls_mgmt_get_linkid(name, linkidp); 1178 if (error != 0) 1179 error = dls_devnet_macname2linkid(name, linkidp); 1180 1181 return (error); 1182 } 1183 1184 /* 1185 * Although there are several new SOL_PACKET options that can be set and 1186 * are specific to this implementation of PF_PACKET, the current API does 1187 * not support doing a get on them to retrieve accompanying status. Thus 1188 * it is only currently possible to use SOL_PACKET with getsockopt to 1189 * retrieve statistical information. This remains consistant with the 1190 * Linux API at the time of writing. 1191 */ 1192 static int 1193 pfp_getpacket_sockopt(sock_lower_handle_t handle, int option_name, 1194 void *optval, socklen_t *optlenp) 1195 { 1196 struct pfpsock *ps; 1197 struct tpacket_stats_short tpss; 1198 int error = 0; 1199 1200 ps = (struct pfpsock *)handle; 1201 1202 switch (option_name) { 1203 case PACKET_STATISTICS : 1204 if (*optlenp < sizeof (ps->ps_stats)) { 1205 error = EINVAL; 1206 break; 1207 } 1208 *optlenp = sizeof (ps->ps_stats); 1209 bcopy(&ps->ps_stats, optval, sizeof (ps->ps_stats)); 1210 break; 1211 case PACKET_STATISTICS_SHORT : 1212 if (*optlenp < sizeof (tpss)) { 1213 error = EINVAL; 1214 break; 1215 } 1216 *optlenp = sizeof (tpss); 1217 tpss.tp_packets = ps->ps_stats.tp_packets; 1218 tpss.tp_drops = ps->ps_stats.tp_drops; 1219 bcopy(&tpss, optval, sizeof (tpss)); 1220 break; 1221 default : 1222 error = EINVAL; 1223 break; 1224 } 1225 1226 return (error); 1227 } 1228 1229 /* 1230 * The SOL_PACKET level for socket options supports three options, 1231 * PACKET_ADD_MEMBERSHIP, PACKET_DROP_MEMBERSHIP and PACKET_AUXDATA. 1232 * This function is responsible for mapping the two socket options 1233 * that manage multicast membership into the appropriate internal 1234 * function calls to bring the option into effect. Whilst direct 1235 * changes to the multicast membership (ADD/DROP) groups is handled 1236 * by calls directly into the mac module, changes to the promiscuos 1237 * mode are vectored through pfp_set_promisc() so that the logic for 1238 * managing the promiscuous mode is in one place. 1239 */ 1240 /* ARGSUSED */ 1241 static int 1242 pfp_setpacket_sockopt(sock_lower_handle_t handle, int option_name, 1243 const void *optval, socklen_t optlen) 1244 { 1245 struct packet_mreq mreq; 1246 struct pfpsock *ps; 1247 int error = 0; 1248 int opt; 1249 1250 ps = (struct pfpsock *)handle; 1251 if (!ps->ps_bound) 1252 return (EPROTO); 1253 1254 if ((option_name == PACKET_ADD_MEMBERSHIP) || 1255 (option_name == PACKET_DROP_MEMBERSHIP)) { 1256 if (!ps->ps_bound) 1257 return (EPROTO); 1258 bcopy(optval, &mreq, sizeof (mreq)); 1259 if (ps->ps_linkid != mreq.mr_ifindex) 1260 return (EINVAL); 1261 } 1262 1263 switch (option_name) { 1264 case PACKET_ADD_MEMBERSHIP : 1265 switch (mreq.mr_type) { 1266 case PACKET_MR_MULTICAST : 1267 if (mreq.mr_alen != 1268 ((struct sockaddr_ll *)&ps->ps_sock)->sll_halen) 1269 return (EINVAL); 1270 1271 error = mac_multicast_add(ps->ps_mch, mreq.mr_address); 1272 break; 1273 1274 case PACKET_MR_PROMISC : 1275 error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_ALL); 1276 break; 1277 1278 case PACKET_MR_ALLMULTI : 1279 error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_MULTI); 1280 break; 1281 } 1282 break; 1283 1284 case PACKET_DROP_MEMBERSHIP : 1285 switch (mreq.mr_type) { 1286 case PACKET_MR_MULTICAST : 1287 if (mreq.mr_alen != 1288 ((struct sockaddr_ll *)&ps->ps_sock)->sll_halen) 1289 return (EINVAL); 1290 1291 mac_multicast_remove(ps->ps_mch, mreq.mr_address); 1292 break; 1293 1294 case PACKET_MR_PROMISC : 1295 if (ps->ps_promisc != MAC_CLIENT_PROMISC_ALL) 1296 return (EINVAL); 1297 error = pfp_set_promisc(ps, 1298 MAC_CLIENT_PROMISC_FILTERED); 1299 break; 1300 1301 case PACKET_MR_ALLMULTI : 1302 if (ps->ps_promisc != MAC_CLIENT_PROMISC_MULTI) 1303 return (EINVAL); 1304 error = pfp_set_promisc(ps, 1305 MAC_CLIENT_PROMISC_FILTERED); 1306 break; 1307 } 1308 break; 1309 1310 case PACKET_AUXDATA : 1311 if (optlen == sizeof (int)) { 1312 opt = *(int *)optval; 1313 ps->ps_auxdata = (opt != 0); 1314 } else { 1315 error = EINVAL; 1316 } 1317 break; 1318 default : 1319 error = EINVAL; 1320 break; 1321 } 1322 1323 return (error); 1324 } 1325 1326 /* 1327 * There are only two special setsockopt's for SOL_SOCKET with PF_PACKET: 1328 * SO_ATTACH_FILTER and SO_DETACH_FILTER. 1329 * 1330 * Both of these setsockopt values are candidates for being handled by the 1331 * socket layer itself in future, however this requires understanding how 1332 * they would interact with all other sockets. 1333 */ 1334 static int 1335 pfp_setsocket_sockopt(sock_lower_handle_t handle, int option_name, 1336 const void *optval, socklen_t optlen) 1337 { 1338 struct bpf_program prog; 1339 struct bpf_insn *fcode; 1340 struct pfpsock *ps; 1341 struct sock_proto_props sopp; 1342 int error = 0; 1343 int size; 1344 1345 ps = (struct pfpsock *)handle; 1346 1347 switch (option_name) { 1348 case SO_ATTACH_FILTER : 1349 #ifdef _LP64 1350 if (optlen == sizeof (struct bpf_program32)) { 1351 struct bpf_program32 prog32; 1352 1353 bcopy(optval, &prog32, sizeof (prog32)); 1354 prog.bf_len = prog32.bf_len; 1355 prog.bf_insns = (void *)(uint64_t)prog32.bf_insns; 1356 } else 1357 #endif 1358 if (optlen == sizeof (struct bpf_program)) { 1359 bcopy(optval, &prog, sizeof (prog)); 1360 } else if (optlen != sizeof (struct bpf_program)) { 1361 return (EINVAL); 1362 } 1363 if (prog.bf_len > BPF_MAXINSNS) 1364 return (EINVAL); 1365 1366 size = prog.bf_len * sizeof (*prog.bf_insns); 1367 fcode = kmem_alloc(size, KM_SLEEP); 1368 if (ddi_copyin(prog.bf_insns, fcode, size, 0) != 0) { 1369 kmem_free(fcode, size); 1370 return (EFAULT); 1371 } 1372 1373 if (bpf_validate(fcode, (int)prog.bf_len)) { 1374 rw_enter(&ps->ps_bpflock, RW_WRITER); 1375 pfp_release_bpf(ps); 1376 ps->ps_bpf.bf_insns = fcode; 1377 ps->ps_bpf.bf_len = size; 1378 rw_exit(&ps->ps_bpflock); 1379 1380 return (0); 1381 } 1382 kmem_free(fcode, size); 1383 error = EINVAL; 1384 break; 1385 1386 case SO_DETACH_FILTER : 1387 pfp_release_bpf(ps); 1388 break; 1389 1390 case SO_RCVBUF : 1391 size = *(int32_t *)optval; 1392 if (size > sockmod_pfp_rcvbuf_max || size < 0) 1393 return (ENOBUFS); 1394 sopp.sopp_flags = SOCKOPT_RCVHIWAT; 1395 sopp.sopp_rxhiwat = size; 1396 ps->ps_upcalls->su_set_proto_props(ps->ps_upper, &sopp); 1397 ps->ps_rcvbuf = size; 1398 break; 1399 1400 default : 1401 error = ENOPROTOOPT; 1402 break; 1403 } 1404 1405 return (error); 1406 } 1407 1408 /* 1409 * pfp_open_index is an internal function used to open a MAC device by 1410 * its index. Both a mac_handle_t and mac_client_handle_t are acquired 1411 * because some of the interfaces provided by the mac layer require either 1412 * only the mac_handle_t or both it and mac_handle_t. 1413 * 1414 * Whilst inside the kernel we can access data structures supporting any 1415 * zone, access to interfaces from non-global zones is restricted to those 1416 * interfaces (if any) that are exclusively assigned to a zone. 1417 */ 1418 static int 1419 pfp_open_index(int index, mac_handle_t *mhp, mac_client_handle_t *mcip, 1420 cred_t *cred) 1421 { 1422 mac_client_handle_t mch; 1423 zoneid_t ifzoneid; 1424 mac_handle_t mh; 1425 zoneid_t zoneid; 1426 int error; 1427 1428 mh = 0; 1429 mch = 0; 1430 error = mac_open_by_linkid(index, &mh); 1431 if (error != 0) 1432 goto bad_open; 1433 1434 error = mac_client_open(mh, &mch, NULL, 1435 MAC_OPEN_FLAGS_USE_DATALINK_NAME); 1436 if (error != 0) 1437 goto bad_open; 1438 1439 zoneid = crgetzoneid(cred); 1440 if (zoneid != GLOBAL_ZONEID) { 1441 mac_perim_handle_t perim; 1442 1443 mac_perim_enter_by_mh(mh, &perim); 1444 error = dls_link_getzid(mac_name(mh), &ifzoneid); 1445 mac_perim_exit(perim); 1446 if (error != 0) 1447 goto bad_open; 1448 if (ifzoneid != zoneid) { 1449 error = EACCES; 1450 goto bad_open; 1451 } 1452 } 1453 1454 *mcip = mch; 1455 *mhp = mh; 1456 1457 return (0); 1458 bad_open: 1459 if (mch != 0) 1460 mac_client_close(mch, 0); 1461 if (mh != 0) 1462 mac_close(mh); 1463 return (error); 1464 } 1465 1466 static void 1467 pfp_close(mac_handle_t mh, mac_client_handle_t mch) 1468 { 1469 mac_client_close(mch, 0); 1470 mac_close(mh); 1471 } 1472 1473 /* 1474 * The purpose of this function is to provide a single place where we free 1475 * the loaded BPF program and reset all pointers/counters associated with 1476 * it. 1477 */ 1478 static void 1479 pfp_release_bpf(struct pfpsock *ps) 1480 { 1481 if (ps->ps_bpf.bf_len != 0) { 1482 kmem_free(ps->ps_bpf.bf_insns, ps->ps_bpf.bf_len); 1483 ps->ps_bpf.bf_len = 0; 1484 ps->ps_bpf.bf_insns = NULL; 1485 } 1486 } 1487 1488 /* 1489 * Set the promiscuous mode of a network interface. 1490 * This function only calls the mac layer when there is a change to the 1491 * status of a network interface's promiscous mode. Tracking of how many 1492 * sockets have the network interface in promiscuous mode, and thus the 1493 * control over the physical device's status, is left to the mac layer. 1494 */ 1495 static int 1496 pfp_set_promisc(struct pfpsock *ps, mac_client_promisc_type_t turnon) 1497 { 1498 int error = 0; 1499 int flags; 1500 1501 /* 1502 * There are 4 combinations of turnon/ps_promisc. 1503 * This if handles 2 (both false, both true) and the if() below 1504 * handles the remaining one - when change is required. 1505 */ 1506 if (turnon == ps->ps_promisc) 1507 return (error); 1508 1509 if (ps->ps_phd != 0) { 1510 mac_promisc_remove(ps->ps_phd); 1511 ps->ps_phd = 0; 1512 1513 /* 1514 * ps_promisc is set here in case the call to mac_promisc_add 1515 * fails: leaving it to indicate that the interface is still 1516 * in some sort of promiscuous mode is false. 1517 */ 1518 if (ps->ps_promisc != MAC_CLIENT_PROMISC_FILTERED) { 1519 ps->ps_promisc = MAC_CLIENT_PROMISC_FILTERED; 1520 flags = MAC_PROMISC_FLAGS_NO_PHYS; 1521 } else { 1522 flags = 0; 1523 } 1524 flags |= MAC_PROMISC_FLAGS_VLAN_TAG_STRIP; 1525 } 1526 1527 error = mac_promisc_add(ps->ps_mch, turnon, pfp_packet, ps, 1528 &ps->ps_phd, flags); 1529 if (error == 0) 1530 ps->ps_promisc = turnon; 1531 1532 return (error); 1533 } 1534 1535 /* 1536 * This table maps the MAC types in Solaris to the ARPHRD_* values used 1537 * on Linux. This is used with the SIOCGIFHWADDR/SIOCGLIFHWADDR ioctl. 1538 * 1539 * The symbols in this table are *not* pulled in from <net/if_arp.h>, 1540 * they are pulled from <netpacket/packet.h>, thus it acts as a source 1541 * of supplementary information to the ARP table. 1542 */ 1543 static uint_t arphrd_to_dl[][2] = { 1544 { ARPHRD_IEEE80211, DL_WIFI }, 1545 { ARPHRD_TUNNEL, DL_IPV4 }, 1546 { ARPHRD_TUNNEL, DL_IPV6 }, 1547 { ARPHRD_TUNNEL, DL_6TO4 }, 1548 { ARPHRD_AX25, DL_X25 }, 1549 { ARPHRD_ATM, DL_ATM }, 1550 { 0, 0 } 1551 }; 1552 1553 static int 1554 pfp_dl_to_arphrd(int dltype) 1555 { 1556 int i; 1557 1558 for (i = 0; arphrd_to_dl[i][0] != 0; i++) 1559 if (arphrd_to_dl[i][1] == dltype) 1560 return (arphrd_to_dl[i][0]); 1561 return (arp_hw_type(dltype)); 1562 } 1563