xref: /illumos-gate/usr/src/uts/common/inet/sockmods/sockmod_pfp.c (revision d2b5b2d357ee3172eacb6860be1891259902203d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/param.h>
28 #include <sys/systm.h>
29 #include <sys/stropts.h>
30 #include <sys/socket.h>
31 #include <sys/socketvar.h>
32 #include <sys/socket_proto.h>
33 #include <sys/sockio.h>
34 #include <sys/strsun.h>
35 #include <sys/kstat.h>
36 #include <sys/modctl.h>
37 #include <sys/policy.h>
38 #include <sys/priv_const.h>
39 #include <sys/tihdr.h>
40 #include <sys/zone.h>
41 #include <sys/time.h>
42 #include <fs/sockfs/sockcommon.h>
43 #include <net/if.h>
44 
45 #include <sys/dls.h>
46 #include <sys/mac.h>
47 #include <sys/mac_client.h>
48 #include <sys/mac_provider.h>
49 #include <sys/mac_client_priv.h>
50 
51 #include <netpacket/packet.h>
52 
53 static void pfp_close(mac_handle_t, mac_client_handle_t);
54 static int pfp_dl_to_arphrd(int);
55 static int pfp_getpacket_sockopt(sock_lower_handle_t, int, void *,
56     socklen_t *);
57 static int pfp_ifreq_getlinkid(intptr_t, struct ifreq *, datalink_id_t *);
58 static int pfp_lifreq_getlinkid(intptr_t, struct lifreq *, datalink_id_t *);
59 static int pfp_open_index(int, mac_handle_t *, mac_client_handle_t *,
60     cred_t *);
61 static void pfp_packet(void *, mac_resource_handle_t, mblk_t *, boolean_t);
62 static void pfp_release_bpf(struct pfpsock *);
63 static int pfp_set_promisc(struct pfpsock *, mac_client_promisc_type_t);
64 static int pfp_setsocket_sockopt(sock_lower_handle_t, int, const void *,
65     socklen_t);
66 static int pfp_setpacket_sockopt(sock_lower_handle_t, int, const void *,
67     socklen_t);
68 
69 /*
70  * PFP sockfs operations
71  * Most are currently no-ops because they have no meaning for a connectionless
72  * socket.
73  */
74 static void sdpfp_activate(sock_lower_handle_t, sock_upper_handle_t,
75     sock_upcalls_t *, int, struct cred *);
76 static int sdpfp_bind(sock_lower_handle_t, struct sockaddr *, socklen_t,
77     struct cred *);
78 static int sdpfp_close(sock_lower_handle_t, int, struct cred *);
79 static void sdpfp_clr_flowctrl(sock_lower_handle_t);
80 static int sdpfp_getsockopt(sock_lower_handle_t, int, int, void *,
81     socklen_t *, struct cred *);
82 static int sdpfp_ioctl(sock_lower_handle_t, int, intptr_t, int, int32_t *,
83     struct cred *);
84 static int sdpfp_senduio(sock_lower_handle_t, struct uio *, struct nmsghdr *,
85     struct cred *);
86 static int sdpfp_setsockopt(sock_lower_handle_t, int, int, const void *,
87     socklen_t, struct cred *);
88 
89 static sock_lower_handle_t sockpfp_create(int, int, int, sock_downcalls_t **,
90     uint_t *, int *, int, cred_t *);
91 
92 static int sockpfp_init(void);
93 static void sockpfp_fini(void);
94 
95 static kstat_t *pfp_ksp;
96 static pfp_kstats_t ks_stats;
97 static pfp_kstats_t pfp_kstats = {
98 	/*
99 	 * Each one of these kstats is a different return path in handling
100 	 * a packet received from the mac layer.
101 	 */
102 	{ "recvMacHeaderFail",	KSTAT_DATA_UINT64 },
103 	{ "recvBadProtocol",	KSTAT_DATA_UINT64 },
104 	{ "recvAllocbFail",	KSTAT_DATA_UINT64 },
105 	{ "recvOk",		KSTAT_DATA_UINT64 },
106 	{ "recvFail",		KSTAT_DATA_UINT64 },
107 	{ "recvFiltered",	KSTAT_DATA_UINT64 },
108 	{ "recvFlowControl",	KSTAT_DATA_UINT64 },
109 	/*
110 	 * A global set of counters is maintained to track the behaviour
111 	 * of the system (kernel & applications) in sending packets.
112 	 */
113 	{ "sendUnbound",	KSTAT_DATA_UINT64 },
114 	{ "sendFailed",		KSTAT_DATA_UINT64 },
115 	{ "sendTooBig",		KSTAT_DATA_UINT64 },
116 	{ "sendAllocFail",	KSTAT_DATA_UINT64 },
117 	{ "sendUiomoveFail",	KSTAT_DATA_UINT64 },
118 	{ "sendNoMemory",	KSTAT_DATA_UINT64 },
119 	{ "sendOpenFail",	KSTAT_DATA_UINT64 },
120 	{ "sendWrongFamily",	KSTAT_DATA_UINT64 },
121 	{ "sendShortMsg",	KSTAT_DATA_UINT64 },
122 	{ "sendOk",		KSTAT_DATA_UINT64 }
123 };
124 
125 sock_downcalls_t pfp_downcalls = {
126 	sdpfp_activate,
127 	sock_accept_notsupp,
128 	sdpfp_bind,
129 	sock_listen_notsupp,
130 	sock_connect_notsupp,
131 	sock_getpeername_notsupp,
132 	sock_getsockname_notsupp,
133 	sdpfp_getsockopt,
134 	sdpfp_setsockopt,
135 	sock_send_notsupp,
136 	sdpfp_senduio,
137 	NULL,
138 	sock_poll_notsupp,
139 	sock_shutdown_notsupp,
140 	sdpfp_clr_flowctrl,
141 	sdpfp_ioctl,
142 	sdpfp_close,
143 };
144 
145 static smod_reg_t sinfo = {
146 	SOCKMOD_VERSION,
147 	"sockpfp",
148 	SOCK_UC_VERSION,
149 	SOCK_DC_VERSION,
150 	sockpfp_create,
151 	NULL
152 };
153 
154 /*
155  * Module linkage information for the kernel.
156  */
157 static struct modlsockmod modlsockmod = {
158 	&mod_sockmodops, "PF Packet socket module", &sinfo
159 };
160 
161 static struct modlinkage modlinkage = {
162 	MODREV_1,
163 	&modlsockmod,
164 	NULL
165 };
166 
167 int
168 _init(void)
169 {
170 	int error;
171 
172 	error = sockpfp_init();
173 	if (error != 0)
174 		return (error);
175 
176 	error = mod_install(&modlinkage);
177 	if (error != 0)
178 		sockpfp_fini();
179 
180 	return (error);
181 }
182 
183 int
184 _fini(void)
185 {
186 	int error;
187 
188 	error = mod_remove(&modlinkage);
189 	if (error == 0)
190 		sockpfp_fini();
191 
192 	return (error);
193 }
194 
195 int
196 _info(struct modinfo *modinfop)
197 {
198 	return (mod_info(&modlinkage, modinfop));
199 }
200 
201 /*
202  * sockpfp_init: called as part of the initialisation of the module when
203  * loaded into the kernel.
204  *
205  * Being able to create and record the kstats data in the kernel is not
206  * considered to be vital to the operation of this kernel module, thus
207  * its failure is tolerated.
208  */
209 static int
210 sockpfp_init(void)
211 {
212 	(void) memset(&ks_stats, 0, sizeof (ks_stats));
213 
214 	(void) memcpy(&ks_stats, &pfp_kstats, sizeof (pfp_kstats));
215 
216 	pfp_ksp = kstat_create("pfpacket", 0, "global", "misc",
217 	    KSTAT_TYPE_NAMED, sizeof (pfp_kstats) / sizeof (kstat_named_t),
218 	    KSTAT_FLAG_VIRTUAL);
219 	if (pfp_ksp != NULL) {
220 		pfp_ksp->ks_data = &ks_stats;
221 		kstat_install(pfp_ksp);
222 	}
223 
224 	return (0);
225 }
226 
227 /*
228  * sockpfp_fini: called when the operating system wants to unload the
229  * socket module from the kernel.
230  */
231 static void
232 sockpfp_fini(void)
233 {
234 	if (pfp_ksp != NULL)
235 		kstat_delete(pfp_ksp);
236 }
237 
238 /*
239  * Due to sockets being created read-write by default, all PF_PACKET sockets
240  * therefore require the NET_RAWACCESS priviliege, even if the socket is only
241  * being used for reading packets from.
242  *
243  * This create function enforces this module only being used with PF_PACKET
244  * sockets and the policy that we support via the config file in sock2path.d:
245  * PF_PACKET sockets must be either SOCK_DGRAM or SOCK_RAW.
246  */
247 /* ARGSUSED */
248 static sock_lower_handle_t
249 sockpfp_create(int family, int type, int proto,
250     sock_downcalls_t **sock_downcalls, uint_t *smodep, int *errorp,
251     int sflags, cred_t *cred)
252 {
253 	struct pfpsock *ps;
254 	int kmflags;
255 
256 	if (secpolicy_net_rawaccess(cred) != 0) {
257 		*errorp = EACCES;
258 		return (NULL);
259 	}
260 
261 	if (family != AF_PACKET) {
262 		*errorp = EAFNOSUPPORT;
263 		return (NULL);
264 	}
265 
266 	if ((type != SOCK_RAW) && (type != SOCK_DGRAM)) {
267 		*errorp = ESOCKTNOSUPPORT;
268 		return (NULL);
269 	}
270 
271 	kmflags = (sflags & SOCKET_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
272 	ps = kmem_zalloc(sizeof (*ps), kmflags);
273 	if (ps == NULL) {
274 		*errorp = ENOMEM;
275 		return (NULL);
276 	}
277 
278 	ps->ps_type = type;
279 	ps->ps_proto = proto;
280 	rw_init(&ps->ps_bpflock, NULL, RW_DRIVER, NULL);
281 	mutex_init(&ps->ps_lock, NULL, MUTEX_DRIVER, NULL);
282 
283 	*sock_downcalls = &pfp_downcalls;
284 	/*
285 	 * Setting this causes bytes from a packet that do not fit into the
286 	 * destination user buffer to be discarded. Thus the API is one
287 	 * packet per receive and callers are required to use a buffer large
288 	 * enough for the biggest packet that the interface can provide.
289 	 */
290 	*smodep = SM_ATOMIC;
291 
292 	return ((sock_lower_handle_t)ps);
293 }
294 
295 /* ************************************************************************* */
296 
297 /*
298  * pfp_packet is the callback function that is given to the mac layer for
299  * PF_PACKET to receive packets with. One packet at a time is passed into
300  * this function from the mac layer. Each packet is a private copy given
301  * to PF_PACKET to modify or free as it wishes and does not harm the original
302  * packet from which it was cloned.
303  */
304 /* ARGSUSED */
305 static void
306 pfp_packet(void *arg, mac_resource_handle_t mrh, mblk_t *mp, boolean_t flag)
307 {
308 	struct T_unitdata_ind *tunit;
309 	struct sockaddr_ll *sll;
310 	struct sockaddr_ll *sol;
311 	mac_header_info_t hdr;
312 	struct pfpsock *ps;
313 	size_t tusz;
314 	mblk_t *mp0;
315 	int error;
316 
317 	if (mp == NULL)
318 		return;
319 
320 	ps = arg;
321 	if (ps->ps_flow_ctrld) {
322 		ps->ps_flow_ctrl_drops++;
323 		ps->ps_stats.tp_drops++;
324 		ks_stats.kp_recv_flow_cntrld.value.ui64++;
325 		freemsg(mp);
326 		return;
327 	}
328 
329 	if (mac_header_info(ps->ps_mh, mp, &hdr) != 0) {
330 		/*
331 		 * Can't decode the packet header information so drop it.
332 		 */
333 		ps->ps_stats.tp_drops++;
334 		ks_stats.kp_recv_mac_hdr_fail.value.ui64++;
335 		freemsg(mp);
336 		return;
337 	}
338 
339 	if (mac_type(ps->ps_mh) == DL_ETHER &&
340 	    hdr.mhi_bindsap == ETHERTYPE_VLAN) {
341 		struct ether_vlan_header *evhp;
342 		struct ether_vlan_header evh;
343 
344 		hdr.mhi_hdrsize = sizeof (struct ether_vlan_header);
345 		hdr.mhi_istagged = B_TRUE;
346 
347 		if (MBLKL(mp) >= sizeof (*evhp)) {
348 			evhp = (struct ether_vlan_header *)mp->b_rptr;
349 		} else {
350 			int sz = sizeof (*evhp);
351 			char *s = (char *)&evh;
352 			mblk_t *tmp;
353 			int len;
354 
355 			for (tmp = mp; sz > 0 && tmp != NULL;
356 			    tmp = tmp->b_cont) {
357 				len = min(sz, MBLKL(tmp));
358 				bcopy(tmp->b_rptr, s, len);
359 				sz -= len;
360 			}
361 			evhp = &evh;
362 		}
363 		hdr.mhi_tci = ntohs(evhp->ether_tci);
364 		hdr.mhi_bindsap = ntohs(evhp->ether_type);
365 	}
366 
367 	if ((ps->ps_proto != 0) && (ps->ps_proto != hdr.mhi_bindsap)) {
368 		/*
369 		 * The packet is not of interest to this socket so
370 		 * drop it on the floor. Here the SAP is being used
371 		 * as a very course filter.
372 		 */
373 		ps->ps_stats.tp_drops++;
374 		ks_stats.kp_recv_bad_proto.value.ui64++;
375 		freemsg(mp);
376 		return;
377 	}
378 
379 	/*
380 	 * This field is not often set, even for ethernet,
381 	 * by mac_header_info, so compute it if it is 0.
382 	 */
383 	if (hdr.mhi_pktsize == 0)
384 		hdr.mhi_pktsize = msgdsize(mp);
385 
386 	/*
387 	 * If a BPF filter is present, pass the raw packet into that.
388 	 * A failed match will result in zero being returned, indicating
389 	 * that this socket is not interested in the packet.
390 	 */
391 	if (ps->ps_bpf.bf_len != 0) {
392 		uchar_t *buffer;
393 		int buflen;
394 
395 		buflen = MBLKL(mp);
396 		if (hdr.mhi_pktsize == buflen) {
397 			buffer = mp->b_rptr;
398 		} else {
399 			buflen = 0;
400 			buffer = (uchar_t *)mp;
401 		}
402 		rw_enter(&ps->ps_bpflock, RW_READER);
403 		if (bpf_filter(ps->ps_bpf.bf_insns, buffer,
404 		    hdr.mhi_pktsize, buflen) == 0) {
405 			rw_exit(&ps->ps_bpflock);
406 			ps->ps_stats.tp_drops++;
407 			ks_stats.kp_recv_filtered.value.ui64++;
408 			freemsg(mp);
409 			return;
410 		}
411 		rw_exit(&ps->ps_bpflock);
412 	}
413 
414 	if (ps->ps_type == SOCK_DGRAM) {
415 		/*
416 		 * SOCK_DGRAM socket expect a "layer 3" packet, so advance
417 		 * past the link layer header.
418 		 */
419 		mp->b_rptr += hdr.mhi_hdrsize;
420 		hdr.mhi_pktsize -= hdr.mhi_hdrsize;
421 	}
422 
423 	tusz = sizeof (struct T_unitdata_ind) + sizeof (struct sockaddr_ll);
424 	if (ps->ps_auxdata) {
425 		tusz += _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata));
426 		tusz += _TPI_ALIGN_TOPT(sizeof (struct T_opthdr));
427 	}
428 
429 	/*
430 	 * It is tempting to think that this could be optimised by having
431 	 * the base mblk_t allocated and hung off the pfpsock structure,
432 	 * except that then another one would need to be allocated for the
433 	 * sockaddr_ll that is included. Even creating a template to copy
434 	 * from is of questionable value, as read-write from one structure
435 	 * to the other is going to be slower than all of the initialisation.
436 	 */
437 	mp0 = allocb(tusz, BPRI_HI);
438 	if (mp0 == NULL) {
439 		ps->ps_stats.tp_drops++;
440 		ks_stats.kp_recv_alloc_fail.value.ui64++;
441 		freemsg(mp);
442 		return;
443 	}
444 
445 	(void) memset(mp0->b_rptr, 0, tusz);
446 
447 	mp0->b_datap->db_type = M_PROTO;
448 	mp0->b_wptr = mp0->b_rptr + tusz;
449 
450 	tunit = (struct T_unitdata_ind *)mp0->b_rptr;
451 	tunit->PRIM_type = T_UNITDATA_IND;
452 	tunit->SRC_length = sizeof (struct sockaddr);
453 	tunit->SRC_offset = sizeof (*tunit);
454 
455 	sol = (struct sockaddr_ll *)&ps->ps_sock;
456 	sll = (struct sockaddr_ll *)(mp0->b_rptr + sizeof (*tunit));
457 	sll->sll_ifindex = sol->sll_ifindex;
458 	sll->sll_hatype = (uint16_t)hdr.mhi_origsap;
459 	sll->sll_halen = sol->sll_halen;
460 	if (hdr.mhi_saddr != NULL)
461 		(void) memcpy(sll->sll_addr, hdr.mhi_saddr, sll->sll_halen);
462 
463 	switch (hdr.mhi_dsttype) {
464 	case MAC_ADDRTYPE_MULTICAST :
465 		sll->sll_pkttype = PACKET_MULTICAST;
466 		break;
467 	case MAC_ADDRTYPE_BROADCAST :
468 		sll->sll_pkttype = PACKET_BROADCAST;
469 		break;
470 	case MAC_ADDRTYPE_UNICAST :
471 		if (memcmp(sol->sll_addr, hdr.mhi_daddr, sol->sll_halen) == 0)
472 			sll->sll_pkttype = PACKET_HOST;
473 		else
474 			sll->sll_pkttype = PACKET_OTHERHOST;
475 		break;
476 	}
477 
478 	if (ps->ps_auxdata) {
479 		struct tpacket_auxdata *aux;
480 		struct T_opthdr *topt;
481 
482 		tunit->OPT_offset = _TPI_ALIGN_TOPT(tunit->SRC_offset +
483 		    sizeof (struct sockaddr_ll));
484 		tunit->OPT_length = _TPI_ALIGN_TOPT(sizeof (struct T_opthdr)) +
485 		    _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata));
486 
487 		topt = (struct T_opthdr *)(mp0->b_rptr + tunit->OPT_offset);
488 		aux = (struct tpacket_auxdata *)
489 		    ((char *)topt + _TPI_ALIGN_TOPT(sizeof (*topt)));
490 
491 		topt->len = tunit->OPT_length;
492 		topt->level = SOL_PACKET;
493 		topt->name = PACKET_AUXDATA;
494 		topt->status = 0;
495 		/*
496 		 * libpcap doesn't seem to use any other field,
497 		 * so it isn't clear how they should be filled in.
498 		 */
499 		aux->tp_vlan_vci = hdr.mhi_tci;
500 	}
501 
502 	linkb(mp0, mp);
503 
504 	ps->ps_upcalls->su_recv(ps->ps_upper, mp0, hdr.mhi_pktsize, 0,
505 	    &error, NULL);
506 
507 	if (error == 0) {
508 		ps->ps_stats.tp_packets++;
509 		ks_stats.kp_recv_ok.value.ui64++;
510 	} else {
511 		mutex_enter(&ps->ps_lock);
512 		if (error == ENOSPC) {
513 			ps->ps_upcalls->su_recv(ps->ps_upper, NULL, 0, 0,
514 			    &error, NULL);
515 			if (error == ENOSPC)
516 				ps->ps_flow_ctrld = B_TRUE;
517 		}
518 		mutex_exit(&ps->ps_lock);
519 		ps->ps_stats.tp_drops++;
520 		ks_stats.kp_recv_fail.value.ui64++;
521 	}
522 }
523 
524 /*
525  * Bind a PF_PACKET socket to a network interface.
526  *
527  * The default operation of this bind() is to place the socket (and thus the
528  * network interface) into promiscuous mode. It is then up to the application
529  * to turn that down by issuing the relevant ioctls, if desired.
530  */
531 /* ARGSUSED */
532 static int
533 sdpfp_bind(sock_lower_handle_t handle, struct sockaddr *addr,
534     socklen_t addrlen, struct cred *cred)
535 {
536 	struct sockaddr_ll *addr_ll, *sol;
537 	mac_client_handle_t mch;
538 	struct pfpsock *ps;
539 	mac_handle_t mh;
540 	int error;
541 
542 	ps = (struct pfpsock *)handle;
543 	if (ps->ps_bound)
544 		return (EINVAL);
545 
546 	addr_ll = (struct sockaddr_ll *)addr;
547 
548 	error = pfp_open_index(addr_ll->sll_ifindex, &mh, &mch, cred);
549 	if (error != 0)
550 		return (error);
551 	/*
552 	 * Ensure that each socket is only bound once.
553 	 */
554 	mutex_enter(&ps->ps_lock);
555 	if (ps->ps_mh != 0) {
556 		mutex_exit(&ps->ps_lock);
557 		pfp_close(mh, mch);
558 		return (EADDRINUSE);
559 	}
560 	ps->ps_mh = mh;
561 	ps->ps_mch = mch;
562 	mutex_exit(&ps->ps_lock);
563 
564 	/*
565 	 * Cache all of the information from bind so that it's in an easy
566 	 * place to get at when packets are received.
567 	 */
568 	sol = (struct sockaddr_ll *)&ps->ps_sock;
569 	sol->sll_family = AF_PACKET;
570 	sol->sll_ifindex = addr_ll->sll_ifindex;
571 	sol->sll_protocol = addr_ll->sll_protocol;
572 	sol->sll_halen = mac_addr_len(ps->ps_mh);
573 	mac_unicast_primary_get(ps->ps_mh, sol->sll_addr);
574 	mac_sdu_get(ps->ps_mh, NULL, &ps->ps_max_sdu);
575 	ps->ps_linkid = addr_ll->sll_ifindex;
576 
577 	error = mac_promisc_add(ps->ps_mch, MAC_CLIENT_PROMISC_ALL,
578 	    pfp_packet, ps, &ps->ps_phd, MAC_PROMISC_FLAGS_VLAN_TAG_STRIP);
579 	if (error == 0) {
580 		ps->ps_promisc = MAC_CLIENT_PROMISC_ALL;
581 		ps->ps_bound = B_TRUE;
582 	}
583 
584 	return (error);
585 }
586 
587 /* ARGSUSED */
588 static void
589 sdpfp_activate(sock_lower_handle_t lower, sock_upper_handle_t upper,
590     sock_upcalls_t *upcalls, int flags, cred_t *cred)
591 {
592 	struct pfpsock *ps;
593 
594 	ps = (struct pfpsock *)lower;
595 	ps->ps_upper = upper;
596 	ps->ps_upcalls = upcalls;
597 }
598 
599 /*
600  * This module only implements getting socket options for the new socket
601  * option level (SOL_PACKET) that it introduces. All other requests are
602  * passed back to the sockfs layer.
603  */
604 /* ARGSUSED */
605 static int
606 sdpfp_getsockopt(sock_lower_handle_t handle, int level, int option_name,
607     void *optval, socklen_t *optlenp, struct cred *cred)
608 {
609 	int error = 0;
610 
611 	switch (level) {
612 	case SOL_PACKET :
613 		error = pfp_getpacket_sockopt(handle, option_name, optval,
614 		    optlenp);
615 		break;
616 	default :
617 		/*
618 		 * If sockfs code receives this error in return from the
619 		 * getsockopt downcall it handles the option locally, if
620 		 * it can. This implements SO_RCVBUF, etc.
621 		 */
622 		error = ENOPROTOOPT;
623 		break;
624 	}
625 
626 	return (error);
627 }
628 
629 /*
630  * PF_PACKET supports setting socket options at only two levels:
631  * SOL_SOCKET and SOL_PACKET.
632  */
633 /* ARGSUSED */
634 static int
635 sdpfp_setsockopt(sock_lower_handle_t handle, int level, int option_name,
636     const void *optval, socklen_t optlen, struct cred *cred)
637 {
638 	int error = 0;
639 
640 	switch (level) {
641 	case SOL_SOCKET :
642 		error = pfp_setsocket_sockopt(handle, option_name, optval,
643 		    optlen);
644 		break;
645 	case SOL_PACKET :
646 		error = pfp_setpacket_sockopt(handle, option_name, optval,
647 		    optlen);
648 		break;
649 	default :
650 		error = EINVAL;
651 		break;
652 	}
653 
654 	return (error);
655 }
656 
657 /*
658  * This function is incredibly inefficient for sending any packet that
659  * comes with a msghdr asking to be sent to an interface to which the
660  * socket has not been bound. Some possibilities here are keeping a
661  * cache of all open mac's and mac_client's, for the purpose of sending,
662  * and closing them after some amount of inactivity. Clearly, applications
663  * should not be written to use one socket for multiple interfaces if
664  * performance is desired with the code as is.
665  */
666 /* ARGSUSED */
667 static int
668 sdpfp_senduio(sock_lower_handle_t handle, struct uio *uiop,
669     struct nmsghdr *msg, struct cred *cred)
670 {
671 	struct sockaddr_ll *sol;
672 	mac_client_handle_t mch;
673 	struct pfpsock *ps;
674 	boolean_t new_open;
675 	mac_handle_t mh;
676 	size_t mpsize;
677 	uint_t maxsdu;
678 	mblk_t *mp0;
679 	mblk_t *mp;
680 	int error;
681 
682 	mp = NULL;
683 	mp0 = NULL;
684 	new_open = B_FALSE;
685 	ps = (struct pfpsock *)handle;
686 	mh = ps->ps_mh;
687 	mch = ps->ps_mch;
688 	maxsdu = ps->ps_max_sdu;
689 
690 	sol = (struct sockaddr_ll *)msg->msg_name;
691 	if (sol == NULL) {
692 		/*
693 		 * If no sockaddr_ll has been provided with the send call,
694 		 * use the one constructed when the socket was bound to an
695 		 * interface and fail if it hasn't been bound.
696 		 */
697 		if (!ps->ps_bound) {
698 			ks_stats.kp_send_unbound.value.ui64++;
699 			return (EPROTO);
700 		}
701 		sol = (struct sockaddr_ll *)&ps->ps_sock;
702 	} else {
703 		/*
704 		 * Verify the sockaddr_ll message passed down before using
705 		 * it to send a packet out with. If it refers to an interface
706 		 * that has not been bound, it is necessary to open it.
707 		 */
708 		struct sockaddr_ll *sll;
709 
710 		if (msg->msg_namelen < sizeof (struct sockaddr_ll)) {
711 			ks_stats.kp_send_short_msg.value.ui64++;
712 			return (EINVAL);
713 		}
714 
715 		if (sol->sll_family != AF_PACKET) {
716 			ks_stats.kp_send_wrong_family.value.ui64++;
717 			return (EAFNOSUPPORT);
718 		}
719 
720 		sll = (struct sockaddr_ll *)&ps->ps_sock;
721 		if (sol->sll_ifindex != sll->sll_ifindex) {
722 			error = pfp_open_index(sol->sll_ifindex, &mh, &mch,
723 			    cred);
724 			if (error != 0) {
725 				ks_stats.kp_send_open_fail.value.ui64++;
726 				return (error);
727 			}
728 			mac_sdu_get(mh, NULL, &maxsdu);
729 			new_open = B_TRUE;
730 		}
731 	}
732 
733 	mpsize = uiop->uio_resid;
734 	if (mpsize > maxsdu) {
735 		ks_stats.kp_send_too_big.value.ui64++;
736 		error = EMSGSIZE;
737 		goto done;
738 	}
739 
740 	if ((mp = allocb(mpsize, BPRI_HI)) == NULL) {
741 		ks_stats.kp_send_alloc_fail.value.ui64++;
742 		error = ENOBUFS;
743 		goto done;
744 	}
745 
746 	mp->b_wptr = mp->b_rptr + mpsize;
747 	error = uiomove(mp->b_rptr, mpsize, UIO_WRITE, uiop);
748 	if (error != 0) {
749 		ks_stats.kp_send_uiomove_fail.value.ui64++;
750 		goto done;
751 	}
752 
753 	if (ps->ps_type == SOCK_DGRAM) {
754 		mp0 = mac_header(mh, sol->sll_addr, sol->sll_protocol, mp, 0);
755 		if (mp0 == NULL) {
756 			ks_stats.kp_send_no_memory.value.ui64++;
757 			error = ENOBUFS;
758 			goto done;
759 		}
760 		linkb(mp0, mp);
761 		mp = mp0;
762 	}
763 
764 	/*
765 	 * As this is sending datagrams and no promise is made about
766 	 * how or if a packet will be sent/delivered, no effort is to
767 	 * be expended in recovering from a situation where the packet
768 	 * cannot be sent - it is just dropped.
769 	 */
770 	error = mac_tx(mch, mp, 0, MAC_DROP_ON_NO_DESC, NULL);
771 	if (error == 0) {
772 		mp = NULL;
773 		ks_stats.kp_send_ok.value.ui64++;
774 	} else {
775 		ks_stats.kp_send_failed.value.ui64++;
776 	}
777 
778 done:
779 
780 	if (new_open) {
781 		ASSERT(mch != ps->ps_mch);
782 		ASSERT(mh != ps->ps_mh);
783 		pfp_close(mh, mch);
784 	}
785 	if (mp != NULL)
786 		freemsg(mp);
787 
788 	return (error);
789 
790 }
791 
792 /*
793  * There's no use of a lock here, or at the bottom of pfp_packet() where
794  * ps_flow_ctrld is set to true, because in a situation where these two
795  * are racing to set the flag one way or the other, the end result is
796  * going to be ultimately determined by the scheduler anyway - which of
797  * the two threads gets the lock first? In such an operational environment,
798  * we've got packets arriving too fast to be delt with so packets are going
799  * to be dropped. Grabbing a lock just makes the drop more expensive.
800  */
801 static void
802 sdpfp_clr_flowctrl(sock_lower_handle_t handle)
803 {
804 	struct pfpsock *ps;
805 
806 	ps = (struct pfpsock *)handle;
807 
808 	mutex_enter(&ps->ps_lock);
809 	ps->ps_flow_ctrld = B_FALSE;
810 	mutex_exit(&ps->ps_lock);
811 }
812 
813 /*
814  * The implementation of this ioctl() handler is intended to function
815  * in the absence of a bind() being made before it is called. Thus the
816  * function calls mac_open() itself to provide a handle
817  * This function is structured like this:
818  * - determine the linkid for the interface being targetted
819  * - open the interface with said linkid
820  * - perform ioctl
821  * - copy results back to caller
822  *
823  * The ioctls that interact with interface flags have been implented below
824  * to assume that the interface is always up and running (IFF_RUNNING) and
825  * to use the state of this socket to determine whether or not the network
826  * interface is in promiscuous mode. Thus an ioctl to get the interface flags
827  * of an interface that has been put in promiscuous mode by another socket
828  * (in the same program or different), will not report that status.
829  */
830 /* ARGSUSED */
831 static int
832 sdpfp_ioctl(sock_lower_handle_t handle, int cmd, intptr_t arg, int mod,
833     int32_t *rval, struct cred *cr)
834 {
835 #if defined(_SYSCALL32)
836 	struct timeval32 tival;
837 #else
838 	struct timeval tival;
839 #endif
840 	mac_client_promisc_type_t mtype;
841 	datalink_id_t linkid;
842 	struct lifreq lifreq;
843 	struct ifreq ifreq;
844 	struct pfpsock *ps;
845 	mac_handle_t mh;
846 	timespec_t tv;
847 	int error;
848 
849 	switch (cmd) {
850 	/*
851 	 * ioctls that work on "struct lifreq"
852 	 */
853 	case SIOCSLIFFLAGS :
854 	case SIOCGLIFINDEX :
855 	case SIOCGLIFFLAGS :
856 	case SIOCGLIFMTU :
857 		error = pfp_lifreq_getlinkid(arg, &lifreq, &linkid);
858 		if (error != 0)
859 			return (error);
860 		break;
861 
862 	/*
863 	 * ioctls that work on "struct ifreq".
864 	 * Not all of these have a "struct lifreq" partner, for example
865 	 * SIOCGIFHWADDR, for the simple reason that the logical interface
866 	 * does not have a hardware address.
867 	 */
868 	case SIOCSIFFLAGS :
869 	case SIOCGIFINDEX :
870 	case SIOCGIFFLAGS :
871 	case SIOCGIFMTU :
872 	case SIOCGIFHWADDR :
873 		error = pfp_ifreq_getlinkid(arg, &ifreq, &linkid);
874 		if (error != 0)
875 			return (error);
876 		break;
877 	}
878 
879 	error =  mac_open_by_linkid(linkid, &mh);
880 	if (error != 0)
881 		return (error);
882 
883 	ps = (struct pfpsock *)handle;
884 
885 	switch (cmd) {
886 	case SIOCGLIFINDEX :
887 		lifreq.lifr_index = linkid;
888 		break;
889 
890 	case SIOCGIFINDEX :
891 		ifreq.ifr_index = linkid;
892 		break;
893 
894 	case SIOCGIFFLAGS :
895 		ifreq.ifr_flags = IFF_RUNNING;
896 		if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL)
897 			ifreq.ifr_flags |= IFF_PROMISC;
898 		break;
899 
900 	case SIOCGLIFFLAGS :
901 		lifreq.lifr_flags = IFF_RUNNING;
902 		if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL)
903 			lifreq.lifr_flags |= IFF_PROMISC;
904 		break;
905 
906 	case SIOCSIFFLAGS :
907 		if (linkid != ps->ps_linkid) {
908 			error = EINVAL;
909 		} else {
910 			if ((ifreq.ifr_flags & IFF_PROMISC) != 0)
911 				mtype = MAC_CLIENT_PROMISC_ALL;
912 			else
913 				mtype = MAC_CLIENT_PROMISC_FILTERED;
914 			error = pfp_set_promisc(ps, mtype);
915 		}
916 		break;
917 
918 	case SIOCSLIFFLAGS :
919 		if (linkid != ps->ps_linkid) {
920 			error = EINVAL;
921 		} else {
922 			if ((lifreq.lifr_flags & IFF_PROMISC) != 0)
923 				mtype = MAC_CLIENT_PROMISC_ALL;
924 			else
925 				mtype = MAC_CLIENT_PROMISC_FILTERED;
926 			error = pfp_set_promisc(ps, mtype);
927 		}
928 		break;
929 
930 	case SIOCGIFMTU :
931 		mac_sdu_get(mh, NULL, &ifreq.ifr_mtu);
932 		break;
933 
934 	case SIOCGLIFMTU :
935 		mac_sdu_get(mh, NULL, &lifreq.lifr_mtu);
936 		break;
937 
938 	case SIOCGIFHWADDR :
939 		mac_unicast_primary_get(mh, (uint8_t *)ifreq.ifr_addr.sa_data);
940 		ifreq.ifr_addr.sa_family = pfp_dl_to_arphrd(mac_type(mh));
941 		break;
942 
943 	case SIOCGSTAMP :
944 		(void) gethrestime(&tv);
945 		tival.tv_sec = (time_t)tv.tv_sec;
946 		tival.tv_usec = tv.tv_nsec / 1000;
947 		error = ddi_copyout(&tival, (void *)arg, sizeof (tival), 0);
948 		break;
949 
950 	default :
951 		break;
952 	}
953 
954 	mac_close(mh);
955 
956 	if (error == 0) {
957 		/*
958 		 * Only the "GET" ioctls need to copy data back to userace.
959 		 */
960 		switch (cmd) {
961 		case SIOCGLIFINDEX :
962 		case SIOCGLIFFLAGS :
963 		case SIOCGLIFMTU :
964 			error = ddi_copyout(&lifreq, (void *)arg,
965 			    sizeof (lifreq), 0);
966 			break;
967 
968 		case SIOCGIFINDEX :
969 		case SIOCGIFFLAGS :
970 		case SIOCGIFMTU :
971 		case SIOCGIFHWADDR :
972 			error = ddi_copyout(&ifreq, (void *)arg,
973 			    sizeof (ifreq), 0);
974 			break;
975 		default :
976 			break;
977 		}
978 	}
979 
980 	return (error);
981 }
982 
983 /*
984  * Closing the socket requires that all open references to network
985  * interfaces be closed.
986  */
987 /* ARGSUSED */
988 static int
989 sdpfp_close(sock_lower_handle_t handle, int flag, struct cred *cr)
990 {
991 	struct pfpsock *ps = (struct pfpsock *)handle;
992 
993 	if (ps->ps_phd != 0) {
994 		mac_promisc_remove(ps->ps_phd);
995 		ps->ps_phd = 0;
996 	}
997 
998 	if (ps->ps_mch != 0) {
999 		mac_client_close(ps->ps_mch, 0);
1000 		ps->ps_mch = 0;
1001 	}
1002 
1003 	if (ps->ps_mh != 0) {
1004 		mac_close(ps->ps_mh);
1005 		ps->ps_mh = 0;
1006 	}
1007 
1008 	kmem_free(ps, sizeof (*ps));
1009 
1010 	return (0);
1011 }
1012 
1013 /* ************************************************************************* */
1014 
1015 /*
1016  * Given a pointer (arg) to a "struct ifreq" (potentially in user space),
1017  * determine the linkid for the interface name stored in that structure.
1018  * name is used as a buffer so that we can ensure a trailing \0 is appended
1019  * to the name safely.
1020  */
1021 static int
1022 pfp_ifreq_getlinkid(intptr_t arg, struct ifreq *ifreqp,
1023     datalink_id_t *linkidp)
1024 {
1025 	char name[IFNAMSIZ + 1];
1026 	int error;
1027 
1028 	if (ddi_copyin((void *)arg, ifreqp, sizeof (*ifreqp), 0) != 0)
1029 		return (EFAULT);
1030 
1031 	(void) strlcpy(name, ifreqp->ifr_name, sizeof (name));
1032 
1033 	error = dls_mgmt_get_linkid(name, linkidp);
1034 	if (error != 0)
1035 		error = dls_devnet_macname2linkid(name, linkidp);
1036 
1037 	return (error);
1038 }
1039 
1040 /*
1041  * Given a pointer (arg) to a "struct lifreq" (potentially in user space),
1042  * determine the linkid for the interface name stored in that structure.
1043  * name is used as a buffer so that we can ensure a trailing \0 is appended
1044  * to the name safely.
1045  */
1046 static int
1047 pfp_lifreq_getlinkid(intptr_t arg, struct lifreq *lifreqp,
1048     datalink_id_t *linkidp)
1049 {
1050 	char name[LIFNAMSIZ + 1];
1051 	int error;
1052 
1053 	if (ddi_copyin((void *)arg, lifreqp, sizeof (*lifreqp), 0) != 0)
1054 		return (EFAULT);
1055 
1056 	(void) strlcpy(name, lifreqp->lifr_name, sizeof (name));
1057 
1058 	error = dls_mgmt_get_linkid(name, linkidp);
1059 	if (error != 0)
1060 		error = dls_devnet_macname2linkid(name, linkidp);
1061 
1062 	return (error);
1063 }
1064 
1065 /*
1066  * Although there are several new SOL_PACKET options that can be set and
1067  * are specific to this implementation of PF_PACKET, the current API does
1068  * not support doing a get on them to retrieve accompanying status. Thus
1069  * it is only currently possible to use SOL_PACKET with getsockopt to
1070  * retrieve statistical information. This remains consistant with the
1071  * Linux API at the time of writing.
1072  */
1073 static int
1074 pfp_getpacket_sockopt(sock_lower_handle_t handle, int option_name,
1075     void *optval, socklen_t *optlenp)
1076 {
1077 	struct pfpsock *ps;
1078 	int error = 0;
1079 
1080 	ps = (struct pfpsock *)handle;
1081 
1082 	switch (option_name) {
1083 	case PACKET_STATISTICS :
1084 		if (*optlenp < sizeof (ps->ps_stats)) {
1085 			error = EINVAL;
1086 			break;
1087 		}
1088 		*optlenp = sizeof (ps->ps_stats);
1089 		bcopy(&ps->ps_stats, optval, sizeof (ps->ps_stats));
1090 		break;
1091 	default :
1092 		error = EINVAL;
1093 		break;
1094 	}
1095 
1096 	return (error);
1097 }
1098 
1099 /*
1100  * The SOL_PACKET level for socket options supports three options,
1101  * PACKET_ADD_MEMBERSHIP, PACKET_DROP_MEMBERSHIP and PACKET_AUXDATA.
1102  * This function is responsible for mapping the two socket options
1103  * that manage multicast membership into the appropriate internal
1104  * function calls to bring the option into effect. Whilst direct
1105  * changes to the multicast membership (ADD/DROP) groups is handled
1106  * by calls directly into the mac module, changes to the promiscuos
1107  * mode are vectored through pfp_set_promisc() so that the logic for
1108  * managing the promiscuous mode is in one place.
1109  */
1110 /* ARGSUSED */
1111 static int
1112 pfp_setpacket_sockopt(sock_lower_handle_t handle, int option_name,
1113     const void *optval, socklen_t optlen)
1114 {
1115 	struct packet_mreq mreq;
1116 	struct pfpsock *ps;
1117 	int error = 0;
1118 	int opt;
1119 
1120 	ps = (struct pfpsock *)handle;
1121 	if (!ps->ps_bound)
1122 		return (EPROTO);
1123 
1124 	if ((option_name == PACKET_ADD_MEMBERSHIP) ||
1125 	    (option_name == PACKET_DROP_MEMBERSHIP)) {
1126 		if (!ps->ps_bound)
1127 			return (EPROTO);
1128 		bcopy(optval, &mreq, sizeof (mreq));
1129 		if (ps->ps_linkid != mreq.mr_ifindex)
1130 			return (EINVAL);
1131 
1132 		if (mreq.mr_alen !=
1133 		    ((struct sockaddr_ll *)&ps->ps_sock)->sll_halen)
1134 			return (EINVAL);
1135 	}
1136 
1137 	switch (option_name) {
1138 	case PACKET_ADD_MEMBERSHIP :
1139 		switch (mreq.mr_type) {
1140 		case PACKET_MR_MULTICAST :
1141 			error = mac_multicast_add(ps->ps_mch, mreq.mr_address);
1142 			break;
1143 
1144 		case PACKET_MR_PROMISC :
1145 			error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_ALL);
1146 			break;
1147 
1148 		case PACKET_MR_ALLMULTI :
1149 			error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_MULTI);
1150 			break;
1151 		}
1152 		break;
1153 
1154 	case PACKET_DROP_MEMBERSHIP :
1155 		switch (mreq.mr_type) {
1156 		case PACKET_MR_MULTICAST :
1157 			mac_multicast_remove(ps->ps_mch, mreq.mr_address);
1158 			break;
1159 
1160 		case PACKET_MR_PROMISC :
1161 			if (ps->ps_promisc != MAC_CLIENT_PROMISC_ALL)
1162 				return (EINVAL);
1163 			error = pfp_set_promisc(ps,
1164 			    MAC_CLIENT_PROMISC_FILTERED);
1165 			break;
1166 
1167 		case PACKET_MR_ALLMULTI :
1168 			if (ps->ps_promisc != MAC_CLIENT_PROMISC_MULTI)
1169 				return (EINVAL);
1170 			error = pfp_set_promisc(ps,
1171 			    MAC_CLIENT_PROMISC_FILTERED);
1172 			break;
1173 		}
1174 		break;
1175 
1176 	case PACKET_AUXDATA :
1177 		if (optlen == sizeof (int)) {
1178 			opt = *(int *)optval;
1179 			ps->ps_auxdata = (opt != 0);
1180 		} else {
1181 			error = EINVAL;
1182 		}
1183 		break;
1184 	default :
1185 		error = EINVAL;
1186 		break;
1187 	}
1188 
1189 	return (error);
1190 }
1191 
1192 /*
1193  * There are only two special setsockopt's for SOL_SOCKET with PF_PACKET:
1194  * SO_ATTACH_FILTER and SO_DETACH_FILTER. All other setsockopt requests
1195  * that are for SOL_SOCKET are passed back to the socket layer for its
1196  * generic implementation.
1197  *
1198  * Both of these setsockopt values are candidates for being handled by the
1199  * socket layer itself in future, however this requires understanding how
1200  * they would interact with all other sockets.
1201  */
1202 static int
1203 pfp_setsocket_sockopt(sock_lower_handle_t handle, int option_name,
1204     const void *optval, socklen_t optlen)
1205 {
1206 	struct bpf_program prog;
1207 	struct bpf_insn *fcode;
1208 	struct pfpsock *ps;
1209 	int error = 0;
1210 	int size;
1211 
1212 	ps = (struct pfpsock *)handle;
1213 
1214 	switch (option_name) {
1215 	case SO_ATTACH_FILTER :
1216 #ifdef _LP64
1217 		if (optlen == sizeof (struct bpf_program32)) {
1218 			struct bpf_program32 prog32;
1219 
1220 			bcopy(optval, &prog32, sizeof (prog32));
1221 			prog.bf_len = prog32.bf_len;
1222 			prog.bf_insns = (void *)(uint64_t)prog32.bf_insns;
1223 		} else
1224 #endif
1225 		if (optlen == sizeof (struct bpf_program)) {
1226 			bcopy(optval, &prog, sizeof (prog));
1227 		} else if (optlen != sizeof (struct bpf_program)) {
1228 			return (EINVAL);
1229 		}
1230 
1231 		size = prog.bf_len * sizeof (*prog.bf_insns);
1232 		fcode = kmem_alloc(size, KM_SLEEP);
1233 		if (ddi_copyin(prog.bf_insns, fcode, size, 0) != 0) {
1234 			kmem_free(fcode, size);
1235 			return (EFAULT);
1236 		}
1237 
1238 		if (bpf_validate(fcode, (int)prog.bf_len)) {
1239 			rw_enter(&ps->ps_bpflock, RW_WRITER);
1240 			pfp_release_bpf(ps);
1241 			ps->ps_bpf.bf_insns = fcode;
1242 			ps->ps_bpf.bf_len = size;
1243 			rw_exit(&ps->ps_bpflock);
1244 
1245 			return (0);
1246 		}
1247 		kmem_free(fcode, size);
1248 		error = EINVAL;
1249 		break;
1250 
1251 	case SO_DETACH_FILTER :
1252 		pfp_release_bpf(ps);
1253 		break;
1254 	default :
1255 		/*
1256 		 * If sockfs code receives this error in return from the
1257 		 * getsockopt downcall it handles the option locally, if
1258 		 * it can. This implements SO_RCVBUF, etc.
1259 		 */
1260 		error = ENOPROTOOPT;
1261 		break;
1262 	}
1263 
1264 	return (error);
1265 }
1266 
1267 /*
1268  * pfp_open_index is an internal function used to open a MAC device by
1269  * its index. Both a mac_handle_t and mac_client_handle_t are acquired
1270  * because some of the interfaces provided by the mac layer require either
1271  * only the mac_handle_t or both it and mac_handle_t.
1272  *
1273  * Whilst inside the kernel we can access data structures supporting any
1274  * zone, access to interfaces from non-global zones is restricted to those
1275  * interfaces (if any) that are exclusively assigned to a zone.
1276  */
1277 static int
1278 pfp_open_index(int index, mac_handle_t *mhp, mac_client_handle_t *mcip,
1279     cred_t *cred)
1280 {
1281 	mac_client_handle_t mch;
1282 	zoneid_t ifzoneid;
1283 	mac_handle_t mh;
1284 	zoneid_t zoneid;
1285 	int error;
1286 
1287 	mh = 0;
1288 	mch = 0;
1289 	error = mac_open_by_linkid(index, &mh);
1290 	if (error != 0)
1291 		goto bad_open;
1292 
1293 	error = mac_client_open(mh, &mch, NULL,
1294 	    MAC_OPEN_FLAGS_USE_DATALINK_NAME);
1295 	if (error != 0)
1296 		goto bad_open;
1297 
1298 	zoneid = crgetzoneid(cred);
1299 	if (zoneid != GLOBAL_ZONEID) {
1300 		mac_perim_handle_t perim;
1301 
1302 		mac_perim_enter_by_mh(mh, &perim);
1303 		error = dls_link_getzid(mac_client_name(mch), &ifzoneid);
1304 		mac_perim_exit(perim);
1305 		if (error != 0)
1306 			goto bad_open;
1307 		if (ifzoneid != zoneid) {
1308 			error = EACCES;
1309 			goto bad_open;
1310 		}
1311 	}
1312 
1313 	*mcip = mch;
1314 	*mhp = mh;
1315 
1316 	return (0);
1317 bad_open:
1318 	if (mch != 0)
1319 		mac_client_close(mch, 0);
1320 	if (mh != 0)
1321 		mac_close(mh);
1322 	return (error);
1323 }
1324 
1325 static void
1326 pfp_close(mac_handle_t mh, mac_client_handle_t mch)
1327 {
1328 	mac_client_close(mch, 0);
1329 	mac_close(mh);
1330 }
1331 
1332 /*
1333  * The purpose of this function is to provide a single place where we free
1334  * the loaded BPF program and reset all pointers/counters associated with
1335  * it.
1336  */
1337 static void
1338 pfp_release_bpf(struct pfpsock *ps)
1339 {
1340 	if (ps->ps_bpf.bf_len != 0) {
1341 		kmem_free(ps->ps_bpf.bf_insns, ps->ps_bpf.bf_len);
1342 		ps->ps_bpf.bf_len = 0;
1343 		ps->ps_bpf.bf_insns = NULL;
1344 	}
1345 }
1346 
1347 /*
1348  * Set the promiscuous mode of a network interface.
1349  * This function only calls the mac layer when there is a change to the
1350  * status of a network interface's promiscous mode. Tracking of how many
1351  * sockets have the network interface in promiscuous mode, and thus the
1352  * control over the physical device's status, is left to the mac layer.
1353  */
1354 static int
1355 pfp_set_promisc(struct pfpsock *ps, mac_client_promisc_type_t turnon)
1356 {
1357 	int error = 0;
1358 	int flags;
1359 
1360 	/*
1361 	 * There are 4 combinations of turnon/ps_promisc.
1362 	 * This if handles 2 (both false, both true) and the if() below
1363 	 * handles the remaining one - when change is required.
1364 	 */
1365 	if (turnon == ps->ps_promisc)
1366 		return (error);
1367 
1368 	if (ps->ps_phd != 0) {
1369 		mac_promisc_remove(ps->ps_phd);
1370 		ps->ps_phd = 0;
1371 
1372 		/*
1373 		 * ps_promisc is set here in case the call to mac_promisc_add
1374 		 * fails: leaving it to indicate that the interface is still
1375 		 * in some sort of promiscuous mode is false.
1376 		 */
1377 		if (ps->ps_promisc != MAC_CLIENT_PROMISC_FILTERED) {
1378 			ps->ps_promisc = MAC_CLIENT_PROMISC_FILTERED;
1379 			flags = MAC_PROMISC_FLAGS_NO_PHYS;
1380 		} else {
1381 			flags = 0;
1382 		}
1383 		flags |= MAC_PROMISC_FLAGS_VLAN_TAG_STRIP;
1384 	}
1385 
1386 	error = mac_promisc_add(ps->ps_mch, turnon, pfp_packet, ps,
1387 	    &ps->ps_phd, flags);
1388 	if (error == 0)
1389 		ps->ps_promisc = turnon;
1390 
1391 	return (error);
1392 }
1393 
1394 /*
1395  * This table maps the MAC types in Solaris to the ARPHRD_* values used
1396  * on Linux. This is used with the SIOCGIFHWADDR ioctl.
1397  */
1398 static uint_t arphrd_to_dl[][2] = {
1399 	{ ARPHRD_ETHER,		DL_ETHER },
1400 	{ ARPHRD_IEEE80211,	DL_WIFI },
1401 	{ 0,			0 }
1402 };
1403 
1404 static int
1405 pfp_dl_to_arphrd(int dltype)
1406 {
1407 	int i;
1408 
1409 	for (i = 0; arphrd_to_dl[i][0] != 0; i++)
1410 		if (arphrd_to_dl[i][1] == dltype)
1411 			return (arphrd_to_dl[i][0]);
1412 	return (0);
1413 }
1414