xref: /titanic_50/usr/src/uts/common/inet/sockmods/sockmod_pfp.c (revision f5c2e7ea56aaa46a9976476fb0cb1f02b9426f07)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/param.h>
28 #include <sys/systm.h>
29 #include <sys/stropts.h>
30 #include <sys/socket.h>
31 #include <sys/socketvar.h>
32 #include <sys/socket_proto.h>
33 #include <sys/sockio.h>
34 #include <sys/strsun.h>
35 #include <sys/kstat.h>
36 #include <sys/modctl.h>
37 #include <sys/policy.h>
38 #include <sys/priv_const.h>
39 #include <sys/tihdr.h>
40 #include <sys/zone.h>
41 #include <sys/time.h>
42 #include <sys/ethernet.h>
43 #include <sys/llc1.h>
44 #include <fs/sockfs/sockcommon.h>
45 #include <net/if.h>
46 #include <inet/ip_arp.h>
47 
48 #include <sys/dls.h>
49 #include <sys/mac.h>
50 #include <sys/mac_client.h>
51 #include <sys/mac_provider.h>
52 #include <sys/mac_client_priv.h>
53 
54 #include <netpacket/packet.h>
55 
56 static void pfp_close(mac_handle_t, mac_client_handle_t);
57 static int pfp_dl_to_arphrd(int);
58 static int pfp_getpacket_sockopt(sock_lower_handle_t, int, void *,
59     socklen_t *);
60 static int pfp_ifreq_getlinkid(intptr_t, struct ifreq *, datalink_id_t *);
61 static int pfp_lifreq_getlinkid(intptr_t, struct lifreq *, datalink_id_t *);
62 static int pfp_open_index(int, mac_handle_t *, mac_client_handle_t *,
63     cred_t *);
64 static void pfp_packet(void *, mac_resource_handle_t, mblk_t *, boolean_t);
65 static void pfp_release_bpf(struct pfpsock *);
66 static int pfp_set_promisc(struct pfpsock *, mac_client_promisc_type_t);
67 static int pfp_setsocket_sockopt(sock_lower_handle_t, int, const void *,
68     socklen_t);
69 static int pfp_setpacket_sockopt(sock_lower_handle_t, int, const void *,
70     socklen_t);
71 
72 /*
73  * PFP sockfs operations
74  * Most are currently no-ops because they have no meaning for a connectionless
75  * socket.
76  */
77 static void sdpfp_activate(sock_lower_handle_t, sock_upper_handle_t,
78     sock_upcalls_t *, int, struct cred *);
79 static int sdpfp_bind(sock_lower_handle_t, struct sockaddr *, socklen_t,
80     struct cred *);
81 static int sdpfp_close(sock_lower_handle_t, int, struct cred *);
82 static void sdpfp_clr_flowctrl(sock_lower_handle_t);
83 static int sdpfp_getsockopt(sock_lower_handle_t, int, int, void *,
84     socklen_t *, struct cred *);
85 static int sdpfp_ioctl(sock_lower_handle_t, int, intptr_t, int, int32_t *,
86     struct cred *);
87 static int sdpfp_senduio(sock_lower_handle_t, struct uio *, struct nmsghdr *,
88     struct cred *);
89 static int sdpfp_setsockopt(sock_lower_handle_t, int, int, const void *,
90     socklen_t, struct cred *);
91 
92 static sock_lower_handle_t sockpfp_create(int, int, int, sock_downcalls_t **,
93     uint_t *, int *, int, cred_t *);
94 
95 static int sockpfp_init(void);
96 static void sockpfp_fini(void);
97 
98 static kstat_t *pfp_ksp;
99 static pfp_kstats_t ks_stats;
100 static pfp_kstats_t pfp_kstats = {
101 	/*
102 	 * Each one of these kstats is a different return path in handling
103 	 * a packet received from the mac layer.
104 	 */
105 	{ "recvMacHeaderFail",	KSTAT_DATA_UINT64 },
106 	{ "recvBadProtocol",	KSTAT_DATA_UINT64 },
107 	{ "recvAllocbFail",	KSTAT_DATA_UINT64 },
108 	{ "recvOk",		KSTAT_DATA_UINT64 },
109 	{ "recvFail",		KSTAT_DATA_UINT64 },
110 	{ "recvFiltered",	KSTAT_DATA_UINT64 },
111 	{ "recvFlowControl",	KSTAT_DATA_UINT64 },
112 	/*
113 	 * A global set of counters is maintained to track the behaviour
114 	 * of the system (kernel & applications) in sending packets.
115 	 */
116 	{ "sendUnbound",	KSTAT_DATA_UINT64 },
117 	{ "sendFailed",		KSTAT_DATA_UINT64 },
118 	{ "sendTooBig",		KSTAT_DATA_UINT64 },
119 	{ "sendAllocFail",	KSTAT_DATA_UINT64 },
120 	{ "sendUiomoveFail",	KSTAT_DATA_UINT64 },
121 	{ "sendNoMemory",	KSTAT_DATA_UINT64 },
122 	{ "sendOpenFail",	KSTAT_DATA_UINT64 },
123 	{ "sendWrongFamily",	KSTAT_DATA_UINT64 },
124 	{ "sendShortMsg",	KSTAT_DATA_UINT64 },
125 	{ "sendOk",		KSTAT_DATA_UINT64 }
126 };
127 
128 sock_downcalls_t pfp_downcalls = {
129 	sdpfp_activate,
130 	sock_accept_notsupp,
131 	sdpfp_bind,
132 	sock_listen_notsupp,
133 	sock_connect_notsupp,
134 	sock_getpeername_notsupp,
135 	sock_getsockname_notsupp,
136 	sdpfp_getsockopt,
137 	sdpfp_setsockopt,
138 	sock_send_notsupp,
139 	sdpfp_senduio,
140 	NULL,
141 	sock_poll_notsupp,
142 	sock_shutdown_notsupp,
143 	sdpfp_clr_flowctrl,
144 	sdpfp_ioctl,
145 	sdpfp_close,
146 };
147 
148 static smod_reg_t sinfo = {
149 	SOCKMOD_VERSION,
150 	"sockpfp",
151 	SOCK_UC_VERSION,
152 	SOCK_DC_VERSION,
153 	sockpfp_create,
154 	NULL
155 };
156 
157 static int accepted_protos[3][2] = {
158 	{ ETH_P_ALL,	0 },
159 	{ ETH_P_802_2,	LLC_SNAP_SAP },
160 	{ ETH_P_803_3,	0 },
161 };
162 
163 /*
164  * Module linkage information for the kernel.
165  */
166 static struct modlsockmod modlsockmod = {
167 	&mod_sockmodops, "PF Packet socket module", &sinfo
168 };
169 
170 static struct modlinkage modlinkage = {
171 	MODREV_1,
172 	&modlsockmod,
173 	NULL
174 };
175 
176 int
177 _init(void)
178 {
179 	int error;
180 
181 	error = sockpfp_init();
182 	if (error != 0)
183 		return (error);
184 
185 	error = mod_install(&modlinkage);
186 	if (error != 0)
187 		sockpfp_fini();
188 
189 	return (error);
190 }
191 
192 int
193 _fini(void)
194 {
195 	int error;
196 
197 	error = mod_remove(&modlinkage);
198 	if (error == 0)
199 		sockpfp_fini();
200 
201 	return (error);
202 }
203 
204 int
205 _info(struct modinfo *modinfop)
206 {
207 	return (mod_info(&modlinkage, modinfop));
208 }
209 
210 /*
211  * sockpfp_init: called as part of the initialisation of the module when
212  * loaded into the kernel.
213  *
214  * Being able to create and record the kstats data in the kernel is not
215  * considered to be vital to the operation of this kernel module, thus
216  * its failure is tolerated.
217  */
218 static int
219 sockpfp_init(void)
220 {
221 	(void) memset(&ks_stats, 0, sizeof (ks_stats));
222 
223 	(void) memcpy(&ks_stats, &pfp_kstats, sizeof (pfp_kstats));
224 
225 	pfp_ksp = kstat_create("pfpacket", 0, "global", "misc",
226 	    KSTAT_TYPE_NAMED, sizeof (pfp_kstats) / sizeof (kstat_named_t),
227 	    KSTAT_FLAG_VIRTUAL);
228 	if (pfp_ksp != NULL) {
229 		pfp_ksp->ks_data = &ks_stats;
230 		kstat_install(pfp_ksp);
231 	}
232 
233 	return (0);
234 }
235 
236 /*
237  * sockpfp_fini: called when the operating system wants to unload the
238  * socket module from the kernel.
239  */
240 static void
241 sockpfp_fini(void)
242 {
243 	if (pfp_ksp != NULL)
244 		kstat_delete(pfp_ksp);
245 }
246 
247 /*
248  * Due to sockets being created read-write by default, all PF_PACKET sockets
249  * therefore require the NET_RAWACCESS priviliege, even if the socket is only
250  * being used for reading packets from.
251  *
252  * This create function enforces this module only being used with PF_PACKET
253  * sockets and the policy that we support via the config file in sock2path.d:
254  * PF_PACKET sockets must be either SOCK_DGRAM or SOCK_RAW.
255  */
256 /* ARGSUSED */
257 static sock_lower_handle_t
258 sockpfp_create(int family, int type, int proto,
259     sock_downcalls_t **sock_downcalls, uint_t *smodep, int *errorp,
260     int sflags, cred_t *cred)
261 {
262 	struct pfpsock *ps;
263 	int kmflags;
264 	int newproto;
265 	int i;
266 
267 	if (secpolicy_net_rawaccess(cred) != 0) {
268 		*errorp = EACCES;
269 		return (NULL);
270 	}
271 
272 	if (family != AF_PACKET) {
273 		*errorp = EAFNOSUPPORT;
274 		return (NULL);
275 	}
276 
277 	if ((type != SOCK_RAW) && (type != SOCK_DGRAM)) {
278 		*errorp = ESOCKTNOSUPPORT;
279 		return (NULL);
280 	}
281 
282 	/*
283 	 * First check to see if the protocol number passed in via the socket
284 	 * creation should be mapped to a different number for internal use.
285 	 */
286 	for (i = 0, newproto = -1;
287 	    i < sizeof (accepted_protos)/ sizeof (accepted_protos[0]); i++) {
288 		if (accepted_protos[i][0] == proto) {
289 			newproto = accepted_protos[i][1];
290 			break;
291 		}
292 	}
293 
294 	/*
295 	 * If the mapping of the protocol that was under 0x800 failed to find
296 	 * a local equivalent then fail the socket creation. If the protocol
297 	 * for the socket is over 0x800 and it was not found in the mapping
298 	 * table above, then use the value as is.
299 	 */
300 	if (newproto == -1) {
301 		if (proto < 0x800) {
302 			*errorp = ENOPROTOOPT;
303 			return (NULL);
304 		}
305 		newproto = proto;
306 	}
307 	proto = newproto;
308 
309 	kmflags = (sflags & SOCKET_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
310 	ps = kmem_zalloc(sizeof (*ps), kmflags);
311 	if (ps == NULL) {
312 		*errorp = ENOMEM;
313 		return (NULL);
314 	}
315 
316 	ps->ps_type = type;
317 	ps->ps_proto = proto;
318 	rw_init(&ps->ps_bpflock, NULL, RW_DRIVER, NULL);
319 	mutex_init(&ps->ps_lock, NULL, MUTEX_DRIVER, NULL);
320 
321 	*sock_downcalls = &pfp_downcalls;
322 	/*
323 	 * Setting this causes bytes from a packet that do not fit into the
324 	 * destination user buffer to be discarded. Thus the API is one
325 	 * packet per receive and callers are required to use a buffer large
326 	 * enough for the biggest packet that the interface can provide.
327 	 */
328 	*smodep = SM_ATOMIC;
329 
330 	return ((sock_lower_handle_t)ps);
331 }
332 
333 /* ************************************************************************* */
334 
335 /*
336  * pfp_packet is the callback function that is given to the mac layer for
337  * PF_PACKET to receive packets with. One packet at a time is passed into
338  * this function from the mac layer. Each packet is a private copy given
339  * to PF_PACKET to modify or free as it wishes and does not harm the original
340  * packet from which it was cloned.
341  */
342 /* ARGSUSED */
343 static void
344 pfp_packet(void *arg, mac_resource_handle_t mrh, mblk_t *mp, boolean_t flag)
345 {
346 	struct T_unitdata_ind *tunit;
347 	struct sockaddr_ll *sll;
348 	struct sockaddr_ll *sol;
349 	mac_header_info_t hdr;
350 	struct pfpsock *ps;
351 	size_t tusz;
352 	mblk_t *mp0;
353 	int error;
354 
355 	if (mp == NULL)
356 		return;
357 
358 	ps = arg;
359 	if (ps->ps_flow_ctrld) {
360 		ps->ps_flow_ctrl_drops++;
361 		ps->ps_stats.tp_drops++;
362 		ks_stats.kp_recv_flow_cntrld.value.ui64++;
363 		freemsg(mp);
364 		return;
365 	}
366 
367 	if (mac_header_info(ps->ps_mh, mp, &hdr) != 0) {
368 		/*
369 		 * Can't decode the packet header information so drop it.
370 		 */
371 		ps->ps_stats.tp_drops++;
372 		ks_stats.kp_recv_mac_hdr_fail.value.ui64++;
373 		freemsg(mp);
374 		return;
375 	}
376 
377 	if (mac_type(ps->ps_mh) == DL_ETHER &&
378 	    hdr.mhi_bindsap == ETHERTYPE_VLAN) {
379 		struct ether_vlan_header *evhp;
380 		struct ether_vlan_header evh;
381 
382 		hdr.mhi_hdrsize = sizeof (struct ether_vlan_header);
383 		hdr.mhi_istagged = B_TRUE;
384 
385 		if (MBLKL(mp) >= sizeof (*evhp)) {
386 			evhp = (struct ether_vlan_header *)mp->b_rptr;
387 		} else {
388 			int sz = sizeof (*evhp);
389 			char *s = (char *)&evh;
390 			mblk_t *tmp;
391 			int len;
392 
393 			for (tmp = mp; sz > 0 && tmp != NULL;
394 			    tmp = tmp->b_cont) {
395 				len = min(sz, MBLKL(tmp));
396 				bcopy(tmp->b_rptr, s, len);
397 				sz -= len;
398 			}
399 			evhp = &evh;
400 		}
401 		hdr.mhi_tci = ntohs(evhp->ether_tci);
402 		hdr.mhi_bindsap = ntohs(evhp->ether_type);
403 	}
404 
405 	if ((ps->ps_proto != 0) && (ps->ps_proto != hdr.mhi_bindsap)) {
406 		/*
407 		 * The packet is not of interest to this socket so
408 		 * drop it on the floor. Here the SAP is being used
409 		 * as a very course filter.
410 		 */
411 		ps->ps_stats.tp_drops++;
412 		ks_stats.kp_recv_bad_proto.value.ui64++;
413 		freemsg(mp);
414 		return;
415 	}
416 
417 	/*
418 	 * This field is not often set, even for ethernet,
419 	 * by mac_header_info, so compute it if it is 0.
420 	 */
421 	if (hdr.mhi_pktsize == 0)
422 		hdr.mhi_pktsize = msgdsize(mp);
423 
424 	/*
425 	 * If a BPF filter is present, pass the raw packet into that.
426 	 * A failed match will result in zero being returned, indicating
427 	 * that this socket is not interested in the packet.
428 	 */
429 	if (ps->ps_bpf.bf_len != 0) {
430 		uchar_t *buffer;
431 		int buflen;
432 
433 		buflen = MBLKL(mp);
434 		if (hdr.mhi_pktsize == buflen) {
435 			buffer = mp->b_rptr;
436 		} else {
437 			buflen = 0;
438 			buffer = (uchar_t *)mp;
439 		}
440 		rw_enter(&ps->ps_bpflock, RW_READER);
441 		if (bpf_filter(ps->ps_bpf.bf_insns, buffer,
442 		    hdr.mhi_pktsize, buflen) == 0) {
443 			rw_exit(&ps->ps_bpflock);
444 			ps->ps_stats.tp_drops++;
445 			ks_stats.kp_recv_filtered.value.ui64++;
446 			freemsg(mp);
447 			return;
448 		}
449 		rw_exit(&ps->ps_bpflock);
450 	}
451 
452 	if (ps->ps_type == SOCK_DGRAM) {
453 		/*
454 		 * SOCK_DGRAM socket expect a "layer 3" packet, so advance
455 		 * past the link layer header.
456 		 */
457 		mp->b_rptr += hdr.mhi_hdrsize;
458 		hdr.mhi_pktsize -= hdr.mhi_hdrsize;
459 	}
460 
461 	tusz = sizeof (struct T_unitdata_ind) + sizeof (struct sockaddr_ll);
462 	if (ps->ps_auxdata) {
463 		tusz += _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata));
464 		tusz += _TPI_ALIGN_TOPT(sizeof (struct T_opthdr));
465 	}
466 
467 	/*
468 	 * It is tempting to think that this could be optimised by having
469 	 * the base mblk_t allocated and hung off the pfpsock structure,
470 	 * except that then another one would need to be allocated for the
471 	 * sockaddr_ll that is included. Even creating a template to copy
472 	 * from is of questionable value, as read-write from one structure
473 	 * to the other is going to be slower than all of the initialisation.
474 	 */
475 	mp0 = allocb(tusz, BPRI_HI);
476 	if (mp0 == NULL) {
477 		ps->ps_stats.tp_drops++;
478 		ks_stats.kp_recv_alloc_fail.value.ui64++;
479 		freemsg(mp);
480 		return;
481 	}
482 
483 	(void) memset(mp0->b_rptr, 0, tusz);
484 
485 	mp0->b_datap->db_type = M_PROTO;
486 	mp0->b_wptr = mp0->b_rptr + tusz;
487 
488 	tunit = (struct T_unitdata_ind *)mp0->b_rptr;
489 	tunit->PRIM_type = T_UNITDATA_IND;
490 	tunit->SRC_length = sizeof (struct sockaddr);
491 	tunit->SRC_offset = sizeof (*tunit);
492 
493 	sol = (struct sockaddr_ll *)&ps->ps_sock;
494 	sll = (struct sockaddr_ll *)(mp0->b_rptr + sizeof (*tunit));
495 	sll->sll_ifindex = sol->sll_ifindex;
496 	sll->sll_hatype = (uint16_t)hdr.mhi_origsap;
497 	sll->sll_halen = sol->sll_halen;
498 	if (hdr.mhi_saddr != NULL)
499 		(void) memcpy(sll->sll_addr, hdr.mhi_saddr, sll->sll_halen);
500 
501 	switch (hdr.mhi_dsttype) {
502 	case MAC_ADDRTYPE_MULTICAST :
503 		sll->sll_pkttype = PACKET_MULTICAST;
504 		break;
505 	case MAC_ADDRTYPE_BROADCAST :
506 		sll->sll_pkttype = PACKET_BROADCAST;
507 		break;
508 	case MAC_ADDRTYPE_UNICAST :
509 		if (memcmp(sol->sll_addr, hdr.mhi_daddr, sol->sll_halen) == 0)
510 			sll->sll_pkttype = PACKET_HOST;
511 		else
512 			sll->sll_pkttype = PACKET_OTHERHOST;
513 		break;
514 	}
515 
516 	if (ps->ps_auxdata) {
517 		struct tpacket_auxdata *aux;
518 		struct T_opthdr *topt;
519 
520 		tunit->OPT_offset = _TPI_ALIGN_TOPT(tunit->SRC_offset +
521 		    sizeof (struct sockaddr_ll));
522 		tunit->OPT_length = _TPI_ALIGN_TOPT(sizeof (struct T_opthdr)) +
523 		    _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata));
524 
525 		topt = (struct T_opthdr *)(mp0->b_rptr + tunit->OPT_offset);
526 		aux = (struct tpacket_auxdata *)
527 		    ((char *)topt + _TPI_ALIGN_TOPT(sizeof (*topt)));
528 
529 		topt->len = tunit->OPT_length;
530 		topt->level = SOL_PACKET;
531 		topt->name = PACKET_AUXDATA;
532 		topt->status = 0;
533 		/*
534 		 * libpcap doesn't seem to use any other field,
535 		 * so it isn't clear how they should be filled in.
536 		 */
537 		aux->tp_vlan_vci = hdr.mhi_tci;
538 	}
539 
540 	linkb(mp0, mp);
541 
542 	ps->ps_upcalls->su_recv(ps->ps_upper, mp0, hdr.mhi_pktsize, 0,
543 	    &error, NULL);
544 
545 	if (error == 0) {
546 		ps->ps_stats.tp_packets++;
547 		ks_stats.kp_recv_ok.value.ui64++;
548 	} else {
549 		mutex_enter(&ps->ps_lock);
550 		if (error == ENOSPC) {
551 			ps->ps_upcalls->su_recv(ps->ps_upper, NULL, 0, 0,
552 			    &error, NULL);
553 			if (error == ENOSPC)
554 				ps->ps_flow_ctrld = B_TRUE;
555 		}
556 		mutex_exit(&ps->ps_lock);
557 		ps->ps_stats.tp_drops++;
558 		ks_stats.kp_recv_fail.value.ui64++;
559 	}
560 }
561 
562 /*
563  * Bind a PF_PACKET socket to a network interface.
564  *
565  * The default operation of this bind() is to place the socket (and thus the
566  * network interface) into promiscuous mode. It is then up to the application
567  * to turn that down by issuing the relevant ioctls, if desired.
568  */
569 /* ARGSUSED */
570 static int
571 sdpfp_bind(sock_lower_handle_t handle, struct sockaddr *addr,
572     socklen_t addrlen, struct cred *cred)
573 {
574 	struct sockaddr_ll *addr_ll, *sol;
575 	mac_client_handle_t mch;
576 	struct pfpsock *ps;
577 	mac_handle_t mh;
578 	int error;
579 
580 	ps = (struct pfpsock *)handle;
581 	if (ps->ps_bound)
582 		return (EINVAL);
583 
584 	addr_ll = (struct sockaddr_ll *)addr;
585 
586 	error = pfp_open_index(addr_ll->sll_ifindex, &mh, &mch, cred);
587 	if (error != 0)
588 		return (error);
589 	/*
590 	 * Ensure that each socket is only bound once.
591 	 */
592 	mutex_enter(&ps->ps_lock);
593 	if (ps->ps_mh != 0) {
594 		mutex_exit(&ps->ps_lock);
595 		pfp_close(mh, mch);
596 		return (EADDRINUSE);
597 	}
598 	ps->ps_mh = mh;
599 	ps->ps_mch = mch;
600 	mutex_exit(&ps->ps_lock);
601 
602 	/*
603 	 * Cache all of the information from bind so that it's in an easy
604 	 * place to get at when packets are received.
605 	 */
606 	sol = (struct sockaddr_ll *)&ps->ps_sock;
607 	sol->sll_family = AF_PACKET;
608 	sol->sll_ifindex = addr_ll->sll_ifindex;
609 	sol->sll_protocol = addr_ll->sll_protocol;
610 	sol->sll_halen = mac_addr_len(ps->ps_mh);
611 	mac_unicast_primary_get(ps->ps_mh, sol->sll_addr);
612 	mac_sdu_get(ps->ps_mh, NULL, &ps->ps_max_sdu);
613 	ps->ps_linkid = addr_ll->sll_ifindex;
614 
615 	error = mac_promisc_add(ps->ps_mch, MAC_CLIENT_PROMISC_ALL,
616 	    pfp_packet, ps, &ps->ps_phd, MAC_PROMISC_FLAGS_VLAN_TAG_STRIP);
617 	if (error == 0) {
618 		ps->ps_promisc = MAC_CLIENT_PROMISC_ALL;
619 		ps->ps_bound = B_TRUE;
620 	}
621 
622 	return (error);
623 }
624 
625 /* ARGSUSED */
626 static void
627 sdpfp_activate(sock_lower_handle_t lower, sock_upper_handle_t upper,
628     sock_upcalls_t *upcalls, int flags, cred_t *cred)
629 {
630 	struct pfpsock *ps;
631 
632 	ps = (struct pfpsock *)lower;
633 	ps->ps_upper = upper;
634 	ps->ps_upcalls = upcalls;
635 }
636 
637 /*
638  * This module only implements getting socket options for the new socket
639  * option level (SOL_PACKET) that it introduces. All other requests are
640  * passed back to the sockfs layer.
641  */
642 /* ARGSUSED */
643 static int
644 sdpfp_getsockopt(sock_lower_handle_t handle, int level, int option_name,
645     void *optval, socklen_t *optlenp, struct cred *cred)
646 {
647 	int error = 0;
648 
649 	switch (level) {
650 	case SOL_PACKET :
651 		error = pfp_getpacket_sockopt(handle, option_name, optval,
652 		    optlenp);
653 		break;
654 	default :
655 		/*
656 		 * If sockfs code receives this error in return from the
657 		 * getsockopt downcall it handles the option locally, if
658 		 * it can. This implements SO_RCVBUF, etc.
659 		 */
660 		error = ENOPROTOOPT;
661 		break;
662 	}
663 
664 	return (error);
665 }
666 
667 /*
668  * PF_PACKET supports setting socket options at only two levels:
669  * SOL_SOCKET and SOL_PACKET.
670  */
671 /* ARGSUSED */
672 static int
673 sdpfp_setsockopt(sock_lower_handle_t handle, int level, int option_name,
674     const void *optval, socklen_t optlen, struct cred *cred)
675 {
676 	int error = 0;
677 
678 	switch (level) {
679 	case SOL_SOCKET :
680 		error = pfp_setsocket_sockopt(handle, option_name, optval,
681 		    optlen);
682 		break;
683 	case SOL_PACKET :
684 		error = pfp_setpacket_sockopt(handle, option_name, optval,
685 		    optlen);
686 		break;
687 	default :
688 		error = EINVAL;
689 		break;
690 	}
691 
692 	return (error);
693 }
694 
695 /*
696  * This function is incredibly inefficient for sending any packet that
697  * comes with a msghdr asking to be sent to an interface to which the
698  * socket has not been bound. Some possibilities here are keeping a
699  * cache of all open mac's and mac_client's, for the purpose of sending,
700  * and closing them after some amount of inactivity. Clearly, applications
701  * should not be written to use one socket for multiple interfaces if
702  * performance is desired with the code as is.
703  */
704 /* ARGSUSED */
705 static int
706 sdpfp_senduio(sock_lower_handle_t handle, struct uio *uiop,
707     struct nmsghdr *msg, struct cred *cred)
708 {
709 	struct sockaddr_ll *sol;
710 	mac_client_handle_t mch;
711 	struct pfpsock *ps;
712 	boolean_t new_open;
713 	mac_handle_t mh;
714 	size_t mpsize;
715 	uint_t maxsdu;
716 	mblk_t *mp0;
717 	mblk_t *mp;
718 	int error;
719 
720 	mp = NULL;
721 	mp0 = NULL;
722 	new_open = B_FALSE;
723 	ps = (struct pfpsock *)handle;
724 	mh = ps->ps_mh;
725 	mch = ps->ps_mch;
726 	maxsdu = ps->ps_max_sdu;
727 
728 	sol = (struct sockaddr_ll *)msg->msg_name;
729 	if (sol == NULL) {
730 		/*
731 		 * If no sockaddr_ll has been provided with the send call,
732 		 * use the one constructed when the socket was bound to an
733 		 * interface and fail if it hasn't been bound.
734 		 */
735 		if (!ps->ps_bound) {
736 			ks_stats.kp_send_unbound.value.ui64++;
737 			return (EPROTO);
738 		}
739 		sol = (struct sockaddr_ll *)&ps->ps_sock;
740 	} else {
741 		/*
742 		 * Verify the sockaddr_ll message passed down before using
743 		 * it to send a packet out with. If it refers to an interface
744 		 * that has not been bound, it is necessary to open it.
745 		 */
746 		struct sockaddr_ll *sll;
747 
748 		if (msg->msg_namelen < sizeof (struct sockaddr_ll)) {
749 			ks_stats.kp_send_short_msg.value.ui64++;
750 			return (EINVAL);
751 		}
752 
753 		if (sol->sll_family != AF_PACKET) {
754 			ks_stats.kp_send_wrong_family.value.ui64++;
755 			return (EAFNOSUPPORT);
756 		}
757 
758 		sll = (struct sockaddr_ll *)&ps->ps_sock;
759 		if (sol->sll_ifindex != sll->sll_ifindex) {
760 			error = pfp_open_index(sol->sll_ifindex, &mh, &mch,
761 			    cred);
762 			if (error != 0) {
763 				ks_stats.kp_send_open_fail.value.ui64++;
764 				return (error);
765 			}
766 			mac_sdu_get(mh, NULL, &maxsdu);
767 			new_open = B_TRUE;
768 		}
769 	}
770 
771 	mpsize = uiop->uio_resid;
772 	if (mpsize > maxsdu) {
773 		ks_stats.kp_send_too_big.value.ui64++;
774 		error = EMSGSIZE;
775 		goto done;
776 	}
777 
778 	if ((mp = allocb(mpsize, BPRI_HI)) == NULL) {
779 		ks_stats.kp_send_alloc_fail.value.ui64++;
780 		error = ENOBUFS;
781 		goto done;
782 	}
783 
784 	mp->b_wptr = mp->b_rptr + mpsize;
785 	error = uiomove(mp->b_rptr, mpsize, UIO_WRITE, uiop);
786 	if (error != 0) {
787 		ks_stats.kp_send_uiomove_fail.value.ui64++;
788 		goto done;
789 	}
790 
791 	if (ps->ps_type == SOCK_DGRAM) {
792 		mp0 = mac_header(mh, sol->sll_addr, sol->sll_protocol, mp, 0);
793 		if (mp0 == NULL) {
794 			ks_stats.kp_send_no_memory.value.ui64++;
795 			error = ENOBUFS;
796 			goto done;
797 		}
798 		linkb(mp0, mp);
799 		mp = mp0;
800 	}
801 
802 	/*
803 	 * As this is sending datagrams and no promise is made about
804 	 * how or if a packet will be sent/delivered, no effort is to
805 	 * be expended in recovering from a situation where the packet
806 	 * cannot be sent - it is just dropped.
807 	 */
808 	error = mac_tx(mch, mp, 0, MAC_DROP_ON_NO_DESC, NULL);
809 	if (error == 0) {
810 		mp = NULL;
811 		ks_stats.kp_send_ok.value.ui64++;
812 	} else {
813 		ks_stats.kp_send_failed.value.ui64++;
814 	}
815 
816 done:
817 
818 	if (new_open) {
819 		ASSERT(mch != ps->ps_mch);
820 		ASSERT(mh != ps->ps_mh);
821 		pfp_close(mh, mch);
822 	}
823 	if (mp != NULL)
824 		freemsg(mp);
825 
826 	return (error);
827 
828 }
829 
830 /*
831  * There's no use of a lock here, or at the bottom of pfp_packet() where
832  * ps_flow_ctrld is set to true, because in a situation where these two
833  * are racing to set the flag one way or the other, the end result is
834  * going to be ultimately determined by the scheduler anyway - which of
835  * the two threads gets the lock first? In such an operational environment,
836  * we've got packets arriving too fast to be delt with so packets are going
837  * to be dropped. Grabbing a lock just makes the drop more expensive.
838  */
839 static void
840 sdpfp_clr_flowctrl(sock_lower_handle_t handle)
841 {
842 	struct pfpsock *ps;
843 
844 	ps = (struct pfpsock *)handle;
845 
846 	mutex_enter(&ps->ps_lock);
847 	ps->ps_flow_ctrld = B_FALSE;
848 	mutex_exit(&ps->ps_lock);
849 }
850 
851 /*
852  * The implementation of this ioctl() handler is intended to function
853  * in the absence of a bind() being made before it is called. Thus the
854  * function calls mac_open() itself to provide a handle
855  * This function is structured like this:
856  * - determine the linkid for the interface being targetted
857  * - open the interface with said linkid
858  * - perform ioctl
859  * - copy results back to caller
860  *
861  * The ioctls that interact with interface flags have been implented below
862  * to assume that the interface is always up and running (IFF_RUNNING) and
863  * to use the state of this socket to determine whether or not the network
864  * interface is in promiscuous mode. Thus an ioctl to get the interface flags
865  * of an interface that has been put in promiscuous mode by another socket
866  * (in the same program or different), will not report that status.
867  */
868 /* ARGSUSED */
869 static int
870 sdpfp_ioctl(sock_lower_handle_t handle, int cmd, intptr_t arg, int mod,
871     int32_t *rval, struct cred *cr)
872 {
873 #if defined(_SYSCALL32)
874 	struct timeval32 tival;
875 #else
876 	struct timeval tival;
877 #endif
878 	mac_client_promisc_type_t mtype;
879 	struct sockaddr_dl *sock;
880 	datalink_id_t linkid;
881 	struct lifreq lifreq;
882 	struct ifreq ifreq;
883 	struct pfpsock *ps;
884 	mac_handle_t mh;
885 	timespec_t tv;
886 	int error;
887 
888 	switch (cmd) {
889 	/*
890 	 * ioctls that work on "struct lifreq"
891 	 */
892 	case SIOCSLIFFLAGS :
893 	case SIOCGLIFINDEX :
894 	case SIOCGLIFFLAGS :
895 	case SIOCGLIFMTU :
896 	case SIOCGLIFHWADDR :
897 		error = pfp_lifreq_getlinkid(arg, &lifreq, &linkid);
898 		if (error != 0)
899 			return (error);
900 		break;
901 
902 	/*
903 	 * ioctls that work on "struct ifreq".
904 	 * Not all of these have a "struct lifreq" partner, for example
905 	 * SIOCGIFHWADDR, for the simple reason that the logical interface
906 	 * does not have a hardware address.
907 	 */
908 	case SIOCSIFFLAGS :
909 	case SIOCGIFINDEX :
910 	case SIOCGIFFLAGS :
911 	case SIOCGIFMTU :
912 	case SIOCGIFHWADDR :
913 		error = pfp_ifreq_getlinkid(arg, &ifreq, &linkid);
914 		if (error != 0)
915 			return (error);
916 		break;
917 	}
918 
919 	error =  mac_open_by_linkid(linkid, &mh);
920 	if (error != 0)
921 		return (error);
922 
923 	ps = (struct pfpsock *)handle;
924 
925 	switch (cmd) {
926 	case SIOCGLIFINDEX :
927 		lifreq.lifr_index = linkid;
928 		break;
929 
930 	case SIOCGIFINDEX :
931 		ifreq.ifr_index = linkid;
932 		break;
933 
934 	case SIOCGIFFLAGS :
935 		ifreq.ifr_flags = IFF_RUNNING;
936 		if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL)
937 			ifreq.ifr_flags |= IFF_PROMISC;
938 		break;
939 
940 	case SIOCGLIFFLAGS :
941 		lifreq.lifr_flags = IFF_RUNNING;
942 		if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL)
943 			lifreq.lifr_flags |= IFF_PROMISC;
944 		break;
945 
946 	case SIOCSIFFLAGS :
947 		if (linkid != ps->ps_linkid) {
948 			error = EINVAL;
949 		} else {
950 			if ((ifreq.ifr_flags & IFF_PROMISC) != 0)
951 				mtype = MAC_CLIENT_PROMISC_ALL;
952 			else
953 				mtype = MAC_CLIENT_PROMISC_FILTERED;
954 			error = pfp_set_promisc(ps, mtype);
955 		}
956 		break;
957 
958 	case SIOCSLIFFLAGS :
959 		if (linkid != ps->ps_linkid) {
960 			error = EINVAL;
961 		} else {
962 			if ((lifreq.lifr_flags & IFF_PROMISC) != 0)
963 				mtype = MAC_CLIENT_PROMISC_ALL;
964 			else
965 				mtype = MAC_CLIENT_PROMISC_FILTERED;
966 			error = pfp_set_promisc(ps, mtype);
967 		}
968 		break;
969 
970 	case SIOCGIFMTU :
971 		mac_sdu_get(mh, NULL, &ifreq.ifr_mtu);
972 		break;
973 
974 	case SIOCGLIFMTU :
975 		mac_sdu_get(mh, NULL, &lifreq.lifr_mtu);
976 		break;
977 
978 	case SIOCGIFHWADDR :
979 		if (mac_addr_len(mh) > sizeof (ifreq.ifr_addr.sa_data)) {
980 			error = EPFNOSUPPORT;
981 			break;
982 		}
983 
984 		if (mac_addr_len(mh) == 0) {
985 			(void) memset(ifreq.ifr_addr.sa_data, 0,
986 			    sizeof (ifreq.ifr_addr.sa_data));
987 		} else {
988 			mac_unicast_primary_get(mh,
989 			    (uint8_t *)ifreq.ifr_addr.sa_data);
990 		}
991 
992 		/*
993 		 * The behaviour here in setting sa_family is consistent
994 		 * with what applications such as tcpdump would expect
995 		 * for a Linux PF_PACKET socket.
996 		 */
997 		ifreq.ifr_addr.sa_family = pfp_dl_to_arphrd(mac_type(mh));
998 		break;
999 
1000 	case SIOCGLIFHWADDR :
1001 		lifreq.lifr_type = 0;
1002 		sock = (struct sockaddr_dl *)&lifreq.lifr_addr;
1003 
1004 		if (mac_addr_len(mh) > sizeof (sock->sdl_data)) {
1005 			error = EPFNOSUPPORT;
1006 			break;
1007 		}
1008 
1009 		/*
1010 		 * Fill in the sockaddr_dl with link layer details. Of note,
1011 		 * the index is returned as 0 for a couple of reasons:
1012 		 * (1) there is no public API that uses or requires it
1013 		 * (2) the MAC index is currently 32bits and sdl_index is 16.
1014 		 */
1015 		sock->sdl_family = AF_LINK;
1016 		sock->sdl_index = 0;
1017 		sock->sdl_type = mac_type(mh);
1018 		sock->sdl_nlen = 0;
1019 		sock->sdl_alen = mac_addr_len(mh);
1020 		sock->sdl_slen = 0;
1021 		if (mac_addr_len(mh) == 0) {
1022 			(void) memset(sock->sdl_data, 0,
1023 			    sizeof (sock->sdl_data));
1024 		} else {
1025 			mac_unicast_primary_get(mh, (uint8_t *)sock->sdl_data);
1026 		}
1027 		break;
1028 
1029 	case SIOCGSTAMP :
1030 		(void) gethrestime(&tv);
1031 		tival.tv_sec = (time_t)tv.tv_sec;
1032 		tival.tv_usec = tv.tv_nsec / 1000;
1033 		error = ddi_copyout(&tival, (void *)arg, sizeof (tival), 0);
1034 		break;
1035 
1036 	default :
1037 		break;
1038 	}
1039 
1040 	mac_close(mh);
1041 
1042 	if (error == 0) {
1043 		/*
1044 		 * Only the "GET" ioctls need to copy data back to userace.
1045 		 */
1046 		switch (cmd) {
1047 		case SIOCGLIFINDEX :
1048 		case SIOCGLIFFLAGS :
1049 		case SIOCGLIFMTU :
1050 		case SIOCGLIFHWADDR :
1051 			error = ddi_copyout(&lifreq, (void *)arg,
1052 			    sizeof (lifreq), 0);
1053 			break;
1054 
1055 		case SIOCGIFINDEX :
1056 		case SIOCGIFFLAGS :
1057 		case SIOCGIFMTU :
1058 		case SIOCGIFHWADDR :
1059 			error = ddi_copyout(&ifreq, (void *)arg,
1060 			    sizeof (ifreq), 0);
1061 			break;
1062 		default :
1063 			break;
1064 		}
1065 	}
1066 
1067 	return (error);
1068 }
1069 
1070 /*
1071  * Closing the socket requires that all open references to network
1072  * interfaces be closed.
1073  */
1074 /* ARGSUSED */
1075 static int
1076 sdpfp_close(sock_lower_handle_t handle, int flag, struct cred *cr)
1077 {
1078 	struct pfpsock *ps = (struct pfpsock *)handle;
1079 
1080 	if (ps->ps_phd != 0) {
1081 		mac_promisc_remove(ps->ps_phd);
1082 		ps->ps_phd = 0;
1083 	}
1084 
1085 	if (ps->ps_mch != 0) {
1086 		mac_client_close(ps->ps_mch, 0);
1087 		ps->ps_mch = 0;
1088 	}
1089 
1090 	if (ps->ps_mh != 0) {
1091 		mac_close(ps->ps_mh);
1092 		ps->ps_mh = 0;
1093 	}
1094 
1095 	kmem_free(ps, sizeof (*ps));
1096 
1097 	return (0);
1098 }
1099 
1100 /* ************************************************************************* */
1101 
1102 /*
1103  * Given a pointer (arg) to a "struct ifreq" (potentially in user space),
1104  * determine the linkid for the interface name stored in that structure.
1105  * name is used as a buffer so that we can ensure a trailing \0 is appended
1106  * to the name safely.
1107  */
1108 static int
1109 pfp_ifreq_getlinkid(intptr_t arg, struct ifreq *ifreqp,
1110     datalink_id_t *linkidp)
1111 {
1112 	char name[IFNAMSIZ + 1];
1113 	int error;
1114 
1115 	if (ddi_copyin((void *)arg, ifreqp, sizeof (*ifreqp), 0) != 0)
1116 		return (EFAULT);
1117 
1118 	(void) strlcpy(name, ifreqp->ifr_name, sizeof (name));
1119 
1120 	error = dls_mgmt_get_linkid(name, linkidp);
1121 	if (error != 0)
1122 		error = dls_devnet_macname2linkid(name, linkidp);
1123 
1124 	return (error);
1125 }
1126 
1127 /*
1128  * Given a pointer (arg) to a "struct lifreq" (potentially in user space),
1129  * determine the linkid for the interface name stored in that structure.
1130  * name is used as a buffer so that we can ensure a trailing \0 is appended
1131  * to the name safely.
1132  */
1133 static int
1134 pfp_lifreq_getlinkid(intptr_t arg, struct lifreq *lifreqp,
1135     datalink_id_t *linkidp)
1136 {
1137 	char name[LIFNAMSIZ + 1];
1138 	int error;
1139 
1140 	if (ddi_copyin((void *)arg, lifreqp, sizeof (*lifreqp), 0) != 0)
1141 		return (EFAULT);
1142 
1143 	(void) strlcpy(name, lifreqp->lifr_name, sizeof (name));
1144 
1145 	error = dls_mgmt_get_linkid(name, linkidp);
1146 	if (error != 0)
1147 		error = dls_devnet_macname2linkid(name, linkidp);
1148 
1149 	return (error);
1150 }
1151 
1152 /*
1153  * Although there are several new SOL_PACKET options that can be set and
1154  * are specific to this implementation of PF_PACKET, the current API does
1155  * not support doing a get on them to retrieve accompanying status. Thus
1156  * it is only currently possible to use SOL_PACKET with getsockopt to
1157  * retrieve statistical information. This remains consistant with the
1158  * Linux API at the time of writing.
1159  */
1160 static int
1161 pfp_getpacket_sockopt(sock_lower_handle_t handle, int option_name,
1162     void *optval, socklen_t *optlenp)
1163 {
1164 	struct pfpsock *ps;
1165 	int error = 0;
1166 
1167 	ps = (struct pfpsock *)handle;
1168 
1169 	switch (option_name) {
1170 	case PACKET_STATISTICS :
1171 		if (*optlenp < sizeof (ps->ps_stats)) {
1172 			error = EINVAL;
1173 			break;
1174 		}
1175 		*optlenp = sizeof (ps->ps_stats);
1176 		bcopy(&ps->ps_stats, optval, sizeof (ps->ps_stats));
1177 		break;
1178 	default :
1179 		error = EINVAL;
1180 		break;
1181 	}
1182 
1183 	return (error);
1184 }
1185 
1186 /*
1187  * The SOL_PACKET level for socket options supports three options,
1188  * PACKET_ADD_MEMBERSHIP, PACKET_DROP_MEMBERSHIP and PACKET_AUXDATA.
1189  * This function is responsible for mapping the two socket options
1190  * that manage multicast membership into the appropriate internal
1191  * function calls to bring the option into effect. Whilst direct
1192  * changes to the multicast membership (ADD/DROP) groups is handled
1193  * by calls directly into the mac module, changes to the promiscuos
1194  * mode are vectored through pfp_set_promisc() so that the logic for
1195  * managing the promiscuous mode is in one place.
1196  */
1197 /* ARGSUSED */
1198 static int
1199 pfp_setpacket_sockopt(sock_lower_handle_t handle, int option_name,
1200     const void *optval, socklen_t optlen)
1201 {
1202 	struct packet_mreq mreq;
1203 	struct pfpsock *ps;
1204 	int error = 0;
1205 	int opt;
1206 
1207 	ps = (struct pfpsock *)handle;
1208 	if (!ps->ps_bound)
1209 		return (EPROTO);
1210 
1211 	if ((option_name == PACKET_ADD_MEMBERSHIP) ||
1212 	    (option_name == PACKET_DROP_MEMBERSHIP)) {
1213 		if (!ps->ps_bound)
1214 			return (EPROTO);
1215 		bcopy(optval, &mreq, sizeof (mreq));
1216 		if (ps->ps_linkid != mreq.mr_ifindex)
1217 			return (EINVAL);
1218 	}
1219 
1220 	switch (option_name) {
1221 	case PACKET_ADD_MEMBERSHIP :
1222 		switch (mreq.mr_type) {
1223 		case PACKET_MR_MULTICAST :
1224 			if (mreq.mr_alen !=
1225 			    ((struct sockaddr_ll *)&ps->ps_sock)->sll_halen)
1226 				return (EINVAL);
1227 
1228 			error = mac_multicast_add(ps->ps_mch, mreq.mr_address);
1229 			break;
1230 
1231 		case PACKET_MR_PROMISC :
1232 			error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_ALL);
1233 			break;
1234 
1235 		case PACKET_MR_ALLMULTI :
1236 			error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_MULTI);
1237 			break;
1238 		}
1239 		break;
1240 
1241 	case PACKET_DROP_MEMBERSHIP :
1242 		switch (mreq.mr_type) {
1243 		case PACKET_MR_MULTICAST :
1244 			if (mreq.mr_alen !=
1245 			    ((struct sockaddr_ll *)&ps->ps_sock)->sll_halen)
1246 				return (EINVAL);
1247 
1248 			mac_multicast_remove(ps->ps_mch, mreq.mr_address);
1249 			break;
1250 
1251 		case PACKET_MR_PROMISC :
1252 			if (ps->ps_promisc != MAC_CLIENT_PROMISC_ALL)
1253 				return (EINVAL);
1254 			error = pfp_set_promisc(ps,
1255 			    MAC_CLIENT_PROMISC_FILTERED);
1256 			break;
1257 
1258 		case PACKET_MR_ALLMULTI :
1259 			if (ps->ps_promisc != MAC_CLIENT_PROMISC_MULTI)
1260 				return (EINVAL);
1261 			error = pfp_set_promisc(ps,
1262 			    MAC_CLIENT_PROMISC_FILTERED);
1263 			break;
1264 		}
1265 		break;
1266 
1267 	case PACKET_AUXDATA :
1268 		if (optlen == sizeof (int)) {
1269 			opt = *(int *)optval;
1270 			ps->ps_auxdata = (opt != 0);
1271 		} else {
1272 			error = EINVAL;
1273 		}
1274 		break;
1275 	default :
1276 		error = EINVAL;
1277 		break;
1278 	}
1279 
1280 	return (error);
1281 }
1282 
1283 /*
1284  * There are only two special setsockopt's for SOL_SOCKET with PF_PACKET:
1285  * SO_ATTACH_FILTER and SO_DETACH_FILTER. All other setsockopt requests
1286  * that are for SOL_SOCKET are passed back to the socket layer for its
1287  * generic implementation.
1288  *
1289  * Both of these setsockopt values are candidates for being handled by the
1290  * socket layer itself in future, however this requires understanding how
1291  * they would interact with all other sockets.
1292  */
1293 static int
1294 pfp_setsocket_sockopt(sock_lower_handle_t handle, int option_name,
1295     const void *optval, socklen_t optlen)
1296 {
1297 	struct bpf_program prog;
1298 	struct bpf_insn *fcode;
1299 	struct pfpsock *ps;
1300 	int error = 0;
1301 	int size;
1302 
1303 	ps = (struct pfpsock *)handle;
1304 
1305 	switch (option_name) {
1306 	case SO_ATTACH_FILTER :
1307 #ifdef _LP64
1308 		if (optlen == sizeof (struct bpf_program32)) {
1309 			struct bpf_program32 prog32;
1310 
1311 			bcopy(optval, &prog32, sizeof (prog32));
1312 			prog.bf_len = prog32.bf_len;
1313 			prog.bf_insns = (void *)(uint64_t)prog32.bf_insns;
1314 		} else
1315 #endif
1316 		if (optlen == sizeof (struct bpf_program)) {
1317 			bcopy(optval, &prog, sizeof (prog));
1318 		} else if (optlen != sizeof (struct bpf_program)) {
1319 			return (EINVAL);
1320 		}
1321 
1322 		size = prog.bf_len * sizeof (*prog.bf_insns);
1323 		fcode = kmem_alloc(size, KM_SLEEP);
1324 		if (ddi_copyin(prog.bf_insns, fcode, size, 0) != 0) {
1325 			kmem_free(fcode, size);
1326 			return (EFAULT);
1327 		}
1328 
1329 		if (bpf_validate(fcode, (int)prog.bf_len)) {
1330 			rw_enter(&ps->ps_bpflock, RW_WRITER);
1331 			pfp_release_bpf(ps);
1332 			ps->ps_bpf.bf_insns = fcode;
1333 			ps->ps_bpf.bf_len = size;
1334 			rw_exit(&ps->ps_bpflock);
1335 
1336 			return (0);
1337 		}
1338 		kmem_free(fcode, size);
1339 		error = EINVAL;
1340 		break;
1341 
1342 	case SO_DETACH_FILTER :
1343 		pfp_release_bpf(ps);
1344 		break;
1345 	default :
1346 		/*
1347 		 * If sockfs code receives this error in return from the
1348 		 * getsockopt downcall it handles the option locally, if
1349 		 * it can. This implements SO_RCVBUF, etc.
1350 		 */
1351 		error = ENOPROTOOPT;
1352 		break;
1353 	}
1354 
1355 	return (error);
1356 }
1357 
1358 /*
1359  * pfp_open_index is an internal function used to open a MAC device by
1360  * its index. Both a mac_handle_t and mac_client_handle_t are acquired
1361  * because some of the interfaces provided by the mac layer require either
1362  * only the mac_handle_t or both it and mac_handle_t.
1363  *
1364  * Whilst inside the kernel we can access data structures supporting any
1365  * zone, access to interfaces from non-global zones is restricted to those
1366  * interfaces (if any) that are exclusively assigned to a zone.
1367  */
1368 static int
1369 pfp_open_index(int index, mac_handle_t *mhp, mac_client_handle_t *mcip,
1370     cred_t *cred)
1371 {
1372 	mac_client_handle_t mch;
1373 	zoneid_t ifzoneid;
1374 	mac_handle_t mh;
1375 	zoneid_t zoneid;
1376 	int error;
1377 
1378 	mh = 0;
1379 	mch = 0;
1380 	error = mac_open_by_linkid(index, &mh);
1381 	if (error != 0)
1382 		goto bad_open;
1383 
1384 	error = mac_client_open(mh, &mch, NULL,
1385 	    MAC_OPEN_FLAGS_USE_DATALINK_NAME);
1386 	if (error != 0)
1387 		goto bad_open;
1388 
1389 	zoneid = crgetzoneid(cred);
1390 	if (zoneid != GLOBAL_ZONEID) {
1391 		mac_perim_handle_t perim;
1392 
1393 		mac_perim_enter_by_mh(mh, &perim);
1394 		error = dls_link_getzid(mac_client_name(mch), &ifzoneid);
1395 		mac_perim_exit(perim);
1396 		if (error != 0)
1397 			goto bad_open;
1398 		if (ifzoneid != zoneid) {
1399 			error = EACCES;
1400 			goto bad_open;
1401 		}
1402 	}
1403 
1404 	*mcip = mch;
1405 	*mhp = mh;
1406 
1407 	return (0);
1408 bad_open:
1409 	if (mch != 0)
1410 		mac_client_close(mch, 0);
1411 	if (mh != 0)
1412 		mac_close(mh);
1413 	return (error);
1414 }
1415 
1416 static void
1417 pfp_close(mac_handle_t mh, mac_client_handle_t mch)
1418 {
1419 	mac_client_close(mch, 0);
1420 	mac_close(mh);
1421 }
1422 
1423 /*
1424  * The purpose of this function is to provide a single place where we free
1425  * the loaded BPF program and reset all pointers/counters associated with
1426  * it.
1427  */
1428 static void
1429 pfp_release_bpf(struct pfpsock *ps)
1430 {
1431 	if (ps->ps_bpf.bf_len != 0) {
1432 		kmem_free(ps->ps_bpf.bf_insns, ps->ps_bpf.bf_len);
1433 		ps->ps_bpf.bf_len = 0;
1434 		ps->ps_bpf.bf_insns = NULL;
1435 	}
1436 }
1437 
1438 /*
1439  * Set the promiscuous mode of a network interface.
1440  * This function only calls the mac layer when there is a change to the
1441  * status of a network interface's promiscous mode. Tracking of how many
1442  * sockets have the network interface in promiscuous mode, and thus the
1443  * control over the physical device's status, is left to the mac layer.
1444  */
1445 static int
1446 pfp_set_promisc(struct pfpsock *ps, mac_client_promisc_type_t turnon)
1447 {
1448 	int error = 0;
1449 	int flags;
1450 
1451 	/*
1452 	 * There are 4 combinations of turnon/ps_promisc.
1453 	 * This if handles 2 (both false, both true) and the if() below
1454 	 * handles the remaining one - when change is required.
1455 	 */
1456 	if (turnon == ps->ps_promisc)
1457 		return (error);
1458 
1459 	if (ps->ps_phd != 0) {
1460 		mac_promisc_remove(ps->ps_phd);
1461 		ps->ps_phd = 0;
1462 
1463 		/*
1464 		 * ps_promisc is set here in case the call to mac_promisc_add
1465 		 * fails: leaving it to indicate that the interface is still
1466 		 * in some sort of promiscuous mode is false.
1467 		 */
1468 		if (ps->ps_promisc != MAC_CLIENT_PROMISC_FILTERED) {
1469 			ps->ps_promisc = MAC_CLIENT_PROMISC_FILTERED;
1470 			flags = MAC_PROMISC_FLAGS_NO_PHYS;
1471 		} else {
1472 			flags = 0;
1473 		}
1474 		flags |= MAC_PROMISC_FLAGS_VLAN_TAG_STRIP;
1475 	}
1476 
1477 	error = mac_promisc_add(ps->ps_mch, turnon, pfp_packet, ps,
1478 	    &ps->ps_phd, flags);
1479 	if (error == 0)
1480 		ps->ps_promisc = turnon;
1481 
1482 	return (error);
1483 }
1484 
1485 /*
1486  * This table maps the MAC types in Solaris to the ARPHRD_* values used
1487  * on Linux. This is used with the SIOCGIFHWADDR/SIOCGLIFHWADDR ioctl.
1488  *
1489  * The symbols in this table are *not* pulled in from <net/if_arp.h>,
1490  * they are pulled from <netpacket/packet.h>, thus it acts as a source
1491  * of supplementary information to the ARP table.
1492  */
1493 static uint_t arphrd_to_dl[][2] = {
1494 	{ ARPHRD_IEEE80211,	DL_WIFI },
1495 	{ ARPHRD_TUNNEL,	DL_IPV4 },
1496 	{ ARPHRD_TUNNEL,	DL_IPV6 },
1497 	{ ARPHRD_TUNNEL,	DL_6TO4 },
1498 	{ ARPHRD_AX25,		DL_X25 },
1499 	{ ARPHRD_ATM,		DL_ATM },
1500 	{ 0,			0 }
1501 };
1502 
1503 static int
1504 pfp_dl_to_arphrd(int dltype)
1505 {
1506 	int i;
1507 
1508 	for (i = 0; arphrd_to_dl[i][0] != 0; i++)
1509 		if (arphrd_to_dl[i][1] == dltype)
1510 			return (arphrd_to_dl[i][0]);
1511 	return (arp_hw_type(dltype));
1512 }
1513