1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2015 Joyent, Inc. All rights reserved.
25 */
26
27 #include <sys/types.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/stropts.h>
31 #include <sys/socket.h>
32 #include <sys/socketvar.h>
33 #include <sys/socket_proto.h>
34 #include <sys/sockio.h>
35 #include <sys/strsun.h>
36 #include <sys/kstat.h>
37 #include <sys/modctl.h>
38 #include <sys/policy.h>
39 #include <sys/priv_const.h>
40 #include <sys/tihdr.h>
41 #include <sys/zone.h>
42 #include <sys/time.h>
43 #include <sys/ethernet.h>
44 #include <sys/llc1.h>
45 #include <fs/sockfs/sockcommon.h>
46 #include <net/if.h>
47 #include <inet/ip_arp.h>
48
49 #include <sys/dls.h>
50 #include <sys/mac.h>
51 #include <sys/mac_client.h>
52 #include <sys/mac_provider.h>
53 #include <sys/mac_client_priv.h>
54
55 #include <netpacket/packet.h>
56
57 static void pfp_close(mac_handle_t, mac_client_handle_t);
58 static int pfp_dl_to_arphrd(int);
59 static int pfp_getpacket_sockopt(sock_lower_handle_t, int, void *,
60 socklen_t *);
61 static int pfp_ifreq_getlinkid(intptr_t, struct ifreq *, datalink_id_t *, int);
62 static int pfp_lifreq_getlinkid(intptr_t, struct lifreq *, datalink_id_t *,
63 int);
64 static int pfp_open_index(int, mac_handle_t *, mac_client_handle_t *,
65 cred_t *);
66 static void pfp_packet(void *, mac_resource_handle_t, mblk_t *, boolean_t);
67 static void pfp_release_bpf(struct pfpsock *);
68 static int pfp_set_promisc(struct pfpsock *, mac_client_promisc_type_t);
69 static int pfp_setsocket_sockopt(sock_lower_handle_t, int, const void *,
70 socklen_t);
71 static int pfp_setpacket_sockopt(sock_lower_handle_t, int, const void *,
72 socklen_t);
73
74 /*
75 * PFP sockfs operations
76 * Most are currently no-ops because they have no meaning for a connectionless
77 * socket.
78 */
79 static void sdpfp_activate(sock_lower_handle_t, sock_upper_handle_t,
80 sock_upcalls_t *, int, struct cred *);
81 static int sdpfp_bind(sock_lower_handle_t, struct sockaddr *, socklen_t,
82 struct cred *);
83 static int sdpfp_close(sock_lower_handle_t, int, struct cred *);
84 static void sdpfp_clr_flowctrl(sock_lower_handle_t);
85 static int sdpfp_getsockopt(sock_lower_handle_t, int, int, void *,
86 socklen_t *, struct cred *);
87 static int sdpfp_ioctl(sock_lower_handle_t, int, intptr_t, int, int32_t *,
88 struct cred *);
89 static int sdpfp_senduio(sock_lower_handle_t, struct uio *, struct nmsghdr *,
90 struct cred *);
91 static int sdpfp_setsockopt(sock_lower_handle_t, int, int, const void *,
92 socklen_t, struct cred *);
93
94 static sock_lower_handle_t sockpfp_create(int, int, int, sock_downcalls_t **,
95 uint_t *, int *, int, cred_t *);
96
97 static int sockpfp_init(void);
98 static void sockpfp_fini(void);
99
100 static kstat_t *pfp_ksp;
101 static pfp_kstats_t ks_stats;
102 static pfp_kstats_t pfp_kstats = {
103 /*
104 * Each one of these kstats is a different return path in handling
105 * a packet received from the mac layer.
106 */
107 { "recvMacHeaderFail", KSTAT_DATA_UINT64 },
108 { "recvBadProtocol", KSTAT_DATA_UINT64 },
109 { "recvAllocbFail", KSTAT_DATA_UINT64 },
110 { "recvOk", KSTAT_DATA_UINT64 },
111 { "recvFail", KSTAT_DATA_UINT64 },
112 { "recvFiltered", KSTAT_DATA_UINT64 },
113 { "recvFlowControl", KSTAT_DATA_UINT64 },
114 /*
115 * A global set of counters is maintained to track the behaviour
116 * of the system (kernel & applications) in sending packets.
117 */
118 { "sendUnbound", KSTAT_DATA_UINT64 },
119 { "sendFailed", KSTAT_DATA_UINT64 },
120 { "sendTooBig", KSTAT_DATA_UINT64 },
121 { "sendAllocFail", KSTAT_DATA_UINT64 },
122 { "sendUiomoveFail", KSTAT_DATA_UINT64 },
123 { "sendNoMemory", KSTAT_DATA_UINT64 },
124 { "sendOpenFail", KSTAT_DATA_UINT64 },
125 { "sendWrongFamily", KSTAT_DATA_UINT64 },
126 { "sendShortMsg", KSTAT_DATA_UINT64 },
127 { "sendOk", KSTAT_DATA_UINT64 }
128 };
129
130 sock_downcalls_t pfp_downcalls = {
131 sdpfp_activate,
132 sock_accept_notsupp,
133 sdpfp_bind,
134 sock_listen_notsupp,
135 sock_connect_notsupp,
136 sock_getpeername_notsupp,
137 sock_getsockname_notsupp,
138 sdpfp_getsockopt,
139 sdpfp_setsockopt,
140 sock_send_notsupp,
141 sdpfp_senduio,
142 NULL,
143 sock_poll_notsupp,
144 sock_shutdown_notsupp,
145 sdpfp_clr_flowctrl,
146 sdpfp_ioctl,
147 sdpfp_close,
148 };
149
150 static smod_reg_t sinfo = {
151 SOCKMOD_VERSION,
152 "sockpfp",
153 SOCK_UC_VERSION,
154 SOCK_DC_VERSION,
155 sockpfp_create,
156 NULL
157 };
158
159 static int accepted_protos[3][2] = {
160 { ETH_P_ALL, 0 },
161 { ETH_P_802_2, LLC_SNAP_SAP },
162 { ETH_P_803_3, 0 },
163 };
164
165 /*
166 * This sets an upper bound on the size of the receive buffer for a PF_PACKET
167 * socket. More properly, this should be controlled through ipadm, ala TCP, UDP,
168 * SCTP, etc. Until that's done, this provides a hard cap of 4 MB and allows an
169 * opportunity for it to be changed, should it be needed.
170 */
171 int sockmod_pfp_rcvbuf_max = 1024 * 1024 * 4;
172
173 /*
174 * Module linkage information for the kernel.
175 */
176 static struct modlsockmod modlsockmod = {
177 &mod_sockmodops, "PF Packet socket module", &sinfo
178 };
179
180 static struct modlinkage modlinkage = {
181 MODREV_1,
182 &modlsockmod,
183 NULL
184 };
185
186 int
_init(void)187 _init(void)
188 {
189 int error;
190
191 error = sockpfp_init();
192 if (error != 0)
193 return (error);
194
195 error = mod_install(&modlinkage);
196 if (error != 0)
197 sockpfp_fini();
198
199 return (error);
200 }
201
202 int
_fini(void)203 _fini(void)
204 {
205 int error;
206
207 error = mod_remove(&modlinkage);
208 if (error == 0)
209 sockpfp_fini();
210
211 return (error);
212 }
213
214 int
_info(struct modinfo * modinfop)215 _info(struct modinfo *modinfop)
216 {
217 return (mod_info(&modlinkage, modinfop));
218 }
219
220 /*
221 * sockpfp_init: called as part of the initialisation of the module when
222 * loaded into the kernel.
223 *
224 * Being able to create and record the kstats data in the kernel is not
225 * considered to be vital to the operation of this kernel module, thus
226 * its failure is tolerated.
227 */
228 static int
sockpfp_init(void)229 sockpfp_init(void)
230 {
231 (void) memset(&ks_stats, 0, sizeof (ks_stats));
232
233 (void) memcpy(&ks_stats, &pfp_kstats, sizeof (pfp_kstats));
234
235 pfp_ksp = kstat_create("pfpacket", 0, "global", "misc",
236 KSTAT_TYPE_NAMED, sizeof (pfp_kstats) / sizeof (kstat_named_t),
237 KSTAT_FLAG_VIRTUAL);
238 if (pfp_ksp != NULL) {
239 pfp_ksp->ks_data = &ks_stats;
240 kstat_install(pfp_ksp);
241 }
242
243 return (0);
244 }
245
246 /*
247 * sockpfp_fini: called when the operating system wants to unload the
248 * socket module from the kernel.
249 */
250 static void
sockpfp_fini(void)251 sockpfp_fini(void)
252 {
253 if (pfp_ksp != NULL)
254 kstat_delete(pfp_ksp);
255 }
256
257 /*
258 * Due to sockets being created read-write by default, all PF_PACKET sockets
259 * therefore require the NET_RAWACCESS priviliege, even if the socket is only
260 * being used for reading packets from.
261 *
262 * This create function enforces this module only being used with PF_PACKET
263 * sockets and the policy that we support via the config file in sock2path.d:
264 * PF_PACKET sockets must be either SOCK_DGRAM or SOCK_RAW.
265 */
266 /* ARGSUSED */
267 static sock_lower_handle_t
sockpfp_create(int family,int type,int proto,sock_downcalls_t ** sock_downcalls,uint_t * smodep,int * errorp,int sflags,cred_t * cred)268 sockpfp_create(int family, int type, int proto,
269 sock_downcalls_t **sock_downcalls, uint_t *smodep, int *errorp,
270 int sflags, cred_t *cred)
271 {
272 struct pfpsock *ps;
273 int kmflags;
274 int newproto;
275 int i;
276
277 if (secpolicy_net_rawaccess(cred) != 0) {
278 *errorp = EACCES;
279 return (NULL);
280 }
281
282 if (family != AF_PACKET) {
283 *errorp = EAFNOSUPPORT;
284 return (NULL);
285 }
286
287 if ((type != SOCK_RAW) && (type != SOCK_DGRAM)) {
288 *errorp = ESOCKTNOSUPPORT;
289 return (NULL);
290 }
291
292 /*
293 * First check to see if the protocol number passed in via the socket
294 * creation should be mapped to a different number for internal use.
295 */
296 for (i = 0, newproto = -1;
297 i < sizeof (accepted_protos)/ sizeof (accepted_protos[0]); i++) {
298 if (accepted_protos[i][0] == proto) {
299 newproto = accepted_protos[i][1];
300 break;
301 }
302 }
303
304 /*
305 * If the mapping of the protocol that was under 0x800 failed to find
306 * a local equivalent then fail the socket creation. If the protocol
307 * for the socket is over 0x800 and it was not found in the mapping
308 * table above, then use the value as is.
309 */
310 if (newproto == -1) {
311 if (proto < 0x800) {
312 *errorp = ENOPROTOOPT;
313 return (NULL);
314 }
315 newproto = proto;
316 }
317 proto = newproto;
318
319 kmflags = (sflags & SOCKET_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
320 ps = kmem_zalloc(sizeof (*ps), kmflags);
321 if (ps == NULL) {
322 *errorp = ENOMEM;
323 return (NULL);
324 }
325
326 ps->ps_type = type;
327 ps->ps_proto = proto;
328 rw_init(&ps->ps_bpflock, NULL, RW_DRIVER, NULL);
329 mutex_init(&ps->ps_lock, NULL, MUTEX_DRIVER, NULL);
330
331 *sock_downcalls = &pfp_downcalls;
332 /*
333 * Setting this causes bytes from a packet that do not fit into the
334 * destination user buffer to be discarded. Thus the API is one
335 * packet per receive and callers are required to use a buffer large
336 * enough for the biggest packet that the interface can provide.
337 */
338 *smodep = SM_ATOMIC;
339
340 return ((sock_lower_handle_t)ps);
341 }
342
343 /* ************************************************************************* */
344
345 /*
346 * pfp_packet is the callback function that is given to the mac layer for
347 * PF_PACKET to receive packets with. One packet at a time is passed into
348 * this function from the mac layer. Each packet is a private copy given
349 * to PF_PACKET to modify or free as it wishes and does not harm the original
350 * packet from which it was cloned.
351 */
352 /* ARGSUSED */
353 static void
pfp_packet(void * arg,mac_resource_handle_t mrh,mblk_t * mp,boolean_t flag)354 pfp_packet(void *arg, mac_resource_handle_t mrh, mblk_t *mp, boolean_t flag)
355 {
356 struct T_unitdata_ind *tunit;
357 struct sockaddr_ll *sll;
358 struct sockaddr_ll *sol;
359 mac_header_info_t hdr;
360 struct pfpsock *ps;
361 size_t tusz;
362 mblk_t *mp0;
363 int error;
364
365 if (mp == NULL)
366 return;
367
368 ps = arg;
369 if (ps->ps_flow_ctrld) {
370 ps->ps_flow_ctrl_drops++;
371 ps->ps_stats.tp_drops++;
372 ks_stats.kp_recv_flow_cntrld.value.ui64++;
373 freemsg(mp);
374 return;
375 }
376
377 if (mac_header_info(ps->ps_mh, mp, &hdr) != 0) {
378 /*
379 * Can't decode the packet header information so drop it.
380 */
381 ps->ps_stats.tp_drops++;
382 ks_stats.kp_recv_mac_hdr_fail.value.ui64++;
383 freemsg(mp);
384 return;
385 }
386
387 if (mac_type(ps->ps_mh) == DL_ETHER &&
388 hdr.mhi_bindsap == ETHERTYPE_VLAN) {
389 struct ether_vlan_header *evhp;
390 struct ether_vlan_header evh;
391
392 hdr.mhi_hdrsize = sizeof (struct ether_vlan_header);
393 hdr.mhi_istagged = B_TRUE;
394
395 if (MBLKL(mp) >= sizeof (*evhp)) {
396 evhp = (struct ether_vlan_header *)mp->b_rptr;
397 } else {
398 int sz = sizeof (*evhp);
399 char *s = (char *)&evh;
400 mblk_t *tmp;
401 int len;
402
403 for (tmp = mp; sz > 0 && tmp != NULL;
404 tmp = tmp->b_cont) {
405 len = min(sz, MBLKL(tmp));
406 bcopy(tmp->b_rptr, s, len);
407 sz -= len;
408 }
409 evhp = &evh;
410 }
411 hdr.mhi_tci = ntohs(evhp->ether_tci);
412 hdr.mhi_bindsap = ntohs(evhp->ether_type);
413 }
414
415 if ((ps->ps_proto != 0) && (ps->ps_proto != hdr.mhi_bindsap)) {
416 /*
417 * The packet is not of interest to this socket so
418 * drop it on the floor. Here the SAP is being used
419 * as a very course filter.
420 */
421 ps->ps_stats.tp_drops++;
422 ks_stats.kp_recv_bad_proto.value.ui64++;
423 freemsg(mp);
424 return;
425 }
426
427 /*
428 * This field is not often set, even for ethernet,
429 * by mac_header_info, so compute it if it is 0.
430 */
431 if (hdr.mhi_pktsize == 0)
432 hdr.mhi_pktsize = msgdsize(mp);
433
434 /*
435 * If a BPF filter is present, pass the raw packet into that.
436 * A failed match will result in zero being returned, indicating
437 * that this socket is not interested in the packet.
438 */
439 if (ps->ps_bpf.bf_len != 0) {
440 uchar_t *buffer;
441 int buflen;
442
443 buflen = MBLKL(mp);
444 if (hdr.mhi_pktsize == buflen) {
445 buffer = mp->b_rptr;
446 } else {
447 buflen = 0;
448 buffer = (uchar_t *)mp;
449 }
450 rw_enter(&ps->ps_bpflock, RW_READER);
451 if (bpf_filter(ps->ps_bpf.bf_insns, buffer,
452 hdr.mhi_pktsize, buflen) == 0) {
453 rw_exit(&ps->ps_bpflock);
454 ps->ps_stats.tp_drops++;
455 ks_stats.kp_recv_filtered.value.ui64++;
456 freemsg(mp);
457 return;
458 }
459 rw_exit(&ps->ps_bpflock);
460 }
461
462 if (ps->ps_type == SOCK_DGRAM) {
463 /*
464 * SOCK_DGRAM socket expect a "layer 3" packet, so advance
465 * past the link layer header.
466 */
467 mp->b_rptr += hdr.mhi_hdrsize;
468 hdr.mhi_pktsize -= hdr.mhi_hdrsize;
469 }
470
471 tusz = sizeof (struct T_unitdata_ind) + sizeof (struct sockaddr_ll);
472 if (ps->ps_auxdata) {
473 tusz += _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata));
474 tusz += _TPI_ALIGN_TOPT(sizeof (struct T_opthdr));
475 }
476
477 /*
478 * It is tempting to think that this could be optimised by having
479 * the base mblk_t allocated and hung off the pfpsock structure,
480 * except that then another one would need to be allocated for the
481 * sockaddr_ll that is included. Even creating a template to copy
482 * from is of questionable value, as read-write from one structure
483 * to the other is going to be slower than all of the initialisation.
484 */
485 mp0 = allocb(tusz, BPRI_HI);
486 if (mp0 == NULL) {
487 ps->ps_stats.tp_drops++;
488 ks_stats.kp_recv_alloc_fail.value.ui64++;
489 freemsg(mp);
490 return;
491 }
492
493 (void) memset(mp0->b_rptr, 0, tusz);
494
495 mp0->b_datap->db_type = M_PROTO;
496 mp0->b_wptr = mp0->b_rptr + tusz;
497
498 tunit = (struct T_unitdata_ind *)mp0->b_rptr;
499 tunit->PRIM_type = T_UNITDATA_IND;
500 tunit->SRC_length = sizeof (struct sockaddr);
501 tunit->SRC_offset = sizeof (*tunit);
502
503 sol = &ps->ps_sock;
504 sll = (struct sockaddr_ll *)(mp0->b_rptr + sizeof (*tunit));
505 sll->sll_ifindex = sol->sll_ifindex;
506 sll->sll_hatype = (uint16_t)hdr.mhi_origsap;
507 sll->sll_halen = sol->sll_halen;
508 if (hdr.mhi_saddr != NULL)
509 (void) memcpy(sll->sll_addr, hdr.mhi_saddr, sll->sll_halen);
510
511 switch (hdr.mhi_dsttype) {
512 case MAC_ADDRTYPE_MULTICAST :
513 sll->sll_pkttype = PACKET_MULTICAST;
514 break;
515 case MAC_ADDRTYPE_BROADCAST :
516 sll->sll_pkttype = PACKET_BROADCAST;
517 break;
518 case MAC_ADDRTYPE_UNICAST :
519 if (memcmp(sol->sll_addr, hdr.mhi_daddr, sol->sll_halen) == 0)
520 sll->sll_pkttype = PACKET_HOST;
521 else
522 sll->sll_pkttype = PACKET_OTHERHOST;
523 break;
524 }
525
526 if (ps->ps_auxdata) {
527 struct tpacket_auxdata *aux;
528 struct T_opthdr *topt;
529
530 tunit->OPT_offset = _TPI_ALIGN_TOPT(tunit->SRC_offset +
531 sizeof (struct sockaddr_ll));
532 tunit->OPT_length = _TPI_ALIGN_TOPT(sizeof (struct T_opthdr)) +
533 _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata));
534
535 topt = (struct T_opthdr *)(mp0->b_rptr + tunit->OPT_offset);
536 aux = (struct tpacket_auxdata *)
537 ((char *)topt + _TPI_ALIGN_TOPT(sizeof (*topt)));
538
539 topt->len = tunit->OPT_length;
540 topt->level = SOL_PACKET;
541 topt->name = PACKET_AUXDATA;
542 topt->status = 0;
543 /*
544 * libpcap doesn't seem to use any other field,
545 * so it isn't clear how they should be filled in.
546 */
547 aux->tp_vlan_vci = hdr.mhi_tci;
548 }
549
550 linkb(mp0, mp);
551
552 (void) gethrestime(&ps->ps_timestamp);
553
554 ps->ps_upcalls->su_recv(ps->ps_upper, mp0, hdr.mhi_pktsize, 0,
555 &error, NULL);
556
557 if (error == 0) {
558 ps->ps_stats.tp_packets++;
559 ks_stats.kp_recv_ok.value.ui64++;
560 } else {
561 mutex_enter(&ps->ps_lock);
562 if (error == ENOSPC) {
563 ps->ps_upcalls->su_recv(ps->ps_upper, NULL, 0, 0,
564 &error, NULL);
565 if (error == ENOSPC)
566 ps->ps_flow_ctrld = B_TRUE;
567 }
568 mutex_exit(&ps->ps_lock);
569 ps->ps_stats.tp_drops++;
570 ks_stats.kp_recv_fail.value.ui64++;
571 }
572 }
573
574 /*
575 * Bind a PF_PACKET socket to a network interface.
576 *
577 * The default operation of this bind() is to place the socket (and thus the
578 * network interface) into promiscuous mode. It is then up to the application
579 * to turn that down by issuing the relevant ioctls, if desired.
580 */
581 static int
sdpfp_bind(sock_lower_handle_t handle,struct sockaddr * addr,socklen_t addrlen,struct cred * cred)582 sdpfp_bind(sock_lower_handle_t handle, struct sockaddr *addr,
583 socklen_t addrlen, struct cred *cred)
584 {
585 struct sockaddr_ll *addr_ll, *sol;
586 mac_client_handle_t mch;
587 struct pfpsock *ps;
588 mac_handle_t mh;
589 int error;
590
591 ps = (struct pfpsock *)handle;
592 if (ps->ps_bound)
593 return (EINVAL);
594
595 if (addrlen < sizeof (struct sockaddr_ll) || addr == NULL)
596 return (EINVAL);
597
598 addr_ll = (struct sockaddr_ll *)addr;
599
600 error = pfp_open_index(addr_ll->sll_ifindex, &mh, &mch, cred);
601 if (error != 0)
602 return (error);
603 /*
604 * Ensure that each socket is only bound once.
605 */
606 mutex_enter(&ps->ps_lock);
607 if (ps->ps_mh != 0) {
608 mutex_exit(&ps->ps_lock);
609 pfp_close(mh, mch);
610 return (EADDRINUSE);
611 }
612 ps->ps_mh = mh;
613 ps->ps_mch = mch;
614 mutex_exit(&ps->ps_lock);
615
616 /*
617 * Cache all of the information from bind so that it's in an easy
618 * place to get at when packets are received.
619 */
620 sol = &ps->ps_sock;
621 sol->sll_family = AF_PACKET;
622 sol->sll_ifindex = addr_ll->sll_ifindex;
623 sol->sll_protocol = addr_ll->sll_protocol;
624 sol->sll_halen = mac_addr_len(ps->ps_mh);
625 mac_unicast_primary_get(ps->ps_mh, sol->sll_addr);
626 mac_sdu_get(ps->ps_mh, NULL, &ps->ps_max_sdu);
627 ps->ps_linkid = addr_ll->sll_ifindex;
628
629 error = mac_promisc_add(ps->ps_mch, MAC_CLIENT_PROMISC_ALL,
630 pfp_packet, ps, &ps->ps_phd, MAC_PROMISC_FLAGS_VLAN_TAG_STRIP);
631 if (error == 0) {
632 ps->ps_promisc = MAC_CLIENT_PROMISC_ALL;
633 ps->ps_bound = B_TRUE;
634 }
635
636 return (error);
637 }
638
639 /* ARGSUSED */
640 static void
sdpfp_activate(sock_lower_handle_t lower,sock_upper_handle_t upper,sock_upcalls_t * upcalls,int flags,cred_t * cred)641 sdpfp_activate(sock_lower_handle_t lower, sock_upper_handle_t upper,
642 sock_upcalls_t *upcalls, int flags, cred_t *cred)
643 {
644 struct pfpsock *ps;
645
646 ps = (struct pfpsock *)lower;
647 ps->ps_upper = upper;
648 ps->ps_upcalls = upcalls;
649 }
650
651 /*
652 * This module only implements getting socket options for the new socket
653 * option level (SOL_PACKET) that it introduces. All other requests are
654 * passed back to the sockfs layer.
655 */
656 /* ARGSUSED */
657 static int
sdpfp_getsockopt(sock_lower_handle_t handle,int level,int option_name,void * optval,socklen_t * optlenp,struct cred * cred)658 sdpfp_getsockopt(sock_lower_handle_t handle, int level, int option_name,
659 void *optval, socklen_t *optlenp, struct cred *cred)
660 {
661 struct pfpsock *ps;
662 int error = 0;
663
664 ps = (struct pfpsock *)handle;
665
666 switch (level) {
667 case SOL_PACKET :
668 error = pfp_getpacket_sockopt(handle, option_name, optval,
669 optlenp);
670 break;
671
672 case SOL_SOCKET :
673 if (option_name == SO_RCVBUF) {
674 if (*optlenp < sizeof (int32_t))
675 return (EINVAL);
676 *((int32_t *)optval) = ps->ps_rcvbuf;
677 *optlenp = sizeof (int32_t);
678 } else {
679 error = ENOPROTOOPT;
680 }
681 break;
682
683 default :
684 /*
685 * If sockfs code receives this error in return from the
686 * getsockopt downcall it handles the option locally, if
687 * it can.
688 */
689 error = ENOPROTOOPT;
690 break;
691 }
692
693 return (error);
694 }
695
696 /*
697 * PF_PACKET supports setting socket options at only two levels:
698 * SOL_SOCKET and SOL_PACKET.
699 */
700 /* ARGSUSED */
701 static int
sdpfp_setsockopt(sock_lower_handle_t handle,int level,int option_name,const void * optval,socklen_t optlen,struct cred * cred)702 sdpfp_setsockopt(sock_lower_handle_t handle, int level, int option_name,
703 const void *optval, socklen_t optlen, struct cred *cred)
704 {
705 int error = 0;
706
707 switch (level) {
708 case SOL_SOCKET :
709 error = pfp_setsocket_sockopt(handle, option_name, optval,
710 optlen);
711 break;
712 case SOL_PACKET :
713 error = pfp_setpacket_sockopt(handle, option_name, optval,
714 optlen);
715 break;
716 default :
717 error = EINVAL;
718 break;
719 }
720
721 return (error);
722 }
723
724 /*
725 * This function is incredibly inefficient for sending any packet that
726 * comes with a msghdr asking to be sent to an interface to which the
727 * socket has not been bound. Some possibilities here are keeping a
728 * cache of all open mac's and mac_client's, for the purpose of sending,
729 * and closing them after some amount of inactivity. Clearly, applications
730 * should not be written to use one socket for multiple interfaces if
731 * performance is desired with the code as is.
732 */
733 /* ARGSUSED */
734 static int
sdpfp_senduio(sock_lower_handle_t handle,struct uio * uiop,struct nmsghdr * msg,struct cred * cred)735 sdpfp_senduio(sock_lower_handle_t handle, struct uio *uiop,
736 struct nmsghdr *msg, struct cred *cred)
737 {
738 struct sockaddr_ll *sol;
739 mac_client_handle_t mch;
740 struct pfpsock *ps;
741 boolean_t new_open;
742 mac_handle_t mh;
743 size_t mpsize;
744 uint_t maxsdu;
745 mblk_t *mp0;
746 mblk_t *mp;
747 int error;
748
749 mp = NULL;
750 mp0 = NULL;
751 new_open = B_FALSE;
752 ps = (struct pfpsock *)handle;
753 mh = ps->ps_mh;
754 mch = ps->ps_mch;
755 maxsdu = ps->ps_max_sdu;
756
757 sol = (struct sockaddr_ll *)msg->msg_name;
758 if (sol == NULL) {
759 /*
760 * If no sockaddr_ll has been provided with the send call,
761 * use the one constructed when the socket was bound to an
762 * interface and fail if it hasn't been bound.
763 */
764 if (!ps->ps_bound) {
765 ks_stats.kp_send_unbound.value.ui64++;
766 return (EPROTO);
767 }
768 sol = &ps->ps_sock;
769 } else {
770 /*
771 * Verify the sockaddr_ll message passed down before using
772 * it to send a packet out with. If it refers to an interface
773 * that has not been bound, it is necessary to open it.
774 */
775 struct sockaddr_ll *sll;
776
777 if (msg->msg_namelen < sizeof (struct sockaddr_ll)) {
778 ks_stats.kp_send_short_msg.value.ui64++;
779 return (EINVAL);
780 }
781
782 if (sol->sll_family != AF_PACKET) {
783 ks_stats.kp_send_wrong_family.value.ui64++;
784 return (EAFNOSUPPORT);
785 }
786
787 sll = &ps->ps_sock;
788 if (sol->sll_ifindex != sll->sll_ifindex) {
789 error = pfp_open_index(sol->sll_ifindex, &mh, &mch,
790 cred);
791 if (error != 0) {
792 ks_stats.kp_send_open_fail.value.ui64++;
793 return (error);
794 }
795 mac_sdu_get(mh, NULL, &maxsdu);
796 new_open = B_TRUE;
797 }
798 }
799
800 mpsize = uiop->uio_resid;
801 if (mpsize > maxsdu) {
802 ks_stats.kp_send_too_big.value.ui64++;
803 error = EMSGSIZE;
804 goto done;
805 }
806
807 if ((mp = allocb(mpsize, BPRI_HI)) == NULL) {
808 ks_stats.kp_send_alloc_fail.value.ui64++;
809 error = ENOBUFS;
810 goto done;
811 }
812
813 mp->b_wptr = mp->b_rptr + mpsize;
814 error = uiomove(mp->b_rptr, mpsize, UIO_WRITE, uiop);
815 if (error != 0) {
816 ks_stats.kp_send_uiomove_fail.value.ui64++;
817 goto done;
818 }
819
820 if (ps->ps_type == SOCK_DGRAM) {
821 mp0 = mac_header(mh, sol->sll_addr, sol->sll_protocol, mp, 0);
822 if (mp0 == NULL) {
823 ks_stats.kp_send_no_memory.value.ui64++;
824 error = ENOBUFS;
825 goto done;
826 }
827 linkb(mp0, mp);
828 mp = mp0;
829 }
830
831 /*
832 * As this is sending datagrams and no promise is made about
833 * how or if a packet will be sent/delivered, no effort is to
834 * be expended in recovering from a situation where the packet
835 * cannot be sent - it is just dropped.
836 */
837 error = mac_tx(mch, mp, 0, MAC_DROP_ON_NO_DESC, NULL);
838 if (error == 0) {
839 mp = NULL;
840 ks_stats.kp_send_ok.value.ui64++;
841 } else {
842 ks_stats.kp_send_failed.value.ui64++;
843 }
844
845 done:
846
847 if (new_open) {
848 ASSERT(mch != ps->ps_mch);
849 ASSERT(mh != ps->ps_mh);
850 pfp_close(mh, mch);
851 }
852 if (mp != NULL)
853 freemsg(mp);
854
855 return (error);
856
857 }
858
859 /*
860 * There's no use of a lock here, or at the bottom of pfp_packet() where
861 * ps_flow_ctrld is set to true, because in a situation where these two
862 * are racing to set the flag one way or the other, the end result is
863 * going to be ultimately determined by the scheduler anyway - which of
864 * the two threads gets the lock first? In such an operational environment,
865 * we've got packets arriving too fast to be delt with so packets are going
866 * to be dropped. Grabbing a lock just makes the drop more expensive.
867 */
868 static void
sdpfp_clr_flowctrl(sock_lower_handle_t handle)869 sdpfp_clr_flowctrl(sock_lower_handle_t handle)
870 {
871 struct pfpsock *ps;
872
873 ps = (struct pfpsock *)handle;
874
875 mutex_enter(&ps->ps_lock);
876 ps->ps_flow_ctrld = B_FALSE;
877 mutex_exit(&ps->ps_lock);
878 }
879
880 /*
881 * The implementation of this ioctl() handler is intended to function
882 * in the absence of a bind() being made before it is called. Thus the
883 * function calls mac_open() itself to provide a handle
884 * This function is structured like this:
885 * - determine the linkid for the interface being targetted
886 * - open the interface with said linkid
887 * - perform ioctl
888 * - copy results back to caller
889 *
890 * The ioctls that interact with interface flags have been implented below
891 * to assume that the interface is always up and running (IFF_RUNNING) and
892 * to use the state of this socket to determine whether or not the network
893 * interface is in promiscuous mode. Thus an ioctl to get the interface flags
894 * of an interface that has been put in promiscuous mode by another socket
895 * (in the same program or different), will not report that status.
896 */
897 /* ARGSUSED */
898 static int
sdpfp_ioctl(sock_lower_handle_t handle,int cmd,intptr_t arg,int mod,int32_t * rval,struct cred * cr)899 sdpfp_ioctl(sock_lower_handle_t handle, int cmd, intptr_t arg, int mod,
900 int32_t *rval, struct cred *cr)
901 {
902 struct timeval tival;
903 mac_client_promisc_type_t mtype;
904 struct sockaddr_dl *sock;
905 datalink_id_t linkid;
906 struct lifreq lifreq;
907 struct ifreq ifreq;
908 struct pfpsock *ps;
909 mac_handle_t mh;
910 int error;
911
912 ps = (struct pfpsock *)handle;
913
914 switch (cmd) {
915 /*
916 * ioctls that work on "struct lifreq"
917 */
918 case SIOCSLIFFLAGS :
919 case SIOCGLIFINDEX :
920 case SIOCGLIFFLAGS :
921 case SIOCGLIFMTU :
922 case SIOCGLIFHWADDR :
923 error = pfp_lifreq_getlinkid(arg, &lifreq, &linkid, mod);
924 if (error != 0)
925 return (error);
926 break;
927
928 /*
929 * ioctls that work on "struct ifreq".
930 * Not all of these have a "struct lifreq" partner, for example
931 * SIOCGIFHWADDR, for the simple reason that the logical interface
932 * does not have a hardware address.
933 */
934 case SIOCSIFFLAGS :
935 case SIOCGIFINDEX :
936 case SIOCGIFFLAGS :
937 case SIOCGIFMTU :
938 case SIOCGIFHWADDR :
939 error = pfp_ifreq_getlinkid(arg, &ifreq, &linkid, mod);
940 if (error != 0)
941 return (error);
942 break;
943
944 case SIOCGSTAMP :
945 tival.tv_sec = (time_t)ps->ps_timestamp.tv_sec;
946 tival.tv_usec = ps->ps_timestamp.tv_nsec / 1000;
947 if (get_udatamodel() == DATAMODEL_NATIVE) {
948 error = ddi_copyout(&tival, (void *)arg,
949 sizeof (tival), mod);
950 }
951 #ifdef _SYSCALL32_IMPL
952 else {
953 struct timeval32 tv32;
954 TIMEVAL_TO_TIMEVAL32(&tv32, &tival);
955 error = ddi_copyout(&tv32, (void *)arg,
956 sizeof (tv32), mod);
957 }
958 #endif
959 return (error);
960 }
961
962 error = mac_open_by_linkid(linkid, &mh);
963 if (error != 0)
964 return (error);
965
966 switch (cmd) {
967 case SIOCGLIFINDEX :
968 lifreq.lifr_index = linkid;
969 break;
970
971 case SIOCGIFINDEX :
972 ifreq.ifr_index = linkid;
973 break;
974
975 case SIOCGIFFLAGS :
976 ifreq.ifr_flags = IFF_RUNNING;
977 if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL)
978 ifreq.ifr_flags |= IFF_PROMISC;
979 break;
980
981 case SIOCGLIFFLAGS :
982 lifreq.lifr_flags = IFF_RUNNING;
983 if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL)
984 lifreq.lifr_flags |= IFF_PROMISC;
985 break;
986
987 case SIOCSIFFLAGS :
988 if (linkid != ps->ps_linkid) {
989 error = EINVAL;
990 } else {
991 if ((ifreq.ifr_flags & IFF_PROMISC) != 0)
992 mtype = MAC_CLIENT_PROMISC_ALL;
993 else
994 mtype = MAC_CLIENT_PROMISC_FILTERED;
995 error = pfp_set_promisc(ps, mtype);
996 }
997 break;
998
999 case SIOCSLIFFLAGS :
1000 if (linkid != ps->ps_linkid) {
1001 error = EINVAL;
1002 } else {
1003 if ((lifreq.lifr_flags & IFF_PROMISC) != 0)
1004 mtype = MAC_CLIENT_PROMISC_ALL;
1005 else
1006 mtype = MAC_CLIENT_PROMISC_FILTERED;
1007 error = pfp_set_promisc(ps, mtype);
1008 }
1009 break;
1010
1011 case SIOCGIFMTU :
1012 mac_sdu_get(mh, NULL, &ifreq.ifr_mtu);
1013 break;
1014
1015 case SIOCGLIFMTU :
1016 mac_sdu_get(mh, NULL, &lifreq.lifr_mtu);
1017 break;
1018
1019 case SIOCGIFHWADDR :
1020 if (mac_addr_len(mh) > sizeof (ifreq.ifr_addr.sa_data)) {
1021 error = EPFNOSUPPORT;
1022 break;
1023 }
1024
1025 if (mac_addr_len(mh) == 0) {
1026 (void) memset(ifreq.ifr_addr.sa_data, 0,
1027 sizeof (ifreq.ifr_addr.sa_data));
1028 } else {
1029 mac_unicast_primary_get(mh,
1030 (uint8_t *)ifreq.ifr_addr.sa_data);
1031 }
1032
1033 /*
1034 * The behaviour here in setting sa_family is consistent
1035 * with what applications such as tcpdump would expect
1036 * for a Linux PF_PACKET socket.
1037 */
1038 ifreq.ifr_addr.sa_family = pfp_dl_to_arphrd(mac_type(mh));
1039 break;
1040
1041 case SIOCGLIFHWADDR :
1042 lifreq.lifr_type = 0;
1043 sock = (struct sockaddr_dl *)&lifreq.lifr_addr;
1044
1045 if (mac_addr_len(mh) > sizeof (sock->sdl_data)) {
1046 error = EPFNOSUPPORT;
1047 break;
1048 }
1049
1050 /*
1051 * Fill in the sockaddr_dl with link layer details. Of note,
1052 * the index is returned as 0 for a couple of reasons:
1053 * (1) there is no public API that uses or requires it
1054 * (2) the MAC index is currently 32bits and sdl_index is 16.
1055 */
1056 sock->sdl_family = AF_LINK;
1057 sock->sdl_index = 0;
1058 sock->sdl_type = mac_type(mh);
1059 sock->sdl_nlen = 0;
1060 sock->sdl_alen = mac_addr_len(mh);
1061 sock->sdl_slen = 0;
1062 if (mac_addr_len(mh) == 0) {
1063 (void) memset(sock->sdl_data, 0,
1064 sizeof (sock->sdl_data));
1065 } else {
1066 mac_unicast_primary_get(mh, (uint8_t *)sock->sdl_data);
1067 }
1068 break;
1069
1070 default :
1071 break;
1072 }
1073
1074 mac_close(mh);
1075
1076 if (error == 0) {
1077 /*
1078 * Only the "GET" ioctls need to copy data back to userace.
1079 */
1080 switch (cmd) {
1081 case SIOCGLIFINDEX :
1082 case SIOCGLIFFLAGS :
1083 case SIOCGLIFMTU :
1084 case SIOCGLIFHWADDR :
1085 error = ddi_copyout(&lifreq, (void *)arg,
1086 sizeof (lifreq), mod);
1087 break;
1088
1089 case SIOCGIFINDEX :
1090 case SIOCGIFFLAGS :
1091 case SIOCGIFMTU :
1092 case SIOCGIFHWADDR :
1093 error = ddi_copyout(&ifreq, (void *)arg,
1094 sizeof (ifreq), mod);
1095 break;
1096 default :
1097 break;
1098 }
1099 }
1100
1101 return (error);
1102 }
1103
1104 /*
1105 * Closing the socket requires that all open references to network
1106 * interfaces be closed.
1107 */
1108 /* ARGSUSED */
1109 static int
sdpfp_close(sock_lower_handle_t handle,int flag,struct cred * cr)1110 sdpfp_close(sock_lower_handle_t handle, int flag, struct cred *cr)
1111 {
1112 struct pfpsock *ps = (struct pfpsock *)handle;
1113
1114 if (ps->ps_phd != 0) {
1115 mac_promisc_remove(ps->ps_phd);
1116 ps->ps_phd = 0;
1117 }
1118
1119 if (ps->ps_mch != 0) {
1120 mac_client_close(ps->ps_mch, 0);
1121 ps->ps_mch = 0;
1122 }
1123
1124 if (ps->ps_mh != 0) {
1125 mac_close(ps->ps_mh);
1126 ps->ps_mh = 0;
1127 }
1128
1129 kmem_free(ps, sizeof (*ps));
1130
1131 return (0);
1132 }
1133
1134 /* ************************************************************************* */
1135
1136 /*
1137 * Given a pointer (arg) to a "struct ifreq" (potentially in user space),
1138 * determine the linkid for the interface name stored in that structure.
1139 * name is used as a buffer so that we can ensure a trailing \0 is appended
1140 * to the name safely.
1141 */
1142 static int
pfp_ifreq_getlinkid(intptr_t arg,struct ifreq * ifreqp,datalink_id_t * linkidp,int mode)1143 pfp_ifreq_getlinkid(intptr_t arg, struct ifreq *ifreqp,
1144 datalink_id_t *linkidp, int mode)
1145 {
1146 char name[IFNAMSIZ + 1];
1147 int error;
1148
1149 if (ddi_copyin((void *)arg, ifreqp, sizeof (*ifreqp), mode) != 0)
1150 return (EFAULT);
1151
1152 (void) strlcpy(name, ifreqp->ifr_name, sizeof (name));
1153
1154 error = dls_mgmt_get_linkid(name, linkidp);
1155 if (error != 0)
1156 error = dls_devnet_macname2linkid(name, linkidp);
1157
1158 return (error);
1159 }
1160
1161 /*
1162 * Given a pointer (arg) to a "struct lifreq" (potentially in user space),
1163 * determine the linkid for the interface name stored in that structure.
1164 * name is used as a buffer so that we can ensure a trailing \0 is appended
1165 * to the name safely.
1166 */
1167 static int
pfp_lifreq_getlinkid(intptr_t arg,struct lifreq * lifreqp,datalink_id_t * linkidp,int mode)1168 pfp_lifreq_getlinkid(intptr_t arg, struct lifreq *lifreqp,
1169 datalink_id_t *linkidp, int mode)
1170 {
1171 char name[LIFNAMSIZ + 1];
1172 int error;
1173
1174 if (ddi_copyin((void *)arg, lifreqp, sizeof (*lifreqp), mode) != 0)
1175 return (EFAULT);
1176
1177 (void) strlcpy(name, lifreqp->lifr_name, sizeof (name));
1178
1179 error = dls_mgmt_get_linkid(name, linkidp);
1180 if (error != 0)
1181 error = dls_devnet_macname2linkid(name, linkidp);
1182
1183 return (error);
1184 }
1185
1186 /*
1187 * Although there are several new SOL_PACKET options that can be set and
1188 * are specific to this implementation of PF_PACKET, the current API does
1189 * not support doing a get on them to retrieve accompanying status. Thus
1190 * it is only currently possible to use SOL_PACKET with getsockopt to
1191 * retrieve statistical information. This remains consistant with the
1192 * Linux API at the time of writing.
1193 */
1194 static int
pfp_getpacket_sockopt(sock_lower_handle_t handle,int option_name,void * optval,socklen_t * optlenp)1195 pfp_getpacket_sockopt(sock_lower_handle_t handle, int option_name,
1196 void *optval, socklen_t *optlenp)
1197 {
1198 struct pfpsock *ps;
1199 struct tpacket_stats_short tpss;
1200 int error = 0;
1201
1202 ps = (struct pfpsock *)handle;
1203
1204 switch (option_name) {
1205 case PACKET_STATISTICS :
1206 if (*optlenp < sizeof (ps->ps_stats)) {
1207 error = EINVAL;
1208 break;
1209 }
1210 *optlenp = sizeof (ps->ps_stats);
1211 bcopy(&ps->ps_stats, optval, sizeof (ps->ps_stats));
1212 break;
1213 case PACKET_STATISTICS_SHORT :
1214 if (*optlenp < sizeof (tpss)) {
1215 error = EINVAL;
1216 break;
1217 }
1218 *optlenp = sizeof (tpss);
1219 tpss.tp_packets = ps->ps_stats.tp_packets;
1220 tpss.tp_drops = ps->ps_stats.tp_drops;
1221 bcopy(&tpss, optval, sizeof (tpss));
1222 break;
1223 default :
1224 error = EINVAL;
1225 break;
1226 }
1227
1228 return (error);
1229 }
1230
1231 /*
1232 * The SOL_PACKET level for socket options supports three options,
1233 * PACKET_ADD_MEMBERSHIP, PACKET_DROP_MEMBERSHIP and PACKET_AUXDATA.
1234 * This function is responsible for mapping the two socket options
1235 * that manage multicast membership into the appropriate internal
1236 * function calls to bring the option into effect. Whilst direct
1237 * changes to the multicast membership (ADD/DROP) groups is handled
1238 * by calls directly into the mac module, changes to the promiscuos
1239 * mode are vectored through pfp_set_promisc() so that the logic for
1240 * managing the promiscuous mode is in one place.
1241 */
1242 /* ARGSUSED */
1243 static int
pfp_setpacket_sockopt(sock_lower_handle_t handle,int option_name,const void * optval,socklen_t optlen)1244 pfp_setpacket_sockopt(sock_lower_handle_t handle, int option_name,
1245 const void *optval, socklen_t optlen)
1246 {
1247 struct packet_mreq mreq;
1248 struct pfpsock *ps;
1249 int error = 0;
1250 int opt;
1251
1252 ps = (struct pfpsock *)handle;
1253 if (!ps->ps_bound)
1254 return (EPROTO);
1255
1256 if ((option_name == PACKET_ADD_MEMBERSHIP) ||
1257 (option_name == PACKET_DROP_MEMBERSHIP)) {
1258 if (!ps->ps_bound)
1259 return (EPROTO);
1260 bcopy(optval, &mreq, sizeof (mreq));
1261 if (ps->ps_linkid != mreq.mr_ifindex)
1262 return (EINVAL);
1263 }
1264
1265 switch (option_name) {
1266 case PACKET_ADD_MEMBERSHIP :
1267 switch (mreq.mr_type) {
1268 case PACKET_MR_MULTICAST :
1269 if (mreq.mr_alen != ps->ps_sock.sll_halen)
1270 return (EINVAL);
1271
1272 error = mac_multicast_add(ps->ps_mch, mreq.mr_address);
1273 break;
1274
1275 case PACKET_MR_PROMISC :
1276 error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_ALL);
1277 break;
1278
1279 case PACKET_MR_ALLMULTI :
1280 error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_MULTI);
1281 break;
1282 }
1283 break;
1284
1285 case PACKET_DROP_MEMBERSHIP :
1286 switch (mreq.mr_type) {
1287 case PACKET_MR_MULTICAST :
1288 if (mreq.mr_alen != ps->ps_sock.sll_halen)
1289 return (EINVAL);
1290
1291 mac_multicast_remove(ps->ps_mch, mreq.mr_address);
1292 break;
1293
1294 case PACKET_MR_PROMISC :
1295 if (ps->ps_promisc != MAC_CLIENT_PROMISC_ALL)
1296 return (EINVAL);
1297 error = pfp_set_promisc(ps,
1298 MAC_CLIENT_PROMISC_FILTERED);
1299 break;
1300
1301 case PACKET_MR_ALLMULTI :
1302 if (ps->ps_promisc != MAC_CLIENT_PROMISC_MULTI)
1303 return (EINVAL);
1304 error = pfp_set_promisc(ps,
1305 MAC_CLIENT_PROMISC_FILTERED);
1306 break;
1307 }
1308 break;
1309
1310 case PACKET_AUXDATA :
1311 if (optlen == sizeof (int)) {
1312 opt = *(int *)optval;
1313 ps->ps_auxdata = (opt != 0);
1314 } else {
1315 error = EINVAL;
1316 }
1317 break;
1318 default :
1319 error = EINVAL;
1320 break;
1321 }
1322
1323 return (error);
1324 }
1325
1326 /*
1327 * There are only two special setsockopt's for SOL_SOCKET with PF_PACKET:
1328 * SO_ATTACH_FILTER and SO_DETACH_FILTER.
1329 *
1330 * Both of these setsockopt values are candidates for being handled by the
1331 * socket layer itself in future, however this requires understanding how
1332 * they would interact with all other sockets.
1333 */
1334 static int
pfp_setsocket_sockopt(sock_lower_handle_t handle,int option_name,const void * optval,socklen_t optlen)1335 pfp_setsocket_sockopt(sock_lower_handle_t handle, int option_name,
1336 const void *optval, socklen_t optlen)
1337 {
1338 struct bpf_program prog;
1339 struct bpf_insn *fcode;
1340 struct pfpsock *ps;
1341 struct sock_proto_props sopp;
1342 int error = 0;
1343 int size;
1344
1345 ps = (struct pfpsock *)handle;
1346
1347 switch (option_name) {
1348 case SO_ATTACH_FILTER :
1349 #ifdef _LP64
1350 if (optlen == sizeof (struct bpf_program32)) {
1351 struct bpf_program32 prog32;
1352
1353 bcopy(optval, &prog32, sizeof (prog32));
1354 prog.bf_len = prog32.bf_len;
1355 prog.bf_insns = (void *)(uint64_t)prog32.bf_insns;
1356 } else
1357 #endif
1358 if (optlen == sizeof (struct bpf_program)) {
1359 bcopy(optval, &prog, sizeof (prog));
1360 } else if (optlen != sizeof (struct bpf_program)) {
1361 return (EINVAL);
1362 }
1363 if (prog.bf_len > BPF_MAXINSNS)
1364 return (EINVAL);
1365
1366 size = prog.bf_len * sizeof (*prog.bf_insns);
1367 fcode = kmem_alloc(size, KM_SLEEP);
1368 if (ddi_copyin(prog.bf_insns, fcode, size, 0) != 0) {
1369 kmem_free(fcode, size);
1370 return (EFAULT);
1371 }
1372
1373 if (bpf_validate(fcode, (int)prog.bf_len)) {
1374 rw_enter(&ps->ps_bpflock, RW_WRITER);
1375 pfp_release_bpf(ps);
1376 ps->ps_bpf.bf_insns = fcode;
1377 ps->ps_bpf.bf_len = size;
1378 rw_exit(&ps->ps_bpflock);
1379
1380 return (0);
1381 }
1382 kmem_free(fcode, size);
1383 error = EINVAL;
1384 break;
1385
1386 case SO_DETACH_FILTER :
1387 pfp_release_bpf(ps);
1388 break;
1389
1390 case SO_RCVBUF :
1391 size = *(int32_t *)optval;
1392 if (size > sockmod_pfp_rcvbuf_max || size < 0)
1393 return (ENOBUFS);
1394 sopp.sopp_flags = SOCKOPT_RCVHIWAT;
1395 sopp.sopp_rxhiwat = size;
1396 ps->ps_upcalls->su_set_proto_props(ps->ps_upper, &sopp);
1397 ps->ps_rcvbuf = size;
1398 break;
1399
1400 default :
1401 error = ENOPROTOOPT;
1402 break;
1403 }
1404
1405 return (error);
1406 }
1407
1408 /*
1409 * pfp_open_index is an internal function used to open a MAC device by
1410 * its index. Both a mac_handle_t and mac_client_handle_t are acquired
1411 * because some of the interfaces provided by the mac layer require either
1412 * only the mac_handle_t or both it and mac_handle_t.
1413 *
1414 * Whilst inside the kernel we can access data structures supporting any
1415 * zone, access to interfaces from non-global zones is restricted to those
1416 * interfaces (if any) that are exclusively assigned to a zone.
1417 */
1418 static int
pfp_open_index(int index,mac_handle_t * mhp,mac_client_handle_t * mcip,cred_t * cred)1419 pfp_open_index(int index, mac_handle_t *mhp, mac_client_handle_t *mcip,
1420 cred_t *cred)
1421 {
1422 mac_client_handle_t mch;
1423 zoneid_t ifzoneid;
1424 mac_handle_t mh;
1425 zoneid_t zoneid;
1426 int error;
1427
1428 mh = 0;
1429 mch = 0;
1430 error = mac_open_by_linkid(index, &mh);
1431 if (error != 0)
1432 goto bad_open;
1433
1434 error = mac_client_open(mh, &mch, NULL,
1435 MAC_OPEN_FLAGS_USE_DATALINK_NAME);
1436 if (error != 0)
1437 goto bad_open;
1438
1439 zoneid = crgetzoneid(cred);
1440 if (zoneid != GLOBAL_ZONEID) {
1441 mac_perim_handle_t perim;
1442
1443 mac_perim_enter_by_mh(mh, &perim);
1444 error = dls_link_getzid(mac_name(mh), &ifzoneid);
1445 mac_perim_exit(perim);
1446 if (error != 0)
1447 goto bad_open;
1448 if (ifzoneid != zoneid) {
1449 error = EACCES;
1450 goto bad_open;
1451 }
1452 }
1453
1454 *mcip = mch;
1455 *mhp = mh;
1456
1457 return (0);
1458 bad_open:
1459 if (mch != 0)
1460 mac_client_close(mch, 0);
1461 if (mh != 0)
1462 mac_close(mh);
1463 return (error);
1464 }
1465
1466 static void
pfp_close(mac_handle_t mh,mac_client_handle_t mch)1467 pfp_close(mac_handle_t mh, mac_client_handle_t mch)
1468 {
1469 mac_client_close(mch, 0);
1470 mac_close(mh);
1471 }
1472
1473 /*
1474 * The purpose of this function is to provide a single place where we free
1475 * the loaded BPF program and reset all pointers/counters associated with
1476 * it.
1477 */
1478 static void
pfp_release_bpf(struct pfpsock * ps)1479 pfp_release_bpf(struct pfpsock *ps)
1480 {
1481 if (ps->ps_bpf.bf_len != 0) {
1482 kmem_free(ps->ps_bpf.bf_insns, ps->ps_bpf.bf_len);
1483 ps->ps_bpf.bf_len = 0;
1484 ps->ps_bpf.bf_insns = NULL;
1485 }
1486 }
1487
1488 /*
1489 * Set the promiscuous mode of a network interface.
1490 * This function only calls the mac layer when there is a change to the
1491 * status of a network interface's promiscous mode. Tracking of how many
1492 * sockets have the network interface in promiscuous mode, and thus the
1493 * control over the physical device's status, is left to the mac layer.
1494 */
1495 static int
pfp_set_promisc(struct pfpsock * ps,mac_client_promisc_type_t turnon)1496 pfp_set_promisc(struct pfpsock *ps, mac_client_promisc_type_t turnon)
1497 {
1498 int error = 0;
1499 int flags;
1500
1501 /*
1502 * There are 4 combinations of turnon/ps_promisc.
1503 * This if handles 2 (both false, both true) and the if() below
1504 * handles the remaining one - when change is required.
1505 */
1506 if (turnon == ps->ps_promisc)
1507 return (error);
1508
1509 if (ps->ps_phd != 0) {
1510 mac_promisc_remove(ps->ps_phd);
1511 ps->ps_phd = 0;
1512
1513 /*
1514 * ps_promisc is set here in case the call to mac_promisc_add
1515 * fails: leaving it to indicate that the interface is still
1516 * in some sort of promiscuous mode is false.
1517 */
1518 if (ps->ps_promisc != MAC_CLIENT_PROMISC_FILTERED) {
1519 ps->ps_promisc = MAC_CLIENT_PROMISC_FILTERED;
1520 flags = MAC_PROMISC_FLAGS_NO_PHYS;
1521 } else {
1522 flags = 0;
1523 }
1524 flags |= MAC_PROMISC_FLAGS_VLAN_TAG_STRIP;
1525 }
1526
1527 error = mac_promisc_add(ps->ps_mch, turnon, pfp_packet, ps,
1528 &ps->ps_phd, flags);
1529 if (error == 0)
1530 ps->ps_promisc = turnon;
1531
1532 return (error);
1533 }
1534
1535 /*
1536 * This table maps the MAC types in Solaris to the ARPHRD_* values used
1537 * on Linux. This is used with the SIOCGIFHWADDR/SIOCGLIFHWADDR ioctl.
1538 *
1539 * The symbols in this table are *not* pulled in from <net/if_arp.h>,
1540 * they are pulled from <netpacket/packet.h>, thus it acts as a source
1541 * of supplementary information to the ARP table.
1542 */
1543 static uint_t arphrd_to_dl[][2] = {
1544 { ARPHRD_IEEE80211, DL_WIFI },
1545 { ARPHRD_TUNNEL, DL_IPV4 },
1546 { ARPHRD_TUNNEL, DL_IPV6 },
1547 { ARPHRD_TUNNEL, DL_6TO4 },
1548 { ARPHRD_AX25, DL_X25 },
1549 { ARPHRD_ATM, DL_ATM },
1550 { 0, 0 }
1551 };
1552
1553 static int
pfp_dl_to_arphrd(int dltype)1554 pfp_dl_to_arphrd(int dltype)
1555 {
1556 int i;
1557
1558 for (i = 0; arphrd_to_dl[i][0] != 0; i++)
1559 if (arphrd_to_dl[i][1] == dltype)
1560 return (arphrd_to_dl[i][0]);
1561 return (arp_hw_type(dltype));
1562 }
1563