xref: /titanic_52/usr/src/uts/common/inet/ipf/solaris.c (revision f936286c99fb83153e4bfd870eb2830a990a82c1)
1 /*
2  * Copyright (C) 1993-2001, 2003 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
7  * Use is subject to license terms.
8  *
9  * Copyright (c) 2014, Joyent, Inc.  All rights reserved.
10  */
11 
12 /*
13  * ipfilter kernel module mutexes and locking:
14  *
15  * Enabling ipfilter creates a per-netstack ipf_stack_t object that is
16  * stored in the ipf_stacks list, which is protected by ipf_stack_lock.
17  * ipf_stack_t objects are accessed in three contexts:
18  *
19  * 1) administering that filter (eg: ioctls handled with iplioctl())
20  * 2) reading log data (eg: iplread() / iplwrite())
21  * 3) filtering packets (eg: ipf_hook4_* and ipf_hook6_* pfhooks
22  *    functions)
23  *
24  * Each ipf_stack_t has a RW lock, ifs_ipf_global, protecting access to the
25  * whole structure. The structure also has locks protecting the various
26  * data structures used for filtering. The following guidelines should be
27  * followed for ipf_stack_t locks:
28  *
29  * - ipf_stack_lock must be held when accessing the ipf_stacks list
30  * - ipf_stack_lock should be held before acquiring ifs_ipf_global for
31  *   a stack (the exception to this is ipf_stack_destroy(), which removes
32  *   the ipf_stack_t from the list, then drops ipf_stack_lock before
33  *   acquiring ifs_ipf_global)
34  * - ifs_ipf_global must be held when accessing an ipf_stack_t in that list:
35  *   - The write lock is held only during stack creation / destruction
36  *   - The read lock should be held for all other accesses
37  * - To alter the filtering data in the administrative context, one must:
38  *   - acquire the read lock for ifs_ipf_global
39  *   - then acquire the write lock for the data in question
40  * - In the filtering path, the read lock needs to be held for each type of
41  *   filtering data used
42  * - ifs_ipf_global does not need to be held in the filtering path:
43  *   - The filtering hooks don't need to modify the stack itself
44  *   - The ipf_stack_t will not be destroyed until the hooks are unregistered.
45  *     This requires a write lock on the hook, ensuring that no active hooks
46  *     (eg: the filtering path) are running, and that the hooks won't be run
47  *     afterward.
48  *
49  * Note that there is a deadlock possible when calling net_hook_register()
50  * or net_hook_unregister() with ifs_ipf_global held: see the comments in
51  * iplattach() and ipldetach() for details.
52  */
53 
54 #include <sys/systm.h>
55 #include <sys/types.h>
56 #include <sys/param.h>
57 #include <sys/errno.h>
58 #include <sys/uio.h>
59 #include <sys/buf.h>
60 #include <sys/modctl.h>
61 #include <sys/open.h>
62 #include <sys/kmem.h>
63 #include <sys/conf.h>
64 #include <sys/cmn_err.h>
65 #include <sys/stat.h>
66 #include <sys/cred.h>
67 #include <sys/dditypes.h>
68 #include <sys/poll.h>
69 #include <sys/autoconf.h>
70 #include <sys/byteorder.h>
71 #include <sys/socket.h>
72 #include <sys/dlpi.h>
73 #include <sys/stropts.h>
74 #include <sys/kstat.h>
75 #include <sys/sockio.h>
76 #include <sys/neti.h>
77 #include <sys/hook.h>
78 #include <net/if.h>
79 #if SOLARIS2 >= 6
80 #include <net/if_types.h>
81 #endif
82 #include <net/af.h>
83 #include <net/route.h>
84 #include <netinet/in.h>
85 #include <netinet/in_systm.h>
86 #include <netinet/if_ether.h>
87 #include <netinet/ip.h>
88 #include <netinet/ip_var.h>
89 #include <netinet/tcp.h>
90 #include <netinet/udp.h>
91 #include <netinet/tcpip.h>
92 #include <netinet/ip_icmp.h>
93 #include <sys/ddi.h>
94 #include <sys/sunddi.h>
95 #include "netinet/ip_compat.h"
96 #include "netinet/ipl.h"
97 #include "netinet/ip_fil.h"
98 #include "netinet/ip_nat.h"
99 #include "netinet/ip_frag.h"
100 #include "netinet/ip_auth.h"
101 #include "netinet/ip_state.h"
102 #include "netinet/ipf_stack.h"
103 
104 extern	int	iplwrite __P((dev_t, struct uio *, cred_t *));
105 
106 static	int	ipf_getinfo __P((dev_info_t *, ddi_info_cmd_t,
107 		    void *, void **));
108 #if SOLARIS2 < 10
109 static	int	ipf_identify __P((dev_info_t *));
110 #endif
111 static	int	ipf_attach __P((dev_info_t *, ddi_attach_cmd_t));
112 static	int	ipf_detach __P((dev_info_t *, ddi_detach_cmd_t));
113 static	void	*ipf_stack_create __P((const netid_t));
114 static	void	ipf_stack_destroy __P((const netid_t, void *));
115 static	void	ipf_stack_shutdown __P((const netid_t, void *));
116 static	int	ipf_property_g_update __P((dev_info_t *));
117 static	char	*ipf_devfiles[] = { IPL_NAME, IPNAT_NAME, IPSTATE_NAME,
118 				    IPAUTH_NAME, IPSYNC_NAME, IPSCAN_NAME,
119 				    IPLOOKUP_NAME, NULL };
120 extern void 	*ipf_state;	/* DDI state */
121 extern vmem_t	*ipf_minor;	/* minor number arena */
122 
123 static struct cb_ops ipf_cb_ops = {
124 	iplopen,
125 	iplclose,
126 	nodev,		/* strategy */
127 	nodev,		/* print */
128 	nodev,		/* dump */
129 	iplread,
130 	iplwrite,	/* write */
131 	iplioctl,	/* ioctl */
132 	nodev,		/* devmap */
133 	nodev,		/* mmap */
134 	nodev,		/* segmap */
135 	nochpoll,	/* poll */
136 	ddi_prop_op,
137 	NULL,
138 	D_MTSAFE,
139 #if SOLARIS2 > 4
140 	CB_REV,
141 	nodev,		/* aread */
142 	nodev,		/* awrite */
143 #endif
144 };
145 
146 static struct dev_ops ipf_ops = {
147 	DEVO_REV,
148 	0,
149 	ipf_getinfo,
150 #if SOLARIS2 >= 10
151 	nulldev,
152 #else
153 	ipf_identify,
154 #endif
155 	nulldev,
156 	ipf_attach,
157 	ipf_detach,
158 	nodev,		/* reset */
159 	&ipf_cb_ops,
160 	(struct bus_ops *)0,
161 	NULL,
162 	ddi_quiesce_not_needed,		/* quiesce */
163 };
164 
165 
166 static net_instance_t *ipfncb = NULL;
167 static ipf_stack_t *ipf_stacks = NULL;
168 static kmutex_t ipf_stack_lock;
169 extern struct mod_ops mod_driverops;
170 static struct modldrv iplmod = {
171 	&mod_driverops, IPL_VERSION, &ipf_ops };
172 static struct modlinkage modlink1 = { MODREV_1, &iplmod, NULL };
173 
174 #if SOLARIS2 >= 6
175 static	size_t	hdrsizes[57][2] = {
176 	{ 0, 0 },
177 	{ IFT_OTHER, 0 },
178 	{ IFT_1822, 0 },
179 	{ IFT_HDH1822, 0 },
180 	{ IFT_X25DDN, 0 },
181 	{ IFT_X25, 0 },
182 	{ IFT_ETHER, 14 },
183 	{ IFT_ISO88023, 0 },
184 	{ IFT_ISO88024, 0 },
185 	{ IFT_ISO88025, 0 },
186 	{ IFT_ISO88026, 0 },
187 	{ IFT_STARLAN, 0 },
188 	{ IFT_P10, 0 },
189 	{ IFT_P80, 0 },
190 	{ IFT_HY, 0 },
191 	{ IFT_FDDI, 24 },
192 	{ IFT_LAPB, 0 },
193 	{ IFT_SDLC, 0 },
194 	{ IFT_T1, 0 },
195 	{ IFT_CEPT, 0 },
196 	{ IFT_ISDNBASIC, 0 },
197 	{ IFT_ISDNPRIMARY, 0 },
198 	{ IFT_PTPSERIAL, 0 },
199 	{ IFT_PPP, 0 },
200 	{ IFT_LOOP, 0 },
201 	{ IFT_EON, 0 },
202 	{ IFT_XETHER, 0 },
203 	{ IFT_NSIP, 0 },
204 	{ IFT_SLIP, 0 },
205 	{ IFT_ULTRA, 0 },
206 	{ IFT_DS3, 0 },
207 	{ IFT_SIP, 0 },
208 	{ IFT_FRELAY, 0 },
209 	{ IFT_RS232, 0 },
210 	{ IFT_PARA, 0 },
211 	{ IFT_ARCNET, 0 },
212 	{ IFT_ARCNETPLUS, 0 },
213 	{ IFT_ATM, 0 },
214 	{ IFT_MIOX25, 0 },
215 	{ IFT_SONET, 0 },
216 	{ IFT_X25PLE, 0 },
217 	{ IFT_ISO88022LLC, 0 },
218 	{ IFT_LOCALTALK, 0 },
219 	{ IFT_SMDSDXI, 0 },
220 	{ IFT_FRELAYDCE, 0 },
221 	{ IFT_V35, 0 },
222 	{ IFT_HSSI, 0 },
223 	{ IFT_HIPPI, 0 },
224 	{ IFT_MODEM, 0 },
225 	{ IFT_AAL5, 0 },
226 	{ IFT_SONETPATH, 0 },
227 	{ IFT_SONETVT, 0 },
228 	{ IFT_SMDSICIP, 0 },
229 	{ IFT_PROPVIRTUAL, 0 },
230 	{ IFT_PROPMUX, 0 },
231 };
232 #endif /* SOLARIS2 >= 6 */
233 
234 dev_info_t *ipf_dev_info = NULL;
235 
236 static const filter_kstats_t ipf_kstat_tmp = {
237 	{ "pass",			KSTAT_DATA_ULONG },
238 	{ "block",			KSTAT_DATA_ULONG },
239 	{ "nomatch",			KSTAT_DATA_ULONG },
240 	{ "short",			KSTAT_DATA_ULONG },
241 	{ "pass, logged",		KSTAT_DATA_ULONG },
242 	{ "block, logged",		KSTAT_DATA_ULONG },
243 	{ "nomatch, logged",		KSTAT_DATA_ULONG },
244 	{ "logged",			KSTAT_DATA_ULONG },
245 	{ "skip",			KSTAT_DATA_ULONG },
246 	{ "return sent",		KSTAT_DATA_ULONG },
247 	{ "acct",			KSTAT_DATA_ULONG },
248 	{ "bad frag state alloc",	KSTAT_DATA_ULONG },
249 	{ "new frag state kept",	KSTAT_DATA_ULONG },
250 	{ "new frag state compl. pkt",	KSTAT_DATA_ULONG },
251 	{ "bad pkt state alloc",	KSTAT_DATA_ULONG },
252 	{ "new pkt kept state",		KSTAT_DATA_ULONG },
253 	{ "cachehit",			KSTAT_DATA_ULONG },
254 	{ "tcp cksum bad",		KSTAT_DATA_ULONG },
255 	{{ "pullup ok",			KSTAT_DATA_ULONG },
256 	{ "pullup nok",			KSTAT_DATA_ULONG }},
257 	{ "src != route",		KSTAT_DATA_ULONG },
258 	{ "ttl invalid",		KSTAT_DATA_ULONG },
259 	{ "bad ip pkt",			KSTAT_DATA_ULONG },
260 	{ "ipv6 pkt",			KSTAT_DATA_ULONG },
261 	{ "dropped:pps ceiling",	KSTAT_DATA_ULONG },
262 	{ "ip upd. fail",		KSTAT_DATA_ULONG }
263 };
264 
265 
266 static int	ipf_kstat_update(kstat_t *ksp, int rwflag);
267 
268 static void
269 ipf_kstat_init(ipf_stack_t *ifs, boolean_t from_gz)
270 {
271 	ifs->ifs_kstatp[0] = net_kstat_create(ifs->ifs_netid,
272 	    (from_gz ? "ipf_gz" : "ipf"),
273 	    0, "inbound", "net", KSTAT_TYPE_NAMED,
274 	    sizeof (filter_kstats_t) / sizeof (kstat_named_t), 0);
275 	if (ifs->ifs_kstatp[0] != NULL) {
276 		bcopy(&ipf_kstat_tmp, ifs->ifs_kstatp[0]->ks_data,
277 		    sizeof (filter_kstats_t));
278 		ifs->ifs_kstatp[0]->ks_update = ipf_kstat_update;
279 		ifs->ifs_kstatp[0]->ks_private = &ifs->ifs_frstats[0];
280 		kstat_install(ifs->ifs_kstatp[0]);
281 	}
282 
283 	ifs->ifs_kstatp[1] = net_kstat_create(ifs->ifs_netid,
284 	    (from_gz ? "ipf_gz" : "ipf"),
285 	    0, "outbound", "net", KSTAT_TYPE_NAMED,
286 	    sizeof (filter_kstats_t) / sizeof (kstat_named_t), 0);
287 	if (ifs->ifs_kstatp[1] != NULL) {
288 		bcopy(&ipf_kstat_tmp, ifs->ifs_kstatp[1]->ks_data,
289 		    sizeof (filter_kstats_t));
290 		ifs->ifs_kstatp[1]->ks_update = ipf_kstat_update;
291 		ifs->ifs_kstatp[1]->ks_private = &ifs->ifs_frstats[1];
292 		kstat_install(ifs->ifs_kstatp[1]);
293 	}
294 
295 #ifdef	IPFDEBUG
296 	cmn_err(CE_NOTE, "IP Filter: ipf_kstat_init(%p) installed %p, %p",
297 	    ifs, ifs->ifs_kstatp[0], ifs->ifs_kstatp[1]);
298 #endif
299 }
300 
301 
302 static void
303 ipf_kstat_fini(ipf_stack_t *ifs)
304 {
305 	int i;
306 
307 	for (i = 0; i < 2; i++) {
308 		if (ifs->ifs_kstatp[i] != NULL) {
309 			net_kstat_delete(ifs->ifs_netid, ifs->ifs_kstatp[i]);
310 			ifs->ifs_kstatp[i] = NULL;
311 		}
312 	}
313 }
314 
315 
316 static int
317 ipf_kstat_update(kstat_t *ksp, int rwflag)
318 {
319 	filter_kstats_t	*fkp;
320 	filterstats_t	*fsp;
321 
322 	if (ksp == NULL || ksp->ks_data == NULL)
323 		return (EIO);
324 
325 	if (rwflag == KSTAT_WRITE)
326 		return (EACCES);
327 
328 	fkp = ksp->ks_data;
329 	fsp = ksp->ks_private;
330 
331 	fkp->fks_pass.value.ul		= fsp->fr_pass;
332 	fkp->fks_block.value.ul		= fsp->fr_block;
333 	fkp->fks_nom.value.ul		= fsp->fr_nom;
334 	fkp->fks_short.value.ul		= fsp->fr_short;
335 	fkp->fks_ppkl.value.ul		= fsp->fr_ppkl;
336 	fkp->fks_bpkl.value.ul		= fsp->fr_bpkl;
337 	fkp->fks_npkl.value.ul		= fsp->fr_npkl;
338 	fkp->fks_pkl.value.ul		= fsp->fr_pkl;
339 	fkp->fks_skip.value.ul		= fsp->fr_skip;
340 	fkp->fks_ret.value.ul		= fsp->fr_ret;
341 	fkp->fks_acct.value.ul		= fsp->fr_acct;
342 	fkp->fks_bnfr.value.ul		= fsp->fr_bnfr;
343 	fkp->fks_nfr.value.ul		= fsp->fr_nfr;
344 	fkp->fks_cfr.value.ul		= fsp->fr_cfr;
345 	fkp->fks_bads.value.ul		= fsp->fr_bads;
346 	fkp->fks_ads.value.ul		= fsp->fr_ads;
347 	fkp->fks_chit.value.ul		= fsp->fr_chit;
348 	fkp->fks_tcpbad.value.ul 	= fsp->fr_tcpbad;
349 	fkp->fks_pull[0].value.ul 	= fsp->fr_pull[0];
350 	fkp->fks_pull[1].value.ul 	= fsp->fr_pull[1];
351 	fkp->fks_badsrc.value.ul 	= fsp->fr_badsrc;
352 	fkp->fks_badttl.value.ul 	= fsp->fr_badttl;
353 	fkp->fks_bad.value.ul		= fsp->fr_bad;
354 	fkp->fks_ipv6.value.ul		= fsp->fr_ipv6;
355 	fkp->fks_ppshit.value.ul 	= fsp->fr_ppshit;
356 	fkp->fks_ipud.value.ul		= fsp->fr_ipud;
357 
358 	return (0);
359 }
360 
361 int
362 _init()
363 {
364 	int ipfinst;
365 
366 	ipfinst = mod_install(&modlink1);
367 #ifdef	IPFDEBUG
368 	cmn_err(CE_NOTE, "IP Filter: _init() = %d", ipfinst);
369 #endif
370 	mutex_init(&ipf_stack_lock, NULL, MUTEX_DRIVER, NULL);
371 	return (ipfinst);
372 }
373 
374 
375 int
376 _fini(void)
377 {
378 	int ipfinst;
379 
380 	ipfinst = mod_remove(&modlink1);
381 #ifdef	IPFDEBUG
382 	cmn_err(CE_NOTE, "IP Filter: _fini() = %d", ipfinst);
383 #endif
384 	return (ipfinst);
385 }
386 
387 
388 int
389 _info(modinfop)
390 struct modinfo *modinfop;
391 {
392 	int ipfinst;
393 
394 	ipfinst = mod_info(&modlink1, modinfop);
395 #ifdef	IPFDEBUG
396 	cmn_err(CE_NOTE, "IP Filter: _info(%p) = %d", modinfop, ipfinst);
397 #endif
398 	return (ipfinst);
399 }
400 
401 
402 #if SOLARIS2 < 10
403 static int ipf_identify(dip)
404 dev_info_t *dip;
405 {
406 #ifdef	IPFDEBUG
407 	cmn_err(CE_NOTE, "IP Filter: ipf_identify(%p)", dip);
408 #endif
409 	if (strcmp(ddi_get_name(dip), "ipf") == 0)
410 		return (DDI_IDENTIFIED);
411 	return (DDI_NOT_IDENTIFIED);
412 }
413 #endif
414 
415 /*
416  * Initialize things for IPF for each stack instance
417  */
418 static void *
419 ipf_stack_create_one(const netid_t id, const zoneid_t zid, boolean_t from_gz,
420     ipf_stack_t *ifs_gz)
421 {
422 	ipf_stack_t	*ifs;
423 
424 #ifdef IPFDEBUG
425 	cmn_err(CE_NOTE, "IP Filter:stack_create_one id=%d global=%d", id,
426 	    global);
427 #endif
428 
429 	ifs = (ipf_stack_t *)kmem_alloc(sizeof (*ifs), KM_SLEEP);
430 	bzero(ifs, sizeof (*ifs));
431 
432 	ifs->ifs_hook4_physical_in	= B_FALSE;
433 	ifs->ifs_hook4_physical_out	= B_FALSE;
434 	ifs->ifs_hook4_nic_events	= B_FALSE;
435 	ifs->ifs_hook4_loopback_in	= B_FALSE;
436 	ifs->ifs_hook4_loopback_out	= B_FALSE;
437 	ifs->ifs_hook6_physical_in	= B_FALSE;
438 	ifs->ifs_hook6_physical_out	= B_FALSE;
439 	ifs->ifs_hook6_nic_events	= B_FALSE;
440 	ifs->ifs_hook6_loopback_in	= B_FALSE;
441 	ifs->ifs_hook6_loopback_out	= B_FALSE;
442 
443 	/*
444 	 * Initialize mutex's
445 	 */
446 	RWLOCK_INIT(&ifs->ifs_ipf_global, "ipf filter load/unload mutex");
447 	RWLOCK_INIT(&ifs->ifs_ipf_mutex, "ipf filter rwlock");
448 	RWLOCK_INIT(&ifs->ifs_ipf_frcache, "ipf cache rwlock");
449 	ifs->ifs_netid = id;
450 	ifs->ifs_zone = zid;
451 	ifs->ifs_gz_controlled = from_gz;
452 	ifs->ifs_gz_cont_ifs = ifs_gz;
453 
454 	ipf_kstat_init(ifs, from_gz);
455 
456 #ifdef IPFDEBUG
457 	cmn_err(CE_CONT, "IP Filter:stack_create zone=%d", ifs->ifs_zone);
458 #endif
459 
460 	/*
461 	 * Lock people out while we set things up.
462 	 */
463 	WRITE_ENTER(&ifs->ifs_ipf_global);
464 	ipftuneable_alloc(ifs);
465 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
466 
467 	/* Limit to global stack */
468 	if (ifs->ifs_zone == GLOBAL_ZONEID)
469 		cmn_err(CE_CONT, "!%s, running.\n", ipfilter_version);
470 
471 	mutex_enter(&ipf_stack_lock);
472 	if (ipf_stacks != NULL)
473 		ipf_stacks->ifs_pnext = &ifs->ifs_next;
474 	ifs->ifs_next = ipf_stacks;
475 	ifs->ifs_pnext = &ipf_stacks;
476 	ipf_stacks = ifs;
477 	mutex_exit(&ipf_stack_lock);
478 
479 	return (ifs);
480 }
481 
482 static void *
483 ipf_stack_create(const netid_t id)
484 {
485 	ipf_stack_t	*ifs = NULL;
486 	zoneid_t	zid = net_getzoneidbynetid(id);
487 
488 	/*
489 	 * Create two ipfilter stacks for a zone - the first can only be
490 	 * controlled from the global zone, and the second is owned by
491 	 * the zone itself. There is no need to create a GZ-controlled
492 	 * stack for the global zone, since we're already in the global
493 	 * zone. See the "GZ-controlled and per-zone stacks" comment block in
494 	 * ip_fil_solaris.c for details.
495 	 */
496 	if (zid != GLOBAL_ZONEID)
497 		ifs = ipf_stack_create_one(id, zid, B_TRUE, NULL);
498 
499 	return (ipf_stack_create_one(id, zid, B_FALSE, ifs));
500 }
501 
502 /*
503  * Find an ipfilter stack for the given zone. Return the GZ-controlled or
504  * per-zone stack if set by an earlier SIOCIPFZONESET ioctl call. See the
505  * "GZ-controlled and per-zone stacks" comment block in ip_fil_solaris.c for
506  * details.
507  *
508  * This function returns with the ipf_stack_t's ifs_ipf_global
509  * read lock held (if the stack is found). See the "ipfilter kernel module
510  * mutexes and locking" comment block at the top of this file.
511  */
512 ipf_stack_t *
513 ipf_find_stack(const zoneid_t orig_zone, ipf_devstate_t *isp)
514 {
515 	ipf_stack_t *ifs;
516 	boolean_t gz_stack;
517 	zoneid_t zone;
518 
519 	/*
520 	 * If we're in the GZ, determine if we're acting on a zone's stack,
521 	 * and whether or not that stack is the GZ-controlled or in-zone
522 	 * one.  See the "GZ and per-zone stacks" note at the top of this
523 	 * file.
524 	 */
525 	if (orig_zone == GLOBAL_ZONEID &&
526 	    (isp->ipfs_zoneid != IPFS_ZONE_UNSET)) {
527 		/* Global zone, and we've set the zoneid for this fd already */
528 
529 		if (orig_zone == isp->ipfs_zoneid) {
530 			/* There's only a per-zone stack for the GZ */
531 			gz_stack = B_FALSE;
532 		} else {
533 			gz_stack = isp->ipfs_gz;
534 		}
535 
536 		zone = isp->ipfs_zoneid;
537 	} else {
538 		/*
539 		 * Non-global zone or GZ without having set a zoneid: act on
540 		 * the per-zone stack of the zone that this ioctl originated
541 		 * from.
542 		 */
543 		gz_stack = B_FALSE;
544 		zone = orig_zone;
545 	}
546 
547 	mutex_enter(&ipf_stack_lock);
548 	for (ifs = ipf_stacks; ifs != NULL; ifs = ifs->ifs_next) {
549 		if (ifs->ifs_zone == zone && ifs->ifs_gz_controlled == gz_stack)
550 			break;
551 	}
552 
553 	if (ifs != NULL) {
554 		READ_ENTER(&ifs->ifs_ipf_global);
555 	}
556 	mutex_exit(&ipf_stack_lock);
557 	return (ifs);
558 }
559 
560 static int ipf_detach_check_zone(ipf_stack_t *ifs)
561 {
562 	/*
563 	 * Make sure we're the only one's modifying things.  With
564 	 * this lock others should just fall out of the loop.
565 	 */
566 	READ_ENTER(&ifs->ifs_ipf_global);
567 	if (ifs->ifs_fr_running == 1) {
568 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
569 		return (-1);
570 	}
571 
572 	/*
573 	 * Make sure there is no active filter rule.
574 	 */
575 	if (ifs->ifs_ipfilter[0][ifs->ifs_fr_active] ||
576 	    ifs->ifs_ipfilter[1][ifs->ifs_fr_active] ||
577 	    ifs->ifs_ipfilter6[0][ifs->ifs_fr_active] ||
578 	    ifs->ifs_ipfilter6[1][ifs->ifs_fr_active]) {
579 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
580 		return (-1);
581 	}
582 
583 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
584 
585 	return (0);
586 }
587 
588 
589 static int ipf_detach_check_all()
590 {
591 	ipf_stack_t *ifs;
592 
593 	mutex_enter(&ipf_stack_lock);
594 	for (ifs = ipf_stacks; ifs != NULL; ifs = ifs->ifs_next)
595 		if (ipf_detach_check_zone(ifs) != 0)
596 			break;
597 	mutex_exit(&ipf_stack_lock);
598 	return ((ifs == NULL) ? 0 : -1);
599 }
600 
601 
602 /*
603  * Remove ipf kstats for both the per-zone ipf stack and the
604  * GZ-controlled stack for the same zone, if it exists.
605  */
606 /* ARGSUSED */
607 static void
608 ipf_stack_shutdown(const netid_t id, void *arg)
609 {
610 	ipf_stack_t *ifs = (ipf_stack_t *)arg;
611 
612 	/*
613 	 * The GZ-controlled stack
614 	 */
615 	if (ifs->ifs_gz_cont_ifs != NULL)
616 		ipf_kstat_fini(ifs->ifs_gz_cont_ifs);
617 
618 	/*
619 	 * The per-zone stack
620 	 */
621 	ipf_kstat_fini(ifs);
622 }
623 
624 
625 /*
626  * Destroy things for ipf for one stack.
627  */
628 /* ARGSUSED */
629 static void
630 ipf_stack_destroy_one(const netid_t id, ipf_stack_t *ifs)
631 {
632 	timeout_id_t tid;
633 
634 #ifdef IPFDEBUG
635 	(void) printf("ipf_stack_destroy_one(%p)\n", (void *)ifs);
636 #endif
637 
638 	/*
639 	 * Make sure we're the only one's modifying things.  With
640 	 * this lock others should just fall out of the loop.
641 	 */
642 	WRITE_ENTER(&ifs->ifs_ipf_global);
643 	if (ifs->ifs_fr_running == -2) {
644 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
645 		return;
646 	}
647 	ifs->ifs_fr_running = -2;
648 	tid = ifs->ifs_fr_timer_id;
649 	ifs->ifs_fr_timer_id = NULL;
650 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
651 
652 	mutex_enter(&ipf_stack_lock);
653 	if (ifs->ifs_next != NULL)
654 		ifs->ifs_next->ifs_pnext = ifs->ifs_pnext;
655 	*ifs->ifs_pnext = ifs->ifs_next;
656 	mutex_exit(&ipf_stack_lock);
657 
658 	if (tid != NULL)
659 		(void) untimeout(tid);
660 
661 	WRITE_ENTER(&ifs->ifs_ipf_global);
662 	if (ipldetach(ifs) != 0) {
663 		printf("ipf_stack_destroy_one: ipldetach failed\n");
664 	}
665 
666 	ipftuneable_free(ifs);
667 
668 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
669 	RW_DESTROY(&ifs->ifs_ipf_mutex);
670 	RW_DESTROY(&ifs->ifs_ipf_frcache);
671 	RW_DESTROY(&ifs->ifs_ipf_global);
672 
673 	KFREE(ifs);
674 }
675 
676 
677 /*
678  * Destroy things for ipf for both the per-zone ipf stack and the
679  * GZ-controlled stack for the same zone, if it exists. See the "GZ-controlled
680  * and per-zone stacks" comment block in ip_fil_solaris.c for details.
681  */
682 /* ARGSUSED */
683 static void
684 ipf_stack_destroy(const netid_t id, void *arg)
685 {
686 	ipf_stack_t *ifs = (ipf_stack_t *)arg;
687 
688 	/*
689 	 * The GZ-controlled stack
690 	 */
691 	if (ifs->ifs_gz_cont_ifs != NULL)
692 		ipf_stack_destroy_one(id, ifs->ifs_gz_cont_ifs);
693 
694 	/*
695 	 * The per-zone stack
696 	 */
697 	ipf_stack_destroy_one(id, ifs);
698 }
699 
700 
701 static int ipf_attach(dip, cmd)
702 dev_info_t *dip;
703 ddi_attach_cmd_t cmd;
704 {
705 	char *s;
706 	int i;
707 	int instance;
708 
709 #ifdef	IPFDEBUG
710 	cmn_err(CE_NOTE, "IP Filter: ipf_attach(%p,%x)", dip, cmd);
711 #endif
712 
713 	switch (cmd)
714 	{
715 	case DDI_ATTACH:
716 		instance = ddi_get_instance(dip);
717 		/* Only one instance of ipf (instance 0) can be attached. */
718 		if (instance > 0)
719 			return (DDI_FAILURE);
720 
721 #ifdef	IPFDEBUG
722 		cmn_err(CE_CONT, "IP Filter: attach ipf instance %d", instance);
723 #endif
724 
725 		(void) ipf_property_g_update(dip);
726 
727 		if (ddi_soft_state_init(&ipf_state, sizeof (ipf_devstate_t), 1)
728 		    != 0) {
729 			ddi_prop_remove_all(dip);
730 			return (DDI_FAILURE);
731 		}
732 
733 		for (i = 0; ((s = ipf_devfiles[i]) != NULL); i++) {
734 			s = strrchr(s, '/');
735 			if (s == NULL)
736 				continue;
737 			s++;
738 			if (ddi_create_minor_node(dip, s, S_IFCHR, i,
739 			    DDI_PSEUDO, 0) == DDI_FAILURE)
740 				goto attach_failed;
741 		}
742 
743 		ipf_dev_info = dip;
744 
745 		ipfncb = net_instance_alloc(NETINFO_VERSION);
746 		if (ipfncb == NULL)
747 			goto attach_failed;
748 
749 		ipfncb->nin_name = "ipf";
750 		ipfncb->nin_create = ipf_stack_create;
751 		ipfncb->nin_destroy = ipf_stack_destroy;
752 		ipfncb->nin_shutdown = ipf_stack_shutdown;
753 		if (net_instance_register(ipfncb) == DDI_FAILURE) {
754 			net_instance_free(ipfncb);
755 			goto attach_failed;
756 		}
757 
758 		ipf_minor = vmem_create("ipf_minor", (void *)1, UINT32_MAX - 1,
759 		    1, NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
760 
761 #ifdef IPFDEBUG
762 		cmn_err(CE_CONT, "IP Filter:stack_create callback_reg=%d", i);
763 #endif
764 
765 		return (DDI_SUCCESS);
766 		/* NOTREACHED */
767 	default:
768 		break;
769 	}
770 
771 attach_failed:
772 	ddi_remove_minor_node(dip, NULL);
773 	ddi_prop_remove_all(dip);
774 	ddi_soft_state_fini(&ipf_state);
775 	return (DDI_FAILURE);
776 }
777 
778 
779 static int ipf_detach(dip, cmd)
780 dev_info_t *dip;
781 ddi_detach_cmd_t cmd;
782 {
783 	int i;
784 
785 #ifdef	IPFDEBUG
786 	cmn_err(CE_NOTE, "IP Filter: ipf_detach(%p,%x)", dip, cmd);
787 #endif
788 	switch (cmd) {
789 	case DDI_DETACH:
790 		if (ipf_detach_check_all() != 0)
791 			return (DDI_FAILURE);
792 
793 		/*
794 		 * Undo what we did in ipf_attach, freeing resources
795 		 * and removing things we installed.  The system
796 		 * framework guarantees we are not active with this devinfo
797 		 * node in any other entry points at this time.
798 		 */
799 		ddi_prop_remove_all(dip);
800 		i = ddi_get_instance(dip);
801 		ddi_remove_minor_node(dip, NULL);
802 		if (i > 0) {
803 			cmn_err(CE_CONT, "IP Filter: still attached (%d)\n", i);
804 			return (DDI_FAILURE);
805 		}
806 
807 		vmem_destroy(ipf_minor);
808 		ddi_soft_state_fini(&ipf_state);
809 
810 		(void) net_instance_unregister(ipfncb);
811 		net_instance_free(ipfncb);
812 
813 		return (DDI_SUCCESS);
814 		/* NOTREACHED */
815 	default:
816 		break;
817 	}
818 	cmn_err(CE_NOTE, "IP Filter: failed to detach\n");
819 	return (DDI_FAILURE);
820 }
821 
822 
823 /*ARGSUSED*/
824 static int ipf_getinfo(dip, infocmd, arg, result)
825 dev_info_t *dip;
826 ddi_info_cmd_t infocmd;
827 void *arg, **result;
828 {
829 	int error;
830 
831 	error = DDI_FAILURE;
832 #ifdef	IPFDEBUG
833 	cmn_err(CE_NOTE, "IP Filter: ipf_getinfo(%p,%x,%p)", dip, infocmd, arg);
834 #endif
835 	switch (infocmd) {
836 	case DDI_INFO_DEVT2DEVINFO:
837 		*result = ipf_dev_info;
838 		error = DDI_SUCCESS;
839 		break;
840 	case DDI_INFO_DEVT2INSTANCE:
841 		*result = (void *)0;
842 		error = DDI_SUCCESS;
843 		break;
844 	default:
845 		break;
846 	}
847 	return (error);
848 }
849 
850 
851 /*
852  * Fetch configuration file values that have been entered into the ipf.conf
853  * driver file.
854  */
855 static int ipf_property_g_update(dip)
856 dev_info_t *dip;
857 {
858 #ifdef DDI_NO_AUTODETACH
859 	if (ddi_prop_update_int(DDI_DEV_T_NONE, dip,
860 				DDI_NO_AUTODETACH, 1) != DDI_PROP_SUCCESS) {
861 		cmn_err(CE_WARN, "!updating DDI_NO_AUTODETACH failed");
862 		return (DDI_FAILURE);
863 	}
864 #else
865 	if (ddi_prop_update_int(DDI_DEV_T_NONE, dip,
866 				"ddi-no-autodetach", 1) != DDI_PROP_SUCCESS) {
867 		cmn_err(CE_WARN, "!updating ddi-no-autodetach failed");
868 		return (DDI_FAILURE);
869 	}
870 #endif
871 
872 	return (DDI_SUCCESS);
873 }
874 
875 int
876 ipf_property_update(dip, ifs)
877 dev_info_t *dip;
878 ipf_stack_t *ifs;
879 {
880 	ipftuneable_t *ipft;
881 	char *name;
882 	uint_t one;
883 	int *i32p;
884 	int err, rv = 0;
885 
886 	for (ipft = ifs->ifs_ipf_tuneables;
887 		(name = ipft->ipft_name) != NULL; ipft++) {
888 		one = 1;
889 		i32p = NULL;
890 		err = ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip,
891 						0, name, &i32p, &one);
892 		if (err == DDI_PROP_NOT_FOUND)
893 			continue;
894 #ifdef	IPFDEBUG
895 		cmn_err(CE_CONT, "IP Filter: lookup_int(%s) = %d\n",
896 			name, err);
897 #endif
898 		if (err != DDI_PROP_SUCCESS) {
899 			rv = err;
900 			continue;
901 		}
902 
903 		if (*i32p >= ipft->ipft_min &&
904 		    *i32p <= ipft->ipft_max) {
905 			if (ipft->ipft_sz == sizeof (uint32_t)) {
906 				*ipft->ipft_pint = *i32p;
907 			} else if (ipft->ipft_sz == sizeof (uint64_t)) {
908 				*ipft->ipft_plong = *i32p;
909 			}
910 		}
911 
912 		ddi_prop_free(i32p);
913 	}
914 
915 	return (rv);
916 }
917