xref: /illumos-gate/usr/src/uts/common/io/ppp/sppptun/sppptun.c (revision a6e6969cf9cfe2070eae4cd6071f76b0fa4f539f)
1 /*
2  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 #pragma ident	"%Z%%M%	%I%	%E% SMI"
7 
8 #include <sys/types.h>
9 #include <sys/debug.h>
10 #include <sys/param.h>
11 #include <sys/stat.h>
12 #include <sys/systm.h>
13 #include <sys/socket.h>
14 #include <sys/stream.h>
15 #include <sys/stropts.h>
16 #include <sys/errno.h>
17 #include <sys/time.h>
18 #include <sys/cmn_err.h>
19 #include <sys/sdt.h>
20 #include <sys/conf.h>
21 #include <sys/dlpi.h>
22 #include <sys/ddi.h>
23 #include <sys/kstat.h>
24 #include <sys/strsun.h>
25 #include <sys/bitmap.h>
26 #include <sys/sysmacros.h>
27 #include <sys/note.h>
28 #include <sys/policy.h>
29 #include <net/ppp_defs.h>
30 #include <net/pppio.h>
31 #include <net/sppptun.h>
32 #include <net/pppoe.h>
33 #include <netinet/in.h>
34 
35 #include "s_common.h"
36 #include "sppptun_mod.h"
37 #include "sppptun_impl.h"
38 
39 #define	NTUN_INITIAL 16			/* Initial number of sppptun slots */
40 #define	NTUN_PERCENT 5			/* Percent of memory to use */
41 
42 /*
43  * This is used to tag official Solaris sources.  Please do not define
44  * "INTERNAL_BUILD" when building this software outside of Sun
45  * Microsystems.
46  */
47 #ifdef INTERNAL_BUILD
48 /* MODINFO is limited to 32 characters. */
49 const char sppptun_driver_description[] = "PPP 4.0 tunnel driver";
50 const char sppptun_module_description[] = "PPP 4.0 tunnel module";
51 #else
52 const char sppptun_driver_description[] = "ANU PPP tundrv $Revision: $";
53 const char sppptun_module_description[] = "ANU PPP tunmod $Revision: $";
54 
55 /* LINTED */
56 static const char buildtime[] = "Built " __DATE__ " at " __TIME__
57 #ifdef DEBUG
58 " DEBUG"
59 #endif
60 "\n";
61 #endif
62 
63 /*
64  * Tunable values; these are similar to the values used in ptms_conf.c.
65  * Override these settings via /etc/system.
66  */
67 uint_t	sppptun_cnt = 0;		/* Minimum number of tunnels */
68 size_t	sppptun_max_pty = 0;		/* Maximum number of tunnels */
69 uint_t	sppptun_init_cnt = NTUN_INITIAL; /* Initial number of tunnel slots */
70 uint_t	sppptun_pctofmem = NTUN_PERCENT; /* Percent of memory to use */
71 
72 typedef struct ether_dest_s {
73 	ether_addr_t addr;
74 	ushort_t type;
75 } ether_dest_t;
76 
77 /* Allows unaligned access. */
78 #define	GETLONG(x)	(((x)[0]<<24)|((x)[1]<<16)|((x)[2]<<8)|(x)[3])
79 
80 static const char *tll_kstats_list[] = { TLL_KSTATS_NAMES };
81 static const char *tcl_kstats_list[] = { TCL_KSTATS_NAMES };
82 
83 #define	KREF(p, m, vn)	p->m.vn.value.ui64
84 #define	KINCR(p, m, vn)	++KREF(p, m, vn)
85 #define	KDECR(p, m, vn)	--KREF(p, m, vn)
86 
87 #define	KLINCR(vn)	KINCR(tll, tll_kstats, vn)
88 #define	KLDECR(vn)	KDECR(tll, tll_kstats, vn)
89 
90 #define	KCINCR(vn)	KINCR(tcl, tcl_kstats, vn)
91 #define	KCDECR(vn)	KDECR(tcl, tcl_kstats, vn)
92 
93 static int	sppptun_open(queue_t *, dev_t *, int, int, cred_t *);
94 static int	sppptun_close(queue_t *);
95 static void	sppptun_urput(queue_t *, mblk_t *);
96 static void	sppptun_uwput(queue_t *, mblk_t *);
97 static int	sppptun_ursrv(queue_t *);
98 static int	sppptun_uwsrv(queue_t *);
99 static void	sppptun_lrput(queue_t *, mblk_t *);
100 static void	sppptun_lwput(queue_t *, mblk_t *);
101 
102 /*
103  * This is the hash table of clients.  Clients are the programs that
104  * open /dev/sppptun as a device.  There may be a large number of
105  * these; one per tunneled PPP session.
106  *
107  * Note: slots are offset from minor node value by 1 because
108  * vmem_alloc returns 0 for failure.
109  *
110  * The tcl_slots array entries are modified only when exclusive on
111  * both inner and outer perimeters.  This ensures that threads on
112  * shared perimeters always view this as unchanging memory with no
113  * need to lock around accesses.  (Specifically, the tcl_slots array
114  * is modified by entry to sppptun_open, sppptun_close, and _fini.)
115  */
116 static tuncl_t **tcl_slots = NULL;	/* Slots for tuncl_t */
117 static size_t tcl_nslots = 0;		/* Size of slot array */
118 static size_t tcl_minormax = 0;		/* Maximum number of tunnels */
119 static size_t tcl_inuse = 0;		/* # of tunnels currently allocated */
120 static krwlock_t tcl_rwlock;
121 static struct kmem_cache *tcl_cache = NULL;	/* tunnel cache */
122 static vmem_t *tcl_minor_arena = NULL; /* Arena for device minors */
123 
124 /*
125  * This is the simple list of lower layers.  For PPPoE, there is one
126  * of these per Ethernet interface.  Lower layers are established by
127  * "plumbing" -- using I_PLINK to connect the tunnel multiplexor to
128  * the physical interface.
129  */
130 static struct qelem tunll_list;
131 static int tunll_index;
132 
133 /* Test value; if all zeroes, then address hasn't been set yet. */
134 static const ether_addr_t zero_mac_addr = { 0, 0, 0, 0, 0, 0 };
135 
136 #define	MIN_SET_FASTPATH_UNITDATAREQ_SIZE	\
137 	(sizeof (dl_unitdata_req_t) + 4)
138 
139 #define	TUN_MI_ID	2104	/* officially allocated module ID */
140 #define	TUN_MI_MINPSZ	(0)
141 #define	TUN_MI_MAXPSZ	(PPP_MAXMTU)
142 #define	TUN_MI_HIWAT	(PPP_MTU * 8)
143 #define	TUN_MI_LOWAT	(128)
144 
145 static struct module_info sppptun_modinfo = {
146 	TUN_MI_ID,		/* mi_idnum */
147 	PPP_TUN_NAME,		/* mi_idname */
148 	TUN_MI_MINPSZ,		/* mi_minpsz */
149 	TUN_MI_MAXPSZ,		/* mi_maxpsz */
150 	TUN_MI_HIWAT,		/* mi_hiwat */
151 	TUN_MI_LOWAT		/* mi_lowat */
152 };
153 
154 static struct qinit sppptun_urinit = {
155 	(int (*)())sppptun_urput, /* qi_putp */
156 	sppptun_ursrv,		/* qi_srvp */
157 	sppptun_open,		/* qi_qopen */
158 	sppptun_close,		/* qi_qclose */
159 	NULL,			/* qi_qadmin */
160 	&sppptun_modinfo,	/* qi_minfo */
161 	NULL			/* qi_mstat */
162 };
163 
164 static struct qinit sppptun_uwinit = {
165 	(int (*)())sppptun_uwput, /* qi_putp */
166 	sppptun_uwsrv,		/* qi_srvp */
167 	NULL,			/* qi_qopen */
168 	NULL,			/* qi_qclose */
169 	NULL,			/* qi_qadmin */
170 	&sppptun_modinfo,	/* qi_minfo */
171 	NULL			/* qi_mstat */
172 };
173 
174 static struct qinit sppptun_lrinit = {
175 	(int (*)())sppptun_lrput, /* qi_putp */
176 	NULL,			/* qi_srvp */
177 	NULL,			/* qi_qopen */
178 	NULL,			/* qi_qclose */
179 	NULL,			/* qi_qadmin */
180 	&sppptun_modinfo,	/* qi_minfo */
181 	NULL			/* qi_mstat */
182 };
183 
184 static struct qinit sppptun_lwinit = {
185 	(int (*)())sppptun_lwput, /* qi_putp */
186 	NULL,			/* qi_srvp */
187 	NULL,			/* qi_qopen */
188 	NULL,			/* qi_qclose */
189 	NULL,			/* qi_qadmin */
190 	&sppptun_modinfo,	/* qi_minfo */
191 	NULL			/* qi_mstat */
192 };
193 
194 /*
195  * This is referenced in sppptun_mod.c.
196  */
197 struct streamtab sppptun_tab = {
198 	&sppptun_urinit,	/* st_rdinit */
199 	&sppptun_uwinit,	/* st_wrinit */
200 	&sppptun_lrinit,	/* st_muxrinit */
201 	&sppptun_lwinit		/* st_muxwrinit */
202 };
203 
204 /*
205  * Allocate another slot table twice as large as the original one
206  * (limited to global maximum).  Migrate all tunnels to the new slot
207  * table and free the original one.  Assumes we're exclusive on both
208  * inner and outer perimeters, and thus there are no other users of
209  * the tcl_slots array.
210  */
211 static minor_t
212 tcl_grow(void)
213 {
214 	minor_t old_size = tcl_nslots;
215 	minor_t new_size = 2 * old_size;
216 	tuncl_t **tcl_old = tcl_slots;
217 	tuncl_t **tcl_new;
218 	void  *vaddr;			/* vmem_add return value */
219 
220 	ASSERT(RW_LOCK_HELD(&tcl_rwlock));
221 
222 	/* Allocate new ptms array */
223 	tcl_new = kmem_zalloc(new_size * sizeof (tuncl_t *), KM_NOSLEEP);
224 	if (tcl_new == NULL)
225 		return ((minor_t)0);
226 
227 	/* Increase clone index space */
228 	vaddr = vmem_add(tcl_minor_arena, (void*)((uintptr_t)old_size + 1),
229 	    new_size - old_size, VM_NOSLEEP);
230 
231 	if (vaddr == NULL) {
232 		kmem_free(tcl_new, new_size * sizeof (tuncl_t *));
233 		return ((minor_t)0);
234 	}
235 
236 	/* Migrate tuncl_t entries to a new location */
237 	tcl_nslots = new_size;
238 	bcopy(tcl_old, tcl_new, old_size * sizeof (tuncl_t *));
239 	tcl_slots = tcl_new;
240 	kmem_free(tcl_old, old_size * sizeof (tuncl_t *));
241 
242 	/* Allocate minor number and return it */
243 	return ((minor_t)(uintptr_t)vmem_alloc(tcl_minor_arena, 1, VM_NOSLEEP));
244 }
245 
246 /*
247  * Allocate new minor number and tunnel client entry.  Returns the new
248  * entry or NULL if no memory or maximum number of entries reached.
249  * Assumes we're exclusive on both inner and outer perimeters, and
250  * thus there are no other users of the tcl_slots array.
251  */
252 static tuncl_t *
253 tuncl_alloc(int wantminor)
254 {
255 	minor_t dminor;
256 	tuncl_t *tcl = NULL;
257 
258 	rw_enter(&tcl_rwlock, RW_WRITER);
259 
260 	ASSERT(tcl_slots != NULL);
261 
262 	/*
263 	 * Always try to allocate new pty when sppptun_cnt minimum
264 	 * limit is not achieved. If it is achieved, the maximum is
265 	 * determined by either user-specified value (if it is
266 	 * non-zero) or our memory estimations - whatever is less.
267 	 */
268 	if (tcl_inuse >= sppptun_cnt) {
269 		/*
270 		 * When system achieved required minimum of tunnels,
271 		 * check for the denial of service limits.
272 		 *
273 		 * Get user-imposed maximum, if configured, or
274 		 * calculated memory constraint.
275 		 */
276 		size_t user_max = (sppptun_max_pty == 0 ? tcl_minormax :
277 		    min(sppptun_max_pty, tcl_minormax));
278 
279 		/* Do not try to allocate more than allowed */
280 		if (tcl_inuse >= user_max) {
281 			rw_exit(&tcl_rwlock);
282 			return (NULL);
283 		}
284 	}
285 	tcl_inuse++;
286 
287 	/*
288 	 * Allocate new minor number. If this fails, all slots are
289 	 * busy and we need to grow the hash.
290 	 */
291 	if (wantminor <= 0) {
292 		dminor = (minor_t)(uintptr_t)vmem_alloc(tcl_minor_arena, 1,
293 		    VM_NOSLEEP);
294 		if (dminor == 0) {
295 			/* Grow the cache and retry allocation */
296 			dminor = tcl_grow();
297 		}
298 	} else {
299 		dminor = (minor_t)(uintptr_t)vmem_xalloc(tcl_minor_arena, 1,
300 		    0, 0, 0, (void *)(uintptr_t)wantminor,
301 		    (void *)((uintptr_t)wantminor+1), VM_NOSLEEP);
302 		if (dminor != 0 && dminor != wantminor) {
303 			vmem_free(tcl_minor_arena, (void *)(uintptr_t)dminor,
304 			    1);
305 			dminor = 0;
306 		}
307 	}
308 
309 	if (dminor == 0) {
310 		/* Not enough memory now */
311 		tcl_inuse--;
312 		rw_exit(&tcl_rwlock);
313 		return (NULL);
314 	}
315 
316 	tcl = kmem_cache_alloc(tcl_cache, KM_NOSLEEP);
317 	if (tcl == NULL) {
318 		/* Not enough memory - this entry can't be used now. */
319 		vmem_free(tcl_minor_arena, (void *)(uintptr_t)dminor, 1);
320 		tcl_inuse--;
321 	} else {
322 		bzero(tcl, sizeof (*tcl));
323 		tcl->tcl_lsessid = dminor;
324 		ASSERT(tcl_slots[dminor - 1] == NULL);
325 		tcl_slots[dminor - 1] = tcl;
326 	}
327 
328 	rw_exit(&tcl_rwlock);
329 	return (tcl);
330 }
331 
332 /*
333  * This routine frees an upper level (client) stream by removing it
334  * from the minor number pool and freeing the state structure storage.
335  * Assumes we're exclusive on both inner and outer perimeters, and
336  * thus there are no other concurrent users of the tcl_slots array or
337  * of any entry in that array.
338  */
339 static void
340 tuncl_free(tuncl_t *tcl)
341 {
342 	rw_enter(&tcl_rwlock, RW_WRITER);
343 	ASSERT(tcl->tcl_lsessid <= tcl_nslots);
344 	ASSERT(tcl_slots[tcl->tcl_lsessid - 1] == tcl);
345 	ASSERT(tcl_inuse > 0);
346 	tcl_inuse--;
347 	tcl_slots[tcl->tcl_lsessid - 1] = NULL;
348 
349 	if (tcl->tcl_ksp != NULL) {
350 		kstat_delete(tcl->tcl_ksp);
351 		tcl->tcl_ksp = NULL;
352 	}
353 
354 	/* Return minor number to the pool of minors */
355 	vmem_free(tcl_minor_arena, (void *)(uintptr_t)tcl->tcl_lsessid, 1);
356 
357 	/* Return tuncl_t to the cache */
358 	kmem_cache_free(tcl_cache, tcl);
359 	rw_exit(&tcl_rwlock);
360 }
361 
362 /*
363  * Get tuncl_t structure by minor number.  Returns NULL when minor is
364  * out of range.  Note that lookup of tcl pointers (and use of those
365  * pointers) is safe because modification is done only when exclusive
366  * on both inner and outer perimeters.
367  */
368 static tuncl_t *
369 tcl_by_minor(minor_t dminor)
370 {
371 	tuncl_t *tcl = NULL;
372 
373 	if ((dminor >= 1) && (dminor <= tcl_nslots) && tcl_slots != NULL) {
374 		tcl = tcl_slots[dminor - 1];
375 	}
376 
377 	return (tcl);
378 }
379 
380 /*
381  * Set up kstats for upper or lower stream.
382  */
383 static kstat_t *
384 kstat_setup(kstat_named_t *knt, const char **names, int nstat,
385     const char *modname, int unitnum)
386 {
387 	kstat_t *ksp;
388 	char unitname[KSTAT_STRLEN];
389 	int i;
390 
391 	for (i = 0; i < nstat; i++) {
392 		kstat_set_string(knt[i].name, names[i]);
393 		knt[i].data_type = KSTAT_DATA_UINT64;
394 	}
395 	(void) sprintf(unitname, "%s" "%d", modname, unitnum);
396 	ksp = kstat_create(modname, unitnum, unitname, "net",
397 	    KSTAT_TYPE_NAMED, nstat, KSTAT_FLAG_VIRTUAL);
398 	if (ksp != NULL) {
399 		ksp->ks_data = (void *)knt;
400 		kstat_install(ksp);
401 	}
402 	return (ksp);
403 }
404 
405 /*
406  * sppptun_open()
407  *
408  * MT-Perimeters:
409  *    exclusive inner, exclusive outer.
410  *
411  * Description:
412  *    Common open procedure for module and driver.
413  */
414 static int
415 sppptun_open(queue_t *q, dev_t *devp, int oflag, int sflag, cred_t *credp)
416 {
417 	_NOTE(ARGUNUSED(oflag))
418 
419 	/* Allow a re-open */
420 	if (q->q_ptr != NULL)
421 		return (0);
422 
423 	/* In the off chance that we're on our way out, just return error */
424 	if (tcl_slots == NULL)
425 		return (EINVAL);
426 
427 	if (sflag & MODOPEN) {
428 		tunll_t *tll;
429 		char *cp;
430 
431 		/* ordinary users have no need to push this module */
432 		if (secpolicy_net_config(credp, B_FALSE) != 0)
433 			return (EPERM);
434 
435 		tll = kmem_zalloc(sizeof (tunll_t), KM_SLEEP);
436 
437 		tll->tll_index = tunll_index++;
438 
439 		tll->tll_wq = WR(q);
440 
441 		/* Insert at end of list */
442 		insque(&tll->tll_next, tunll_list.q_back);
443 		q->q_ptr = WR(q)->q_ptr = tll;
444 
445 		tll->tll_style = PTS_PPPOE;
446 		tll->tll_alen = sizeof (tll->tll_lcladdr.pta_pppoe);
447 
448 		tll->tll_ksp = kstat_setup((kstat_named_t *)&tll->tll_kstats,
449 		    tll_kstats_list, Dim(tll_kstats_list), "tll",
450 		    tll->tll_index);
451 
452 		/*
453 		 * Find the name of the driver somewhere beneath us.
454 		 * Note that we have no driver under us until after
455 		 * qprocson().
456 		 */
457 		qprocson(q);
458 		for (q = WR(q); q->q_next != NULL; q = q->q_next)
459 			;
460 		cp = NULL;
461 		if (q->q_qinfo != NULL && q->q_qinfo->qi_minfo != NULL)
462 			cp = q->q_qinfo->qi_minfo->mi_idname;
463 		if (cp != NULL && *cp == '\0')
464 			cp = NULL;
465 
466 		/* Set initial name; user should overwrite. */
467 		if (cp == NULL)
468 			(void) snprintf(tll->tll_name, sizeof (tll->tll_name),
469 			    PPP_TUN_NAME "%d", tll->tll_index);
470 		else
471 			(void) snprintf(tll->tll_name, sizeof (tll->tll_name),
472 			    "%s:tun%d", cp, tll->tll_index);
473 	} else {
474 		tuncl_t	*tcl;
475 
476 		ASSERT(devp != NULL);
477 		if (sflag & CLONEOPEN) {
478 			tcl = tuncl_alloc(-1);
479 		} else {
480 			minor_t mn;
481 
482 			/*
483 			 * Support of non-clone open (ie, mknod with
484 			 * defined minor number) is supported for
485 			 * testing purposes so that 'arbitrary' minor
486 			 * numbers can be used.
487 			 */
488 			mn = getminor(*devp);
489 			if (mn == 0 || (tcl = tcl_by_minor(mn)) != NULL) {
490 				return (EPERM);
491 			}
492 			tcl = tuncl_alloc(mn);
493 		}
494 		if (tcl == NULL)
495 			return (ENOSR);
496 		tcl->tcl_rq = q;		/* save read queue pointer */
497 		tcl->tcl_flags |= TCLF_ISCLIENT;	/* sanity check */
498 
499 		q->q_ptr = WR(q)->q_ptr = (caddr_t)tcl;
500 		*devp = makedevice(getmajor(*devp), tcl->tcl_lsessid);
501 
502 		tcl->tcl_ksp = kstat_setup((kstat_named_t *)&tcl->tcl_kstats,
503 		    tcl_kstats_list, Dim(tcl_kstats_list), "tcl",
504 		    tcl->tcl_lsessid);
505 
506 		qprocson(q);
507 	}
508 	return (0);
509 }
510 
511 /*
512  * Create an appropriate control message for this client event.
513  */
514 static mblk_t *
515 make_control(tuncl_t *tclabout, tunll_t *tllabout, int action, tuncl_t *tclto)
516 {
517 	struct ppptun_control *ptc;
518 	mblk_t *mp = allocb(sizeof (*ptc), BPRI_HI);
519 
520 	if (mp != NULL) {
521 		MTYPE(mp) = M_PROTO;
522 		ptc = (struct ppptun_control *)mp->b_wptr;
523 		mp->b_wptr += sizeof (*ptc);
524 		if (tclabout != NULL) {
525 			ptc->ptc_rsessid = tclabout->tcl_rsessid;
526 			ptc->ptc_address = tclabout->tcl_address;
527 		} else {
528 			bzero(ptc, sizeof (*ptc));
529 		}
530 		ptc->ptc_discrim = tclto->tcl_ctlval;
531 		ptc->ptc_action = action;
532 		(void) strncpy(ptc->ptc_name, tllabout->tll_name,
533 		    sizeof (ptc->ptc_name));
534 	}
535 	return (mp);
536 }
537 
538 /*
539  * Send an appropriate control message up this client session.
540  */
541 static void
542 send_control(tuncl_t *tclabout, tunll_t *tllabout, int action, tuncl_t *tcl)
543 {
544 	mblk_t *mp;
545 
546 	if (tcl->tcl_rq != NULL) {
547 		mp = make_control(tclabout, tllabout, action, tcl);
548 		if (mp != NULL) {
549 			KCINCR(cks_octrl_spec);
550 			putnext(tcl->tcl_rq, mp);
551 		}
552 	}
553 }
554 
555 /*
556  * If a lower stream is being unplumbed, then the upper streams
557  * connected to this lower stream must be disconnected.  This routine
558  * accomplishes this by sending M_HANGUP to data streams and M_PROTO
559  * messages to control streams.  This is called by vmem_walk, and
560  * handles a span of minor node numbers.
561  *
562  * No need to update lks_clients here; the lower stream is on its way
563  * out.
564  */
565 static void
566 tclvm_remove_tll(void *arg, void *firstv, size_t numv)
567 {
568 	tunll_t *tll = (tunll_t *)arg;
569 	int minorn = (int)(uintptr_t)firstv;
570 	int minormax = minorn + numv;
571 	tuncl_t *tcl;
572 	mblk_t *mp;
573 
574 	while (minorn < minormax) {
575 		tcl = tcl_slots[minorn - 1];
576 		ASSERT(tcl != NULL);
577 		if (tcl->tcl_data_tll == tll && tcl->tcl_rq != NULL) {
578 			tcl->tcl_data_tll = NULL;
579 			mp = allocb(0, BPRI_HI);
580 			if (mp != NULL) {
581 				MTYPE(mp) = M_HANGUP;
582 				putnext(tcl->tcl_rq, mp);
583 				if (tcl->tcl_ctrl_tll == tll)
584 					tcl->tcl_ctrl_tll = NULL;
585 			}
586 		}
587 		if (tcl->tcl_ctrl_tll == tll) {
588 			send_control(tcl, tll, PTCA_UNPLUMB, tcl);
589 			tcl->tcl_ctrl_tll = NULL;
590 		}
591 		minorn++;
592 	}
593 }
594 
595 /*
596  * sppptun_close()
597  *
598  * MT-Perimeters:
599  *    exclusive inner, exclusive outer.
600  *
601  * Description:
602  *    Common close procedure for module and driver.
603  */
604 static int
605 sppptun_close(queue_t *q)
606 {
607 	int err;
608 	void *qptr;
609 	tunll_t *tll;
610 	tuncl_t *tcl;
611 
612 	qptr = q->q_ptr;
613 
614 	err = 0;
615 	tll = qptr;
616 	if (!(tll->tll_flags & TLLF_NOTLOWER)) {
617 		/* q_next is set on modules */
618 		ASSERT(WR(q)->q_next != NULL);
619 
620 		/* unlink any clients using this lower layer. */
621 		vmem_walk(tcl_minor_arena, VMEM_ALLOC, tclvm_remove_tll, tll);
622 
623 		/* tell daemon that this has been removed. */
624 		if ((tcl = tll->tll_defcl) != NULL)
625 			send_control(NULL, tll, PTCA_UNPLUMB, tcl);
626 
627 		tll->tll_flags |= TLLF_CLOSING;
628 		while (!(tll->tll_flags & TLLF_CLOSE_DONE)) {
629 			qenable(tll->tll_wq);
630 			qwait(tll->tll_wq);
631 		}
632 		tll->tll_error = 0;
633 		while (!(tll->tll_flags & TLLF_SHUTDOWN_DONE)) {
634 			if (!qwait_sig(tll->tll_wq))
635 				break;
636 		}
637 
638 		qprocsoff(q);
639 		q->q_ptr = WR(q)->q_ptr = NULL;
640 		tll->tll_wq = NULL;
641 		remque(&tll->tll_next);
642 		err = tll->tll_error;
643 		if (tll->tll_ksp != NULL)
644 			kstat_delete(tll->tll_ksp);
645 		kmem_free(tll, sizeof (*tll));
646 	} else {
647 		tcl = qptr;
648 
649 		/* devices are end of line; no q_next. */
650 		ASSERT(WR(q)->q_next == NULL);
651 
652 		qprocsoff(q);
653 		DTRACE_PROBE1(sppptun__client__close, tuncl_t *, tcl);
654 		tcl->tcl_rq = NULL;
655 		q->q_ptr = WR(q)->q_ptr = NULL;
656 
657 		tll = TO_TLL(tunll_list.q_forw);
658 		while (tll != TO_TLL(&tunll_list)) {
659 			if (tll->tll_defcl == tcl)
660 				tll->tll_defcl = NULL;
661 			if (tll->tll_lastcl == tcl)
662 				tll->tll_lastcl = NULL;
663 			tll = TO_TLL(tll->tll_next);
664 		}
665 		/*
666 		 * If this was a normal session, then tell the daemon.
667 		 */
668 		if (!(tcl->tcl_flags & TCLF_DAEMON) &&
669 		    (tll = tcl->tcl_ctrl_tll) != NULL &&
670 		    tll->tll_defcl != NULL) {
671 			send_control(tcl, tll, PTCA_DISCONNECT,
672 			    tll->tll_defcl);
673 		}
674 
675 		/* Update statistics for references being dropped. */
676 		if ((tll = tcl->tcl_data_tll) != NULL) {
677 			KLDECR(lks_clients);
678 		}
679 		if ((tll = tcl->tcl_ctrl_tll) != NULL) {
680 			KLDECR(lks_clients);
681 		}
682 
683 		tuncl_free(tcl);
684 	}
685 
686 	return (err);
687 }
688 
689 /*
690  * Allocate and initialize a DLPI or TPI template of the specified
691  * length.
692  */
693 static mblk_t *
694 pi_alloc(size_t len, int prim)
695 {
696 	mblk_t	*mp;
697 
698 	mp = allocb(len, BPRI_MED);
699 	if (mp != NULL) {
700 		MTYPE(mp) = M_PROTO;
701 		mp->b_wptr = mp->b_rptr + len;
702 		bzero(mp->b_rptr, len);
703 		*(int *)mp->b_rptr = prim;
704 	}
705 	return (mp);
706 }
707 
708 #define	dlpi_alloc(l, p)	pi_alloc((l), (p))
709 
710 /*
711  * Prepend some room to an mblk.  Try to reuse the existing buffer, if
712  * at all possible, rather than allocating a new one.  (Fast-path
713  * output should be able to use this.)
714  *
715  * (XXX why isn't this a library function ...?)
716  */
717 static mblk_t *
718 prependb(mblk_t *mp, size_t len, size_t align)
719 {
720 	mblk_t *newmp;
721 
722 
723 	if (align == 0)
724 		align = 8;
725 	if (DB_REF(mp) > 1 || mp->b_datap->db_base+len > mp->b_rptr ||
726 	    ((uint_t)((uintptr_t)mp->b_rptr - len) % align) != 0) {
727 		if ((newmp = allocb(len, BPRI_LO)) == NULL) {
728 			freemsg(mp);
729 			return (NULL);
730 		}
731 		newmp->b_wptr = newmp->b_rptr + len;
732 		newmp->b_cont = mp;
733 		return (newmp);
734 	}
735 	mp->b_rptr -= len;
736 	return (mp);
737 }
738 
739 /*
740  * sppptun_outpkt()
741  *
742  * MT-Perimeters:
743  *	shared inner, shared outer (if called from sppptun_uwput),
744  *	exclusive inner, shared outer (if called from sppptun_uwsrv).
745  *
746  * Description:
747  *    Called from sppptun_uwput or sppptun_uwsrv when processing a
748  *    M_DATA, M_PROTO, or M_PCPROTO message.  For all cases, it tries
749  *    to prepare the data to be sent to the module below this driver
750  *    if there is a lower stream linked underneath.  If no lower
751  *    stream exists, then the data will be discarded and an ENXIO
752  *    error returned.
753  *
754  * Returns:
755  *	pointer to queue if caller should do putnext, otherwise
756  *	*mpp != NULL if message should be enqueued, otherwise
757  *	*mpp == NULL if message is gone.
758  */
759 static queue_t *
760 sppptun_outpkt(queue_t *q, mblk_t **mpp)
761 {
762 	mblk_t *mp;
763 	tuncl_t *tcl;
764 	tunll_t *tll;
765 	mblk_t *encmb;
766 	mblk_t *datamb;
767 	dl_unitdata_req_t *dur;
768 	queue_t *lowerq;
769 	poep_t *poep;
770 	int len;
771 	ether_dest_t *edestp;
772 	enum { luNone, luCopy, luSend } loopup;
773 	boolean_t isdata;
774 	struct ppptun_control *ptc;
775 
776 	mp = *mpp;
777 	tcl = q->q_ptr;
778 
779 	*mpp = NULL;
780 	if (!(tcl->tcl_flags & TCLF_ISCLIENT)) {
781 		merror(q, mp, EINVAL);
782 		return (NULL);
783 	}
784 
785 	isdata = (MTYPE(mp) == M_DATA);
786 	if (isdata) {
787 		tll = tcl->tcl_data_tll;
788 		ptc = NULL;
789 	} else {
790 		/*
791 		 * If data are unaligned or otherwise unsuitable, then
792 		 * discard.
793 		 */
794 		if (MBLKL(mp) != sizeof (*ptc) || DB_REF(mp) > 1 ||
795 		    !IS_P2ALIGNED(mp->b_rptr, sizeof (ptc))) {
796 			KCINCR(cks_octrl_drop);
797 			DTRACE_PROBE2(sppptun__bad__control, tuncl_t *, tcl,
798 			    mblk_t *, mp);
799 			merror(q, mp, EINVAL);
800 			return (NULL);
801 		}
802 		ptc = (struct ppptun_control *)mp->b_rptr;
803 
804 		/* Set stream discriminator value if not yet set. */
805 		if (tcl->tcl_ctlval == 0)
806 			tcl->tcl_ctlval = ptc->ptc_discrim;
807 
808 		/* If this is a test message, then reply to caller. */
809 		if (ptc->ptc_action == PTCA_TEST) {
810 			DTRACE_PROBE2(sppptun__test, tuncl_t *, tcl,
811 			    struct ppptun_control *, ptc);
812 			if (mp->b_cont != NULL) {
813 				freemsg(mp->b_cont);
814 				mp->b_cont = NULL;
815 			}
816 			ptc->ptc_discrim = tcl->tcl_ctlval;
817 			putnext(RD(q), mp);
818 			return (NULL);
819 		}
820 
821 		/* If this one isn't for us, then discard it */
822 		if (tcl->tcl_ctlval != ptc->ptc_discrim) {
823 			DTRACE_PROBE2(sppptun__bad__discrim, tuncl_t *, tcl,
824 			    struct ppptun_control *, ptc);
825 			freemsg(mp);
826 			return (NULL);
827 		}
828 
829 		/* Don't allow empty control packets. */
830 		if (mp->b_cont == NULL) {
831 			KCINCR(cks_octrl_drop);
832 			merror(q, mp, EINVAL);
833 			return (NULL);
834 		}
835 		tll = tcl->tcl_ctrl_tll;
836 	}
837 
838 	if (tll == NULL || (lowerq = tll->tll_wq) == NULL) {
839 		DTRACE_PROBE3(sppptun__cannot__send, tuncl_t *, tcl,
840 		    tunll_t *, tll, mblk_t *, mp);
841 		merror(q, mp, ENXIO);
842 		if (isdata) {
843 			tcl->tcl_stats.ppp_oerrors++;
844 		} else {
845 			KCINCR(cks_octrl_drop);
846 		}
847 		return (NULL);
848 	}
849 
850 	/*
851 	 * If so, then try to send it down.  The lower queue is only
852 	 * ever detached while holding an exclusive lock on the whole
853 	 * driver, so we can be confident that the lower queue is
854 	 * still there.
855 	 */
856 	if (!bcanputnext(lowerq, mp->b_band)) {
857 		DTRACE_PROBE3(sppptun__flow__control, tuncl_t *, tcl,
858 		    tunll_t *, tll, mblk_t *, mp);
859 		*mpp = mp;
860 		return (NULL);
861 	}
862 
863 	/*
864 	 * Note: DLPI and TPI expect that the first buffer contains
865 	 * the control (unitdata-req) header, destination address, and
866 	 * nothing else.  Any protocol headers must go in the next
867 	 * buffer.
868 	 */
869 	loopup = luNone;
870 	encmb = NULL;
871 	if (isdata) {
872 		if (tll->tll_alen != 0 &&
873 		    bcmp(&tcl->tcl_address, &tll->tll_lcladdr,
874 		    tll->tll_alen) == 0)
875 			loopup = luSend;
876 		switch (tll->tll_style) {
877 		case PTS_PPPOE:
878 			/* Strip address and control fields if present. */
879 			if (mp->b_rptr[0] == 0xFF) {
880 				if (MBLKL(mp) < 3) {
881 					encmb = msgpullup(mp, 3);
882 					freemsg(mp);
883 					if ((mp = encmb) == NULL)
884 						break;
885 				}
886 				mp->b_rptr += 2;
887 			}
888 			/* Broadcasting data is probably not a good idea. */
889 			if (tcl->tcl_address.pta_pppoe.ptma_mac[0] & 1)
890 				break;
891 			encmb = dlpi_alloc(sizeof (*dur) + sizeof (*edestp),
892 			    DL_UNITDATA_REQ);
893 			if (encmb == NULL)
894 				break;
895 
896 			dur = (dl_unitdata_req_t *)encmb->b_rptr;
897 			dur->dl_dest_addr_length = sizeof (*edestp);
898 			dur->dl_dest_addr_offset = sizeof (*dur);
899 			edestp = (ether_dest_t *)(dur + 1);
900 			ether_copy(tcl->tcl_address.pta_pppoe.ptma_mac,
901 			    edestp->addr);
902 			/* DLPI SAPs are in host byte order! */
903 			edestp->type = ETHERTYPE_PPPOES;
904 
905 			/* Make sure the protocol field isn't compressed. */
906 			len = (*mp->b_rptr & 1);
907 			mp = prependb(mp, sizeof (*poep) + len, POE_HDR_ALIGN);
908 			if (mp == NULL)
909 				break;
910 			poep = (poep_t *)mp->b_rptr;
911 			poep->poep_version_type = POE_VERSION;
912 			poep->poep_code = POECODE_DATA;
913 			poep->poep_session_id = htons(tcl->tcl_rsessid);
914 			poep->poep_length = htons(msgsize(mp) -
915 			    sizeof (*poep));
916 			if (len > 0)
917 				*(char *)(poep + 1) = '\0';
918 			break;
919 
920 		default:
921 			ASSERT(0);
922 		}
923 	} else {
924 		/*
925 		 * Control side encapsulation.
926 		 */
927 		if (bcmp(&ptc->ptc_address, &tll->tll_lcladdr, tll->tll_alen)
928 		    == 0)
929 			loopup = luSend;
930 		datamb = mp->b_cont;
931 		switch (tll->tll_style) {
932 		case PTS_PPPOE:
933 			/*
934 			 * Don't allow a loopback session to establish
935 			 * itself.  PPPoE is broken; it uses only one
936 			 * session ID for both data directions, so the
937 			 * loopback data path can simply never work.
938 			 */
939 			if (loopup == luSend &&
940 			    ((poep_t *)datamb->b_rptr)->poep_code ==
941 			    POECODE_PADR)
942 				break;
943 			encmb = dlpi_alloc(sizeof (*dur) + sizeof (*edestp),
944 			    DL_UNITDATA_REQ);
945 			if (encmb == NULL)
946 				break;
947 			dur = (dl_unitdata_req_t *)encmb->b_rptr;
948 			dur->dl_dest_addr_length = sizeof (*edestp);
949 			dur->dl_dest_addr_offset = sizeof (*dur);
950 
951 			edestp = (ether_dest_t *)(dur + 1);
952 			/* DLPI SAPs are in host byte order! */
953 			edestp->type = ETHERTYPE_PPPOED;
954 
955 			/*
956 			 * If destination isn't set yet, then we have to
957 			 * allow anything at all.  Otherwise, force use
958 			 * of configured peer address.
959 			 */
960 			if (bcmp(tcl->tcl_address.pta_pppoe.ptma_mac,
961 			    zero_mac_addr, sizeof (zero_mac_addr)) == 0 ||
962 			    (tcl->tcl_flags & TCLF_DAEMON)) {
963 				ether_copy(ptc->ptc_address.pta_pppoe.ptma_mac,
964 				    edestp->addr);
965 			} else {
966 				ether_copy(tcl->tcl_address.pta_pppoe.ptma_mac,
967 				    edestp->addr);
968 			}
969 			/* Reflect multicast/broadcast back up. */
970 			if (edestp->addr[0] & 1)
971 				loopup = luCopy;
972 			break;
973 
974 		case PTS_PPTP:
975 			/*
976 			 * PPTP's control side is actually done over
977 			 * separate TCP connections.
978 			 */
979 		default:
980 			ASSERT(0);
981 		}
982 		freeb(mp);
983 		mp = datamb;
984 	}
985 	if (mp == NULL || encmb == NULL) {
986 		DTRACE_PROBE1(sppptun__output__failure, tuncl_t *, tcl);
987 		freemsg(mp);
988 		freemsg(encmb);
989 		if (isdata) {
990 			tcl->tcl_stats.ppp_oerrors++;
991 		} else {
992 			KCINCR(cks_octrl_drop);
993 			KLINCR(lks_octrl_drop);
994 		}
995 		lowerq = NULL;
996 	} else {
997 		if (isdata) {
998 			tcl->tcl_stats.ppp_obytes += msgsize(mp);
999 			tcl->tcl_stats.ppp_opackets++;
1000 		} else {
1001 			KCINCR(cks_octrls);
1002 			KLINCR(lks_octrls);
1003 		}
1004 		if (encmb != mp)
1005 			encmb->b_cont = mp;
1006 		switch (loopup) {
1007 		case luNone:
1008 			*mpp = encmb;
1009 			break;
1010 		case luCopy:
1011 			mp = copymsg(encmb);
1012 			if (mp != NULL)
1013 				sppptun_urput(RD(lowerq), mp);
1014 			*mpp = encmb;
1015 			break;
1016 		case luSend:
1017 			sppptun_urput(RD(lowerq), encmb);
1018 			lowerq = NULL;
1019 			break;
1020 		}
1021 	}
1022 	return (lowerq);
1023 }
1024 
1025 /*
1026  * Enqueue a message to be sent when the lower stream is closed.  This
1027  * is done so that we're guaranteed that we always have the necessary
1028  * resources to properly detach ourselves from the system.  (If we
1029  * waited until the close was done to allocate these messages, then
1030  * the message allocation could fail, and we'd be unable to properly
1031  * detach.)
1032  */
1033 static void
1034 save_for_close(tunll_t *tll, mblk_t *mp)
1035 {
1036 	mblk_t *onc;
1037 
1038 	if ((onc = tll->tll_onclose) == NULL)
1039 		tll->tll_onclose = mp;
1040 	else {
1041 		while (onc->b_next != NULL)
1042 			onc = onc->b_next;
1043 		onc->b_next = mp;
1044 	}
1045 }
1046 
1047 /*
1048  * Given the lower stream name, locate the state structure.  Note that
1049  * lookup of tcl pointers (and use of those pointers) is safe because
1050  * modification is done only when exclusive on both inner and outer
1051  * perimeters.
1052  */
1053 static tunll_t *
1054 tll_lookup_on_name(char *dname)
1055 {
1056 	tunll_t *tll;
1057 
1058 	tll = TO_TLL(tunll_list.q_forw);
1059 	for (; tll != TO_TLL(&tunll_list); tll = TO_TLL(tll->tll_next))
1060 		if (strcmp(dname, tll->tll_name) == 0)
1061 			return (tll);
1062 	return (NULL);
1063 }
1064 
1065 /*
1066  * sppptun_inner_ioctl()
1067  *
1068  * MT-Perimeters:
1069  *    exclusive inner, shared outer.
1070  *
1071  * Description:
1072  *    Called by qwriter from sppptun_ioctl as the result of receiving
1073  *    a handled ioctl.
1074  */
1075 static void
1076 sppptun_inner_ioctl(queue_t *q, mblk_t *mp)
1077 {
1078 	struct iocblk *iop;
1079 	int rc = 0;
1080 	int len = 0;
1081 	int i;
1082 	tuncl_t *tcl;
1083 	tunll_t *tll;
1084 	union ppptun_name *ptn;
1085 	struct ppptun_info *pti;
1086 	struct ppptun_peer *ptp;
1087 	mblk_t *mptmp;
1088 	ppptun_atype *pap;
1089 	struct ppp_stats64 *psp;
1090 
1091 	iop = (struct iocblk *)mp->b_rptr;
1092 	tcl = NULL;
1093 	tll = q->q_ptr;
1094 	if (tll->tll_flags & TLLF_NOTLOWER) {
1095 		tcl = (tuncl_t *)tll;
1096 		tll = NULL;
1097 	}
1098 
1099 	DTRACE_PROBE3(sppptun__ioctl, tuncl_t *, tcl, tunll_t *, tll,
1100 	    struct iocblk *, iop);
1101 
1102 	switch (iop->ioc_cmd) {
1103 	case PPPIO_DEBUG:
1104 		/*
1105 		 * Debug requests are now ignored; use dtrace or wireshark
1106 		 * instead.
1107 		 */
1108 		break;
1109 
1110 	case PPPIO_GETSTAT:
1111 		rc = EINVAL;
1112 		break;
1113 
1114 	case PPPIO_GETSTAT64:
1115 		/* Client (device) side only */
1116 		if (tcl == NULL) {
1117 			rc = EINVAL;
1118 			break;
1119 		}
1120 		mptmp = allocb(sizeof (*psp), BPRI_HI);
1121 		if (mptmp == NULL) {
1122 			rc = ENOSR;
1123 			break;
1124 		}
1125 		freemsg(mp->b_cont);
1126 		mp->b_cont = mptmp;
1127 
1128 		psp = (struct ppp_stats64 *)mptmp->b_wptr;
1129 		bzero((caddr_t)psp, sizeof (*psp));
1130 		psp->p = tcl->tcl_stats;
1131 
1132 		len = sizeof (*psp);
1133 		break;
1134 
1135 	case PPPTUN_SNAME:
1136 		/* This is done on the *module* (lower level) side. */
1137 		if (tll == NULL || mp->b_cont == NULL ||
1138 		    iop->ioc_count != sizeof (*ptn) ||
1139 		    *mp->b_cont->b_rptr == '\0') {
1140 			rc = EINVAL;
1141 			break;
1142 		}
1143 
1144 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1145 		ptn->ptn_name[sizeof (ptn->ptn_name) - 1] = '\0';
1146 
1147 		if ((tll = tll_lookup_on_name(ptn->ptn_name)) != NULL) {
1148 			rc = EEXIST;
1149 			break;
1150 		}
1151 		tll = (tunll_t *)q->q_ptr;
1152 		(void) strcpy(tll->tll_name, ptn->ptn_name);
1153 		break;
1154 
1155 	case PPPTUN_GNAME:
1156 		/* This is done on the *module* (lower level) side. */
1157 		if (tll == NULL) {
1158 			rc = EINVAL;
1159 			break;
1160 		}
1161 		if (mp->b_cont != NULL)
1162 			freemsg(mp->b_cont);
1163 		if ((mp->b_cont = allocb(sizeof (*ptn), BPRI_HI)) == NULL) {
1164 			rc = ENOSR;
1165 			break;
1166 		}
1167 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1168 		bcopy(tll->tll_name, ptn->ptn_name, sizeof (ptn->ptn_name));
1169 		len = sizeof (*ptn);
1170 		break;
1171 
1172 	case PPPTUN_SINFO:
1173 	case PPPTUN_GINFO:
1174 		/* Either side */
1175 		if (mp->b_cont == NULL || iop->ioc_count != sizeof (*pti)) {
1176 			rc = EINVAL;
1177 			break;
1178 		}
1179 		pti = (struct ppptun_info *)mp->b_cont->b_rptr;
1180 		if (pti->pti_name[0] != '\0')
1181 			tll = tll_lookup_on_name(pti->pti_name);
1182 		if (tll == NULL) {
1183 			/* Driver (client) side must have name */
1184 			if (tcl != NULL && pti->pti_name[0] == '\0')
1185 				rc = EINVAL;
1186 			else
1187 				rc = ESRCH;
1188 			break;
1189 		}
1190 		if (iop->ioc_cmd == PPPTUN_GINFO) {
1191 			pti->pti_muxid = tll->tll_muxid;
1192 			pti->pti_style = tll->tll_style;
1193 			len = sizeof (*pti);
1194 			break;
1195 		}
1196 		tll->tll_muxid = pti->pti_muxid;
1197 		tll->tll_style = pti->pti_style;
1198 		switch (tll->tll_style) {
1199 		case PTS_PPPOE:		/* DLPI type */
1200 			tll->tll_alen = sizeof (tll->tll_lcladdr.pta_pppoe);
1201 			mptmp = dlpi_alloc(sizeof (dl_unbind_req_t),
1202 			    DL_UNBIND_REQ);
1203 			if (mptmp == NULL) {
1204 				rc = ENOSR;
1205 				break;
1206 			}
1207 			save_for_close(tll, mptmp);
1208 			mptmp = dlpi_alloc(sizeof (dl_detach_req_t),
1209 			    DL_DETACH_REQ);
1210 			if (mptmp == NULL) {
1211 				rc = ENOSR;
1212 				break;
1213 			}
1214 			save_for_close(tll, mptmp);
1215 			break;
1216 		default:
1217 			tll->tll_style = PTS_NONE;
1218 			tll->tll_alen = 0;
1219 			rc = EINVAL;
1220 			break;
1221 		}
1222 		break;
1223 
1224 	case PPPTUN_GNNAME:
1225 		/* This can be done on either side. */
1226 		if (mp->b_cont == NULL || iop->ioc_count < sizeof (uint32_t)) {
1227 			rc = EINVAL;
1228 			break;
1229 		}
1230 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1231 		i = ptn->ptn_index;
1232 		tll = TO_TLL(tunll_list.q_forw);
1233 		while (--i >= 0 && tll != TO_TLL(&tunll_list))
1234 			tll = TO_TLL(tll->tll_next);
1235 		if (tll != TO_TLL(&tunll_list)) {
1236 			bcopy(tll->tll_name, ptn->ptn_name,
1237 			    sizeof (ptn->ptn_name));
1238 		} else {
1239 			bzero(ptn, sizeof (*ptn));
1240 		}
1241 		len = sizeof (*ptn);
1242 		break;
1243 
1244 	case PPPTUN_LCLADDR:
1245 		/* This is done on the *module* (lower level) side. */
1246 		if (tll == NULL || mp->b_cont == NULL) {
1247 			rc = EINVAL;
1248 			break;
1249 		}
1250 
1251 		pap = &tll->tll_lcladdr;
1252 		len = tll->tll_alen;
1253 		if (len == 0 || len > iop->ioc_count) {
1254 			rc = EINVAL;
1255 			break;
1256 		}
1257 		bcopy(mp->b_cont->b_rptr, pap, len);
1258 		len = 0;
1259 		break;
1260 
1261 	case PPPTUN_SPEER:
1262 		/* Client (device) side only; before SDATA */
1263 		if (tcl == NULL || mp->b_cont == NULL ||
1264 		    iop->ioc_count != sizeof (*ptp)) {
1265 			rc = EINVAL;
1266 			break;
1267 		}
1268 		if (tcl->tcl_data_tll != NULL) {
1269 			rc = EINVAL;
1270 			break;
1271 		}
1272 		ptp = (struct ppptun_peer *)mp->b_cont->b_rptr;
1273 		DTRACE_PROBE2(sppptun__speer, tuncl_t *, tcl,
1274 		    struct ppptun_peer *, ptp);
1275 		/* Once set, the style cannot change. */
1276 		if (tcl->tcl_style != PTS_NONE &&
1277 		    tcl->tcl_style != ptp->ptp_style) {
1278 			rc = EINVAL;
1279 			break;
1280 		}
1281 		if (ptp->ptp_flags & PTPF_DAEMON) {
1282 			/* User requests registration for tunnel 0 */
1283 			if ((tcl->tcl_flags & TCLF_SPEER_DONE) ||
1284 			    ptp->ptp_ltunid != 0 || ptp->ptp_rtunid != 0 ||
1285 			    ptp->ptp_lsessid != 0 || ptp->ptp_rsessid != 0) {
1286 				rc = EINVAL;
1287 				break;
1288 			}
1289 			tcl->tcl_flags |= TCLF_DAEMON;
1290 		} else {
1291 			/* Normal client connection */
1292 			if (tcl->tcl_flags & TCLF_DAEMON) {
1293 				rc = EINVAL;
1294 				break;
1295 			}
1296 			if (ptp->ptp_lsessid != 0 &&
1297 			    ptp->ptp_lsessid != tcl->tcl_lsessid) {
1298 				rc = EINVAL;
1299 				break;
1300 			}
1301 			/*
1302 			 * If we're reassigning the peer data, then
1303 			 * the previous assignment must have been for
1304 			 * a client control connection.  Check that.
1305 			 */
1306 			if ((tcl->tcl_flags & TCLF_SPEER_DONE) &&
1307 			    ((tcl->tcl_ltunid != 0 &&
1308 			    tcl->tcl_ltunid != ptp->ptp_ltunid) ||
1309 			    (tcl->tcl_rtunid != 0 &&
1310 			    tcl->tcl_rtunid != ptp->ptp_rtunid) ||
1311 			    (tcl->tcl_rsessid != 0 &&
1312 			    tcl->tcl_rsessid != ptp->ptp_rsessid))) {
1313 				rc = EINVAL;
1314 				break;
1315 			}
1316 			if ((tcl->tcl_ltunid = ptp->ptp_ltunid) == 0 &&
1317 			    tcl->tcl_style == PTS_L2FTP)
1318 				tcl->tcl_ltunid = ptp->ptp_lsessid;
1319 			tcl->tcl_rtunid = ptp->ptp_rtunid;
1320 			tcl->tcl_rsessid = ptp->ptp_rsessid;
1321 		}
1322 		tcl->tcl_flags |= TCLF_SPEER_DONE;
1323 		tcl->tcl_style = ptp->ptp_style;
1324 		tcl->tcl_address = ptp->ptp_address;
1325 		goto fill_in_peer;
1326 
1327 	case PPPTUN_GPEER:
1328 		/* Client (device) side only */
1329 		if (tcl == NULL) {
1330 			rc = EINVAL;
1331 			break;
1332 		}
1333 		if (mp->b_cont != NULL)
1334 			freemsg(mp->b_cont);
1335 		mp->b_cont = allocb(sizeof (*ptp), BPRI_HI);
1336 		if (mp->b_cont == NULL) {
1337 			rc = ENOSR;
1338 			break;
1339 		}
1340 		ptp = (struct ppptun_peer *)mp->b_cont->b_rptr;
1341 	fill_in_peer:
1342 		ptp->ptp_style = tcl->tcl_style;
1343 		ptp->ptp_flags = (tcl->tcl_flags & TCLF_DAEMON) ? PTPF_DAEMON :
1344 		    0;
1345 		ptp->ptp_ltunid = tcl->tcl_ltunid;
1346 		ptp->ptp_rtunid = tcl->tcl_rtunid;
1347 		ptp->ptp_lsessid = tcl->tcl_lsessid;
1348 		ptp->ptp_rsessid = tcl->tcl_rsessid;
1349 		ptp->ptp_address = tcl->tcl_address;
1350 		len = sizeof (*ptp);
1351 		break;
1352 
1353 	case PPPTUN_SDATA:
1354 	case PPPTUN_SCTL:
1355 		/* Client (device) side only; must do SPEER first */
1356 		if (tcl == NULL || mp->b_cont == NULL ||
1357 		    iop->ioc_count != sizeof (*ptn) ||
1358 		    *mp->b_cont->b_rptr == '\0') {
1359 			rc = EINVAL;
1360 			break;
1361 		}
1362 		if (!(tcl->tcl_flags & TCLF_SPEER_DONE)) {
1363 			rc = EINVAL;
1364 			break;
1365 		}
1366 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1367 		ptn->ptn_name[sizeof (ptn->ptn_name) - 1] = '\0';
1368 		tll = tll_lookup_on_name(ptn->ptn_name);
1369 		if (tll == NULL) {
1370 			rc = ESRCH;
1371 			break;
1372 		}
1373 		if (tll->tll_style != tcl->tcl_style) {
1374 			rc = ENXIO;
1375 			break;
1376 		}
1377 		if (iop->ioc_cmd == PPPTUN_SDATA) {
1378 			if (tcl->tcl_data_tll != NULL) {
1379 				rc = EEXIST;
1380 				break;
1381 			}
1382 			/* server daemons cannot use regular data */
1383 			if (tcl->tcl_flags & TCLF_DAEMON) {
1384 				rc = EINVAL;
1385 				break;
1386 			}
1387 			tcl->tcl_data_tll = tll;
1388 		} else if (tcl->tcl_flags & TCLF_DAEMON) {
1389 			if (tll->tll_defcl != NULL && tll->tll_defcl != tcl) {
1390 				rc = EEXIST;
1391 				break;
1392 			}
1393 			tll->tll_defcl = tcl;
1394 			if (tcl->tcl_ctrl_tll != NULL) {
1395 				KDECR(tcl->tcl_ctrl_tll, tll_kstats,
1396 				    lks_clients);
1397 			}
1398 			tcl->tcl_ctrl_tll = tll;
1399 		} else {
1400 			if (tcl->tcl_ctrl_tll != NULL) {
1401 				rc = EEXIST;
1402 				break;
1403 			}
1404 			tcl->tcl_ctrl_tll = tll;
1405 		}
1406 		KLINCR(lks_clients);
1407 		break;
1408 
1409 	case PPPTUN_GDATA:
1410 	case PPPTUN_GCTL:
1411 		/* Client (device) side only */
1412 		if (tcl == NULL) {
1413 			rc = EINVAL;
1414 			break;
1415 		}
1416 		if (mp->b_cont != NULL)
1417 			freemsg(mp->b_cont);
1418 		mp->b_cont = allocb(sizeof (*ptn), BPRI_HI);
1419 		if (mp->b_cont == NULL) {
1420 			rc = ENOSR;
1421 			break;
1422 		}
1423 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1424 		if (iop->ioc_cmd == PPPTUN_GDATA)
1425 			tll = tcl->tcl_data_tll;
1426 		else
1427 			tll = tcl->tcl_ctrl_tll;
1428 		if (tll == NULL)
1429 			bzero(ptn, sizeof (*ptn));
1430 		else
1431 			bcopy(tll->tll_name, ptn->ptn_name,
1432 			    sizeof (ptn->ptn_name));
1433 		len = sizeof (*ptn);
1434 		break;
1435 
1436 	case PPPTUN_DCTL:
1437 		/* Client (device) side daemon mode only */
1438 		if (tcl == NULL || mp->b_cont == NULL ||
1439 		    iop->ioc_count != sizeof (*ptn) ||
1440 		    !(tcl->tcl_flags & TCLF_DAEMON)) {
1441 			rc = EINVAL;
1442 			break;
1443 		}
1444 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1445 		ptn->ptn_name[sizeof (ptn->ptn_name) - 1] = '\0';
1446 		tll = tll_lookup_on_name(ptn->ptn_name);
1447 		if (tll == NULL || tll->tll_defcl != tcl) {
1448 			rc = ESRCH;
1449 			break;
1450 		}
1451 		tll->tll_defcl = NULL;
1452 		break;
1453 
1454 	default:
1455 		/* Caller should already have checked command value */
1456 		ASSERT(0);
1457 	}
1458 	if (rc != 0) {
1459 		miocnak(q, mp, 0, rc);
1460 	} else {
1461 		if (len > 0)
1462 			mp->b_cont->b_wptr = mp->b_cont->b_rptr + len;
1463 		miocack(q, mp, len, 0);
1464 	}
1465 }
1466 
1467 /*
1468  * sppptun_ioctl()
1469  *
1470  * MT-Perimeters:
1471  *    shared inner, shared outer.
1472  *
1473  * Description:
1474  *    Called by sppptun_uwput as the result of receiving a M_IOCTL command.
1475  */
1476 static void
1477 sppptun_ioctl(queue_t *q, mblk_t *mp)
1478 {
1479 	struct iocblk *iop;
1480 	int rc = 0;
1481 	int len = 0;
1482 	uint32_t val = 0;
1483 	tunll_t *tll;
1484 
1485 	iop = (struct iocblk *)mp->b_rptr;
1486 
1487 	switch (iop->ioc_cmd) {
1488 	case PPPIO_DEBUG:
1489 	case PPPIO_GETSTAT:
1490 	case PPPIO_GETSTAT64:
1491 	case PPPTUN_SNAME:
1492 	case PPPTUN_GNAME:
1493 	case PPPTUN_SINFO:
1494 	case PPPTUN_GINFO:
1495 	case PPPTUN_GNNAME:
1496 	case PPPTUN_LCLADDR:
1497 	case PPPTUN_SPEER:
1498 	case PPPTUN_GPEER:
1499 	case PPPTUN_SDATA:
1500 	case PPPTUN_GDATA:
1501 	case PPPTUN_SCTL:
1502 	case PPPTUN_GCTL:
1503 	case PPPTUN_DCTL:
1504 		qwriter(q, mp, sppptun_inner_ioctl, PERIM_INNER);
1505 		return;
1506 
1507 	case PPPIO_GCLEAN:	/* always clean */
1508 		val = RCV_B7_1 | RCV_B7_0 | RCV_ODDP | RCV_EVNP;
1509 		len = sizeof (uint32_t);
1510 		break;
1511 
1512 	case PPPIO_GTYPE:	/* we look like an async driver. */
1513 		val = PPPTYP_AHDLC;
1514 		len = sizeof (uint32_t);
1515 		break;
1516 
1517 	case PPPIO_CFLAGS:	/* never compress headers */
1518 		val = 0;
1519 		len = sizeof (uint32_t);
1520 		break;
1521 
1522 		/* quietly ack PPP things we don't need to do. */
1523 	case PPPIO_XFCS:
1524 	case PPPIO_RFCS:
1525 	case PPPIO_XACCM:
1526 	case PPPIO_RACCM:
1527 	case PPPIO_LASTMOD:
1528 	case PPPIO_MUX:
1529 	case I_PLINK:
1530 	case I_PUNLINK:
1531 	case I_LINK:
1532 	case I_UNLINK:
1533 		break;
1534 
1535 	default:
1536 		tll = (tunll_t *)q->q_ptr;
1537 		if (!(tll->tll_flags & TLLF_NOTLOWER)) {
1538 			/* module side; pass this through. */
1539 			putnext(q, mp);
1540 			return;
1541 		}
1542 		rc = EINVAL;
1543 		break;
1544 	}
1545 	if (rc == 0 && len == sizeof (uint32_t)) {
1546 		if (mp->b_cont != NULL)
1547 			freemsg(mp->b_cont);
1548 		mp->b_cont = allocb(sizeof (uint32_t), BPRI_HI);
1549 		if (mp->b_cont == NULL) {
1550 			rc = ENOSR;
1551 		} else {
1552 			*(uint32_t *)mp->b_cont->b_wptr = val;
1553 			mp->b_cont->b_wptr += sizeof (uint32_t);
1554 		}
1555 	}
1556 	if (rc == 0) {
1557 		miocack(q, mp, len, 0);
1558 	} else {
1559 		miocnak(q, mp, 0, rc);
1560 	}
1561 }
1562 
1563 /*
1564  * sppptun_inner_mctl()
1565  *
1566  * MT-Perimeters:
1567  *    exclusive inner, shared outer.
1568  *
1569  * Description:
1570  *    Called by qwriter (via sppptun_uwput) as the result of receiving
1571  *    an M_CTL.  Called only on the client (driver) side.
1572  */
1573 static void
1574 sppptun_inner_mctl(queue_t *q, mblk_t *mp)
1575 {
1576 	int msglen;
1577 	tuncl_t *tcl;
1578 
1579 	tcl = q->q_ptr;
1580 
1581 	if (!(tcl->tcl_flags & TCLF_ISCLIENT)) {
1582 		freemsg(mp);
1583 		return;
1584 	}
1585 
1586 	msglen = MBLKL(mp);
1587 	switch (*mp->b_rptr) {
1588 	case PPPCTL_UNIT:
1589 		if (msglen == 2)
1590 			tcl->tcl_unit = mp->b_rptr[1];
1591 		else if (msglen == 8)
1592 			tcl->tcl_unit = ((uint32_t *)mp->b_rptr)[1];
1593 		break;
1594 	}
1595 	freemsg(mp);
1596 }
1597 
1598 /*
1599  * sppptun_uwput()
1600  *
1601  * MT-Perimeters:
1602  *    shared inner, shared outer.
1603  *
1604  * Description:
1605  *	Regular output data and controls pass through here.
1606  */
1607 static void
1608 sppptun_uwput(queue_t *q, mblk_t *mp)
1609 {
1610 	queue_t *nextq;
1611 	tuncl_t *tcl;
1612 
1613 	ASSERT(q->q_ptr != NULL);
1614 
1615 	switch (MTYPE(mp)) {
1616 	case M_DATA:
1617 	case M_PROTO:
1618 	case M_PCPROTO:
1619 		if (q->q_first == NULL &&
1620 		    (nextq = sppptun_outpkt(q, &mp)) != NULL) {
1621 			putnext(nextq, mp);
1622 		} else if (mp != NULL && !putq(q, mp)) {
1623 			freemsg(mp);
1624 		}
1625 		break;
1626 	case M_IOCTL:
1627 		sppptun_ioctl(q, mp);
1628 		break;
1629 	case M_CTL:
1630 		qwriter(q, mp, sppptun_inner_mctl, PERIM_INNER);
1631 		break;
1632 	default:
1633 		tcl = (tuncl_t *)q->q_ptr;
1634 		/*
1635 		 * If we're the driver, then discard unknown junk.
1636 		 * Otherwise, if we're the module, then forward along.
1637 		 */
1638 		if (tcl->tcl_flags & TCLF_ISCLIENT)
1639 			freemsg(mp);
1640 		else
1641 			putnext(q, mp);
1642 		break;
1643 	}
1644 }
1645 
1646 /*
1647  * Send a DLPI/TPI control message to the driver but make sure there
1648  * is only one outstanding message.  Uses tll_msg_pending to tell when
1649  * it must queue.  sppptun_urput calls message_done() when an ACK or a
1650  * NAK is received to process the next queued message.
1651  */
1652 static void
1653 message_send(tunll_t *tll, mblk_t *mp)
1654 {
1655 	mblk_t **mpp;
1656 
1657 	if (tll->tll_msg_pending) {
1658 		/* Must queue message. Tail insertion */
1659 		mpp = &tll->tll_msg_deferred;
1660 		while (*mpp != NULL)
1661 			mpp = &((*mpp)->b_next);
1662 		*mpp = mp;
1663 		return;
1664 	}
1665 	tll->tll_msg_pending = 1;
1666 	putnext(tll->tll_wq, mp);
1667 }
1668 
1669 /*
1670  * Called when an DLPI/TPI control message has been acked or nacked to
1671  * send down the next queued message (if any).
1672  */
1673 static void
1674 message_done(tunll_t *tll)
1675 {
1676 	mblk_t *mp;
1677 
1678 	ASSERT(tll->tll_msg_pending);
1679 	tll->tll_msg_pending = 0;
1680 	mp = tll->tll_msg_deferred;
1681 	if (mp != NULL) {
1682 		tll->tll_msg_deferred = mp->b_next;
1683 		mp->b_next = NULL;
1684 		tll->tll_msg_pending = 1;
1685 		putnext(tll->tll_wq, mp);
1686 	}
1687 }
1688 
1689 /*
1690  * Send down queued "close" messages to lower stream.  These were
1691  * enqueued right after the stream was originally allocated, when the
1692  * tll_style was set by PPPTUN_SINFO.
1693  */
1694 static int
1695 tll_close_req(tunll_t *tll)
1696 {
1697 	mblk_t *mb, *mbnext;
1698 
1699 	if ((mb = tll->tll_onclose) == NULL)
1700 		tll->tll_flags |= TLLF_SHUTDOWN_DONE;
1701 	else {
1702 		tll->tll_onclose = NULL;
1703 		while (mb != NULL) {
1704 			mbnext = mb->b_next;
1705 			mb->b_next = NULL;
1706 			message_send(tll, mb);
1707 			mb = mbnext;
1708 		}
1709 	}
1710 	return (0);
1711 }
1712 
1713 /*
1714  * This function is called when a backenable occurs on the write side of a
1715  * lower stream.  It walks over the client streams, looking for ones that use
1716  * the given tunll_t lower stream.  Each client is then backenabled.
1717  */
1718 static void
1719 tclvm_backenable(void *arg, void *firstv, size_t numv)
1720 {
1721 	tunll_t *tll = arg;
1722 	int minorn = (int)(uintptr_t)firstv;
1723 	int minormax = minorn + numv;
1724 	tuncl_t *tcl;
1725 	queue_t *q;
1726 
1727 	while (minorn < minormax) {
1728 		tcl = tcl_slots[minorn - 1];
1729 		if ((tcl->tcl_data_tll == tll ||
1730 		    tcl->tcl_ctrl_tll == tll) &&
1731 		    (q = tcl->tcl_rq) != NULL) {
1732 			qenable(OTHERQ(q));
1733 		}
1734 		minorn++;
1735 	}
1736 }
1737 
1738 /*
1739  * sppptun_uwsrv()
1740  *
1741  * MT-Perimeters:
1742  *    exclusive inner, shared outer.
1743  *
1744  * Description:
1745  *    Upper write-side service procedure.  In addition to the usual
1746  *    STREAMS queue service handling, this routine also handles the
1747  *    transmission of the unbind/detach messages to the lower stream
1748  *    driver when a lower stream is being closed.  (See the use of
1749  *    qenable/qwait in sppptun_close().)
1750  */
1751 static int
1752 sppptun_uwsrv(queue_t *q)
1753 {
1754 	tuncl_t	*tcl;
1755 	mblk_t *mp;
1756 	queue_t *nextq;
1757 
1758 	tcl = q->q_ptr;
1759 	if (!(tcl->tcl_flags & TCLF_ISCLIENT)) {
1760 		tunll_t *tll = (tunll_t *)tcl;
1761 
1762 		if ((tll->tll_flags & (TLLF_CLOSING|TLLF_CLOSE_DONE)) ==
1763 		    TLLF_CLOSING) {
1764 			tll->tll_error = tll_close_req(tll);
1765 			tll->tll_flags |= TLLF_CLOSE_DONE;
1766 		} else {
1767 			/*
1768 			 * We've been enabled here because of a backenable on
1769 			 * output flow control.  Backenable clients using this
1770 			 * lower layer.
1771 			 */
1772 			vmem_walk(tcl_minor_arena, VMEM_ALLOC, tclvm_backenable,
1773 			    tll);
1774 		}
1775 		return (0);
1776 	}
1777 
1778 	while ((mp = getq(q)) != NULL) {
1779 		if ((nextq = sppptun_outpkt(q, &mp)) != NULL) {
1780 			putnext(nextq, mp);
1781 		} else if (mp != NULL) {
1782 			(void) putbq(q, mp);
1783 			break;
1784 		}
1785 	}
1786 	return (0);
1787 }
1788 
1789 /*
1790  * sppptun_lwput()
1791  *
1792  * MT-Perimeters:
1793  *    shared inner, shared outer.
1794  *
1795  * Description:
1796  *    Lower write-side put procedure.  Nothing should be sending
1797  *    packets down this stream.
1798  */
1799 static void
1800 sppptun_lwput(queue_t *q, mblk_t *mp)
1801 {
1802 	switch (MTYPE(mp)) {
1803 	case M_PROTO:
1804 		putnext(q, mp);
1805 		break;
1806 	default:
1807 		freemsg(mp);
1808 		break;
1809 	}
1810 }
1811 
1812 /*
1813  * sppptun_lrput()
1814  *
1815  * MT-Perimeters:
1816  *    shared inner, shared outer.
1817  *
1818  * Description:
1819  *    Lower read-side put procedure.  Nothing should arrive here.
1820  */
1821 static void
1822 sppptun_lrput(queue_t *q, mblk_t *mp)
1823 {
1824 	tuncl_t *tcl;
1825 
1826 	switch (MTYPE(mp)) {
1827 	case M_IOCTL:
1828 		miocnak(q, mp, 0, EINVAL);
1829 		return;
1830 	case M_FLUSH:
1831 		if (*mp->b_rptr & FLUSHR) {
1832 			flushq(q, FLUSHDATA);
1833 		}
1834 		if (*mp->b_rptr & FLUSHW) {
1835 			*mp->b_rptr &= ~FLUSHR;
1836 			qreply(q, mp);
1837 		} else {
1838 			freemsg(mp);
1839 		}
1840 		return;
1841 	}
1842 	/*
1843 	 * Try to forward the message to the put procedure for the upper
1844 	 * control stream for this lower stream. If there are already messages
1845 	 * queued here, queue this one up to preserve message ordering.
1846 	 */
1847 	if ((tcl = (tuncl_t *)q->q_ptr) == NULL || tcl->tcl_rq == NULL) {
1848 		freemsg(mp);
1849 		return;
1850 	}
1851 	if (queclass(mp) == QPCTL ||
1852 	    (q->q_first == NULL && canput(tcl->tcl_rq))) {
1853 		put(tcl->tcl_rq, mp);
1854 	} else {
1855 		if (!putq(q, mp))
1856 			freemsg(mp);
1857 	}
1858 }
1859 
1860 /*
1861  * MT-Perimeters:
1862  *    shared inner, shared outer.
1863  *
1864  *    Handle non-data DLPI messages.  Used with PPPoE, which runs over
1865  *    Ethernet only.
1866  */
1867 static void
1868 urput_dlpi(queue_t *q, mblk_t *mp)
1869 {
1870 	int err;
1871 	union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr;
1872 	tunll_t *tll = q->q_ptr;
1873 	size_t mlen = MBLKL(mp);
1874 
1875 	switch (dlp->dl_primitive) {
1876 	case DL_UDERROR_IND:
1877 		break;
1878 
1879 	case DL_ERROR_ACK:
1880 		if (mlen < DL_ERROR_ACK_SIZE)
1881 			break;
1882 		err = dlp->error_ack.dl_unix_errno ?
1883 		    dlp->error_ack.dl_unix_errno : ENXIO;
1884 		switch (dlp->error_ack.dl_error_primitive) {
1885 		case DL_UNBIND_REQ:
1886 			message_done(tll);
1887 			break;
1888 		case DL_DETACH_REQ:
1889 			message_done(tll);
1890 			tll->tll_error = err;
1891 			tll->tll_flags |= TLLF_SHUTDOWN_DONE;
1892 			break;
1893 		case DL_PHYS_ADDR_REQ:
1894 			message_done(tll);
1895 			break;
1896 		case DL_INFO_REQ:
1897 		case DL_ATTACH_REQ:
1898 		case DL_BIND_REQ:
1899 			message_done(tll);
1900 			tll->tll_error = err;
1901 			break;
1902 		}
1903 		break;
1904 
1905 	case DL_INFO_ACK:
1906 		message_done(tll);
1907 		break;
1908 
1909 	case DL_BIND_ACK:
1910 		message_done(tll);
1911 		break;
1912 
1913 	case DL_PHYS_ADDR_ACK:
1914 		break;
1915 
1916 	case DL_OK_ACK:
1917 		if (mlen < DL_OK_ACK_SIZE)
1918 			break;
1919 		switch (dlp->ok_ack.dl_correct_primitive) {
1920 		case DL_UNBIND_REQ:
1921 			message_done(tll);
1922 			break;
1923 		case DL_DETACH_REQ:
1924 			tll->tll_flags |= TLLF_SHUTDOWN_DONE;
1925 			break;
1926 		case DL_ATTACH_REQ:
1927 			message_done(tll);
1928 			break;
1929 		}
1930 		break;
1931 	}
1932 	freemsg(mp);
1933 }
1934 
1935 /* Search structure used with PPPoE only; see tclvm_pppoe_search(). */
1936 struct poedat {
1937 	uint_t sessid;
1938 	tunll_t *tll;
1939 	const void *srcaddr;
1940 	int isdata;
1941 	tuncl_t *tcl;
1942 };
1943 
1944 /*
1945  * This function is called by vmem_walk from within sppptun_recv.  It
1946  * iterates over a span of allocated minor node numbers to search for
1947  * the appropriate lower stream, session ID, and peer MAC address.
1948  *
1949  * (This is necessary due to a design flaw in the PPPoE protocol
1950  * itself.  The protocol assigns session IDs from the server side
1951  * only.  Both server and client use the same number.  Thus, if there
1952  * are multiple clients on a single host, there can be session ID
1953  * conflicts between servers and there's no way to detangle them
1954  * except by looking at the remote MAC address.)
1955  *
1956  * (This could have been handled by linking together sessions that
1957  * differ only in the remote MAC address.  This isn't done because it
1958  * would involve extra per-session storage and it's very unlikely that
1959  * PPPoE would be used this way.)
1960  */
1961 static void
1962 tclvm_pppoe_search(void *arg, void *firstv, size_t numv)
1963 {
1964 	struct poedat *poedat = (struct poedat *)arg;
1965 	int minorn = (int)(uintptr_t)firstv;
1966 	int minormax = minorn + numv;
1967 	tuncl_t *tcl;
1968 
1969 	if (poedat->tcl != NULL)
1970 		return;
1971 	while (minorn < minormax) {
1972 		tcl = tcl_slots[minorn - 1];
1973 		ASSERT(tcl != NULL);
1974 		if (tcl->tcl_rsessid == poedat->sessid &&
1975 		    ((!poedat->isdata && tcl->tcl_ctrl_tll == poedat->tll) ||
1976 		    (poedat->isdata && tcl->tcl_data_tll == poedat->tll)) &&
1977 		    bcmp(tcl->tcl_address.pta_pppoe.ptma_mac,
1978 		    poedat->srcaddr,
1979 		    sizeof (tcl->tcl_address.pta_pppoe.ptma_mac)) == 0) {
1980 			poedat->tcl = tcl;
1981 			break;
1982 		}
1983 		minorn++;
1984 	}
1985 }
1986 
1987 /*
1988  * sppptun_recv()
1989  *
1990  * MT-Perimeters:
1991  *    shared inner, shared outer.
1992  *
1993  * Description:
1994  *    Receive function called by sppptun_urput, which is called when
1995  *    the lower read-side put or service procedure sends a message
1996  *    upstream to the a device user (PPP).  It attempts to find an
1997  *    appropriate queue on the module above us (depending on what the
1998  *    associated upper stream for the protocol would be), and if not
1999  *    possible, it will find an upper control stream for the protocol.
2000  *    Returns a pointer to the upper queue_t, or NULL if the message
2001  *    has been discarded.
2002  *
2003  * About demultiplexing:
2004  *
2005  *	All four protocols (L2F, PPTP, L2TP, and PPPoE) support a
2006  *	locally assigned ID for demultiplexing incoming traffic.  For
2007  *	L2F, this is called the Client ID, for PPTP the Call ID, for
2008  *	L2TP the Session ID, and for PPPoE the SESSION_ID.  This is a
2009  *	16 bit number for all four protocols, and is used to directly
2010  *	index into a list of upper streams.  With the upper stream in
2011  *	hand, we verify that this is the right stream and deliver the
2012  *	data.
2013  *
2014  *	L2TP has a Tunnel ID, which represents a bundle of PPP
2015  *	sessions between the peers.  Because we always assign unique
2016  *	session ID numbers, we merely check that the given ID matches
2017  *	the assigned ID for the upper stream.
2018  *
2019  *	L2F has a Multiplex ID, which is unique per connection.  It
2020  *	does not have L2TP's concept of multiple-connections-within-
2021  *	a-tunnel.  The same checking is done.
2022  *
2023  *	PPPoE is a horribly broken protocol.  Only one ID is assigned
2024  *	per connection.  The client must somehow demultiplex based on
2025  *	an ID number assigned by the server.  It's not necessarily
2026  *	unique.  The search is done based on {ID,peerEthernet} (using
2027  *	tcl_rsessid) for all packet types except PADI and PADS.
2028  *
2029  *	Neither PPPoE nor PPTP supports additional ID numbers.
2030  *
2031  *	Both L2F and L2TP come in over UDP.  They are distinguished by
2032  *	looking at the GRE version field -- 001 for L2F and 010 for
2033  *	L2TP.
2034  */
2035 static queue_t *
2036 sppptun_recv(queue_t *q, mblk_t **mpp, const void *srcaddr)
2037 {
2038 	mblk_t *mp;
2039 	tunll_t *tll;
2040 	tuncl_t *tcl;
2041 	int sessid;
2042 	int remlen;
2043 	int msglen;
2044 	int isdata;
2045 	int i;
2046 	const uchar_t *ucp;
2047 	const poep_t *poep;
2048 	mblk_t *mnew;
2049 	ppptun_atype *pap;
2050 
2051 	mp = *mpp;
2052 
2053 	tll = q->q_ptr;
2054 	ASSERT(!(tll->tll_flags & TLLF_NOTLOWER));
2055 
2056 	tcl = NULL;
2057 	switch (tll->tll_style) {
2058 	case PTS_PPPOE:
2059 		/* Note that poep_t alignment is uint16_t */
2060 		if ((!IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)) ||
2061 		    MBLKL(mp) < sizeof (poep_t)) &&
2062 		    !pullupmsg(mp, sizeof (poep_t)))
2063 			break;
2064 		poep = (const poep_t *)mp->b_rptr;
2065 		if (poep->poep_version_type != POE_VERSION)
2066 			break;
2067 		/*
2068 		 * First, extract a session ID number.  All protocols have
2069 		 * this.
2070 		 */
2071 		isdata = (poep->poep_code == POECODE_DATA);
2072 		sessid = ntohs(poep->poep_session_id);
2073 		remlen = sizeof (*poep);
2074 		msglen = ntohs(poep->poep_length);
2075 		i = poep->poep_code;
2076 		if (i == POECODE_PADI || i == POECODE_PADR) {
2077 			/* These go to the server daemon only. */
2078 			tcl = tll->tll_defcl;
2079 		} else if (i == POECODE_PADO || i == POECODE_PADS) {
2080 			/*
2081 			 * These go to a client only, and are demuxed
2082 			 * by the Host-Uniq field (into which we stuff
2083 			 * our local ID number when generating
2084 			 * PADI/PADR).
2085 			 */
2086 			ucp = (const uchar_t *)(poep + 1);
2087 			i = msglen;
2088 			while (i > POET_HDRLEN) {
2089 				if (POET_GET_TYPE(ucp) == POETT_END) {
2090 					i = 0;
2091 					break;
2092 				}
2093 				if (POET_GET_TYPE(ucp) == POETT_UNIQ &&
2094 				    POET_GET_LENG(ucp) >= sizeof (uint32_t))
2095 					break;
2096 				i -= POET_GET_LENG(ucp) + POET_HDRLEN;
2097 				ucp = POET_NEXT(ucp);
2098 			}
2099 			if (i >= POET_HDRLEN + 4)
2100 				sessid = GETLONG(ucp + POET_HDRLEN);
2101 			tcl = tcl_by_minor((minor_t)sessid);
2102 		} else {
2103 			/*
2104 			 * Try minor number as session ID first, since
2105 			 * it's used that way on server side.  It's
2106 			 * not used that way on the client, though, so
2107 			 * this might not work.  If this isn't the
2108 			 * right one, then try the tll cache.  If
2109 			 * neither is right, then search all open
2110 			 * clients.  Did I mention that the PPPoE
2111 			 * protocol is badly designed?
2112 			 */
2113 			tcl = tcl_by_minor((minor_t)sessid);
2114 			if (tcl == NULL ||
2115 			    (!isdata && tcl->tcl_ctrl_tll != tll) ||
2116 			    (isdata && tcl->tcl_data_tll != tll) ||
2117 			    sessid != tcl->tcl_rsessid ||
2118 			    bcmp(srcaddr, tcl->tcl_address.pta_pppoe.ptma_mac,
2119 			    sizeof (tcl->tcl_address.pta_pppoe.ptma_mac)) != 0)
2120 				tcl = tll->tll_lastcl;
2121 			if (tcl == NULL ||
2122 			    (!isdata && tcl->tcl_ctrl_tll != tll) ||
2123 			    (isdata && tcl->tcl_data_tll != tll) ||
2124 			    sessid != tcl->tcl_rsessid ||
2125 			    bcmp(srcaddr, tcl->tcl_address.pta_pppoe.ptma_mac,
2126 			    sizeof (tcl->tcl_address.pta_pppoe.ptma_mac)) != 0)
2127 				tcl = NULL;
2128 			if (tcl == NULL && sessid != 0) {
2129 				struct poedat poedat;
2130 
2131 				/*
2132 				 * Slow mode.  Too bad.  If you don't like it,
2133 				 * you can always choose a better protocol.
2134 				 */
2135 				poedat.sessid = sessid;
2136 				poedat.tll = tll;
2137 				poedat.srcaddr = srcaddr;
2138 				poedat.tcl = NULL;
2139 				poedat.isdata = isdata;
2140 				vmem_walk(tcl_minor_arena, VMEM_ALLOC,
2141 				    tclvm_pppoe_search, &poedat);
2142 				KLINCR(lks_walks);
2143 				if ((tcl = poedat.tcl) != NULL) {
2144 					tll->tll_lastcl = tcl;
2145 					KCINCR(cks_walks);
2146 				}
2147 			}
2148 		}
2149 		break;
2150 	}
2151 
2152 	if (tcl == NULL || tcl->tcl_rq == NULL) {
2153 		DTRACE_PROBE3(sppptun__recv__discard, int, sessid,
2154 		    tuncl_t *, tcl, mblk_t *, mp);
2155 		if (tcl == NULL) {
2156 			KLINCR(lks_in_nomatch);
2157 		}
2158 		if (isdata) {
2159 			KLINCR(lks_indata_drops);
2160 			if (tcl != NULL)
2161 				tcl->tcl_stats.ppp_ierrors++;
2162 		} else {
2163 			KLINCR(lks_inctrl_drops);
2164 			if (tcl != NULL) {
2165 				KCINCR(cks_inctrl_drops);
2166 			}
2167 		}
2168 		freemsg(mp);
2169 		return (NULL);
2170 	}
2171 
2172 	if (tcl->tcl_data_tll == tll && isdata) {
2173 		if (!adjmsg(mp, remlen) ||
2174 		    (i = msgsize(mp)) < msglen ||
2175 		    (i > msglen && !adjmsg(mp, msglen - i))) {
2176 			KLINCR(lks_indata_drops);
2177 			tcl->tcl_stats.ppp_ierrors++;
2178 			freemsg(mp);
2179 			return (NULL);
2180 		}
2181 		/* XXX -- address/control handling in pppd needs help. */
2182 		if (*mp->b_rptr != 0xFF) {
2183 			if ((mp = prependb(mp, 2, 1)) == NULL) {
2184 				KLINCR(lks_indata_drops);
2185 				tcl->tcl_stats.ppp_ierrors++;
2186 				return (NULL);
2187 			}
2188 			mp->b_rptr[0] = 0xFF;
2189 			mp->b_rptr[1] = 0x03;
2190 		}
2191 		MTYPE(mp) = M_DATA;
2192 		tcl->tcl_stats.ppp_ibytes += msgsize(mp);
2193 		tcl->tcl_stats.ppp_ipackets++;
2194 		KLINCR(lks_indata);
2195 	} else {
2196 		if (isdata || tcl->tcl_ctrl_tll != tll ||
2197 		    (mnew = make_control(tcl, tll, PTCA_CONTROL, tcl)) ==
2198 		    NULL) {
2199 			KLINCR(lks_inctrl_drops);
2200 			KCINCR(cks_inctrl_drops);
2201 			freemsg(mp);
2202 			return (NULL);
2203 		}
2204 		/* Fix up source address; peer might not be set yet. */
2205 		pap = &((struct ppptun_control *)mnew->b_rptr)->ptc_address;
2206 		bcopy(srcaddr, pap->pta_pppoe.ptma_mac,
2207 		    sizeof (pap->pta_pppoe.ptma_mac));
2208 		mnew->b_cont = mp;
2209 		mp = mnew;
2210 		KLINCR(lks_inctrls);
2211 		KCINCR(cks_inctrls);
2212 	}
2213 	*mpp = mp;
2214 	return (tcl->tcl_rq);
2215 }
2216 
2217 /*
2218  * sppptun_urput()
2219  *
2220  * MT-Perimeters:
2221  *    shared inner, shared outer.
2222  *
2223  * Description:
2224  *    Upper read-side put procedure.  Messages from the underlying
2225  *    lower stream driver arrive here.  See sppptun_recv for the
2226  *    demultiplexing logic.
2227  */
2228 static void
2229 sppptun_urput(queue_t *q, mblk_t *mp)
2230 {
2231 	union DL_primitives *dlprim;
2232 	mblk_t *mpnext;
2233 	tunll_t *tll;
2234 	queue_t *nextq;
2235 
2236 	tll = q->q_ptr;
2237 	ASSERT(!(tll->tll_flags & TLLF_NOTLOWER));
2238 
2239 	switch (MTYPE(mp)) {
2240 	case M_DATA:
2241 		/*
2242 		 * When we're bound over IP, data arrives here.  The
2243 		 * packet starts with the IP header itself.
2244 		 */
2245 		if ((nextq = sppptun_recv(q, &mp, NULL)) != NULL)
2246 			putnext(nextq, mp);
2247 		break;
2248 
2249 	case M_PROTO:
2250 	case M_PCPROTO:
2251 		/* Data arrives here for UDP or raw Ethernet, not IP. */
2252 		switch (tll->tll_style) {
2253 			/* PPTP control messages are over TCP only. */
2254 		case PTS_PPTP:
2255 		default:
2256 			ASSERT(0);	/* how'd that happen? */
2257 			break;
2258 
2259 		case PTS_PPPOE:		/* DLPI message */
2260 			if (MBLKL(mp) < sizeof (t_uscalar_t))
2261 				break;
2262 			dlprim = (union DL_primitives *)mp->b_rptr;
2263 			switch (dlprim->dl_primitive) {
2264 			case DL_UNITDATA_IND: {
2265 				size_t mlen = MBLKL(mp);
2266 
2267 				if (mlen < DL_UNITDATA_IND_SIZE)
2268 					break;
2269 				if (dlprim->unitdata_ind.dl_src_addr_offset <
2270 				    DL_UNITDATA_IND_SIZE ||
2271 				    dlprim->unitdata_ind.dl_src_addr_offset +
2272 				    dlprim->unitdata_ind.dl_src_addr_length >
2273 				    mlen)
2274 					break;
2275 			}
2276 				/* FALLTHROUGH */
2277 			case DL_UNITDATA_REQ:	/* For loopback support. */
2278 				if (dlprim->dl_primitive == DL_UNITDATA_REQ &&
2279 				    MBLKL(mp) < DL_UNITDATA_REQ_SIZE)
2280 					break;
2281 				if ((mpnext = mp->b_cont) == NULL)
2282 					break;
2283 				MTYPE(mpnext) = M_DATA;
2284 				nextq = sppptun_recv(q, &mpnext,
2285 				    dlprim->dl_primitive == DL_UNITDATA_IND ?
2286 				    mp->b_rptr +
2287 				    dlprim->unitdata_ind.dl_src_addr_offset :
2288 				    tll->tll_lcladdr.pta_pppoe.ptma_mac);
2289 				if (nextq != NULL)
2290 					putnext(nextq, mpnext);
2291 				freeb(mp);
2292 				return;
2293 
2294 			default:
2295 				urput_dlpi(q, mp);
2296 				return;
2297 			}
2298 			break;
2299 		}
2300 		freemsg(mp);
2301 		break;
2302 
2303 	default:
2304 		freemsg(mp);
2305 		break;
2306 	}
2307 }
2308 
2309 /*
2310  * sppptun_ursrv()
2311  *
2312  * MT-Perimeters:
2313  *    exclusive inner, shared outer.
2314  *
2315  * Description:
2316  *    Upper read-side service procedure.  This procedure services the
2317  *    client streams.  We get here because the client (PPP) asserts
2318  *    flow control down to us.
2319  */
2320 static int
2321 sppptun_ursrv(queue_t *q)
2322 {
2323 	mblk_t		*mp;
2324 
2325 	ASSERT(q->q_ptr != NULL);
2326 
2327 	while ((mp = getq(q)) != NULL) {
2328 		if (canputnext(q)) {
2329 			putnext(q, mp);
2330 		} else {
2331 			(void) putbq(q, mp);
2332 			break;
2333 		}
2334 	}
2335 	return (0);
2336 }
2337 
2338 /*
2339  * Dummy constructor/destructor functions for kmem_cache_create.
2340  * We're just using kmem as an allocator of integers, not real
2341  * storage.
2342  */
2343 
2344 /*ARGSUSED*/
2345 static int
2346 tcl_constructor(void *maddr, void *arg, int kmflags)
2347 {
2348 	return (0);
2349 }
2350 
2351 /*ARGSUSED*/
2352 static void
2353 tcl_destructor(void *maddr, void *arg)
2354 {
2355 }
2356 
2357 /*
2358  * Total size occupied by one tunnel client.  Each tunnel client
2359  * consumes one pointer for tcl_slots array, one tuncl_t structure and
2360  * two messages preallocated for close.
2361  */
2362 #define	TUNCL_SIZE (sizeof (tuncl_t) + sizeof (tuncl_t *) + \
2363 			2 * sizeof (dblk_t))
2364 
2365 /*
2366  * Clear all bits of x except the highest bit
2367  */
2368 #define	truncate(x) 	((x) <= 2 ? (x) : (1 << (highbit(x) - 1)))
2369 
2370 /*
2371  * This function initializes some well-known global variables inside
2372  * the module.
2373  *
2374  * Called by sppptun_mod.c:_init() before installing the module.
2375  */
2376 void
2377 sppptun_init(void)
2378 {
2379 	tunll_list.q_forw = tunll_list.q_back = &tunll_list;
2380 }
2381 
2382 /*
2383  * This function allocates the initial internal storage for the
2384  * sppptun driver.
2385  *
2386  * Called by sppptun_mod.c:_init() after installing module.
2387  */
2388 void
2389 sppptun_tcl_init(void)
2390 {
2391 	uint_t i, j;
2392 
2393 	rw_init(&tcl_rwlock, NULL, RW_DRIVER, NULL);
2394 	rw_enter(&tcl_rwlock, RW_WRITER);
2395 	tcl_nslots = sppptun_init_cnt;
2396 	tcl_slots = kmem_zalloc(tcl_nslots * sizeof (tuncl_t *), KM_SLEEP);
2397 
2398 	tcl_cache = kmem_cache_create("sppptun_map", sizeof (tuncl_t), 0,
2399 	    tcl_constructor, tcl_destructor, NULL, NULL, NULL, 0);
2400 
2401 	/* Allocate integer space for minor numbers */
2402 	tcl_minor_arena = vmem_create("sppptun_minor", (void *)1, tcl_nslots,
2403 	    1, NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
2404 
2405 	/*
2406 	 * Calculate available number of tunnels - how many tunnels
2407 	 * can we allocate in sppptun_pctofmem % of available
2408 	 * memory.  The value is rounded up to the nearest power of 2.
2409 	 */
2410 	i = (sppptun_pctofmem * kmem_maxavail()) / (100 * TUNCL_SIZE);
2411 	j = truncate(i);	/* i with non-high bits stripped */
2412 	if (i != j)
2413 		j *= 2;
2414 	tcl_minormax = j;
2415 	rw_exit(&tcl_rwlock);
2416 }
2417 
2418 /*
2419  * This function checks that there are no plumbed streams or other users.
2420  *
2421  * Called by sppptun_mod.c:_fini().  Assumes that we're exclusive on
2422  * both perimeters.
2423  */
2424 int
2425 sppptun_tcl_fintest(void)
2426 {
2427 	if (tunll_list.q_forw != &tunll_list || tcl_inuse > 0)
2428 		return (EBUSY);
2429 	else
2430 		return (0);
2431 }
2432 
2433 /*
2434  * If no lower streams are plumbed, then this function deallocates all
2435  * internal storage in preparation for unload.
2436  *
2437  * Called by sppptun_mod.c:_fini().  Assumes that we're exclusive on
2438  * both perimeters.
2439  */
2440 void
2441 sppptun_tcl_fini(void)
2442 {
2443 	if (tcl_minor_arena != NULL) {
2444 		vmem_destroy(tcl_minor_arena);
2445 		tcl_minor_arena = NULL;
2446 	}
2447 	if (tcl_cache != NULL) {
2448 		kmem_cache_destroy(tcl_cache);
2449 		tcl_cache = NULL;
2450 	}
2451 	kmem_free(tcl_slots, tcl_nslots * sizeof (tuncl_t *));
2452 	tcl_slots = NULL;
2453 	rw_destroy(&tcl_rwlock);
2454 	ASSERT(tcl_slots == NULL);
2455 	ASSERT(tcl_cache == NULL);
2456 	ASSERT(tcl_minor_arena == NULL);
2457 }
2458