xref: /illumos-gate/usr/src/uts/common/io/ppp/sppptun/sppptun.c (revision 8b80e8cb6855118d46f605e91b5ed4ce83417395)
1 /*
2  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 #include <sys/types.h>
7 #include <sys/debug.h>
8 #include <sys/param.h>
9 #include <sys/stat.h>
10 #include <sys/systm.h>
11 #include <sys/socket.h>
12 #include <sys/stream.h>
13 #include <sys/stropts.h>
14 #include <sys/errno.h>
15 #include <sys/time.h>
16 #include <sys/cmn_err.h>
17 #include <sys/sdt.h>
18 #include <sys/conf.h>
19 #include <sys/dlpi.h>
20 #include <sys/ddi.h>
21 #include <sys/kstat.h>
22 #include <sys/strsun.h>
23 #include <sys/bitmap.h>
24 #include <sys/sysmacros.h>
25 #include <sys/note.h>
26 #include <sys/policy.h>
27 #include <net/ppp_defs.h>
28 #include <net/pppio.h>
29 #include <net/sppptun.h>
30 #include <net/pppoe.h>
31 #include <netinet/in.h>
32 
33 #include "s_common.h"
34 #include "sppptun_mod.h"
35 #include "sppptun_impl.h"
36 
37 #define	NTUN_INITIAL 16			/* Initial number of sppptun slots */
38 #define	NTUN_PERCENT 5			/* Percent of memory to use */
39 
40 /*
41  * This is used to tag official Solaris sources.  Please do not define
42  * "INTERNAL_BUILD" when building this software outside of Sun
43  * Microsystems.
44  */
45 #ifdef INTERNAL_BUILD
46 /* MODINFO is limited to 32 characters. */
47 const char sppptun_driver_description[] = "PPP 4.0 tunnel driver";
48 const char sppptun_module_description[] = "PPP 4.0 tunnel module";
49 #else
50 const char sppptun_driver_description[] = "ANU PPP tundrv";
51 const char sppptun_module_description[] = "ANU PPP tunmod";
52 
53 /* LINTED */
54 static const char buildtime[] = "Built " __DATE__ " at " __TIME__
55 #ifdef DEBUG
56 " DEBUG"
57 #endif
58 "\n";
59 #endif
60 
61 /*
62  * Tunable values; these are similar to the values used in ptms_conf.c.
63  * Override these settings via /etc/system.
64  */
65 uint_t	sppptun_cnt = 0;		/* Minimum number of tunnels */
66 size_t	sppptun_max_pty = 0;		/* Maximum number of tunnels */
67 uint_t	sppptun_init_cnt = NTUN_INITIAL; /* Initial number of tunnel slots */
68 uint_t	sppptun_pctofmem = NTUN_PERCENT; /* Percent of memory to use */
69 
70 typedef struct ether_dest_s {
71 	ether_addr_t addr;
72 	ushort_t type;
73 } ether_dest_t;
74 
75 /* Allows unaligned access. */
76 #define	GETLONG(x)	(((x)[0]<<24)|((x)[1]<<16)|((x)[2]<<8)|(x)[3])
77 
78 static const char *tll_kstats_list[] = { TLL_KSTATS_NAMES };
79 static const char *tcl_kstats_list[] = { TCL_KSTATS_NAMES };
80 
81 #define	KREF(p, m, vn)	p->m.vn.value.ui64
82 #define	KINCR(p, m, vn)	++KREF(p, m, vn)
83 #define	KDECR(p, m, vn)	--KREF(p, m, vn)
84 
85 #define	KLINCR(vn)	KINCR(tll, tll_kstats, vn)
86 #define	KLDECR(vn)	KDECR(tll, tll_kstats, vn)
87 
88 #define	KCINCR(vn)	KINCR(tcl, tcl_kstats, vn)
89 #define	KCDECR(vn)	KDECR(tcl, tcl_kstats, vn)
90 
91 static int	sppptun_open(queue_t *, dev_t *, int, int, cred_t *);
92 static int	sppptun_close(queue_t *);
93 static void	sppptun_urput(queue_t *, mblk_t *);
94 static void	sppptun_uwput(queue_t *, mblk_t *);
95 static int	sppptun_ursrv(queue_t *);
96 static int	sppptun_uwsrv(queue_t *);
97 static void	sppptun_lrput(queue_t *, mblk_t *);
98 static void	sppptun_lwput(queue_t *, mblk_t *);
99 
100 /*
101  * This is the hash table of clients.  Clients are the programs that
102  * open /dev/sppptun as a device.  There may be a large number of
103  * these; one per tunneled PPP session.
104  *
105  * Note: slots are offset from minor node value by 1 because
106  * vmem_alloc returns 0 for failure.
107  *
108  * The tcl_slots array entries are modified only when exclusive on
109  * both inner and outer perimeters.  This ensures that threads on
110  * shared perimeters always view this as unchanging memory with no
111  * need to lock around accesses.  (Specifically, the tcl_slots array
112  * is modified by entry to sppptun_open, sppptun_close, and _fini.)
113  */
114 static tuncl_t **tcl_slots = NULL;	/* Slots for tuncl_t */
115 static size_t tcl_nslots = 0;		/* Size of slot array */
116 static size_t tcl_minormax = 0;		/* Maximum number of tunnels */
117 static size_t tcl_inuse = 0;		/* # of tunnels currently allocated */
118 static krwlock_t tcl_rwlock;
119 static struct kmem_cache *tcl_cache = NULL;	/* tunnel cache */
120 static vmem_t *tcl_minor_arena = NULL; /* Arena for device minors */
121 
122 /*
123  * This is the simple list of lower layers.  For PPPoE, there is one
124  * of these per Ethernet interface.  Lower layers are established by
125  * "plumbing" -- using I_PLINK to connect the tunnel multiplexor to
126  * the physical interface.
127  */
128 static struct qelem tunll_list;
129 static int tunll_index;
130 
131 /* Test value; if all zeroes, then address hasn't been set yet. */
132 static const ether_addr_t zero_mac_addr = { 0, 0, 0, 0, 0, 0 };
133 
134 #define	MIN_SET_FASTPATH_UNITDATAREQ_SIZE	\
135 	(sizeof (dl_unitdata_req_t) + 4)
136 
137 #define	TUN_MI_ID	2104	/* officially allocated module ID */
138 #define	TUN_MI_MINPSZ	(0)
139 #define	TUN_MI_MAXPSZ	(PPP_MAXMTU)
140 #define	TUN_MI_HIWAT	(PPP_MTU * 8)
141 #define	TUN_MI_LOWAT	(128)
142 
143 static struct module_info sppptun_modinfo = {
144 	TUN_MI_ID,		/* mi_idnum */
145 	PPP_TUN_NAME,		/* mi_idname */
146 	TUN_MI_MINPSZ,		/* mi_minpsz */
147 	TUN_MI_MAXPSZ,		/* mi_maxpsz */
148 	TUN_MI_HIWAT,		/* mi_hiwat */
149 	TUN_MI_LOWAT		/* mi_lowat */
150 };
151 
152 static struct qinit sppptun_urinit = {
153 	(int (*)())sppptun_urput, /* qi_putp */
154 	sppptun_ursrv,		/* qi_srvp */
155 	sppptun_open,		/* qi_qopen */
156 	sppptun_close,		/* qi_qclose */
157 	NULL,			/* qi_qadmin */
158 	&sppptun_modinfo,	/* qi_minfo */
159 	NULL			/* qi_mstat */
160 };
161 
162 static struct qinit sppptun_uwinit = {
163 	(int (*)())sppptun_uwput, /* qi_putp */
164 	sppptun_uwsrv,		/* qi_srvp */
165 	NULL,			/* qi_qopen */
166 	NULL,			/* qi_qclose */
167 	NULL,			/* qi_qadmin */
168 	&sppptun_modinfo,	/* qi_minfo */
169 	NULL			/* qi_mstat */
170 };
171 
172 static struct qinit sppptun_lrinit = {
173 	(int (*)())sppptun_lrput, /* qi_putp */
174 	NULL,			/* qi_srvp */
175 	NULL,			/* qi_qopen */
176 	NULL,			/* qi_qclose */
177 	NULL,			/* qi_qadmin */
178 	&sppptun_modinfo,	/* qi_minfo */
179 	NULL			/* qi_mstat */
180 };
181 
182 static struct qinit sppptun_lwinit = {
183 	(int (*)())sppptun_lwput, /* qi_putp */
184 	NULL,			/* qi_srvp */
185 	NULL,			/* qi_qopen */
186 	NULL,			/* qi_qclose */
187 	NULL,			/* qi_qadmin */
188 	&sppptun_modinfo,	/* qi_minfo */
189 	NULL			/* qi_mstat */
190 };
191 
192 /*
193  * This is referenced in sppptun_mod.c.
194  */
195 struct streamtab sppptun_tab = {
196 	&sppptun_urinit,	/* st_rdinit */
197 	&sppptun_uwinit,	/* st_wrinit */
198 	&sppptun_lrinit,	/* st_muxrinit */
199 	&sppptun_lwinit		/* st_muxwrinit */
200 };
201 
202 /*
203  * Allocate another slot table twice as large as the original one
204  * (limited to global maximum).  Migrate all tunnels to the new slot
205  * table and free the original one.  Assumes we're exclusive on both
206  * inner and outer perimeters, and thus there are no other users of
207  * the tcl_slots array.
208  */
209 static minor_t
210 tcl_grow(void)
211 {
212 	minor_t old_size = tcl_nslots;
213 	minor_t new_size = 2 * old_size;
214 	tuncl_t **tcl_old = tcl_slots;
215 	tuncl_t **tcl_new;
216 	void  *vaddr;			/* vmem_add return value */
217 
218 	ASSERT(RW_LOCK_HELD(&tcl_rwlock));
219 
220 	/* Allocate new ptms array */
221 	tcl_new = kmem_zalloc(new_size * sizeof (tuncl_t *), KM_NOSLEEP);
222 	if (tcl_new == NULL)
223 		return ((minor_t)0);
224 
225 	/* Increase clone index space */
226 	vaddr = vmem_add(tcl_minor_arena, (void*)((uintptr_t)old_size + 1),
227 	    new_size - old_size, VM_NOSLEEP);
228 
229 	if (vaddr == NULL) {
230 		kmem_free(tcl_new, new_size * sizeof (tuncl_t *));
231 		return ((minor_t)0);
232 	}
233 
234 	/* Migrate tuncl_t entries to a new location */
235 	tcl_nslots = new_size;
236 	bcopy(tcl_old, tcl_new, old_size * sizeof (tuncl_t *));
237 	tcl_slots = tcl_new;
238 	kmem_free(tcl_old, old_size * sizeof (tuncl_t *));
239 
240 	/* Allocate minor number and return it */
241 	return ((minor_t)(uintptr_t)vmem_alloc(tcl_minor_arena, 1, VM_NOSLEEP));
242 }
243 
244 /*
245  * Allocate new minor number and tunnel client entry.  Returns the new
246  * entry or NULL if no memory or maximum number of entries reached.
247  * Assumes we're exclusive on both inner and outer perimeters, and
248  * thus there are no other users of the tcl_slots array.
249  */
250 static tuncl_t *
251 tuncl_alloc(int wantminor)
252 {
253 	minor_t dminor;
254 	tuncl_t *tcl = NULL;
255 
256 	rw_enter(&tcl_rwlock, RW_WRITER);
257 
258 	ASSERT(tcl_slots != NULL);
259 
260 	/*
261 	 * Always try to allocate new pty when sppptun_cnt minimum
262 	 * limit is not achieved. If it is achieved, the maximum is
263 	 * determined by either user-specified value (if it is
264 	 * non-zero) or our memory estimations - whatever is less.
265 	 */
266 	if (tcl_inuse >= sppptun_cnt) {
267 		/*
268 		 * When system achieved required minimum of tunnels,
269 		 * check for the denial of service limits.
270 		 *
271 		 * Get user-imposed maximum, if configured, or
272 		 * calculated memory constraint.
273 		 */
274 		size_t user_max = (sppptun_max_pty == 0 ? tcl_minormax :
275 		    min(sppptun_max_pty, tcl_minormax));
276 
277 		/* Do not try to allocate more than allowed */
278 		if (tcl_inuse >= user_max) {
279 			rw_exit(&tcl_rwlock);
280 			return (NULL);
281 		}
282 	}
283 	tcl_inuse++;
284 
285 	/*
286 	 * Allocate new minor number. If this fails, all slots are
287 	 * busy and we need to grow the hash.
288 	 */
289 	if (wantminor <= 0) {
290 		dminor = (minor_t)(uintptr_t)vmem_alloc(tcl_minor_arena, 1,
291 		    VM_NOSLEEP);
292 		if (dminor == 0) {
293 			/* Grow the cache and retry allocation */
294 			dminor = tcl_grow();
295 		}
296 	} else {
297 		dminor = (minor_t)(uintptr_t)vmem_xalloc(tcl_minor_arena, 1,
298 		    0, 0, 0, (void *)(uintptr_t)wantminor,
299 		    (void *)((uintptr_t)wantminor+1), VM_NOSLEEP);
300 		if (dminor != 0 && dminor != wantminor) {
301 			vmem_free(tcl_minor_arena, (void *)(uintptr_t)dminor,
302 			    1);
303 			dminor = 0;
304 		}
305 	}
306 
307 	if (dminor == 0) {
308 		/* Not enough memory now */
309 		tcl_inuse--;
310 		rw_exit(&tcl_rwlock);
311 		return (NULL);
312 	}
313 
314 	tcl = kmem_cache_alloc(tcl_cache, KM_NOSLEEP);
315 	if (tcl == NULL) {
316 		/* Not enough memory - this entry can't be used now. */
317 		vmem_free(tcl_minor_arena, (void *)(uintptr_t)dminor, 1);
318 		tcl_inuse--;
319 	} else {
320 		bzero(tcl, sizeof (*tcl));
321 		tcl->tcl_lsessid = dminor;
322 		ASSERT(tcl_slots[dminor - 1] == NULL);
323 		tcl_slots[dminor - 1] = tcl;
324 	}
325 
326 	rw_exit(&tcl_rwlock);
327 	return (tcl);
328 }
329 
330 /*
331  * This routine frees an upper level (client) stream by removing it
332  * from the minor number pool and freeing the state structure storage.
333  * Assumes we're exclusive on both inner and outer perimeters, and
334  * thus there are no other concurrent users of the tcl_slots array or
335  * of any entry in that array.
336  */
337 static void
338 tuncl_free(tuncl_t *tcl)
339 {
340 	rw_enter(&tcl_rwlock, RW_WRITER);
341 	ASSERT(tcl->tcl_lsessid <= tcl_nslots);
342 	ASSERT(tcl_slots[tcl->tcl_lsessid - 1] == tcl);
343 	ASSERT(tcl_inuse > 0);
344 	tcl_inuse--;
345 	tcl_slots[tcl->tcl_lsessid - 1] = NULL;
346 
347 	if (tcl->tcl_ksp != NULL) {
348 		kstat_delete(tcl->tcl_ksp);
349 		tcl->tcl_ksp = NULL;
350 	}
351 
352 	/* Return minor number to the pool of minors */
353 	vmem_free(tcl_minor_arena, (void *)(uintptr_t)tcl->tcl_lsessid, 1);
354 
355 	/* Return tuncl_t to the cache */
356 	kmem_cache_free(tcl_cache, tcl);
357 	rw_exit(&tcl_rwlock);
358 }
359 
360 /*
361  * Get tuncl_t structure by minor number.  Returns NULL when minor is
362  * out of range.  Note that lookup of tcl pointers (and use of those
363  * pointers) is safe because modification is done only when exclusive
364  * on both inner and outer perimeters.
365  */
366 static tuncl_t *
367 tcl_by_minor(minor_t dminor)
368 {
369 	tuncl_t *tcl = NULL;
370 
371 	if ((dminor >= 1) && (dminor <= tcl_nslots) && tcl_slots != NULL) {
372 		tcl = tcl_slots[dminor - 1];
373 	}
374 
375 	return (tcl);
376 }
377 
378 /*
379  * Set up kstats for upper or lower stream.
380  */
381 static kstat_t *
382 kstat_setup(kstat_named_t *knt, const char **names, int nstat,
383     const char *modname, int unitnum)
384 {
385 	kstat_t *ksp;
386 	char unitname[KSTAT_STRLEN];
387 	int i;
388 
389 	for (i = 0; i < nstat; i++) {
390 		kstat_set_string(knt[i].name, names[i]);
391 		knt[i].data_type = KSTAT_DATA_UINT64;
392 	}
393 	(void) sprintf(unitname, "%s" "%d", modname, unitnum);
394 	ksp = kstat_create(modname, unitnum, unitname, "net",
395 	    KSTAT_TYPE_NAMED, nstat, KSTAT_FLAG_VIRTUAL);
396 	if (ksp != NULL) {
397 		ksp->ks_data = (void *)knt;
398 		kstat_install(ksp);
399 	}
400 	return (ksp);
401 }
402 
403 /*
404  * sppptun_open()
405  *
406  * MT-Perimeters:
407  *    exclusive inner, exclusive outer.
408  *
409  * Description:
410  *    Common open procedure for module and driver.
411  */
412 static int
413 sppptun_open(queue_t *q, dev_t *devp, int oflag, int sflag, cred_t *credp)
414 {
415 	_NOTE(ARGUNUSED(oflag))
416 
417 	/* Allow a re-open */
418 	if (q->q_ptr != NULL)
419 		return (0);
420 
421 	/* In the off chance that we're on our way out, just return error */
422 	if (tcl_slots == NULL)
423 		return (EINVAL);
424 
425 	if (sflag & MODOPEN) {
426 		tunll_t *tll;
427 		char *cp;
428 
429 		/* ordinary users have no need to push this module */
430 		if (secpolicy_net_config(credp, B_FALSE) != 0)
431 			return (EPERM);
432 
433 		tll = kmem_zalloc(sizeof (tunll_t), KM_SLEEP);
434 
435 		tll->tll_index = tunll_index++;
436 
437 		tll->tll_wq = WR(q);
438 
439 		/* Insert at end of list */
440 		insque(&tll->tll_next, tunll_list.q_back);
441 		q->q_ptr = WR(q)->q_ptr = tll;
442 
443 		tll->tll_style = PTS_PPPOE;
444 		tll->tll_alen = sizeof (tll->tll_lcladdr.pta_pppoe);
445 
446 		tll->tll_ksp = kstat_setup((kstat_named_t *)&tll->tll_kstats,
447 		    tll_kstats_list, Dim(tll_kstats_list), "tll",
448 		    tll->tll_index);
449 
450 		/*
451 		 * Find the name of the driver somewhere beneath us.
452 		 * Note that we have no driver under us until after
453 		 * qprocson().
454 		 */
455 		qprocson(q);
456 		for (q = WR(q); q->q_next != NULL; q = q->q_next)
457 			;
458 		cp = NULL;
459 		if (q->q_qinfo != NULL && q->q_qinfo->qi_minfo != NULL)
460 			cp = q->q_qinfo->qi_minfo->mi_idname;
461 		if (cp != NULL && *cp == '\0')
462 			cp = NULL;
463 
464 		/* Set initial name; user should overwrite. */
465 		if (cp == NULL)
466 			(void) snprintf(tll->tll_name, sizeof (tll->tll_name),
467 			    PPP_TUN_NAME "%d", tll->tll_index);
468 		else
469 			(void) snprintf(tll->tll_name, sizeof (tll->tll_name),
470 			    "%s:tun%d", cp, tll->tll_index);
471 	} else {
472 		tuncl_t	*tcl;
473 
474 		ASSERT(devp != NULL);
475 		if (sflag & CLONEOPEN) {
476 			tcl = tuncl_alloc(-1);
477 		} else {
478 			minor_t mn;
479 
480 			/*
481 			 * Support of non-clone open (ie, mknod with
482 			 * defined minor number) is supported for
483 			 * testing purposes so that 'arbitrary' minor
484 			 * numbers can be used.
485 			 */
486 			mn = getminor(*devp);
487 			if (mn == 0 || (tcl = tcl_by_minor(mn)) != NULL) {
488 				return (EPERM);
489 			}
490 			tcl = tuncl_alloc(mn);
491 		}
492 		if (tcl == NULL)
493 			return (ENOSR);
494 		tcl->tcl_rq = q;		/* save read queue pointer */
495 		tcl->tcl_flags |= TCLF_ISCLIENT;	/* sanity check */
496 
497 		q->q_ptr = WR(q)->q_ptr = (caddr_t)tcl;
498 		*devp = makedevice(getmajor(*devp), tcl->tcl_lsessid);
499 
500 		tcl->tcl_ksp = kstat_setup((kstat_named_t *)&tcl->tcl_kstats,
501 		    tcl_kstats_list, Dim(tcl_kstats_list), "tcl",
502 		    tcl->tcl_lsessid);
503 
504 		qprocson(q);
505 	}
506 	return (0);
507 }
508 
509 /*
510  * Create an appropriate control message for this client event.
511  */
512 static mblk_t *
513 make_control(tuncl_t *tclabout, tunll_t *tllabout, int action, tuncl_t *tclto)
514 {
515 	struct ppptun_control *ptc;
516 	mblk_t *mp = allocb(sizeof (*ptc), BPRI_HI);
517 
518 	if (mp != NULL) {
519 		MTYPE(mp) = M_PROTO;
520 		ptc = (struct ppptun_control *)mp->b_wptr;
521 		mp->b_wptr += sizeof (*ptc);
522 		if (tclabout != NULL) {
523 			ptc->ptc_rsessid = tclabout->tcl_rsessid;
524 			ptc->ptc_address = tclabout->tcl_address;
525 		} else {
526 			bzero(ptc, sizeof (*ptc));
527 		}
528 		ptc->ptc_discrim = tclto->tcl_ctlval;
529 		ptc->ptc_action = action;
530 		(void) strncpy(ptc->ptc_name, tllabout->tll_name,
531 		    sizeof (ptc->ptc_name));
532 	}
533 	return (mp);
534 }
535 
536 /*
537  * Send an appropriate control message up this client session.
538  */
539 static void
540 send_control(tuncl_t *tclabout, tunll_t *tllabout, int action, tuncl_t *tcl)
541 {
542 	mblk_t *mp;
543 
544 	if (tcl->tcl_rq != NULL) {
545 		mp = make_control(tclabout, tllabout, action, tcl);
546 		if (mp != NULL) {
547 			KCINCR(cks_octrl_spec);
548 			putnext(tcl->tcl_rq, mp);
549 		}
550 	}
551 }
552 
553 /*
554  * If a lower stream is being unplumbed, then the upper streams
555  * connected to this lower stream must be disconnected.  This routine
556  * accomplishes this by sending M_HANGUP to data streams and M_PROTO
557  * messages to control streams.  This is called by vmem_walk, and
558  * handles a span of minor node numbers.
559  *
560  * No need to update lks_clients here; the lower stream is on its way
561  * out.
562  */
563 static void
564 tclvm_remove_tll(void *arg, void *firstv, size_t numv)
565 {
566 	tunll_t *tll = (tunll_t *)arg;
567 	int minorn = (int)(uintptr_t)firstv;
568 	int minormax = minorn + numv;
569 	tuncl_t *tcl;
570 	mblk_t *mp;
571 
572 	while (minorn < minormax) {
573 		tcl = tcl_slots[minorn - 1];
574 		ASSERT(tcl != NULL);
575 		if (tcl->tcl_data_tll == tll && tcl->tcl_rq != NULL) {
576 			tcl->tcl_data_tll = NULL;
577 			mp = allocb(0, BPRI_HI);
578 			if (mp != NULL) {
579 				MTYPE(mp) = M_HANGUP;
580 				putnext(tcl->tcl_rq, mp);
581 				if (tcl->tcl_ctrl_tll == tll)
582 					tcl->tcl_ctrl_tll = NULL;
583 			}
584 		}
585 		if (tcl->tcl_ctrl_tll == tll) {
586 			send_control(tcl, tll, PTCA_UNPLUMB, tcl);
587 			tcl->tcl_ctrl_tll = NULL;
588 		}
589 		minorn++;
590 	}
591 }
592 
593 /*
594  * sppptun_close()
595  *
596  * MT-Perimeters:
597  *    exclusive inner, exclusive outer.
598  *
599  * Description:
600  *    Common close procedure for module and driver.
601  */
602 static int
603 sppptun_close(queue_t *q)
604 {
605 	int err;
606 	void *qptr;
607 	tunll_t *tll;
608 	tuncl_t *tcl;
609 
610 	qptr = q->q_ptr;
611 
612 	err = 0;
613 	tll = qptr;
614 	if (!(tll->tll_flags & TLLF_NOTLOWER)) {
615 		/* q_next is set on modules */
616 		ASSERT(WR(q)->q_next != NULL);
617 
618 		/* unlink any clients using this lower layer. */
619 		vmem_walk(tcl_minor_arena, VMEM_ALLOC, tclvm_remove_tll, tll);
620 
621 		/* tell daemon that this has been removed. */
622 		if ((tcl = tll->tll_defcl) != NULL)
623 			send_control(NULL, tll, PTCA_UNPLUMB, tcl);
624 
625 		tll->tll_flags |= TLLF_CLOSING;
626 		while (!(tll->tll_flags & TLLF_CLOSE_DONE)) {
627 			qenable(tll->tll_wq);
628 			qwait(tll->tll_wq);
629 		}
630 		tll->tll_error = 0;
631 		while (!(tll->tll_flags & TLLF_SHUTDOWN_DONE)) {
632 			if (!qwait_sig(tll->tll_wq))
633 				break;
634 		}
635 
636 		qprocsoff(q);
637 		q->q_ptr = WR(q)->q_ptr = NULL;
638 		tll->tll_wq = NULL;
639 		remque(&tll->tll_next);
640 		err = tll->tll_error;
641 		if (tll->tll_ksp != NULL)
642 			kstat_delete(tll->tll_ksp);
643 		kmem_free(tll, sizeof (*tll));
644 	} else {
645 		tcl = qptr;
646 
647 		/* devices are end of line; no q_next. */
648 		ASSERT(WR(q)->q_next == NULL);
649 
650 		qprocsoff(q);
651 		DTRACE_PROBE1(sppptun__client__close, tuncl_t *, tcl);
652 		tcl->tcl_rq = NULL;
653 		q->q_ptr = WR(q)->q_ptr = NULL;
654 
655 		tll = TO_TLL(tunll_list.q_forw);
656 		while (tll != TO_TLL(&tunll_list)) {
657 			if (tll->tll_defcl == tcl)
658 				tll->tll_defcl = NULL;
659 			if (tll->tll_lastcl == tcl)
660 				tll->tll_lastcl = NULL;
661 			tll = TO_TLL(tll->tll_next);
662 		}
663 		/*
664 		 * If this was a normal session, then tell the daemon.
665 		 */
666 		if (!(tcl->tcl_flags & TCLF_DAEMON) &&
667 		    (tll = tcl->tcl_ctrl_tll) != NULL &&
668 		    tll->tll_defcl != NULL) {
669 			send_control(tcl, tll, PTCA_DISCONNECT,
670 			    tll->tll_defcl);
671 		}
672 
673 		/* Update statistics for references being dropped. */
674 		if ((tll = tcl->tcl_data_tll) != NULL) {
675 			KLDECR(lks_clients);
676 		}
677 		if ((tll = tcl->tcl_ctrl_tll) != NULL) {
678 			KLDECR(lks_clients);
679 		}
680 
681 		tuncl_free(tcl);
682 	}
683 
684 	return (err);
685 }
686 
687 /*
688  * Allocate and initialize a DLPI or TPI template of the specified
689  * length.
690  */
691 static mblk_t *
692 pi_alloc(size_t len, int prim)
693 {
694 	mblk_t	*mp;
695 
696 	mp = allocb(len, BPRI_MED);
697 	if (mp != NULL) {
698 		MTYPE(mp) = M_PROTO;
699 		mp->b_wptr = mp->b_rptr + len;
700 		bzero(mp->b_rptr, len);
701 		*(int *)mp->b_rptr = prim;
702 	}
703 	return (mp);
704 }
705 
706 #define	dlpi_alloc(l, p)	pi_alloc((l), (p))
707 
708 /*
709  * Prepend some room to an mblk.  Try to reuse the existing buffer, if
710  * at all possible, rather than allocating a new one.  (Fast-path
711  * output should be able to use this.)
712  *
713  * (XXX why isn't this a library function ...?)
714  */
715 static mblk_t *
716 prependb(mblk_t *mp, size_t len, size_t align)
717 {
718 	mblk_t *newmp;
719 
720 
721 	if (align == 0)
722 		align = 8;
723 	if (DB_REF(mp) > 1 || mp->b_datap->db_base+len > mp->b_rptr ||
724 	    ((uint_t)((uintptr_t)mp->b_rptr - len) % align) != 0) {
725 		if ((newmp = allocb(len, BPRI_LO)) == NULL) {
726 			freemsg(mp);
727 			return (NULL);
728 		}
729 		newmp->b_wptr = newmp->b_rptr + len;
730 		newmp->b_cont = mp;
731 		return (newmp);
732 	}
733 	mp->b_rptr -= len;
734 	return (mp);
735 }
736 
737 /*
738  * sppptun_outpkt()
739  *
740  * MT-Perimeters:
741  *	shared inner, shared outer (if called from sppptun_uwput),
742  *	exclusive inner, shared outer (if called from sppptun_uwsrv).
743  *
744  * Description:
745  *    Called from sppptun_uwput or sppptun_uwsrv when processing a
746  *    M_DATA, M_PROTO, or M_PCPROTO message.  For all cases, it tries
747  *    to prepare the data to be sent to the module below this driver
748  *    if there is a lower stream linked underneath.  If no lower
749  *    stream exists, then the data will be discarded and an ENXIO
750  *    error returned.
751  *
752  * Returns:
753  *	pointer to queue if caller should do putnext, otherwise
754  *	*mpp != NULL if message should be enqueued, otherwise
755  *	*mpp == NULL if message is gone.
756  */
757 static queue_t *
758 sppptun_outpkt(queue_t *q, mblk_t **mpp)
759 {
760 	mblk_t *mp;
761 	tuncl_t *tcl;
762 	tunll_t *tll;
763 	mblk_t *encmb;
764 	mblk_t *datamb;
765 	dl_unitdata_req_t *dur;
766 	queue_t *lowerq;
767 	poep_t *poep;
768 	int len;
769 	ether_dest_t *edestp;
770 	enum { luNone, luCopy, luSend } loopup;
771 	boolean_t isdata;
772 	struct ppptun_control *ptc;
773 
774 	mp = *mpp;
775 	tcl = q->q_ptr;
776 
777 	*mpp = NULL;
778 	if (!(tcl->tcl_flags & TCLF_ISCLIENT)) {
779 		merror(q, mp, EINVAL);
780 		return (NULL);
781 	}
782 
783 	isdata = (MTYPE(mp) == M_DATA);
784 	if (isdata) {
785 		tll = tcl->tcl_data_tll;
786 		ptc = NULL;
787 	} else {
788 		/*
789 		 * If data are unaligned or otherwise unsuitable, then
790 		 * discard.
791 		 */
792 		if (MBLKL(mp) != sizeof (*ptc) || DB_REF(mp) > 1 ||
793 		    !IS_P2ALIGNED(mp->b_rptr, sizeof (ptc))) {
794 			KCINCR(cks_octrl_drop);
795 			DTRACE_PROBE2(sppptun__bad__control, tuncl_t *, tcl,
796 			    mblk_t *, mp);
797 			merror(q, mp, EINVAL);
798 			return (NULL);
799 		}
800 		ptc = (struct ppptun_control *)mp->b_rptr;
801 
802 		/* Set stream discriminator value if not yet set. */
803 		if (tcl->tcl_ctlval == 0)
804 			tcl->tcl_ctlval = ptc->ptc_discrim;
805 
806 		/* If this is a test message, then reply to caller. */
807 		if (ptc->ptc_action == PTCA_TEST) {
808 			DTRACE_PROBE2(sppptun__test, tuncl_t *, tcl,
809 			    struct ppptun_control *, ptc);
810 			if (mp->b_cont != NULL) {
811 				freemsg(mp->b_cont);
812 				mp->b_cont = NULL;
813 			}
814 			ptc->ptc_discrim = tcl->tcl_ctlval;
815 			putnext(RD(q), mp);
816 			return (NULL);
817 		}
818 
819 		/* If this one isn't for us, then discard it */
820 		if (tcl->tcl_ctlval != ptc->ptc_discrim) {
821 			DTRACE_PROBE2(sppptun__bad__discrim, tuncl_t *, tcl,
822 			    struct ppptun_control *, ptc);
823 			freemsg(mp);
824 			return (NULL);
825 		}
826 
827 		/* Don't allow empty control packets. */
828 		if (mp->b_cont == NULL) {
829 			KCINCR(cks_octrl_drop);
830 			merror(q, mp, EINVAL);
831 			return (NULL);
832 		}
833 		tll = tcl->tcl_ctrl_tll;
834 	}
835 
836 	if (tll == NULL || (lowerq = tll->tll_wq) == NULL) {
837 		DTRACE_PROBE3(sppptun__cannot__send, tuncl_t *, tcl,
838 		    tunll_t *, tll, mblk_t *, mp);
839 		merror(q, mp, ENXIO);
840 		if (isdata) {
841 			tcl->tcl_stats.ppp_oerrors++;
842 		} else {
843 			KCINCR(cks_octrl_drop);
844 		}
845 		return (NULL);
846 	}
847 
848 	/*
849 	 * If so, then try to send it down.  The lower queue is only
850 	 * ever detached while holding an exclusive lock on the whole
851 	 * driver, so we can be confident that the lower queue is
852 	 * still there.
853 	 */
854 	if (!bcanputnext(lowerq, mp->b_band)) {
855 		DTRACE_PROBE3(sppptun__flow__control, tuncl_t *, tcl,
856 		    tunll_t *, tll, mblk_t *, mp);
857 		*mpp = mp;
858 		return (NULL);
859 	}
860 
861 	/*
862 	 * Note: DLPI and TPI expect that the first buffer contains
863 	 * the control (unitdata-req) header, destination address, and
864 	 * nothing else.  Any protocol headers must go in the next
865 	 * buffer.
866 	 */
867 	loopup = luNone;
868 	encmb = NULL;
869 	if (isdata) {
870 		if (tll->tll_alen != 0 &&
871 		    bcmp(&tcl->tcl_address, &tll->tll_lcladdr,
872 		    tll->tll_alen) == 0)
873 			loopup = luSend;
874 		switch (tll->tll_style) {
875 		case PTS_PPPOE:
876 			/* Strip address and control fields if present. */
877 			if (mp->b_rptr[0] == 0xFF) {
878 				if (MBLKL(mp) < 3) {
879 					encmb = msgpullup(mp, 3);
880 					freemsg(mp);
881 					if ((mp = encmb) == NULL)
882 						break;
883 				}
884 				mp->b_rptr += 2;
885 			}
886 			/* Broadcasting data is probably not a good idea. */
887 			if (tcl->tcl_address.pta_pppoe.ptma_mac[0] & 1)
888 				break;
889 			encmb = dlpi_alloc(sizeof (*dur) + sizeof (*edestp),
890 			    DL_UNITDATA_REQ);
891 			if (encmb == NULL)
892 				break;
893 
894 			dur = (dl_unitdata_req_t *)encmb->b_rptr;
895 			dur->dl_dest_addr_length = sizeof (*edestp);
896 			dur->dl_dest_addr_offset = sizeof (*dur);
897 			edestp = (ether_dest_t *)(dur + 1);
898 			ether_copy(tcl->tcl_address.pta_pppoe.ptma_mac,
899 			    edestp->addr);
900 			/* DLPI SAPs are in host byte order! */
901 			edestp->type = ETHERTYPE_PPPOES;
902 
903 			/* Make sure the protocol field isn't compressed. */
904 			len = (*mp->b_rptr & 1);
905 			mp = prependb(mp, sizeof (*poep) + len, POE_HDR_ALIGN);
906 			if (mp == NULL)
907 				break;
908 			poep = (poep_t *)mp->b_rptr;
909 			poep->poep_version_type = POE_VERSION;
910 			poep->poep_code = POECODE_DATA;
911 			poep->poep_session_id = htons(tcl->tcl_rsessid);
912 			poep->poep_length = htons(msgsize(mp) -
913 			    sizeof (*poep));
914 			if (len > 0)
915 				*(char *)(poep + 1) = '\0';
916 			break;
917 
918 		default:
919 			ASSERT(0);
920 		}
921 	} else {
922 		/*
923 		 * Control side encapsulation.
924 		 */
925 		if (bcmp(&ptc->ptc_address, &tll->tll_lcladdr, tll->tll_alen)
926 		    == 0)
927 			loopup = luSend;
928 		datamb = mp->b_cont;
929 		switch (tll->tll_style) {
930 		case PTS_PPPOE:
931 			/*
932 			 * Don't allow a loopback session to establish
933 			 * itself.  PPPoE is broken; it uses only one
934 			 * session ID for both data directions, so the
935 			 * loopback data path can simply never work.
936 			 */
937 			if (loopup == luSend &&
938 			    ((poep_t *)datamb->b_rptr)->poep_code ==
939 			    POECODE_PADR)
940 				break;
941 			encmb = dlpi_alloc(sizeof (*dur) + sizeof (*edestp),
942 			    DL_UNITDATA_REQ);
943 			if (encmb == NULL)
944 				break;
945 			dur = (dl_unitdata_req_t *)encmb->b_rptr;
946 			dur->dl_dest_addr_length = sizeof (*edestp);
947 			dur->dl_dest_addr_offset = sizeof (*dur);
948 
949 			edestp = (ether_dest_t *)(dur + 1);
950 			/* DLPI SAPs are in host byte order! */
951 			edestp->type = ETHERTYPE_PPPOED;
952 
953 			/*
954 			 * If destination isn't set yet, then we have to
955 			 * allow anything at all.  Otherwise, force use
956 			 * of configured peer address.
957 			 */
958 			if (bcmp(tcl->tcl_address.pta_pppoe.ptma_mac,
959 			    zero_mac_addr, sizeof (zero_mac_addr)) == 0 ||
960 			    (tcl->tcl_flags & TCLF_DAEMON)) {
961 				ether_copy(ptc->ptc_address.pta_pppoe.ptma_mac,
962 				    edestp->addr);
963 			} else {
964 				ether_copy(tcl->tcl_address.pta_pppoe.ptma_mac,
965 				    edestp->addr);
966 			}
967 			/* Reflect multicast/broadcast back up. */
968 			if (edestp->addr[0] & 1)
969 				loopup = luCopy;
970 			break;
971 
972 		case PTS_PPTP:
973 			/*
974 			 * PPTP's control side is actually done over
975 			 * separate TCP connections.
976 			 */
977 		default:
978 			ASSERT(0);
979 		}
980 		freeb(mp);
981 		mp = datamb;
982 	}
983 	if (mp == NULL || encmb == NULL) {
984 		DTRACE_PROBE1(sppptun__output__failure, tuncl_t *, tcl);
985 		freemsg(mp);
986 		freemsg(encmb);
987 		if (isdata) {
988 			tcl->tcl_stats.ppp_oerrors++;
989 		} else {
990 			KCINCR(cks_octrl_drop);
991 			KLINCR(lks_octrl_drop);
992 		}
993 		lowerq = NULL;
994 	} else {
995 		if (isdata) {
996 			tcl->tcl_stats.ppp_obytes += msgsize(mp);
997 			tcl->tcl_stats.ppp_opackets++;
998 		} else {
999 			KCINCR(cks_octrls);
1000 			KLINCR(lks_octrls);
1001 		}
1002 		if (encmb != mp)
1003 			encmb->b_cont = mp;
1004 		switch (loopup) {
1005 		case luNone:
1006 			*mpp = encmb;
1007 			break;
1008 		case luCopy:
1009 			mp = copymsg(encmb);
1010 			if (mp != NULL)
1011 				sppptun_urput(RD(lowerq), mp);
1012 			*mpp = encmb;
1013 			break;
1014 		case luSend:
1015 			sppptun_urput(RD(lowerq), encmb);
1016 			lowerq = NULL;
1017 			break;
1018 		}
1019 	}
1020 	return (lowerq);
1021 }
1022 
1023 /*
1024  * Enqueue a message to be sent when the lower stream is closed.  This
1025  * is done so that we're guaranteed that we always have the necessary
1026  * resources to properly detach ourselves from the system.  (If we
1027  * waited until the close was done to allocate these messages, then
1028  * the message allocation could fail, and we'd be unable to properly
1029  * detach.)
1030  */
1031 static void
1032 save_for_close(tunll_t *tll, mblk_t *mp)
1033 {
1034 	mblk_t *onc;
1035 
1036 	if ((onc = tll->tll_onclose) == NULL)
1037 		tll->tll_onclose = mp;
1038 	else {
1039 		while (onc->b_next != NULL)
1040 			onc = onc->b_next;
1041 		onc->b_next = mp;
1042 	}
1043 }
1044 
1045 /*
1046  * Given the lower stream name, locate the state structure.  Note that
1047  * lookup of tcl pointers (and use of those pointers) is safe because
1048  * modification is done only when exclusive on both inner and outer
1049  * perimeters.
1050  */
1051 static tunll_t *
1052 tll_lookup_on_name(char *dname)
1053 {
1054 	tunll_t *tll;
1055 
1056 	tll = TO_TLL(tunll_list.q_forw);
1057 	for (; tll != TO_TLL(&tunll_list); tll = TO_TLL(tll->tll_next))
1058 		if (strcmp(dname, tll->tll_name) == 0)
1059 			return (tll);
1060 	return (NULL);
1061 }
1062 
1063 /*
1064  * sppptun_inner_ioctl()
1065  *
1066  * MT-Perimeters:
1067  *    exclusive inner, shared outer.
1068  *
1069  * Description:
1070  *    Called by qwriter from sppptun_ioctl as the result of receiving
1071  *    a handled ioctl.
1072  */
1073 static void
1074 sppptun_inner_ioctl(queue_t *q, mblk_t *mp)
1075 {
1076 	struct iocblk *iop;
1077 	int rc = 0;
1078 	int len = 0;
1079 	int i;
1080 	tuncl_t *tcl;
1081 	tunll_t *tll;
1082 	union ppptun_name *ptn;
1083 	struct ppptun_info *pti;
1084 	struct ppptun_peer *ptp;
1085 	mblk_t *mptmp;
1086 	ppptun_atype *pap;
1087 	struct ppp_stats64 *psp;
1088 
1089 	iop = (struct iocblk *)mp->b_rptr;
1090 	tcl = NULL;
1091 	tll = q->q_ptr;
1092 	if (tll->tll_flags & TLLF_NOTLOWER) {
1093 		tcl = (tuncl_t *)tll;
1094 		tll = NULL;
1095 	}
1096 
1097 	DTRACE_PROBE3(sppptun__ioctl, tuncl_t *, tcl, tunll_t *, tll,
1098 	    struct iocblk *, iop);
1099 
1100 	switch (iop->ioc_cmd) {
1101 	case PPPIO_DEBUG:
1102 		/*
1103 		 * Debug requests are now ignored; use dtrace or wireshark
1104 		 * instead.
1105 		 */
1106 		break;
1107 
1108 	case PPPIO_GETSTAT:
1109 		rc = EINVAL;
1110 		break;
1111 
1112 	case PPPIO_GETSTAT64:
1113 		/* Client (device) side only */
1114 		if (tcl == NULL) {
1115 			rc = EINVAL;
1116 			break;
1117 		}
1118 		mptmp = allocb(sizeof (*psp), BPRI_HI);
1119 		if (mptmp == NULL) {
1120 			rc = ENOSR;
1121 			break;
1122 		}
1123 		freemsg(mp->b_cont);
1124 		mp->b_cont = mptmp;
1125 
1126 		psp = (struct ppp_stats64 *)mptmp->b_wptr;
1127 		bzero((caddr_t)psp, sizeof (*psp));
1128 		psp->p = tcl->tcl_stats;
1129 
1130 		len = sizeof (*psp);
1131 		break;
1132 
1133 	case PPPTUN_SNAME:
1134 		/* This is done on the *module* (lower level) side. */
1135 		if (tll == NULL || mp->b_cont == NULL ||
1136 		    iop->ioc_count != sizeof (*ptn) ||
1137 		    *mp->b_cont->b_rptr == '\0') {
1138 			rc = EINVAL;
1139 			break;
1140 		}
1141 
1142 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1143 		ptn->ptn_name[sizeof (ptn->ptn_name) - 1] = '\0';
1144 
1145 		if ((tll = tll_lookup_on_name(ptn->ptn_name)) != NULL) {
1146 			rc = EEXIST;
1147 			break;
1148 		}
1149 		tll = (tunll_t *)q->q_ptr;
1150 		(void) strcpy(tll->tll_name, ptn->ptn_name);
1151 		break;
1152 
1153 	case PPPTUN_GNAME:
1154 		/* This is done on the *module* (lower level) side. */
1155 		if (tll == NULL) {
1156 			rc = EINVAL;
1157 			break;
1158 		}
1159 		if (mp->b_cont != NULL)
1160 			freemsg(mp->b_cont);
1161 		if ((mp->b_cont = allocb(sizeof (*ptn), BPRI_HI)) == NULL) {
1162 			rc = ENOSR;
1163 			break;
1164 		}
1165 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1166 		bcopy(tll->tll_name, ptn->ptn_name, sizeof (ptn->ptn_name));
1167 		len = sizeof (*ptn);
1168 		break;
1169 
1170 	case PPPTUN_SINFO:
1171 	case PPPTUN_GINFO:
1172 		/* Either side */
1173 		if (mp->b_cont == NULL || iop->ioc_count != sizeof (*pti)) {
1174 			rc = EINVAL;
1175 			break;
1176 		}
1177 		pti = (struct ppptun_info *)mp->b_cont->b_rptr;
1178 		if (pti->pti_name[0] != '\0')
1179 			tll = tll_lookup_on_name(pti->pti_name);
1180 		if (tll == NULL) {
1181 			/* Driver (client) side must have name */
1182 			if (tcl != NULL && pti->pti_name[0] == '\0')
1183 				rc = EINVAL;
1184 			else
1185 				rc = ESRCH;
1186 			break;
1187 		}
1188 		if (iop->ioc_cmd == PPPTUN_GINFO) {
1189 			pti->pti_muxid = tll->tll_muxid;
1190 			pti->pti_style = tll->tll_style;
1191 			len = sizeof (*pti);
1192 			break;
1193 		}
1194 		tll->tll_muxid = pti->pti_muxid;
1195 		tll->tll_style = pti->pti_style;
1196 		switch (tll->tll_style) {
1197 		case PTS_PPPOE:		/* DLPI type */
1198 			tll->tll_alen = sizeof (tll->tll_lcladdr.pta_pppoe);
1199 			mptmp = dlpi_alloc(sizeof (dl_unbind_req_t),
1200 			    DL_UNBIND_REQ);
1201 			if (mptmp == NULL) {
1202 				rc = ENOSR;
1203 				break;
1204 			}
1205 			save_for_close(tll, mptmp);
1206 			mptmp = dlpi_alloc(sizeof (dl_detach_req_t),
1207 			    DL_DETACH_REQ);
1208 			if (mptmp == NULL) {
1209 				rc = ENOSR;
1210 				break;
1211 			}
1212 			save_for_close(tll, mptmp);
1213 			break;
1214 		default:
1215 			tll->tll_style = PTS_NONE;
1216 			tll->tll_alen = 0;
1217 			rc = EINVAL;
1218 			break;
1219 		}
1220 		break;
1221 
1222 	case PPPTUN_GNNAME:
1223 		/* This can be done on either side. */
1224 		if (mp->b_cont == NULL || iop->ioc_count < sizeof (uint32_t)) {
1225 			rc = EINVAL;
1226 			break;
1227 		}
1228 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1229 		i = ptn->ptn_index;
1230 		tll = TO_TLL(tunll_list.q_forw);
1231 		while (--i >= 0 && tll != TO_TLL(&tunll_list))
1232 			tll = TO_TLL(tll->tll_next);
1233 		if (tll != TO_TLL(&tunll_list)) {
1234 			bcopy(tll->tll_name, ptn->ptn_name,
1235 			    sizeof (ptn->ptn_name));
1236 		} else {
1237 			bzero(ptn, sizeof (*ptn));
1238 		}
1239 		len = sizeof (*ptn);
1240 		break;
1241 
1242 	case PPPTUN_LCLADDR:
1243 		/* This is done on the *module* (lower level) side. */
1244 		if (tll == NULL || mp->b_cont == NULL) {
1245 			rc = EINVAL;
1246 			break;
1247 		}
1248 
1249 		pap = &tll->tll_lcladdr;
1250 		len = tll->tll_alen;
1251 		if (len == 0 || len > iop->ioc_count) {
1252 			rc = EINVAL;
1253 			break;
1254 		}
1255 		bcopy(mp->b_cont->b_rptr, pap, len);
1256 		len = 0;
1257 		break;
1258 
1259 	case PPPTUN_SPEER:
1260 		/* Client (device) side only; before SDATA */
1261 		if (tcl == NULL || mp->b_cont == NULL ||
1262 		    iop->ioc_count != sizeof (*ptp)) {
1263 			rc = EINVAL;
1264 			break;
1265 		}
1266 		if (tcl->tcl_data_tll != NULL) {
1267 			rc = EINVAL;
1268 			break;
1269 		}
1270 		ptp = (struct ppptun_peer *)mp->b_cont->b_rptr;
1271 		DTRACE_PROBE2(sppptun__speer, tuncl_t *, tcl,
1272 		    struct ppptun_peer *, ptp);
1273 		/* Once set, the style cannot change. */
1274 		if (tcl->tcl_style != PTS_NONE &&
1275 		    tcl->tcl_style != ptp->ptp_style) {
1276 			rc = EINVAL;
1277 			break;
1278 		}
1279 		if (ptp->ptp_flags & PTPF_DAEMON) {
1280 			/* User requests registration for tunnel 0 */
1281 			if ((tcl->tcl_flags & TCLF_SPEER_DONE) ||
1282 			    ptp->ptp_ltunid != 0 || ptp->ptp_rtunid != 0 ||
1283 			    ptp->ptp_lsessid != 0 || ptp->ptp_rsessid != 0) {
1284 				rc = EINVAL;
1285 				break;
1286 			}
1287 			tcl->tcl_flags |= TCLF_DAEMON;
1288 		} else {
1289 			/* Normal client connection */
1290 			if (tcl->tcl_flags & TCLF_DAEMON) {
1291 				rc = EINVAL;
1292 				break;
1293 			}
1294 			if (ptp->ptp_lsessid != 0 &&
1295 			    ptp->ptp_lsessid != tcl->tcl_lsessid) {
1296 				rc = EINVAL;
1297 				break;
1298 			}
1299 			/*
1300 			 * If we're reassigning the peer data, then
1301 			 * the previous assignment must have been for
1302 			 * a client control connection.  Check that.
1303 			 */
1304 			if ((tcl->tcl_flags & TCLF_SPEER_DONE) &&
1305 			    ((tcl->tcl_ltunid != 0 &&
1306 			    tcl->tcl_ltunid != ptp->ptp_ltunid) ||
1307 			    (tcl->tcl_rtunid != 0 &&
1308 			    tcl->tcl_rtunid != ptp->ptp_rtunid) ||
1309 			    (tcl->tcl_rsessid != 0 &&
1310 			    tcl->tcl_rsessid != ptp->ptp_rsessid))) {
1311 				rc = EINVAL;
1312 				break;
1313 			}
1314 			if ((tcl->tcl_ltunid = ptp->ptp_ltunid) == 0 &&
1315 			    tcl->tcl_style == PTS_L2FTP)
1316 				tcl->tcl_ltunid = ptp->ptp_lsessid;
1317 			tcl->tcl_rtunid = ptp->ptp_rtunid;
1318 			tcl->tcl_rsessid = ptp->ptp_rsessid;
1319 		}
1320 		tcl->tcl_flags |= TCLF_SPEER_DONE;
1321 		tcl->tcl_style = ptp->ptp_style;
1322 		tcl->tcl_address = ptp->ptp_address;
1323 		goto fill_in_peer;
1324 
1325 	case PPPTUN_GPEER:
1326 		/* Client (device) side only */
1327 		if (tcl == NULL) {
1328 			rc = EINVAL;
1329 			break;
1330 		}
1331 		if (mp->b_cont != NULL)
1332 			freemsg(mp->b_cont);
1333 		mp->b_cont = allocb(sizeof (*ptp), BPRI_HI);
1334 		if (mp->b_cont == NULL) {
1335 			rc = ENOSR;
1336 			break;
1337 		}
1338 		ptp = (struct ppptun_peer *)mp->b_cont->b_rptr;
1339 	fill_in_peer:
1340 		ptp->ptp_style = tcl->tcl_style;
1341 		ptp->ptp_flags = (tcl->tcl_flags & TCLF_DAEMON) ? PTPF_DAEMON :
1342 		    0;
1343 		ptp->ptp_ltunid = tcl->tcl_ltunid;
1344 		ptp->ptp_rtunid = tcl->tcl_rtunid;
1345 		ptp->ptp_lsessid = tcl->tcl_lsessid;
1346 		ptp->ptp_rsessid = tcl->tcl_rsessid;
1347 		ptp->ptp_address = tcl->tcl_address;
1348 		len = sizeof (*ptp);
1349 		break;
1350 
1351 	case PPPTUN_SDATA:
1352 	case PPPTUN_SCTL:
1353 		/* Client (device) side only; must do SPEER first */
1354 		if (tcl == NULL || mp->b_cont == NULL ||
1355 		    iop->ioc_count != sizeof (*ptn) ||
1356 		    *mp->b_cont->b_rptr == '\0') {
1357 			rc = EINVAL;
1358 			break;
1359 		}
1360 		if (!(tcl->tcl_flags & TCLF_SPEER_DONE)) {
1361 			rc = EINVAL;
1362 			break;
1363 		}
1364 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1365 		ptn->ptn_name[sizeof (ptn->ptn_name) - 1] = '\0';
1366 		tll = tll_lookup_on_name(ptn->ptn_name);
1367 		if (tll == NULL) {
1368 			rc = ESRCH;
1369 			break;
1370 		}
1371 		if (tll->tll_style != tcl->tcl_style) {
1372 			rc = ENXIO;
1373 			break;
1374 		}
1375 		if (iop->ioc_cmd == PPPTUN_SDATA) {
1376 			if (tcl->tcl_data_tll != NULL) {
1377 				rc = EEXIST;
1378 				break;
1379 			}
1380 			/* server daemons cannot use regular data */
1381 			if (tcl->tcl_flags & TCLF_DAEMON) {
1382 				rc = EINVAL;
1383 				break;
1384 			}
1385 			tcl->tcl_data_tll = tll;
1386 		} else if (tcl->tcl_flags & TCLF_DAEMON) {
1387 			if (tll->tll_defcl != NULL && tll->tll_defcl != tcl) {
1388 				rc = EEXIST;
1389 				break;
1390 			}
1391 			tll->tll_defcl = tcl;
1392 			if (tcl->tcl_ctrl_tll != NULL) {
1393 				KDECR(tcl->tcl_ctrl_tll, tll_kstats,
1394 				    lks_clients);
1395 			}
1396 			tcl->tcl_ctrl_tll = tll;
1397 		} else {
1398 			if (tcl->tcl_ctrl_tll != NULL) {
1399 				rc = EEXIST;
1400 				break;
1401 			}
1402 			tcl->tcl_ctrl_tll = tll;
1403 		}
1404 		KLINCR(lks_clients);
1405 		break;
1406 
1407 	case PPPTUN_GDATA:
1408 	case PPPTUN_GCTL:
1409 		/* Client (device) side only */
1410 		if (tcl == NULL) {
1411 			rc = EINVAL;
1412 			break;
1413 		}
1414 		if (mp->b_cont != NULL)
1415 			freemsg(mp->b_cont);
1416 		mp->b_cont = allocb(sizeof (*ptn), BPRI_HI);
1417 		if (mp->b_cont == NULL) {
1418 			rc = ENOSR;
1419 			break;
1420 		}
1421 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1422 		if (iop->ioc_cmd == PPPTUN_GDATA)
1423 			tll = tcl->tcl_data_tll;
1424 		else
1425 			tll = tcl->tcl_ctrl_tll;
1426 		if (tll == NULL)
1427 			bzero(ptn, sizeof (*ptn));
1428 		else
1429 			bcopy(tll->tll_name, ptn->ptn_name,
1430 			    sizeof (ptn->ptn_name));
1431 		len = sizeof (*ptn);
1432 		break;
1433 
1434 	case PPPTUN_DCTL:
1435 		/* Client (device) side daemon mode only */
1436 		if (tcl == NULL || mp->b_cont == NULL ||
1437 		    iop->ioc_count != sizeof (*ptn) ||
1438 		    !(tcl->tcl_flags & TCLF_DAEMON)) {
1439 			rc = EINVAL;
1440 			break;
1441 		}
1442 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1443 		ptn->ptn_name[sizeof (ptn->ptn_name) - 1] = '\0';
1444 		tll = tll_lookup_on_name(ptn->ptn_name);
1445 		if (tll == NULL || tll->tll_defcl != tcl) {
1446 			rc = ESRCH;
1447 			break;
1448 		}
1449 		tll->tll_defcl = NULL;
1450 		break;
1451 
1452 	default:
1453 		/* Caller should already have checked command value */
1454 		ASSERT(0);
1455 	}
1456 	if (rc != 0) {
1457 		miocnak(q, mp, 0, rc);
1458 	} else {
1459 		if (len > 0)
1460 			mp->b_cont->b_wptr = mp->b_cont->b_rptr + len;
1461 		miocack(q, mp, len, 0);
1462 	}
1463 }
1464 
1465 /*
1466  * sppptun_ioctl()
1467  *
1468  * MT-Perimeters:
1469  *    shared inner, shared outer.
1470  *
1471  * Description:
1472  *    Called by sppptun_uwput as the result of receiving a M_IOCTL command.
1473  */
1474 static void
1475 sppptun_ioctl(queue_t *q, mblk_t *mp)
1476 {
1477 	struct iocblk *iop;
1478 	int rc = 0;
1479 	int len = 0;
1480 	uint32_t val = 0;
1481 	tunll_t *tll;
1482 
1483 	iop = (struct iocblk *)mp->b_rptr;
1484 
1485 	switch (iop->ioc_cmd) {
1486 	case PPPIO_DEBUG:
1487 	case PPPIO_GETSTAT:
1488 	case PPPIO_GETSTAT64:
1489 	case PPPTUN_SNAME:
1490 	case PPPTUN_GNAME:
1491 	case PPPTUN_SINFO:
1492 	case PPPTUN_GINFO:
1493 	case PPPTUN_GNNAME:
1494 	case PPPTUN_LCLADDR:
1495 	case PPPTUN_SPEER:
1496 	case PPPTUN_GPEER:
1497 	case PPPTUN_SDATA:
1498 	case PPPTUN_GDATA:
1499 	case PPPTUN_SCTL:
1500 	case PPPTUN_GCTL:
1501 	case PPPTUN_DCTL:
1502 		qwriter(q, mp, sppptun_inner_ioctl, PERIM_INNER);
1503 		return;
1504 
1505 	case PPPIO_GCLEAN:	/* always clean */
1506 		val = RCV_B7_1 | RCV_B7_0 | RCV_ODDP | RCV_EVNP;
1507 		len = sizeof (uint32_t);
1508 		break;
1509 
1510 	case PPPIO_GTYPE:	/* we look like an async driver. */
1511 		val = PPPTYP_AHDLC;
1512 		len = sizeof (uint32_t);
1513 		break;
1514 
1515 	case PPPIO_CFLAGS:	/* never compress headers */
1516 		val = 0;
1517 		len = sizeof (uint32_t);
1518 		break;
1519 
1520 		/* quietly ack PPP things we don't need to do. */
1521 	case PPPIO_XFCS:
1522 	case PPPIO_RFCS:
1523 	case PPPIO_XACCM:
1524 	case PPPIO_RACCM:
1525 	case PPPIO_LASTMOD:
1526 	case PPPIO_MUX:
1527 	case I_PLINK:
1528 	case I_PUNLINK:
1529 	case I_LINK:
1530 	case I_UNLINK:
1531 		break;
1532 
1533 	default:
1534 		tll = (tunll_t *)q->q_ptr;
1535 		if (!(tll->tll_flags & TLLF_NOTLOWER)) {
1536 			/* module side; pass this through. */
1537 			putnext(q, mp);
1538 			return;
1539 		}
1540 		rc = EINVAL;
1541 		break;
1542 	}
1543 	if (rc == 0 && len == sizeof (uint32_t)) {
1544 		if (mp->b_cont != NULL)
1545 			freemsg(mp->b_cont);
1546 		mp->b_cont = allocb(sizeof (uint32_t), BPRI_HI);
1547 		if (mp->b_cont == NULL) {
1548 			rc = ENOSR;
1549 		} else {
1550 			*(uint32_t *)mp->b_cont->b_wptr = val;
1551 			mp->b_cont->b_wptr += sizeof (uint32_t);
1552 		}
1553 	}
1554 	if (rc == 0) {
1555 		miocack(q, mp, len, 0);
1556 	} else {
1557 		miocnak(q, mp, 0, rc);
1558 	}
1559 }
1560 
1561 /*
1562  * sppptun_inner_mctl()
1563  *
1564  * MT-Perimeters:
1565  *    exclusive inner, shared outer.
1566  *
1567  * Description:
1568  *    Called by qwriter (via sppptun_uwput) as the result of receiving
1569  *    an M_CTL.  Called only on the client (driver) side.
1570  */
1571 static void
1572 sppptun_inner_mctl(queue_t *q, mblk_t *mp)
1573 {
1574 	int msglen;
1575 	tuncl_t *tcl;
1576 
1577 	tcl = q->q_ptr;
1578 
1579 	if (!(tcl->tcl_flags & TCLF_ISCLIENT)) {
1580 		freemsg(mp);
1581 		return;
1582 	}
1583 
1584 	msglen = MBLKL(mp);
1585 	switch (*mp->b_rptr) {
1586 	case PPPCTL_UNIT:
1587 		if (msglen == 2)
1588 			tcl->tcl_unit = mp->b_rptr[1];
1589 		else if (msglen == 8)
1590 			tcl->tcl_unit = ((uint32_t *)mp->b_rptr)[1];
1591 		break;
1592 	}
1593 	freemsg(mp);
1594 }
1595 
1596 /*
1597  * sppptun_uwput()
1598  *
1599  * MT-Perimeters:
1600  *    shared inner, shared outer.
1601  *
1602  * Description:
1603  *	Regular output data and controls pass through here.
1604  */
1605 static void
1606 sppptun_uwput(queue_t *q, mblk_t *mp)
1607 {
1608 	queue_t *nextq;
1609 	tuncl_t *tcl;
1610 
1611 	ASSERT(q->q_ptr != NULL);
1612 
1613 	switch (MTYPE(mp)) {
1614 	case M_DATA:
1615 	case M_PROTO:
1616 	case M_PCPROTO:
1617 		if (q->q_first == NULL &&
1618 		    (nextq = sppptun_outpkt(q, &mp)) != NULL) {
1619 			putnext(nextq, mp);
1620 		} else if (mp != NULL && !putq(q, mp)) {
1621 			freemsg(mp);
1622 		}
1623 		break;
1624 	case M_IOCTL:
1625 		sppptun_ioctl(q, mp);
1626 		break;
1627 	case M_CTL:
1628 		qwriter(q, mp, sppptun_inner_mctl, PERIM_INNER);
1629 		break;
1630 	default:
1631 		tcl = (tuncl_t *)q->q_ptr;
1632 		/*
1633 		 * If we're the driver, then discard unknown junk.
1634 		 * Otherwise, if we're the module, then forward along.
1635 		 */
1636 		if (tcl->tcl_flags & TCLF_ISCLIENT)
1637 			freemsg(mp);
1638 		else
1639 			putnext(q, mp);
1640 		break;
1641 	}
1642 }
1643 
1644 /*
1645  * Send a DLPI/TPI control message to the driver but make sure there
1646  * is only one outstanding message.  Uses tll_msg_pending to tell when
1647  * it must queue.  sppptun_urput calls message_done() when an ACK or a
1648  * NAK is received to process the next queued message.
1649  */
1650 static void
1651 message_send(tunll_t *tll, mblk_t *mp)
1652 {
1653 	mblk_t **mpp;
1654 
1655 	if (tll->tll_msg_pending) {
1656 		/* Must queue message. Tail insertion */
1657 		mpp = &tll->tll_msg_deferred;
1658 		while (*mpp != NULL)
1659 			mpp = &((*mpp)->b_next);
1660 		*mpp = mp;
1661 		return;
1662 	}
1663 	tll->tll_msg_pending = 1;
1664 	putnext(tll->tll_wq, mp);
1665 }
1666 
1667 /*
1668  * Called when an DLPI/TPI control message has been acked or nacked to
1669  * send down the next queued message (if any).
1670  */
1671 static void
1672 message_done(tunll_t *tll)
1673 {
1674 	mblk_t *mp;
1675 
1676 	ASSERT(tll->tll_msg_pending);
1677 	tll->tll_msg_pending = 0;
1678 	mp = tll->tll_msg_deferred;
1679 	if (mp != NULL) {
1680 		tll->tll_msg_deferred = mp->b_next;
1681 		mp->b_next = NULL;
1682 		tll->tll_msg_pending = 1;
1683 		putnext(tll->tll_wq, mp);
1684 	}
1685 }
1686 
1687 /*
1688  * Send down queued "close" messages to lower stream.  These were
1689  * enqueued right after the stream was originally allocated, when the
1690  * tll_style was set by PPPTUN_SINFO.
1691  */
1692 static int
1693 tll_close_req(tunll_t *tll)
1694 {
1695 	mblk_t *mb, *mbnext;
1696 
1697 	if ((mb = tll->tll_onclose) == NULL)
1698 		tll->tll_flags |= TLLF_SHUTDOWN_DONE;
1699 	else {
1700 		tll->tll_onclose = NULL;
1701 		while (mb != NULL) {
1702 			mbnext = mb->b_next;
1703 			mb->b_next = NULL;
1704 			message_send(tll, mb);
1705 			mb = mbnext;
1706 		}
1707 	}
1708 	return (0);
1709 }
1710 
1711 /*
1712  * This function is called when a backenable occurs on the write side of a
1713  * lower stream.  It walks over the client streams, looking for ones that use
1714  * the given tunll_t lower stream.  Each client is then backenabled.
1715  */
1716 static void
1717 tclvm_backenable(void *arg, void *firstv, size_t numv)
1718 {
1719 	tunll_t *tll = arg;
1720 	int minorn = (int)(uintptr_t)firstv;
1721 	int minormax = minorn + numv;
1722 	tuncl_t *tcl;
1723 	queue_t *q;
1724 
1725 	while (minorn < minormax) {
1726 		tcl = tcl_slots[minorn - 1];
1727 		if ((tcl->tcl_data_tll == tll ||
1728 		    tcl->tcl_ctrl_tll == tll) &&
1729 		    (q = tcl->tcl_rq) != NULL) {
1730 			qenable(OTHERQ(q));
1731 		}
1732 		minorn++;
1733 	}
1734 }
1735 
1736 /*
1737  * sppptun_uwsrv()
1738  *
1739  * MT-Perimeters:
1740  *    exclusive inner, shared outer.
1741  *
1742  * Description:
1743  *    Upper write-side service procedure.  In addition to the usual
1744  *    STREAMS queue service handling, this routine also handles the
1745  *    transmission of the unbind/detach messages to the lower stream
1746  *    driver when a lower stream is being closed.  (See the use of
1747  *    qenable/qwait in sppptun_close().)
1748  */
1749 static int
1750 sppptun_uwsrv(queue_t *q)
1751 {
1752 	tuncl_t	*tcl;
1753 	mblk_t *mp;
1754 	queue_t *nextq;
1755 
1756 	tcl = q->q_ptr;
1757 	if (!(tcl->tcl_flags & TCLF_ISCLIENT)) {
1758 		tunll_t *tll = (tunll_t *)tcl;
1759 
1760 		if ((tll->tll_flags & (TLLF_CLOSING|TLLF_CLOSE_DONE)) ==
1761 		    TLLF_CLOSING) {
1762 			tll->tll_error = tll_close_req(tll);
1763 			tll->tll_flags |= TLLF_CLOSE_DONE;
1764 		} else {
1765 			/*
1766 			 * We've been enabled here because of a backenable on
1767 			 * output flow control.  Backenable clients using this
1768 			 * lower layer.
1769 			 */
1770 			vmem_walk(tcl_minor_arena, VMEM_ALLOC, tclvm_backenable,
1771 			    tll);
1772 		}
1773 		return (0);
1774 	}
1775 
1776 	while ((mp = getq(q)) != NULL) {
1777 		if ((nextq = sppptun_outpkt(q, &mp)) != NULL) {
1778 			putnext(nextq, mp);
1779 		} else if (mp != NULL) {
1780 			(void) putbq(q, mp);
1781 			break;
1782 		}
1783 	}
1784 	return (0);
1785 }
1786 
1787 /*
1788  * sppptun_lwput()
1789  *
1790  * MT-Perimeters:
1791  *    shared inner, shared outer.
1792  *
1793  * Description:
1794  *    Lower write-side put procedure.  Nothing should be sending
1795  *    packets down this stream.
1796  */
1797 static void
1798 sppptun_lwput(queue_t *q, mblk_t *mp)
1799 {
1800 	switch (MTYPE(mp)) {
1801 	case M_PROTO:
1802 		putnext(q, mp);
1803 		break;
1804 	default:
1805 		freemsg(mp);
1806 		break;
1807 	}
1808 }
1809 
1810 /*
1811  * sppptun_lrput()
1812  *
1813  * MT-Perimeters:
1814  *    shared inner, shared outer.
1815  *
1816  * Description:
1817  *    Lower read-side put procedure.  Nothing should arrive here.
1818  */
1819 static void
1820 sppptun_lrput(queue_t *q, mblk_t *mp)
1821 {
1822 	tuncl_t *tcl;
1823 
1824 	switch (MTYPE(mp)) {
1825 	case M_IOCTL:
1826 		miocnak(q, mp, 0, EINVAL);
1827 		return;
1828 	case M_FLUSH:
1829 		if (*mp->b_rptr & FLUSHR) {
1830 			flushq(q, FLUSHDATA);
1831 		}
1832 		if (*mp->b_rptr & FLUSHW) {
1833 			*mp->b_rptr &= ~FLUSHR;
1834 			qreply(q, mp);
1835 		} else {
1836 			freemsg(mp);
1837 		}
1838 		return;
1839 	}
1840 	/*
1841 	 * Try to forward the message to the put procedure for the upper
1842 	 * control stream for this lower stream. If there are already messages
1843 	 * queued here, queue this one up to preserve message ordering.
1844 	 */
1845 	if ((tcl = (tuncl_t *)q->q_ptr) == NULL || tcl->tcl_rq == NULL) {
1846 		freemsg(mp);
1847 		return;
1848 	}
1849 	if (queclass(mp) == QPCTL ||
1850 	    (q->q_first == NULL && canput(tcl->tcl_rq))) {
1851 		put(tcl->tcl_rq, mp);
1852 	} else {
1853 		if (!putq(q, mp))
1854 			freemsg(mp);
1855 	}
1856 }
1857 
1858 /*
1859  * MT-Perimeters:
1860  *    shared inner, shared outer.
1861  *
1862  *    Handle non-data DLPI messages.  Used with PPPoE, which runs over
1863  *    Ethernet only.
1864  */
1865 static void
1866 urput_dlpi(queue_t *q, mblk_t *mp)
1867 {
1868 	int err;
1869 	union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr;
1870 	tunll_t *tll = q->q_ptr;
1871 	size_t mlen = MBLKL(mp);
1872 
1873 	switch (dlp->dl_primitive) {
1874 	case DL_UDERROR_IND:
1875 		break;
1876 
1877 	case DL_ERROR_ACK:
1878 		if (mlen < DL_ERROR_ACK_SIZE)
1879 			break;
1880 		err = dlp->error_ack.dl_unix_errno ?
1881 		    dlp->error_ack.dl_unix_errno : ENXIO;
1882 		switch (dlp->error_ack.dl_error_primitive) {
1883 		case DL_UNBIND_REQ:
1884 			message_done(tll);
1885 			break;
1886 		case DL_DETACH_REQ:
1887 			message_done(tll);
1888 			tll->tll_error = err;
1889 			tll->tll_flags |= TLLF_SHUTDOWN_DONE;
1890 			break;
1891 		case DL_PHYS_ADDR_REQ:
1892 			message_done(tll);
1893 			break;
1894 		case DL_INFO_REQ:
1895 		case DL_ATTACH_REQ:
1896 		case DL_BIND_REQ:
1897 			message_done(tll);
1898 			tll->tll_error = err;
1899 			break;
1900 		}
1901 		break;
1902 
1903 	case DL_INFO_ACK:
1904 		message_done(tll);
1905 		break;
1906 
1907 	case DL_BIND_ACK:
1908 		message_done(tll);
1909 		break;
1910 
1911 	case DL_PHYS_ADDR_ACK:
1912 		break;
1913 
1914 	case DL_OK_ACK:
1915 		if (mlen < DL_OK_ACK_SIZE)
1916 			break;
1917 		switch (dlp->ok_ack.dl_correct_primitive) {
1918 		case DL_UNBIND_REQ:
1919 			message_done(tll);
1920 			break;
1921 		case DL_DETACH_REQ:
1922 			tll->tll_flags |= TLLF_SHUTDOWN_DONE;
1923 			break;
1924 		case DL_ATTACH_REQ:
1925 			message_done(tll);
1926 			break;
1927 		}
1928 		break;
1929 	}
1930 	freemsg(mp);
1931 }
1932 
1933 /* Search structure used with PPPoE only; see tclvm_pppoe_search(). */
1934 struct poedat {
1935 	uint_t sessid;
1936 	tunll_t *tll;
1937 	const void *srcaddr;
1938 	int isdata;
1939 	tuncl_t *tcl;
1940 };
1941 
1942 /*
1943  * This function is called by vmem_walk from within sppptun_recv.  It
1944  * iterates over a span of allocated minor node numbers to search for
1945  * the appropriate lower stream, session ID, and peer MAC address.
1946  *
1947  * (This is necessary due to a design flaw in the PPPoE protocol
1948  * itself.  The protocol assigns session IDs from the server side
1949  * only.  Both server and client use the same number.  Thus, if there
1950  * are multiple clients on a single host, there can be session ID
1951  * conflicts between servers and there's no way to detangle them
1952  * except by looking at the remote MAC address.)
1953  *
1954  * (This could have been handled by linking together sessions that
1955  * differ only in the remote MAC address.  This isn't done because it
1956  * would involve extra per-session storage and it's very unlikely that
1957  * PPPoE would be used this way.)
1958  */
1959 static void
1960 tclvm_pppoe_search(void *arg, void *firstv, size_t numv)
1961 {
1962 	struct poedat *poedat = (struct poedat *)arg;
1963 	int minorn = (int)(uintptr_t)firstv;
1964 	int minormax = minorn + numv;
1965 	tuncl_t *tcl;
1966 
1967 	if (poedat->tcl != NULL)
1968 		return;
1969 	while (minorn < minormax) {
1970 		tcl = tcl_slots[minorn - 1];
1971 		ASSERT(tcl != NULL);
1972 		if (tcl->tcl_rsessid == poedat->sessid &&
1973 		    ((!poedat->isdata && tcl->tcl_ctrl_tll == poedat->tll) ||
1974 		    (poedat->isdata && tcl->tcl_data_tll == poedat->tll)) &&
1975 		    bcmp(tcl->tcl_address.pta_pppoe.ptma_mac,
1976 		    poedat->srcaddr,
1977 		    sizeof (tcl->tcl_address.pta_pppoe.ptma_mac)) == 0) {
1978 			poedat->tcl = tcl;
1979 			break;
1980 		}
1981 		minorn++;
1982 	}
1983 }
1984 
1985 /*
1986  * sppptun_recv()
1987  *
1988  * MT-Perimeters:
1989  *    shared inner, shared outer.
1990  *
1991  * Description:
1992  *    Receive function called by sppptun_urput, which is called when
1993  *    the lower read-side put or service procedure sends a message
1994  *    upstream to the a device user (PPP).  It attempts to find an
1995  *    appropriate queue on the module above us (depending on what the
1996  *    associated upper stream for the protocol would be), and if not
1997  *    possible, it will find an upper control stream for the protocol.
1998  *    Returns a pointer to the upper queue_t, or NULL if the message
1999  *    has been discarded.
2000  *
2001  * About demultiplexing:
2002  *
2003  *	All four protocols (L2F, PPTP, L2TP, and PPPoE) support a
2004  *	locally assigned ID for demultiplexing incoming traffic.  For
2005  *	L2F, this is called the Client ID, for PPTP the Call ID, for
2006  *	L2TP the Session ID, and for PPPoE the SESSION_ID.  This is a
2007  *	16 bit number for all four protocols, and is used to directly
2008  *	index into a list of upper streams.  With the upper stream in
2009  *	hand, we verify that this is the right stream and deliver the
2010  *	data.
2011  *
2012  *	L2TP has a Tunnel ID, which represents a bundle of PPP
2013  *	sessions between the peers.  Because we always assign unique
2014  *	session ID numbers, we merely check that the given ID matches
2015  *	the assigned ID for the upper stream.
2016  *
2017  *	L2F has a Multiplex ID, which is unique per connection.  It
2018  *	does not have L2TP's concept of multiple-connections-within-
2019  *	a-tunnel.  The same checking is done.
2020  *
2021  *	PPPoE is a horribly broken protocol.  Only one ID is assigned
2022  *	per connection.  The client must somehow demultiplex based on
2023  *	an ID number assigned by the server.  It's not necessarily
2024  *	unique.  The search is done based on {ID,peerEthernet} (using
2025  *	tcl_rsessid) for all packet types except PADI and PADS.
2026  *
2027  *	Neither PPPoE nor PPTP supports additional ID numbers.
2028  *
2029  *	Both L2F and L2TP come in over UDP.  They are distinguished by
2030  *	looking at the GRE version field -- 001 for L2F and 010 for
2031  *	L2TP.
2032  */
2033 static queue_t *
2034 sppptun_recv(queue_t *q, mblk_t **mpp, const void *srcaddr)
2035 {
2036 	mblk_t *mp;
2037 	tunll_t *tll;
2038 	tuncl_t *tcl;
2039 	int sessid;
2040 	int remlen;
2041 	int msglen;
2042 	int isdata;
2043 	int i;
2044 	const uchar_t *ucp;
2045 	const poep_t *poep;
2046 	mblk_t *mnew;
2047 	ppptun_atype *pap;
2048 
2049 	mp = *mpp;
2050 
2051 	tll = q->q_ptr;
2052 	ASSERT(!(tll->tll_flags & TLLF_NOTLOWER));
2053 
2054 	tcl = NULL;
2055 	switch (tll->tll_style) {
2056 	case PTS_PPPOE:
2057 		/* Note that poep_t alignment is uint16_t */
2058 		if ((!IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)) ||
2059 		    MBLKL(mp) < sizeof (poep_t)) &&
2060 		    !pullupmsg(mp, sizeof (poep_t)))
2061 			break;
2062 		poep = (const poep_t *)mp->b_rptr;
2063 		if (poep->poep_version_type != POE_VERSION)
2064 			break;
2065 		/*
2066 		 * First, extract a session ID number.  All protocols have
2067 		 * this.
2068 		 */
2069 		isdata = (poep->poep_code == POECODE_DATA);
2070 		sessid = ntohs(poep->poep_session_id);
2071 		remlen = sizeof (*poep);
2072 		msglen = ntohs(poep->poep_length);
2073 		i = poep->poep_code;
2074 		if (i == POECODE_PADI || i == POECODE_PADR) {
2075 			/* These go to the server daemon only. */
2076 			tcl = tll->tll_defcl;
2077 		} else if (i == POECODE_PADO || i == POECODE_PADS) {
2078 			/*
2079 			 * These go to a client only, and are demuxed
2080 			 * by the Host-Uniq field (into which we stuff
2081 			 * our local ID number when generating
2082 			 * PADI/PADR).
2083 			 */
2084 			ucp = (const uchar_t *)(poep + 1);
2085 			i = msglen;
2086 			while (i > POET_HDRLEN) {
2087 				if (POET_GET_TYPE(ucp) == POETT_END) {
2088 					i = 0;
2089 					break;
2090 				}
2091 				if (POET_GET_TYPE(ucp) == POETT_UNIQ &&
2092 				    POET_GET_LENG(ucp) >= sizeof (uint32_t))
2093 					break;
2094 				i -= POET_GET_LENG(ucp) + POET_HDRLEN;
2095 				ucp = POET_NEXT(ucp);
2096 			}
2097 			if (i >= POET_HDRLEN + 4)
2098 				sessid = GETLONG(ucp + POET_HDRLEN);
2099 			tcl = tcl_by_minor((minor_t)sessid);
2100 		} else {
2101 			/*
2102 			 * Try minor number as session ID first, since
2103 			 * it's used that way on server side.  It's
2104 			 * not used that way on the client, though, so
2105 			 * this might not work.  If this isn't the
2106 			 * right one, then try the tll cache.  If
2107 			 * neither is right, then search all open
2108 			 * clients.  Did I mention that the PPPoE
2109 			 * protocol is badly designed?
2110 			 */
2111 			tcl = tcl_by_minor((minor_t)sessid);
2112 			if (tcl == NULL ||
2113 			    (!isdata && tcl->tcl_ctrl_tll != tll) ||
2114 			    (isdata && tcl->tcl_data_tll != tll) ||
2115 			    sessid != tcl->tcl_rsessid ||
2116 			    bcmp(srcaddr, tcl->tcl_address.pta_pppoe.ptma_mac,
2117 			    sizeof (tcl->tcl_address.pta_pppoe.ptma_mac)) != 0)
2118 				tcl = tll->tll_lastcl;
2119 			if (tcl == NULL ||
2120 			    (!isdata && tcl->tcl_ctrl_tll != tll) ||
2121 			    (isdata && tcl->tcl_data_tll != tll) ||
2122 			    sessid != tcl->tcl_rsessid ||
2123 			    bcmp(srcaddr, tcl->tcl_address.pta_pppoe.ptma_mac,
2124 			    sizeof (tcl->tcl_address.pta_pppoe.ptma_mac)) != 0)
2125 				tcl = NULL;
2126 			if (tcl == NULL && sessid != 0) {
2127 				struct poedat poedat;
2128 
2129 				/*
2130 				 * Slow mode.  Too bad.  If you don't like it,
2131 				 * you can always choose a better protocol.
2132 				 */
2133 				poedat.sessid = sessid;
2134 				poedat.tll = tll;
2135 				poedat.srcaddr = srcaddr;
2136 				poedat.tcl = NULL;
2137 				poedat.isdata = isdata;
2138 				vmem_walk(tcl_minor_arena, VMEM_ALLOC,
2139 				    tclvm_pppoe_search, &poedat);
2140 				KLINCR(lks_walks);
2141 				if ((tcl = poedat.tcl) != NULL) {
2142 					tll->tll_lastcl = tcl;
2143 					KCINCR(cks_walks);
2144 				}
2145 			}
2146 		}
2147 		break;
2148 	}
2149 
2150 	if (tcl == NULL || tcl->tcl_rq == NULL) {
2151 		DTRACE_PROBE3(sppptun__recv__discard, int, sessid,
2152 		    tuncl_t *, tcl, mblk_t *, mp);
2153 		if (tcl == NULL) {
2154 			KLINCR(lks_in_nomatch);
2155 		}
2156 		if (isdata) {
2157 			KLINCR(lks_indata_drops);
2158 			if (tcl != NULL)
2159 				tcl->tcl_stats.ppp_ierrors++;
2160 		} else {
2161 			KLINCR(lks_inctrl_drops);
2162 			if (tcl != NULL) {
2163 				KCINCR(cks_inctrl_drops);
2164 			}
2165 		}
2166 		freemsg(mp);
2167 		return (NULL);
2168 	}
2169 
2170 	if (tcl->tcl_data_tll == tll && isdata) {
2171 		if (!adjmsg(mp, remlen) ||
2172 		    (i = msgsize(mp)) < msglen ||
2173 		    (i > msglen && !adjmsg(mp, msglen - i))) {
2174 			KLINCR(lks_indata_drops);
2175 			tcl->tcl_stats.ppp_ierrors++;
2176 			freemsg(mp);
2177 			return (NULL);
2178 		}
2179 		/* XXX -- address/control handling in pppd needs help. */
2180 		if (*mp->b_rptr != 0xFF) {
2181 			if ((mp = prependb(mp, 2, 1)) == NULL) {
2182 				KLINCR(lks_indata_drops);
2183 				tcl->tcl_stats.ppp_ierrors++;
2184 				return (NULL);
2185 			}
2186 			mp->b_rptr[0] = 0xFF;
2187 			mp->b_rptr[1] = 0x03;
2188 		}
2189 		MTYPE(mp) = M_DATA;
2190 		tcl->tcl_stats.ppp_ibytes += msgsize(mp);
2191 		tcl->tcl_stats.ppp_ipackets++;
2192 		KLINCR(lks_indata);
2193 	} else {
2194 		if (isdata || tcl->tcl_ctrl_tll != tll ||
2195 		    (mnew = make_control(tcl, tll, PTCA_CONTROL, tcl)) ==
2196 		    NULL) {
2197 			KLINCR(lks_inctrl_drops);
2198 			KCINCR(cks_inctrl_drops);
2199 			freemsg(mp);
2200 			return (NULL);
2201 		}
2202 		/* Fix up source address; peer might not be set yet. */
2203 		pap = &((struct ppptun_control *)mnew->b_rptr)->ptc_address;
2204 		bcopy(srcaddr, pap->pta_pppoe.ptma_mac,
2205 		    sizeof (pap->pta_pppoe.ptma_mac));
2206 		mnew->b_cont = mp;
2207 		mp = mnew;
2208 		KLINCR(lks_inctrls);
2209 		KCINCR(cks_inctrls);
2210 	}
2211 	*mpp = mp;
2212 	return (tcl->tcl_rq);
2213 }
2214 
2215 /*
2216  * sppptun_urput()
2217  *
2218  * MT-Perimeters:
2219  *    shared inner, shared outer.
2220  *
2221  * Description:
2222  *    Upper read-side put procedure.  Messages from the underlying
2223  *    lower stream driver arrive here.  See sppptun_recv for the
2224  *    demultiplexing logic.
2225  */
2226 static void
2227 sppptun_urput(queue_t *q, mblk_t *mp)
2228 {
2229 	union DL_primitives *dlprim;
2230 	mblk_t *mpnext;
2231 	tunll_t *tll;
2232 	queue_t *nextq;
2233 
2234 	tll = q->q_ptr;
2235 	ASSERT(!(tll->tll_flags & TLLF_NOTLOWER));
2236 
2237 	switch (MTYPE(mp)) {
2238 	case M_DATA:
2239 		/*
2240 		 * When we're bound over IP, data arrives here.  The
2241 		 * packet starts with the IP header itself.
2242 		 */
2243 		if ((nextq = sppptun_recv(q, &mp, NULL)) != NULL)
2244 			putnext(nextq, mp);
2245 		break;
2246 
2247 	case M_PROTO:
2248 	case M_PCPROTO:
2249 		/* Data arrives here for UDP or raw Ethernet, not IP. */
2250 		switch (tll->tll_style) {
2251 			/* PPTP control messages are over TCP only. */
2252 		case PTS_PPTP:
2253 		default:
2254 			ASSERT(0);	/* how'd that happen? */
2255 			break;
2256 
2257 		case PTS_PPPOE:		/* DLPI message */
2258 			if (MBLKL(mp) < sizeof (t_uscalar_t))
2259 				break;
2260 			dlprim = (union DL_primitives *)mp->b_rptr;
2261 			switch (dlprim->dl_primitive) {
2262 			case DL_UNITDATA_IND: {
2263 				size_t mlen = MBLKL(mp);
2264 
2265 				if (mlen < DL_UNITDATA_IND_SIZE)
2266 					break;
2267 				if (dlprim->unitdata_ind.dl_src_addr_offset <
2268 				    DL_UNITDATA_IND_SIZE ||
2269 				    dlprim->unitdata_ind.dl_src_addr_offset +
2270 				    dlprim->unitdata_ind.dl_src_addr_length >
2271 				    mlen)
2272 					break;
2273 			}
2274 				/* FALLTHROUGH */
2275 			case DL_UNITDATA_REQ:	/* For loopback support. */
2276 				if (dlprim->dl_primitive == DL_UNITDATA_REQ &&
2277 				    MBLKL(mp) < DL_UNITDATA_REQ_SIZE)
2278 					break;
2279 				if ((mpnext = mp->b_cont) == NULL)
2280 					break;
2281 				MTYPE(mpnext) = M_DATA;
2282 				nextq = sppptun_recv(q, &mpnext,
2283 				    dlprim->dl_primitive == DL_UNITDATA_IND ?
2284 				    mp->b_rptr +
2285 				    dlprim->unitdata_ind.dl_src_addr_offset :
2286 				    tll->tll_lcladdr.pta_pppoe.ptma_mac);
2287 				if (nextq != NULL)
2288 					putnext(nextq, mpnext);
2289 				freeb(mp);
2290 				return;
2291 
2292 			default:
2293 				urput_dlpi(q, mp);
2294 				return;
2295 			}
2296 			break;
2297 		}
2298 		freemsg(mp);
2299 		break;
2300 
2301 	default:
2302 		freemsg(mp);
2303 		break;
2304 	}
2305 }
2306 
2307 /*
2308  * sppptun_ursrv()
2309  *
2310  * MT-Perimeters:
2311  *    exclusive inner, shared outer.
2312  *
2313  * Description:
2314  *    Upper read-side service procedure.  This procedure services the
2315  *    client streams.  We get here because the client (PPP) asserts
2316  *    flow control down to us.
2317  */
2318 static int
2319 sppptun_ursrv(queue_t *q)
2320 {
2321 	mblk_t		*mp;
2322 
2323 	ASSERT(q->q_ptr != NULL);
2324 
2325 	while ((mp = getq(q)) != NULL) {
2326 		if (canputnext(q)) {
2327 			putnext(q, mp);
2328 		} else {
2329 			(void) putbq(q, mp);
2330 			break;
2331 		}
2332 	}
2333 	return (0);
2334 }
2335 
2336 /*
2337  * Dummy constructor/destructor functions for kmem_cache_create.
2338  * We're just using kmem as an allocator of integers, not real
2339  * storage.
2340  */
2341 
2342 /*ARGSUSED*/
2343 static int
2344 tcl_constructor(void *maddr, void *arg, int kmflags)
2345 {
2346 	return (0);
2347 }
2348 
2349 /*ARGSUSED*/
2350 static void
2351 tcl_destructor(void *maddr, void *arg)
2352 {
2353 }
2354 
2355 /*
2356  * Total size occupied by one tunnel client.  Each tunnel client
2357  * consumes one pointer for tcl_slots array, one tuncl_t structure and
2358  * two messages preallocated for close.
2359  */
2360 #define	TUNCL_SIZE (sizeof (tuncl_t) + sizeof (tuncl_t *) + \
2361 			2 * sizeof (dblk_t))
2362 
2363 /*
2364  * Clear all bits of x except the highest bit
2365  */
2366 #define	truncate(x) 	((x) <= 2 ? (x) : (1 << (highbit(x) - 1)))
2367 
2368 /*
2369  * This function initializes some well-known global variables inside
2370  * the module.
2371  *
2372  * Called by sppptun_mod.c:_init() before installing the module.
2373  */
2374 void
2375 sppptun_init(void)
2376 {
2377 	tunll_list.q_forw = tunll_list.q_back = &tunll_list;
2378 }
2379 
2380 /*
2381  * This function allocates the initial internal storage for the
2382  * sppptun driver.
2383  *
2384  * Called by sppptun_mod.c:_init() after installing module.
2385  */
2386 void
2387 sppptun_tcl_init(void)
2388 {
2389 	uint_t i, j;
2390 
2391 	rw_init(&tcl_rwlock, NULL, RW_DRIVER, NULL);
2392 	rw_enter(&tcl_rwlock, RW_WRITER);
2393 	tcl_nslots = sppptun_init_cnt;
2394 	tcl_slots = kmem_zalloc(tcl_nslots * sizeof (tuncl_t *), KM_SLEEP);
2395 
2396 	tcl_cache = kmem_cache_create("sppptun_map", sizeof (tuncl_t), 0,
2397 	    tcl_constructor, tcl_destructor, NULL, NULL, NULL, 0);
2398 
2399 	/* Allocate integer space for minor numbers */
2400 	tcl_minor_arena = vmem_create("sppptun_minor", (void *)1, tcl_nslots,
2401 	    1, NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
2402 
2403 	/*
2404 	 * Calculate available number of tunnels - how many tunnels
2405 	 * can we allocate in sppptun_pctofmem % of available
2406 	 * memory.  The value is rounded up to the nearest power of 2.
2407 	 */
2408 	i = (sppptun_pctofmem * kmem_maxavail()) / (100 * TUNCL_SIZE);
2409 	j = truncate(i);	/* i with non-high bits stripped */
2410 	if (i != j)
2411 		j *= 2;
2412 	tcl_minormax = j;
2413 	rw_exit(&tcl_rwlock);
2414 }
2415 
2416 /*
2417  * This function checks that there are no plumbed streams or other users.
2418  *
2419  * Called by sppptun_mod.c:_fini().  Assumes that we're exclusive on
2420  * both perimeters.
2421  */
2422 int
2423 sppptun_tcl_fintest(void)
2424 {
2425 	if (tunll_list.q_forw != &tunll_list || tcl_inuse > 0)
2426 		return (EBUSY);
2427 	else
2428 		return (0);
2429 }
2430 
2431 /*
2432  * If no lower streams are plumbed, then this function deallocates all
2433  * internal storage in preparation for unload.
2434  *
2435  * Called by sppptun_mod.c:_fini().  Assumes that we're exclusive on
2436  * both perimeters.
2437  */
2438 void
2439 sppptun_tcl_fini(void)
2440 {
2441 	if (tcl_minor_arena != NULL) {
2442 		vmem_destroy(tcl_minor_arena);
2443 		tcl_minor_arena = NULL;
2444 	}
2445 	if (tcl_cache != NULL) {
2446 		kmem_cache_destroy(tcl_cache);
2447 		tcl_cache = NULL;
2448 	}
2449 	kmem_free(tcl_slots, tcl_nslots * sizeof (tuncl_t *));
2450 	tcl_slots = NULL;
2451 	rw_destroy(&tcl_rwlock);
2452 	ASSERT(tcl_slots == NULL);
2453 	ASSERT(tcl_cache == NULL);
2454 	ASSERT(tcl_minor_arena == NULL);
2455 }
2456