xref: /illumos-gate/usr/src/uts/common/io/ppp/sppptun/sppptun.c (revision f18d8787c0ba765f61b003e2aae78db90b48f833)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/debug.h>
29 #include <sys/param.h>
30 #include <sys/stat.h>
31 #include <sys/systm.h>
32 #include <sys/socket.h>
33 #include <sys/stream.h>
34 #include <sys/stropts.h>
35 #include <sys/errno.h>
36 #include <sys/time.h>
37 #include <sys/cmn_err.h>
38 #include <sys/sdt.h>
39 #include <sys/conf.h>
40 #include <sys/dlpi.h>
41 #include <sys/ddi.h>
42 #include <sys/kstat.h>
43 #include <sys/strsun.h>
44 #include <sys/bitmap.h>
45 #include <sys/sysmacros.h>
46 #include <sys/note.h>
47 #include <sys/policy.h>
48 #include <net/ppp_defs.h>
49 #include <net/pppio.h>
50 #include <net/sppptun.h>
51 #include <net/pppoe.h>
52 #include <netinet/in.h>
53 
54 #include "s_common.h"
55 #include "sppptun_mod.h"
56 #include "sppptun_impl.h"
57 
58 #define	NTUN_INITIAL 16			/* Initial number of sppptun slots */
59 #define	NTUN_PERCENT 5			/* Percent of memory to use */
60 
61 /*
62  * This is used to tag official Solaris sources.  Please do not define
63  * "INTERNAL_BUILD" when building this software outside of Sun
64  * Microsystems.
65  */
66 #ifdef INTERNAL_BUILD
67 /* MODINFO is limited to 32 characters. */
68 const char sppptun_driver_description[] = "PPP 4.0 tunnel driver";
69 const char sppptun_module_description[] = "PPP 4.0 tunnel module";
70 #else
71 const char sppptun_driver_description[] = "ANU PPP tundrv";
72 const char sppptun_module_description[] = "ANU PPP tunmod";
73 
74 /* LINTED */
75 static const char buildtime[] = "Built " __DATE__ " at " __TIME__
76 #ifdef DEBUG
77 " DEBUG"
78 #endif
79 "\n";
80 #endif
81 
82 /*
83  * Tunable values; these are similar to the values used in ptms_conf.c.
84  * Override these settings via /etc/system.
85  */
86 uint_t	sppptun_cnt = 0;		/* Minimum number of tunnels */
87 size_t	sppptun_max_pty = 0;		/* Maximum number of tunnels */
88 uint_t	sppptun_init_cnt = NTUN_INITIAL; /* Initial number of tunnel slots */
89 uint_t	sppptun_pctofmem = NTUN_PERCENT; /* Percent of memory to use */
90 
91 typedef struct ether_dest_s {
92 	ether_addr_t addr;
93 	ushort_t type;
94 } ether_dest_t;
95 
96 /* Allows unaligned access. */
97 #define	GETLONG(x)	(((x)[0]<<24)|((x)[1]<<16)|((x)[2]<<8)|(x)[3])
98 
99 static const char *tll_kstats_list[] = { TLL_KSTATS_NAMES };
100 static const char *tcl_kstats_list[] = { TCL_KSTATS_NAMES };
101 
102 #define	KREF(p, m, vn)	p->m.vn.value.ui64
103 #define	KINCR(p, m, vn)	++KREF(p, m, vn)
104 #define	KDECR(p, m, vn)	--KREF(p, m, vn)
105 
106 #define	KLINCR(vn)	KINCR(tll, tll_kstats, vn)
107 #define	KLDECR(vn)	KDECR(tll, tll_kstats, vn)
108 
109 #define	KCINCR(vn)	KINCR(tcl, tcl_kstats, vn)
110 #define	KCDECR(vn)	KDECR(tcl, tcl_kstats, vn)
111 
112 static int	sppptun_open(queue_t *, dev_t *, int, int, cred_t *);
113 static int	sppptun_close(queue_t *, int, cred_t *);
114 static void	sppptun_urput(queue_t *, mblk_t *);
115 static void	sppptun_uwput(queue_t *, mblk_t *);
116 static int	sppptun_ursrv(queue_t *);
117 static int	sppptun_uwsrv(queue_t *);
118 static void	sppptun_lrput(queue_t *, mblk_t *);
119 static void	sppptun_lwput(queue_t *, mblk_t *);
120 
121 /*
122  * This is the hash table of clients.  Clients are the programs that
123  * open /dev/sppptun as a device.  There may be a large number of
124  * these; one per tunneled PPP session.
125  *
126  * Note: slots are offset from minor node value by 1 because
127  * vmem_alloc returns 0 for failure.
128  *
129  * The tcl_slots array entries are modified only when exclusive on
130  * both inner and outer perimeters.  This ensures that threads on
131  * shared perimeters always view this as unchanging memory with no
132  * need to lock around accesses.  (Specifically, the tcl_slots array
133  * is modified by entry to sppptun_open, sppptun_close, and _fini.)
134  */
135 static tuncl_t **tcl_slots = NULL;	/* Slots for tuncl_t */
136 static size_t tcl_nslots = 0;		/* Size of slot array */
137 static size_t tcl_minormax = 0;		/* Maximum number of tunnels */
138 static size_t tcl_inuse = 0;		/* # of tunnels currently allocated */
139 static krwlock_t tcl_rwlock;
140 static struct kmem_cache *tcl_cache = NULL;	/* tunnel cache */
141 static vmem_t *tcl_minor_arena = NULL; /* Arena for device minors */
142 
143 /*
144  * This is the simple list of lower layers.  For PPPoE, there is one
145  * of these per Ethernet interface.  Lower layers are established by
146  * "plumbing" -- using I_PLINK to connect the tunnel multiplexor to
147  * the physical interface.
148  */
149 static struct qelem tunll_list;
150 static int tunll_index;
151 
152 /* Test value; if all zeroes, then address hasn't been set yet. */
153 static const ether_addr_t zero_mac_addr = { 0, 0, 0, 0, 0, 0 };
154 
155 #define	MIN_SET_FASTPATH_UNITDATAREQ_SIZE	\
156 	(sizeof (dl_unitdata_req_t) + 4)
157 
158 #define	TUN_MI_ID	2104	/* officially allocated module ID */
159 #define	TUN_MI_MINPSZ	(0)
160 #define	TUN_MI_MAXPSZ	(PPP_MAXMTU)
161 #define	TUN_MI_HIWAT	(PPP_MTU * 8)
162 #define	TUN_MI_LOWAT	(128)
163 
164 static struct module_info sppptun_modinfo = {
165 	TUN_MI_ID,		/* mi_idnum */
166 	PPP_TUN_NAME,		/* mi_idname */
167 	TUN_MI_MINPSZ,		/* mi_minpsz */
168 	TUN_MI_MAXPSZ,		/* mi_maxpsz */
169 	TUN_MI_HIWAT,		/* mi_hiwat */
170 	TUN_MI_LOWAT		/* mi_lowat */
171 };
172 
173 static struct qinit sppptun_urinit = {
174 	(int (*)())sppptun_urput, /* qi_putp */
175 	sppptun_ursrv,		/* qi_srvp */
176 	sppptun_open,		/* qi_qopen */
177 	sppptun_close,		/* qi_qclose */
178 	NULL,			/* qi_qadmin */
179 	&sppptun_modinfo,	/* qi_minfo */
180 	NULL			/* qi_mstat */
181 };
182 
183 static struct qinit sppptun_uwinit = {
184 	(int (*)())sppptun_uwput, /* qi_putp */
185 	sppptun_uwsrv,		/* qi_srvp */
186 	NULL,			/* qi_qopen */
187 	NULL,			/* qi_qclose */
188 	NULL,			/* qi_qadmin */
189 	&sppptun_modinfo,	/* qi_minfo */
190 	NULL			/* qi_mstat */
191 };
192 
193 static struct qinit sppptun_lrinit = {
194 	(int (*)())sppptun_lrput, /* qi_putp */
195 	NULL,			/* qi_srvp */
196 	NULL,			/* qi_qopen */
197 	NULL,			/* qi_qclose */
198 	NULL,			/* qi_qadmin */
199 	&sppptun_modinfo,	/* qi_minfo */
200 	NULL			/* qi_mstat */
201 };
202 
203 static struct qinit sppptun_lwinit = {
204 	(int (*)())sppptun_lwput, /* qi_putp */
205 	NULL,			/* qi_srvp */
206 	NULL,			/* qi_qopen */
207 	NULL,			/* qi_qclose */
208 	NULL,			/* qi_qadmin */
209 	&sppptun_modinfo,	/* qi_minfo */
210 	NULL			/* qi_mstat */
211 };
212 
213 /*
214  * This is referenced in sppptun_mod.c.
215  */
216 struct streamtab sppptun_tab = {
217 	&sppptun_urinit,	/* st_rdinit */
218 	&sppptun_uwinit,	/* st_wrinit */
219 	&sppptun_lrinit,	/* st_muxrinit */
220 	&sppptun_lwinit		/* st_muxwrinit */
221 };
222 
223 /*
224  * Allocate another slot table twice as large as the original one
225  * (limited to global maximum).  Migrate all tunnels to the new slot
226  * table and free the original one.  Assumes we're exclusive on both
227  * inner and outer perimeters, and thus there are no other users of
228  * the tcl_slots array.
229  */
230 static minor_t
231 tcl_grow(void)
232 {
233 	minor_t old_size = tcl_nslots;
234 	minor_t new_size = 2 * old_size;
235 	tuncl_t **tcl_old = tcl_slots;
236 	tuncl_t **tcl_new;
237 	void  *vaddr;			/* vmem_add return value */
238 
239 	ASSERT(RW_LOCK_HELD(&tcl_rwlock));
240 
241 	/* Allocate new ptms array */
242 	tcl_new = kmem_zalloc(new_size * sizeof (tuncl_t *), KM_NOSLEEP);
243 	if (tcl_new == NULL)
244 		return ((minor_t)0);
245 
246 	/* Increase clone index space */
247 	vaddr = vmem_add(tcl_minor_arena, (void*)((uintptr_t)old_size + 1),
248 	    new_size - old_size, VM_NOSLEEP);
249 
250 	if (vaddr == NULL) {
251 		kmem_free(tcl_new, new_size * sizeof (tuncl_t *));
252 		return ((minor_t)0);
253 	}
254 
255 	/* Migrate tuncl_t entries to a new location */
256 	tcl_nslots = new_size;
257 	bcopy(tcl_old, tcl_new, old_size * sizeof (tuncl_t *));
258 	tcl_slots = tcl_new;
259 	kmem_free(tcl_old, old_size * sizeof (tuncl_t *));
260 
261 	/* Allocate minor number and return it */
262 	return ((minor_t)(uintptr_t)vmem_alloc(tcl_minor_arena, 1, VM_NOSLEEP));
263 }
264 
265 /*
266  * Allocate new minor number and tunnel client entry.  Returns the new
267  * entry or NULL if no memory or maximum number of entries reached.
268  * Assumes we're exclusive on both inner and outer perimeters, and
269  * thus there are no other users of the tcl_slots array.
270  */
271 static tuncl_t *
272 tuncl_alloc(int wantminor)
273 {
274 	minor_t dminor;
275 	tuncl_t *tcl = NULL;
276 
277 	rw_enter(&tcl_rwlock, RW_WRITER);
278 
279 	ASSERT(tcl_slots != NULL);
280 
281 	/*
282 	 * Always try to allocate new pty when sppptun_cnt minimum
283 	 * limit is not achieved. If it is achieved, the maximum is
284 	 * determined by either user-specified value (if it is
285 	 * non-zero) or our memory estimations - whatever is less.
286 	 */
287 	if (tcl_inuse >= sppptun_cnt) {
288 		/*
289 		 * When system achieved required minimum of tunnels,
290 		 * check for the denial of service limits.
291 		 *
292 		 * Get user-imposed maximum, if configured, or
293 		 * calculated memory constraint.
294 		 */
295 		size_t user_max = (sppptun_max_pty == 0 ? tcl_minormax :
296 		    min(sppptun_max_pty, tcl_minormax));
297 
298 		/* Do not try to allocate more than allowed */
299 		if (tcl_inuse >= user_max) {
300 			rw_exit(&tcl_rwlock);
301 			return (NULL);
302 		}
303 	}
304 	tcl_inuse++;
305 
306 	/*
307 	 * Allocate new minor number. If this fails, all slots are
308 	 * busy and we need to grow the hash.
309 	 */
310 	if (wantminor <= 0) {
311 		dminor = (minor_t)(uintptr_t)vmem_alloc(tcl_minor_arena, 1,
312 		    VM_NOSLEEP);
313 		if (dminor == 0) {
314 			/* Grow the cache and retry allocation */
315 			dminor = tcl_grow();
316 		}
317 	} else {
318 		dminor = (minor_t)(uintptr_t)vmem_xalloc(tcl_minor_arena, 1,
319 		    0, 0, 0, (void *)(uintptr_t)wantminor,
320 		    (void *)((uintptr_t)wantminor+1), VM_NOSLEEP);
321 		if (dminor != 0 && dminor != wantminor) {
322 			vmem_free(tcl_minor_arena, (void *)(uintptr_t)dminor,
323 			    1);
324 			dminor = 0;
325 		}
326 	}
327 
328 	if (dminor == 0) {
329 		/* Not enough memory now */
330 		tcl_inuse--;
331 		rw_exit(&tcl_rwlock);
332 		return (NULL);
333 	}
334 
335 	tcl = kmem_cache_alloc(tcl_cache, KM_NOSLEEP);
336 	if (tcl == NULL) {
337 		/* Not enough memory - this entry can't be used now. */
338 		vmem_free(tcl_minor_arena, (void *)(uintptr_t)dminor, 1);
339 		tcl_inuse--;
340 	} else {
341 		bzero(tcl, sizeof (*tcl));
342 		tcl->tcl_lsessid = dminor;
343 		ASSERT(tcl_slots[dminor - 1] == NULL);
344 		tcl_slots[dminor - 1] = tcl;
345 	}
346 
347 	rw_exit(&tcl_rwlock);
348 	return (tcl);
349 }
350 
351 /*
352  * This routine frees an upper level (client) stream by removing it
353  * from the minor number pool and freeing the state structure storage.
354  * Assumes we're exclusive on both inner and outer perimeters, and
355  * thus there are no other concurrent users of the tcl_slots array or
356  * of any entry in that array.
357  */
358 static void
359 tuncl_free(tuncl_t *tcl)
360 {
361 	rw_enter(&tcl_rwlock, RW_WRITER);
362 	ASSERT(tcl->tcl_lsessid <= tcl_nslots);
363 	ASSERT(tcl_slots[tcl->tcl_lsessid - 1] == tcl);
364 	ASSERT(tcl_inuse > 0);
365 	tcl_inuse--;
366 	tcl_slots[tcl->tcl_lsessid - 1] = NULL;
367 
368 	if (tcl->tcl_ksp != NULL) {
369 		kstat_delete(tcl->tcl_ksp);
370 		tcl->tcl_ksp = NULL;
371 	}
372 
373 	/* Return minor number to the pool of minors */
374 	vmem_free(tcl_minor_arena, (void *)(uintptr_t)tcl->tcl_lsessid, 1);
375 
376 	/* Return tuncl_t to the cache */
377 	kmem_cache_free(tcl_cache, tcl);
378 	rw_exit(&tcl_rwlock);
379 }
380 
381 /*
382  * Get tuncl_t structure by minor number.  Returns NULL when minor is
383  * out of range.  Note that lookup of tcl pointers (and use of those
384  * pointers) is safe because modification is done only when exclusive
385  * on both inner and outer perimeters.
386  */
387 static tuncl_t *
388 tcl_by_minor(minor_t dminor)
389 {
390 	tuncl_t *tcl = NULL;
391 
392 	if ((dminor >= 1) && (dminor <= tcl_nslots) && tcl_slots != NULL) {
393 		tcl = tcl_slots[dminor - 1];
394 	}
395 
396 	return (tcl);
397 }
398 
399 /*
400  * Set up kstats for upper or lower stream.
401  */
402 static kstat_t *
403 kstat_setup(kstat_named_t *knt, const char **names, int nstat,
404     const char *modname, int unitnum)
405 {
406 	kstat_t *ksp;
407 	char unitname[KSTAT_STRLEN];
408 	int i;
409 
410 	for (i = 0; i < nstat; i++) {
411 		kstat_set_string(knt[i].name, names[i]);
412 		knt[i].data_type = KSTAT_DATA_UINT64;
413 	}
414 	(void) sprintf(unitname, "%s" "%d", modname, unitnum);
415 	ksp = kstat_create(modname, unitnum, unitname, "net",
416 	    KSTAT_TYPE_NAMED, nstat, KSTAT_FLAG_VIRTUAL);
417 	if (ksp != NULL) {
418 		ksp->ks_data = (void *)knt;
419 		kstat_install(ksp);
420 	}
421 	return (ksp);
422 }
423 
424 /*
425  * sppptun_open()
426  *
427  * MT-Perimeters:
428  *    exclusive inner, exclusive outer.
429  *
430  * Description:
431  *    Common open procedure for module and driver.
432  */
433 static int
434 sppptun_open(queue_t *q, dev_t *devp, int oflag, int sflag, cred_t *credp)
435 {
436 	_NOTE(ARGUNUSED(oflag))
437 
438 	/* Allow a re-open */
439 	if (q->q_ptr != NULL)
440 		return (0);
441 
442 	/* In the off chance that we're on our way out, just return error */
443 	if (tcl_slots == NULL)
444 		return (EINVAL);
445 
446 	if (sflag & MODOPEN) {
447 		tunll_t *tll;
448 		char *cp;
449 
450 		/* ordinary users have no need to push this module */
451 		if (secpolicy_ppp_config(credp) != 0)
452 			return (EPERM);
453 
454 		tll = kmem_zalloc(sizeof (tunll_t), KM_SLEEP);
455 
456 		tll->tll_index = tunll_index++;
457 
458 		tll->tll_wq = WR(q);
459 		tll->tll_zoneid = crgetzoneid(credp);
460 
461 		/* Insert at end of list */
462 		insque(&tll->tll_next, tunll_list.q_back);
463 		q->q_ptr = WR(q)->q_ptr = tll;
464 
465 		tll->tll_style = PTS_PPPOE;
466 		tll->tll_alen = sizeof (tll->tll_lcladdr.pta_pppoe);
467 
468 		tll->tll_ksp = kstat_setup((kstat_named_t *)&tll->tll_kstats,
469 		    tll_kstats_list, Dim(tll_kstats_list), "tll",
470 		    tll->tll_index);
471 
472 		/*
473 		 * Find the name of the driver somewhere beneath us.
474 		 * Note that we have no driver under us until after
475 		 * qprocson().
476 		 */
477 		qprocson(q);
478 		for (q = WR(q); q->q_next != NULL; q = q->q_next)
479 			;
480 		cp = NULL;
481 		if (q->q_qinfo != NULL && q->q_qinfo->qi_minfo != NULL)
482 			cp = q->q_qinfo->qi_minfo->mi_idname;
483 		if (cp != NULL && *cp == '\0')
484 			cp = NULL;
485 
486 		/* Set initial name; user should overwrite. */
487 		if (cp == NULL)
488 			(void) snprintf(tll->tll_name, sizeof (tll->tll_name),
489 			    PPP_TUN_NAME "%d", tll->tll_index);
490 		else
491 			(void) snprintf(tll->tll_name, sizeof (tll->tll_name),
492 			    "%s:tun%d", cp, tll->tll_index);
493 	} else {
494 		tuncl_t	*tcl;
495 
496 		ASSERT(devp != NULL);
497 		if (sflag & CLONEOPEN) {
498 			tcl = tuncl_alloc(-1);
499 		} else {
500 			minor_t mn;
501 
502 			/*
503 			 * Support of non-clone open (ie, mknod with
504 			 * defined minor number) is supported for
505 			 * testing purposes so that 'arbitrary' minor
506 			 * numbers can be used.
507 			 */
508 			mn = getminor(*devp);
509 			if (mn == 0 || (tcl = tcl_by_minor(mn)) != NULL) {
510 				return (EPERM);
511 			}
512 			tcl = tuncl_alloc(mn);
513 		}
514 		if (tcl == NULL)
515 			return (ENOSR);
516 		tcl->tcl_rq = q;		/* save read queue pointer */
517 		tcl->tcl_flags |= TCLF_ISCLIENT;	/* sanity check */
518 		tcl->tcl_zoneid = crgetzoneid(credp);
519 
520 		q->q_ptr = WR(q)->q_ptr = (caddr_t)tcl;
521 		*devp = makedevice(getmajor(*devp), tcl->tcl_lsessid);
522 
523 		tcl->tcl_ksp = kstat_setup((kstat_named_t *)&tcl->tcl_kstats,
524 		    tcl_kstats_list, Dim(tcl_kstats_list), "tcl",
525 		    tcl->tcl_lsessid);
526 
527 		qprocson(q);
528 	}
529 	return (0);
530 }
531 
532 /*
533  * Create an appropriate control message for this client event.
534  */
535 static mblk_t *
536 make_control(tuncl_t *tclabout, tunll_t *tllabout, int action, tuncl_t *tclto)
537 {
538 	struct ppptun_control *ptc;
539 	mblk_t *mp = allocb(sizeof (*ptc), BPRI_HI);
540 
541 	if (mp != NULL) {
542 		MTYPE(mp) = M_PROTO;
543 		ptc = (struct ppptun_control *)mp->b_wptr;
544 		bzero(ptc, sizeof (*ptc));
545 		mp->b_wptr += sizeof (*ptc);
546 		if (tclabout != NULL) {
547 			ptc->ptc_rsessid = tclabout->tcl_rsessid;
548 			ptc->ptc_address = tclabout->tcl_address;
549 		}
550 		ptc->ptc_discrim = tclto->tcl_ctlval;
551 		ptc->ptc_action = action;
552 		if (tllabout != NULL) {
553 			(void) strncpy(ptc->ptc_name, tllabout->tll_name,
554 			    sizeof (ptc->ptc_name));
555 		}
556 	}
557 	return (mp);
558 }
559 
560 /*
561  * Send an appropriate control message up this client session.
562  */
563 static void
564 send_control(tuncl_t *tclabout, tunll_t *tllabout, int action, tuncl_t *tcl)
565 {
566 	mblk_t *mp;
567 
568 	if (tcl->tcl_rq != NULL) {
569 		mp = make_control(tclabout, tllabout, action, tcl);
570 		if (mp != NULL) {
571 			KCINCR(cks_octrl_spec);
572 			putnext(tcl->tcl_rq, mp);
573 		}
574 	}
575 }
576 
577 /*
578  * If a lower stream is being unplumbed, then the upper streams
579  * connected to this lower stream must be disconnected.  This routine
580  * accomplishes this by sending M_HANGUP to data streams and M_PROTO
581  * messages to control streams.  This is called by vmem_walk, and
582  * handles a span of minor node numbers.
583  *
584  * No need to update lks_clients here; the lower stream is on its way
585  * out.
586  */
587 static void
588 tclvm_remove_tll(void *arg, void *firstv, size_t numv)
589 {
590 	tunll_t *tll = (tunll_t *)arg;
591 	int minorn = (int)(uintptr_t)firstv;
592 	int minormax = minorn + numv;
593 	tuncl_t *tcl;
594 	mblk_t *mp;
595 
596 	while (minorn < minormax) {
597 		tcl = tcl_slots[minorn - 1];
598 		ASSERT(tcl != NULL);
599 		if (tcl->tcl_data_tll == tll && tcl->tcl_rq != NULL) {
600 			tcl->tcl_data_tll = NULL;
601 			mp = allocb(0, BPRI_HI);
602 			if (mp != NULL) {
603 				MTYPE(mp) = M_HANGUP;
604 				putnext(tcl->tcl_rq, mp);
605 				if (tcl->tcl_ctrl_tll == tll)
606 					tcl->tcl_ctrl_tll = NULL;
607 			}
608 		}
609 		if (tcl->tcl_ctrl_tll == tll) {
610 			send_control(tcl, tll, PTCA_UNPLUMB, tcl);
611 			tcl->tcl_ctrl_tll = NULL;
612 		}
613 		minorn++;
614 	}
615 }
616 
617 /*
618  * sppptun_close()
619  *
620  * MT-Perimeters:
621  *    exclusive inner, exclusive outer.
622  *
623  * Description:
624  *    Common close procedure for module and driver.
625  */
626 /* ARGSUSED */
627 static int
628 sppptun_close(queue_t *q, int flags __unused, cred_t *credp __unused)
629 {
630 	int err;
631 	void *qptr;
632 	tunll_t *tll;
633 	tuncl_t *tcl;
634 
635 	qptr = q->q_ptr;
636 
637 	err = 0;
638 	tll = qptr;
639 	if (!(tll->tll_flags & TLLF_NOTLOWER)) {
640 		/* q_next is set on modules */
641 		ASSERT(WR(q)->q_next != NULL);
642 
643 		/* unlink any clients using this lower layer. */
644 		vmem_walk(tcl_minor_arena, VMEM_ALLOC, tclvm_remove_tll, tll);
645 
646 		/* tell daemon that this has been removed. */
647 		if ((tcl = tll->tll_defcl) != NULL)
648 			send_control(NULL, tll, PTCA_UNPLUMB, tcl);
649 
650 		tll->tll_flags |= TLLF_CLOSING;
651 		while (!(tll->tll_flags & TLLF_CLOSE_DONE)) {
652 			qenable(tll->tll_wq);
653 			qwait(tll->tll_wq);
654 		}
655 		tll->tll_error = 0;
656 		while (!(tll->tll_flags & TLLF_SHUTDOWN_DONE)) {
657 			if (!qwait_sig(tll->tll_wq))
658 				break;
659 		}
660 
661 		qprocsoff(q);
662 		q->q_ptr = WR(q)->q_ptr = NULL;
663 		tll->tll_wq = NULL;
664 		remque(&tll->tll_next);
665 		err = tll->tll_error;
666 		if (tll->tll_ksp != NULL)
667 			kstat_delete(tll->tll_ksp);
668 		kmem_free(tll, sizeof (*tll));
669 	} else {
670 		tcl = qptr;
671 
672 		/* devices are end of line; no q_next. */
673 		ASSERT(WR(q)->q_next == NULL);
674 
675 		qprocsoff(q);
676 		DTRACE_PROBE1(sppptun__client__close, tuncl_t *, tcl);
677 		tcl->tcl_rq = NULL;
678 		q->q_ptr = WR(q)->q_ptr = NULL;
679 
680 		tll = TO_TLL(tunll_list.q_forw);
681 		while (tll != TO_TLL(&tunll_list)) {
682 			if (tll->tll_defcl == tcl)
683 				tll->tll_defcl = NULL;
684 			if (tll->tll_lastcl == tcl)
685 				tll->tll_lastcl = NULL;
686 			tll = TO_TLL(tll->tll_next);
687 		}
688 		/*
689 		 * If this was a normal session, then tell the daemon.
690 		 */
691 		if (!(tcl->tcl_flags & TCLF_DAEMON) &&
692 		    (tll = tcl->tcl_ctrl_tll) != NULL &&
693 		    tll->tll_defcl != NULL) {
694 			send_control(tcl, tll, PTCA_DISCONNECT,
695 			    tll->tll_defcl);
696 		}
697 
698 		/* Update statistics for references being dropped. */
699 		if ((tll = tcl->tcl_data_tll) != NULL) {
700 			KLDECR(lks_clients);
701 		}
702 		if ((tll = tcl->tcl_ctrl_tll) != NULL) {
703 			KLDECR(lks_clients);
704 		}
705 
706 		tuncl_free(tcl);
707 	}
708 
709 	return (err);
710 }
711 
712 /*
713  * Allocate and initialize a DLPI or TPI template of the specified
714  * length.
715  */
716 static mblk_t *
717 pi_alloc(size_t len, int prim)
718 {
719 	mblk_t	*mp;
720 
721 	mp = allocb(len, BPRI_MED);
722 	if (mp != NULL) {
723 		MTYPE(mp) = M_PROTO;
724 		mp->b_wptr = mp->b_rptr + len;
725 		bzero(mp->b_rptr, len);
726 		*(int *)mp->b_rptr = prim;
727 	}
728 	return (mp);
729 }
730 
731 #define	dlpi_alloc(l, p)	pi_alloc((l), (p))
732 
733 /*
734  * Prepend some room to an mblk.  Try to reuse the existing buffer, if
735  * at all possible, rather than allocating a new one.  (Fast-path
736  * output should be able to use this.)
737  *
738  * (XXX why isn't this a library function ...?)
739  */
740 static mblk_t *
741 prependb(mblk_t *mp, size_t len, size_t align)
742 {
743 	mblk_t *newmp;
744 
745 
746 	if (align == 0)
747 		align = 8;
748 	if (DB_REF(mp) > 1 || mp->b_datap->db_base+len > mp->b_rptr ||
749 	    ((uint_t)((uintptr_t)mp->b_rptr - len) % align) != 0) {
750 		if ((newmp = allocb(len, BPRI_LO)) == NULL) {
751 			freemsg(mp);
752 			return (NULL);
753 		}
754 		newmp->b_wptr = newmp->b_rptr + len;
755 		newmp->b_cont = mp;
756 		return (newmp);
757 	}
758 	mp->b_rptr -= len;
759 	return (mp);
760 }
761 
762 /*
763  * sppptun_outpkt()
764  *
765  * MT-Perimeters:
766  *	shared inner, shared outer (if called from sppptun_uwput),
767  *	exclusive inner, shared outer (if called from sppptun_uwsrv).
768  *
769  * Description:
770  *    Called from sppptun_uwput or sppptun_uwsrv when processing a
771  *    M_DATA, M_PROTO, or M_PCPROTO message.  For all cases, it tries
772  *    to prepare the data to be sent to the module below this driver
773  *    if there is a lower stream linked underneath.  If no lower
774  *    stream exists, then the data will be discarded and an ENXIO
775  *    error returned.
776  *
777  * Returns:
778  *	pointer to queue if caller should do putnext, otherwise
779  *	*mpp != NULL if message should be enqueued, otherwise
780  *	*mpp == NULL if message is gone.
781  */
782 static queue_t *
783 sppptun_outpkt(queue_t *q, mblk_t **mpp)
784 {
785 	mblk_t *mp;
786 	tuncl_t *tcl;
787 	tunll_t *tll;
788 	mblk_t *encmb;
789 	mblk_t *datamb;
790 	dl_unitdata_req_t *dur;
791 	queue_t *lowerq;
792 	poep_t *poep;
793 	int len;
794 	ether_dest_t *edestp;
795 	enum { luNone, luCopy, luSend } loopup;
796 	boolean_t isdata;
797 	struct ppptun_control *ptc;
798 
799 	mp = *mpp;
800 	tcl = q->q_ptr;
801 
802 	*mpp = NULL;
803 	if (!(tcl->tcl_flags & TCLF_ISCLIENT)) {
804 		/* This should never happen on a lower layer stream */
805 		freemsg(mp);
806 		return (NULL);
807 	}
808 
809 	isdata = (MTYPE(mp) == M_DATA);
810 	if (isdata) {
811 		tll = tcl->tcl_data_tll;
812 		ptc = NULL;
813 	} else {
814 		/*
815 		 * If data are unaligned or otherwise unsuitable, then
816 		 * discard.
817 		 */
818 		if (MBLKL(mp) != sizeof (*ptc) || DB_REF(mp) > 1 ||
819 		    !IS_P2ALIGNED(mp->b_rptr, sizeof (ptc))) {
820 			KCINCR(cks_octrl_drop);
821 			DTRACE_PROBE2(sppptun__bad__control, tuncl_t *, tcl,
822 			    mblk_t *, mp);
823 			send_control(tcl, tcl->tcl_ctrl_tll, PTCA_BADCTRL, tcl);
824 			freemsg(mp);
825 			return (NULL);
826 		}
827 		ptc = (struct ppptun_control *)mp->b_rptr;
828 
829 		/* Set stream discriminator value if not yet set. */
830 		if (tcl->tcl_ctlval == 0)
831 			tcl->tcl_ctlval = ptc->ptc_discrim;
832 
833 		/* If this is a test message, then reply to caller. */
834 		if (ptc->ptc_action == PTCA_TEST) {
835 			DTRACE_PROBE2(sppptun__test, tuncl_t *, tcl,
836 			    struct ppptun_control *, ptc);
837 			if (mp->b_cont != NULL) {
838 				freemsg(mp->b_cont);
839 				mp->b_cont = NULL;
840 			}
841 			ptc->ptc_discrim = tcl->tcl_ctlval;
842 			putnext(RD(q), mp);
843 			return (NULL);
844 		}
845 
846 		/* If this one isn't for us, then discard it */
847 		if (tcl->tcl_ctlval != ptc->ptc_discrim) {
848 			DTRACE_PROBE2(sppptun__bad__discrim, tuncl_t *, tcl,
849 			    struct ppptun_control *, ptc);
850 			freemsg(mp);
851 			return (NULL);
852 		}
853 
854 		/* Don't allow empty control packets. */
855 		tll = tcl->tcl_ctrl_tll;
856 		if (mp->b_cont == NULL) {
857 			KCINCR(cks_octrl_drop);
858 			DTRACE_PROBE2(sppptun__bad__control, tuncl_t *, tcl,
859 			    mblk_t *, mp);
860 			send_control(tcl, tll, PTCA_BADCTRL, tcl);
861 			freemsg(mp);
862 			return (NULL);
863 		}
864 	}
865 
866 	if (tll == NULL || (lowerq = tll->tll_wq) == NULL) {
867 		DTRACE_PROBE3(sppptun__cannot__send, tuncl_t *, tcl,
868 		    tunll_t *, tll, mblk_t *, mp);
869 		send_control(tcl, tll, PTCA_UNPLUMB, tcl);
870 		freemsg(mp);
871 		if (isdata) {
872 			tcl->tcl_stats.ppp_oerrors++;
873 		} else {
874 			KCINCR(cks_octrl_drop);
875 		}
876 		return (NULL);
877 	}
878 
879 	/*
880 	 * If so, then try to send it down.  The lower queue is only
881 	 * ever detached while holding an exclusive lock on the whole
882 	 * driver, so we can be confident that the lower queue is
883 	 * still there.
884 	 */
885 	if (!bcanputnext(lowerq, mp->b_band)) {
886 		DTRACE_PROBE3(sppptun__flow__control, tuncl_t *, tcl,
887 		    tunll_t *, tll, mblk_t *, mp);
888 		*mpp = mp;
889 		return (NULL);
890 	}
891 
892 	/*
893 	 * Note: DLPI and TPI expect that the first buffer contains
894 	 * the control (unitdata-req) header, destination address, and
895 	 * nothing else.  Any protocol headers must go in the next
896 	 * buffer.
897 	 */
898 	loopup = luNone;
899 	encmb = NULL;
900 	if (isdata) {
901 		if (tll->tll_alen != 0 &&
902 		    bcmp(&tcl->tcl_address, &tll->tll_lcladdr,
903 		    tll->tll_alen) == 0)
904 			loopup = luSend;
905 		switch (tll->tll_style) {
906 		case PTS_PPPOE:
907 			/* Strip address and control fields if present. */
908 			if (mp->b_rptr[0] == 0xFF) {
909 				if (MBLKL(mp) < 3) {
910 					encmb = msgpullup(mp, 3);
911 					freemsg(mp);
912 					if ((mp = encmb) == NULL)
913 						break;
914 				}
915 				mp->b_rptr += 2;
916 			}
917 			/* Broadcasting data is probably not a good idea. */
918 			if (tcl->tcl_address.pta_pppoe.ptma_mac[0] & 1)
919 				break;
920 			encmb = dlpi_alloc(sizeof (*dur) + sizeof (*edestp),
921 			    DL_UNITDATA_REQ);
922 			if (encmb == NULL)
923 				break;
924 
925 			dur = (dl_unitdata_req_t *)encmb->b_rptr;
926 			dur->dl_dest_addr_length = sizeof (*edestp);
927 			dur->dl_dest_addr_offset = sizeof (*dur);
928 			edestp = (ether_dest_t *)(dur + 1);
929 			ether_copy(tcl->tcl_address.pta_pppoe.ptma_mac,
930 			    edestp->addr);
931 			/* DLPI SAPs are in host byte order! */
932 			edestp->type = tll->tll_sap;
933 
934 			/* Make sure the protocol field isn't compressed. */
935 			len = (*mp->b_rptr & 1);
936 			mp = prependb(mp, sizeof (*poep) + len, POE_HDR_ALIGN);
937 			if (mp == NULL)
938 				break;
939 			poep = (poep_t *)mp->b_rptr;
940 			poep->poep_version_type = POE_VERSION;
941 			poep->poep_code = POECODE_DATA;
942 			poep->poep_session_id = htons(tcl->tcl_rsessid);
943 			poep->poep_length = htons(msgsize(mp) -
944 			    sizeof (*poep));
945 			if (len > 0)
946 				*(char *)(poep + 1) = '\0';
947 			break;
948 
949 		default:
950 			ASSERT(0);
951 		}
952 	} else {
953 		/*
954 		 * Control side encapsulation.
955 		 */
956 		if (bcmp(&ptc->ptc_address, &tll->tll_lcladdr, tll->tll_alen)
957 		    == 0)
958 			loopup = luSend;
959 		datamb = mp->b_cont;
960 		switch (tll->tll_style) {
961 		case PTS_PPPOE:
962 			/*
963 			 * Don't allow a loopback session to establish
964 			 * itself.  PPPoE is broken; it uses only one
965 			 * session ID for both data directions, so the
966 			 * loopback data path can simply never work.
967 			 */
968 			if (loopup == luSend &&
969 			    ((poep_t *)datamb->b_rptr)->poep_code ==
970 			    POECODE_PADR)
971 				break;
972 			encmb = dlpi_alloc(sizeof (*dur) + sizeof (*edestp),
973 			    DL_UNITDATA_REQ);
974 			if (encmb == NULL)
975 				break;
976 			dur = (dl_unitdata_req_t *)encmb->b_rptr;
977 			dur->dl_dest_addr_length = sizeof (*edestp);
978 			dur->dl_dest_addr_offset = sizeof (*dur);
979 
980 			edestp = (ether_dest_t *)(dur + 1);
981 			/* DLPI SAPs are in host byte order! */
982 			edestp->type = tll->tll_sap;
983 
984 			/*
985 			 * If destination isn't set yet, then we have to
986 			 * allow anything at all.  Otherwise, force use
987 			 * of configured peer address.
988 			 */
989 			if (bcmp(tcl->tcl_address.pta_pppoe.ptma_mac,
990 			    zero_mac_addr, sizeof (zero_mac_addr)) == 0 ||
991 			    (tcl->tcl_flags & TCLF_DAEMON)) {
992 				ether_copy(ptc->ptc_address.pta_pppoe.ptma_mac,
993 				    edestp->addr);
994 			} else {
995 				ether_copy(tcl->tcl_address.pta_pppoe.ptma_mac,
996 				    edestp->addr);
997 			}
998 			/* Reflect multicast/broadcast back up. */
999 			if (edestp->addr[0] & 1)
1000 				loopup = luCopy;
1001 			break;
1002 
1003 		case PTS_PPTP:
1004 			/*
1005 			 * PPTP's control side is actually done over
1006 			 * separate TCP connections.
1007 			 */
1008 		default:
1009 			ASSERT(0);
1010 		}
1011 		freeb(mp);
1012 		mp = datamb;
1013 	}
1014 	if (mp == NULL || encmb == NULL) {
1015 		DTRACE_PROBE1(sppptun__output__failure, tuncl_t *, tcl);
1016 		freemsg(mp);
1017 		freemsg(encmb);
1018 		if (isdata) {
1019 			tcl->tcl_stats.ppp_oerrors++;
1020 		} else {
1021 			KCINCR(cks_octrl_drop);
1022 			KLINCR(lks_octrl_drop);
1023 		}
1024 		lowerq = NULL;
1025 	} else {
1026 		if (isdata) {
1027 			tcl->tcl_stats.ppp_obytes += msgsize(mp);
1028 			tcl->tcl_stats.ppp_opackets++;
1029 		} else {
1030 			KCINCR(cks_octrls);
1031 			KLINCR(lks_octrls);
1032 		}
1033 		if (encmb != mp)
1034 			encmb->b_cont = mp;
1035 		switch (loopup) {
1036 		case luNone:
1037 			*mpp = encmb;
1038 			break;
1039 		case luCopy:
1040 			mp = copymsg(encmb);
1041 			if (mp != NULL)
1042 				sppptun_urput(RD(lowerq), mp);
1043 			*mpp = encmb;
1044 			break;
1045 		case luSend:
1046 			sppptun_urput(RD(lowerq), encmb);
1047 			lowerq = NULL;
1048 			break;
1049 		}
1050 	}
1051 	return (lowerq);
1052 }
1053 
1054 /*
1055  * Enqueue a message to be sent when the lower stream is closed.  This
1056  * is done so that we're guaranteed that we always have the necessary
1057  * resources to properly detach ourselves from the system.  (If we
1058  * waited until the close was done to allocate these messages, then
1059  * the message allocation could fail, and we'd be unable to properly
1060  * detach.)
1061  */
1062 static void
1063 save_for_close(tunll_t *tll, mblk_t *mp)
1064 {
1065 	mblk_t *onc;
1066 
1067 	if ((onc = tll->tll_onclose) == NULL)
1068 		tll->tll_onclose = mp;
1069 	else {
1070 		while (onc->b_next != NULL)
1071 			onc = onc->b_next;
1072 		onc->b_next = mp;
1073 	}
1074 }
1075 
1076 /*
1077  * Given the lower stream name, locate the state structure.  Note that
1078  * lookup of tcl pointers (and use of those pointers) is safe because
1079  * modification is done only when exclusive on both inner and outer
1080  * perimeters.
1081  */
1082 static tunll_t *
1083 tll_lookup_on_name(const char *dname, zoneid_t zoneid)
1084 {
1085 	tunll_t *tll;
1086 
1087 	tll = TO_TLL(tunll_list.q_forw);
1088 	for (; tll != TO_TLL(&tunll_list); tll = TO_TLL(tll->tll_next))
1089 		if (tll->tll_zoneid == zoneid &&
1090 		    strcmp(dname, tll->tll_name) == 0)
1091 			return (tll);
1092 	return (NULL);
1093 }
1094 
1095 /*
1096  * sppptun_inner_ioctl()
1097  *
1098  * MT-Perimeters:
1099  *    exclusive inner, shared outer.
1100  *
1101  * Description:
1102  *    Called by qwriter from sppptun_ioctl as the result of receiving
1103  *    a handled ioctl.
1104  */
1105 static void
1106 sppptun_inner_ioctl(queue_t *q, mblk_t *mp)
1107 {
1108 	struct iocblk *iop;
1109 	int rc = 0;
1110 	int len = 0;
1111 	int i;
1112 	tuncl_t *tcl;
1113 	tunll_t *tll;
1114 	union ppptun_name *ptn;
1115 	struct ppptun_info *pti;
1116 	struct ppptun_peer *ptp;
1117 	mblk_t *mptmp;
1118 	ppptun_atype *pap;
1119 	struct ppp_stats64 *psp;
1120 	zoneid_t zoneid;
1121 
1122 	iop = (struct iocblk *)mp->b_rptr;
1123 	tcl = NULL;
1124 	tll = q->q_ptr;
1125 	if (tll->tll_flags & TLLF_NOTLOWER) {
1126 		tcl = (tuncl_t *)tll;
1127 		tll = NULL;
1128 	}
1129 
1130 	DTRACE_PROBE3(sppptun__ioctl, tuncl_t *, tcl, tunll_t *, tll,
1131 	    struct iocblk *, iop);
1132 
1133 	switch (iop->ioc_cmd) {
1134 	case PPPIO_DEBUG:
1135 		/*
1136 		 * Debug requests are now ignored; use dtrace or wireshark
1137 		 * instead.
1138 		 */
1139 		break;
1140 
1141 	case PPPIO_GETSTAT:
1142 		rc = EINVAL;
1143 		break;
1144 
1145 	case PPPIO_GETSTAT64:
1146 		/* Client (device) side only */
1147 		if (tcl == NULL) {
1148 			rc = EINVAL;
1149 			break;
1150 		}
1151 		mptmp = allocb(sizeof (*psp), BPRI_HI);
1152 		if (mptmp == NULL) {
1153 			rc = ENOSR;
1154 			break;
1155 		}
1156 		freemsg(mp->b_cont);
1157 		mp->b_cont = mptmp;
1158 
1159 		psp = (struct ppp_stats64 *)mptmp->b_wptr;
1160 		bzero((caddr_t)psp, sizeof (*psp));
1161 		psp->p = tcl->tcl_stats;
1162 
1163 		len = sizeof (*psp);
1164 		break;
1165 
1166 	case PPPTUN_SNAME:
1167 		/* This is done on the *module* (lower level) side. */
1168 		if (tll == NULL || mp->b_cont == NULL ||
1169 		    iop->ioc_count != sizeof (*ptn) ||
1170 		    *mp->b_cont->b_rptr == '\0') {
1171 			rc = EINVAL;
1172 			break;
1173 		}
1174 
1175 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1176 		ptn->ptn_name[sizeof (ptn->ptn_name) - 1] = '\0';
1177 
1178 		tll = tll_lookup_on_name(ptn->ptn_name, tll->tll_zoneid);
1179 		if (tll != NULL) {
1180 			rc = EEXIST;
1181 			break;
1182 		}
1183 		tll = (tunll_t *)q->q_ptr;
1184 		(void) strcpy(tll->tll_name, ptn->ptn_name);
1185 		break;
1186 
1187 	case PPPTUN_SINFO:
1188 	case PPPTUN_GINFO:
1189 		/* Either side */
1190 		if (mp->b_cont == NULL || iop->ioc_count != sizeof (*pti)) {
1191 			rc = EINVAL;
1192 			break;
1193 		}
1194 		pti = (struct ppptun_info *)mp->b_cont->b_rptr;
1195 		if (pti->pti_name[0] != '\0')
1196 			tll = tll_lookup_on_name(pti->pti_name,
1197 			    tcl == NULL ? tll->tll_zoneid : tcl->tcl_zoneid);
1198 		if (tll == NULL) {
1199 			/* Driver (client) side must have name */
1200 			if (tcl != NULL && pti->pti_name[0] == '\0')
1201 				rc = EINVAL;
1202 			else
1203 				rc = ESRCH;
1204 			break;
1205 		}
1206 		if (iop->ioc_cmd == PPPTUN_GINFO) {
1207 			pti->pti_muxid = tll->tll_muxid;
1208 			pti->pti_style = tll->tll_style;
1209 			len = sizeof (*pti);
1210 			break;
1211 		}
1212 		tll->tll_muxid = pti->pti_muxid;
1213 		tll->tll_style = pti->pti_style;
1214 		switch (tll->tll_style) {
1215 		case PTS_PPPOE:		/* DLPI type */
1216 			tll->tll_alen = sizeof (tll->tll_lcladdr.pta_pppoe);
1217 			mptmp = dlpi_alloc(sizeof (dl_unbind_req_t),
1218 			    DL_UNBIND_REQ);
1219 			if (mptmp == NULL) {
1220 				rc = ENOSR;
1221 				break;
1222 			}
1223 			save_for_close(tll, mptmp);
1224 			mptmp = dlpi_alloc(sizeof (dl_detach_req_t),
1225 			    DL_DETACH_REQ);
1226 			if (mptmp == NULL) {
1227 				rc = ENOSR;
1228 				break;
1229 			}
1230 			save_for_close(tll, mptmp);
1231 			break;
1232 		default:
1233 			tll->tll_style = PTS_NONE;
1234 			tll->tll_alen = 0;
1235 			rc = EINVAL;
1236 			break;
1237 		}
1238 		break;
1239 
1240 	case PPPTUN_GNNAME:
1241 		/* This can be done on either side. */
1242 		if (mp->b_cont == NULL || iop->ioc_count < sizeof (uint32_t)) {
1243 			rc = EINVAL;
1244 			break;
1245 		}
1246 		zoneid = tcl == NULL ? tll->tll_zoneid : tcl->tcl_zoneid;
1247 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1248 		i = ptn->ptn_index;
1249 		tll = TO_TLL(tunll_list.q_forw);
1250 		while (tll != TO_TLL(&tunll_list)) {
1251 			if (tll->tll_zoneid == zoneid && --i < 0)
1252 				break;
1253 			tll = TO_TLL(tll->tll_next);
1254 		}
1255 		if (tll != TO_TLL(&tunll_list)) {
1256 			bcopy(tll->tll_name, ptn->ptn_name,
1257 			    sizeof (ptn->ptn_name));
1258 		} else {
1259 			bzero(ptn, sizeof (*ptn));
1260 		}
1261 		len = sizeof (*ptn);
1262 		break;
1263 
1264 	case PPPTUN_LCLADDR:
1265 		/* This is done on the *module* (lower level) side. */
1266 		if (tll == NULL || mp->b_cont == NULL) {
1267 			rc = EINVAL;
1268 			break;
1269 		}
1270 
1271 		pap = &tll->tll_lcladdr;
1272 		len = tll->tll_alen;
1273 		if (len == 0 || len > iop->ioc_count) {
1274 			rc = EINVAL;
1275 			break;
1276 		}
1277 		bcopy(mp->b_cont->b_rptr, pap, len);
1278 		len = 0;
1279 		break;
1280 
1281 	case PPPTUN_SPEER:
1282 		/* Client (device) side only; before SDATA */
1283 		if (tcl == NULL || mp->b_cont == NULL ||
1284 		    iop->ioc_count != sizeof (*ptp)) {
1285 			rc = EINVAL;
1286 			break;
1287 		}
1288 		if (tcl->tcl_data_tll != NULL) {
1289 			rc = EINVAL;
1290 			break;
1291 		}
1292 		ptp = (struct ppptun_peer *)mp->b_cont->b_rptr;
1293 		DTRACE_PROBE2(sppptun__speer, tuncl_t *, tcl,
1294 		    struct ppptun_peer *, ptp);
1295 		/* Once set, the style cannot change. */
1296 		if (tcl->tcl_style != PTS_NONE &&
1297 		    tcl->tcl_style != ptp->ptp_style) {
1298 			rc = EINVAL;
1299 			break;
1300 		}
1301 		if (ptp->ptp_flags & PTPF_DAEMON) {
1302 			/* User requests registration for tunnel 0 */
1303 			if ((tcl->tcl_flags & TCLF_SPEER_DONE) ||
1304 			    ptp->ptp_ltunid != 0 || ptp->ptp_rtunid != 0 ||
1305 			    ptp->ptp_lsessid != 0 || ptp->ptp_rsessid != 0) {
1306 				rc = EINVAL;
1307 				break;
1308 			}
1309 			tcl->tcl_flags |= TCLF_DAEMON;
1310 		} else {
1311 			/* Normal client connection */
1312 			if (tcl->tcl_flags & TCLF_DAEMON) {
1313 				rc = EINVAL;
1314 				break;
1315 			}
1316 			if (ptp->ptp_lsessid != 0 &&
1317 			    ptp->ptp_lsessid != tcl->tcl_lsessid) {
1318 				rc = EINVAL;
1319 				break;
1320 			}
1321 			/*
1322 			 * If we're reassigning the peer data, then
1323 			 * the previous assignment must have been for
1324 			 * a client control connection.  Check that.
1325 			 */
1326 			if ((tcl->tcl_flags & TCLF_SPEER_DONE) &&
1327 			    ((tcl->tcl_ltunid != 0 &&
1328 			    tcl->tcl_ltunid != ptp->ptp_ltunid) ||
1329 			    (tcl->tcl_rtunid != 0 &&
1330 			    tcl->tcl_rtunid != ptp->ptp_rtunid) ||
1331 			    (tcl->tcl_rsessid != 0 &&
1332 			    tcl->tcl_rsessid != ptp->ptp_rsessid))) {
1333 				rc = EINVAL;
1334 				break;
1335 			}
1336 			if ((tcl->tcl_ltunid = ptp->ptp_ltunid) == 0 &&
1337 			    tcl->tcl_style == PTS_L2FTP)
1338 				tcl->tcl_ltunid = ptp->ptp_lsessid;
1339 			tcl->tcl_rtunid = ptp->ptp_rtunid;
1340 			tcl->tcl_rsessid = ptp->ptp_rsessid;
1341 		}
1342 		tcl->tcl_flags |= TCLF_SPEER_DONE;
1343 		tcl->tcl_style = ptp->ptp_style;
1344 		tcl->tcl_address = ptp->ptp_address;
1345 		goto fill_in_peer;
1346 
1347 	case PPPTUN_GPEER:
1348 		/* Client (device) side only */
1349 		if (tcl == NULL) {
1350 			rc = EINVAL;
1351 			break;
1352 		}
1353 		if (mp->b_cont != NULL)
1354 			freemsg(mp->b_cont);
1355 		mp->b_cont = allocb(sizeof (*ptp), BPRI_HI);
1356 		if (mp->b_cont == NULL) {
1357 			rc = ENOSR;
1358 			break;
1359 		}
1360 		ptp = (struct ppptun_peer *)mp->b_cont->b_rptr;
1361 	fill_in_peer:
1362 		ptp->ptp_style = tcl->tcl_style;
1363 		ptp->ptp_flags = (tcl->tcl_flags & TCLF_DAEMON) ? PTPF_DAEMON :
1364 		    0;
1365 		ptp->ptp_ltunid = tcl->tcl_ltunid;
1366 		ptp->ptp_rtunid = tcl->tcl_rtunid;
1367 		ptp->ptp_lsessid = tcl->tcl_lsessid;
1368 		ptp->ptp_rsessid = tcl->tcl_rsessid;
1369 		ptp->ptp_address = tcl->tcl_address;
1370 		len = sizeof (*ptp);
1371 		break;
1372 
1373 	case PPPTUN_SDATA:
1374 	case PPPTUN_SCTL:
1375 		/* Client (device) side only; must do SPEER first */
1376 		if (tcl == NULL || mp->b_cont == NULL ||
1377 		    iop->ioc_count != sizeof (*ptn) ||
1378 		    *mp->b_cont->b_rptr == '\0') {
1379 			rc = EINVAL;
1380 			break;
1381 		}
1382 		if (!(tcl->tcl_flags & TCLF_SPEER_DONE)) {
1383 			rc = EINVAL;
1384 			break;
1385 		}
1386 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1387 		ptn->ptn_name[sizeof (ptn->ptn_name) - 1] = '\0';
1388 		tll = tll_lookup_on_name(ptn->ptn_name, tcl->tcl_zoneid);
1389 		if (tll == NULL) {
1390 			rc = ESRCH;
1391 			break;
1392 		}
1393 		if (tll->tll_style != tcl->tcl_style) {
1394 			rc = ENXIO;
1395 			break;
1396 		}
1397 		if (iop->ioc_cmd == PPPTUN_SDATA) {
1398 			if (tcl->tcl_data_tll != NULL) {
1399 				rc = EEXIST;
1400 				break;
1401 			}
1402 			/* server daemons cannot use regular data */
1403 			if (tcl->tcl_flags & TCLF_DAEMON) {
1404 				rc = EINVAL;
1405 				break;
1406 			}
1407 			tcl->tcl_data_tll = tll;
1408 		} else if (tcl->tcl_flags & TCLF_DAEMON) {
1409 			if (tll->tll_defcl != NULL && tll->tll_defcl != tcl) {
1410 				rc = EEXIST;
1411 				break;
1412 			}
1413 			tll->tll_defcl = tcl;
1414 			if (tcl->tcl_ctrl_tll != NULL) {
1415 				KDECR(tcl->tcl_ctrl_tll, tll_kstats,
1416 				    lks_clients);
1417 			}
1418 			tcl->tcl_ctrl_tll = tll;
1419 		} else {
1420 			if (tcl->tcl_ctrl_tll != NULL) {
1421 				rc = EEXIST;
1422 				break;
1423 			}
1424 			tcl->tcl_ctrl_tll = tll;
1425 		}
1426 		KLINCR(lks_clients);
1427 		break;
1428 
1429 	case PPPTUN_GDATA:
1430 	case PPPTUN_GCTL:
1431 		/* Client (device) side only */
1432 		if (tcl == NULL) {
1433 			rc = EINVAL;
1434 			break;
1435 		}
1436 		if (mp->b_cont != NULL)
1437 			freemsg(mp->b_cont);
1438 		mp->b_cont = allocb(sizeof (*ptn), BPRI_HI);
1439 		if (mp->b_cont == NULL) {
1440 			rc = ENOSR;
1441 			break;
1442 		}
1443 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1444 		if (iop->ioc_cmd == PPPTUN_GDATA)
1445 			tll = tcl->tcl_data_tll;
1446 		else
1447 			tll = tcl->tcl_ctrl_tll;
1448 		if (tll == NULL)
1449 			bzero(ptn, sizeof (*ptn));
1450 		else
1451 			bcopy(tll->tll_name, ptn->ptn_name,
1452 			    sizeof (ptn->ptn_name));
1453 		len = sizeof (*ptn);
1454 		break;
1455 
1456 	case PPPTUN_DCTL:
1457 		/* Client (device) side daemon mode only */
1458 		if (tcl == NULL || mp->b_cont == NULL ||
1459 		    iop->ioc_count != sizeof (*ptn) ||
1460 		    !(tcl->tcl_flags & TCLF_DAEMON)) {
1461 			rc = EINVAL;
1462 			break;
1463 		}
1464 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1465 		ptn->ptn_name[sizeof (ptn->ptn_name) - 1] = '\0';
1466 		tll = tll_lookup_on_name(ptn->ptn_name, tcl->tcl_zoneid);
1467 		if (tll == NULL || tll->tll_defcl != tcl) {
1468 			rc = ESRCH;
1469 			break;
1470 		}
1471 		tll->tll_defcl = NULL;
1472 		break;
1473 
1474 	case PPPTUN_SSAP:
1475 		/* This is done on the *module* (lower level) side. */
1476 		if (tll == NULL || mp->b_cont == NULL ||
1477 		    iop->ioc_count != sizeof (uint_t)) {
1478 			rc = EINVAL;
1479 			break;
1480 		}
1481 
1482 		tll->tll_sap = *(uint_t *)mp->b_cont->b_rptr;
1483 		break;
1484 
1485 	default:
1486 		/* Caller should already have checked command value */
1487 		ASSERT(0);
1488 	}
1489 	if (rc != 0) {
1490 		miocnak(q, mp, 0, rc);
1491 	} else {
1492 		if (len > 0)
1493 			mp->b_cont->b_wptr = mp->b_cont->b_rptr + len;
1494 		miocack(q, mp, len, 0);
1495 	}
1496 }
1497 
1498 /*
1499  * sppptun_ioctl()
1500  *
1501  * MT-Perimeters:
1502  *    shared inner, shared outer.
1503  *
1504  * Description:
1505  *    Called by sppptun_uwput as the result of receiving a M_IOCTL command.
1506  */
1507 static void
1508 sppptun_ioctl(queue_t *q, mblk_t *mp)
1509 {
1510 	struct iocblk *iop;
1511 	int rc = 0;
1512 	int len = 0;
1513 	uint32_t val = 0;
1514 	tunll_t *tll;
1515 
1516 	iop = (struct iocblk *)mp->b_rptr;
1517 
1518 	switch (iop->ioc_cmd) {
1519 	case PPPIO_DEBUG:
1520 	case PPPIO_GETSTAT:
1521 	case PPPIO_GETSTAT64:
1522 	case PPPTUN_SNAME:
1523 	case PPPTUN_SINFO:
1524 	case PPPTUN_GINFO:
1525 	case PPPTUN_GNNAME:
1526 	case PPPTUN_LCLADDR:
1527 	case PPPTUN_SPEER:
1528 	case PPPTUN_GPEER:
1529 	case PPPTUN_SDATA:
1530 	case PPPTUN_GDATA:
1531 	case PPPTUN_SCTL:
1532 	case PPPTUN_GCTL:
1533 	case PPPTUN_DCTL:
1534 	case PPPTUN_SSAP:
1535 		qwriter(q, mp, sppptun_inner_ioctl, PERIM_INNER);
1536 		return;
1537 
1538 	case PPPIO_GCLEAN:	/* always clean */
1539 		val = RCV_B7_1 | RCV_B7_0 | RCV_ODDP | RCV_EVNP;
1540 		len = sizeof (uint32_t);
1541 		break;
1542 
1543 	case PPPIO_GTYPE:	/* we look like an async driver. */
1544 		val = PPPTYP_AHDLC;
1545 		len = sizeof (uint32_t);
1546 		break;
1547 
1548 	case PPPIO_CFLAGS:	/* never compress headers */
1549 		val = 0;
1550 		len = sizeof (uint32_t);
1551 		break;
1552 
1553 		/* quietly ack PPP things we don't need to do. */
1554 	case PPPIO_XFCS:
1555 	case PPPIO_RFCS:
1556 	case PPPIO_XACCM:
1557 	case PPPIO_RACCM:
1558 	case PPPIO_LASTMOD:
1559 	case PPPIO_MUX:
1560 	case I_PLINK:
1561 	case I_PUNLINK:
1562 	case I_LINK:
1563 	case I_UNLINK:
1564 		break;
1565 
1566 	default:
1567 		tll = (tunll_t *)q->q_ptr;
1568 		if (!(tll->tll_flags & TLLF_NOTLOWER)) {
1569 			/* module side; pass this through. */
1570 			putnext(q, mp);
1571 			return;
1572 		}
1573 		rc = EINVAL;
1574 		break;
1575 	}
1576 	if (rc == 0 && len == sizeof (uint32_t)) {
1577 		if (mp->b_cont != NULL)
1578 			freemsg(mp->b_cont);
1579 		mp->b_cont = allocb(sizeof (uint32_t), BPRI_HI);
1580 		if (mp->b_cont == NULL) {
1581 			rc = ENOSR;
1582 		} else {
1583 			*(uint32_t *)mp->b_cont->b_wptr = val;
1584 			mp->b_cont->b_wptr += sizeof (uint32_t);
1585 		}
1586 	}
1587 	if (rc == 0) {
1588 		miocack(q, mp, len, 0);
1589 	} else {
1590 		miocnak(q, mp, 0, rc);
1591 	}
1592 }
1593 
1594 /*
1595  * sppptun_inner_mctl()
1596  *
1597  * MT-Perimeters:
1598  *    exclusive inner, shared outer.
1599  *
1600  * Description:
1601  *    Called by qwriter (via sppptun_uwput) as the result of receiving
1602  *    an M_CTL.  Called only on the client (driver) side.
1603  */
1604 static void
1605 sppptun_inner_mctl(queue_t *q, mblk_t *mp)
1606 {
1607 	int msglen;
1608 	tuncl_t *tcl;
1609 
1610 	tcl = q->q_ptr;
1611 
1612 	if (!(tcl->tcl_flags & TCLF_ISCLIENT)) {
1613 		freemsg(mp);
1614 		return;
1615 	}
1616 
1617 	msglen = MBLKL(mp);
1618 	switch (*mp->b_rptr) {
1619 	case PPPCTL_UNIT:
1620 		if (msglen == 2)
1621 			tcl->tcl_unit = mp->b_rptr[1];
1622 		else if (msglen == 8)
1623 			tcl->tcl_unit = ((uint32_t *)mp->b_rptr)[1];
1624 		break;
1625 	}
1626 	freemsg(mp);
1627 }
1628 
1629 /*
1630  * sppptun_uwput()
1631  *
1632  * MT-Perimeters:
1633  *    shared inner, shared outer.
1634  *
1635  * Description:
1636  *	Regular output data and controls pass through here.
1637  */
1638 static void
1639 sppptun_uwput(queue_t *q, mblk_t *mp)
1640 {
1641 	queue_t *nextq;
1642 	tuncl_t *tcl;
1643 
1644 	ASSERT(q->q_ptr != NULL);
1645 
1646 	switch (MTYPE(mp)) {
1647 	case M_DATA:
1648 	case M_PROTO:
1649 	case M_PCPROTO:
1650 		if (q->q_first == NULL &&
1651 		    (nextq = sppptun_outpkt(q, &mp)) != NULL) {
1652 			putnext(nextq, mp);
1653 		} else if (mp != NULL && !putq(q, mp)) {
1654 			freemsg(mp);
1655 		}
1656 		break;
1657 	case M_IOCTL:
1658 		sppptun_ioctl(q, mp);
1659 		break;
1660 	case M_CTL:
1661 		qwriter(q, mp, sppptun_inner_mctl, PERIM_INNER);
1662 		break;
1663 	default:
1664 		tcl = (tuncl_t *)q->q_ptr;
1665 		/*
1666 		 * If we're the driver, then discard unknown junk.
1667 		 * Otherwise, if we're the module, then forward along.
1668 		 */
1669 		if (tcl->tcl_flags & TCLF_ISCLIENT)
1670 			freemsg(mp);
1671 		else
1672 			putnext(q, mp);
1673 		break;
1674 	}
1675 }
1676 
1677 /*
1678  * Send a DLPI/TPI control message to the driver but make sure there
1679  * is only one outstanding message.  Uses tll_msg_pending to tell when
1680  * it must queue.  sppptun_urput calls message_done() when an ACK or a
1681  * NAK is received to process the next queued message.
1682  */
1683 static void
1684 message_send(tunll_t *tll, mblk_t *mp)
1685 {
1686 	mblk_t **mpp;
1687 
1688 	if (tll->tll_msg_pending) {
1689 		/* Must queue message. Tail insertion */
1690 		mpp = &tll->tll_msg_deferred;
1691 		while (*mpp != NULL)
1692 			mpp = &((*mpp)->b_next);
1693 		*mpp = mp;
1694 		return;
1695 	}
1696 	tll->tll_msg_pending = 1;
1697 	putnext(tll->tll_wq, mp);
1698 }
1699 
1700 /*
1701  * Called when an DLPI/TPI control message has been acked or nacked to
1702  * send down the next queued message (if any).
1703  */
1704 static void
1705 message_done(tunll_t *tll)
1706 {
1707 	mblk_t *mp;
1708 
1709 	ASSERT(tll->tll_msg_pending);
1710 	tll->tll_msg_pending = 0;
1711 	mp = tll->tll_msg_deferred;
1712 	if (mp != NULL) {
1713 		tll->tll_msg_deferred = mp->b_next;
1714 		mp->b_next = NULL;
1715 		tll->tll_msg_pending = 1;
1716 		putnext(tll->tll_wq, mp);
1717 	}
1718 }
1719 
1720 /*
1721  * Send down queued "close" messages to lower stream.  These were
1722  * enqueued right after the stream was originally allocated, when the
1723  * tll_style was set by PPPTUN_SINFO.
1724  */
1725 static int
1726 tll_close_req(tunll_t *tll)
1727 {
1728 	mblk_t *mb, *mbnext;
1729 
1730 	if ((mb = tll->tll_onclose) == NULL)
1731 		tll->tll_flags |= TLLF_SHUTDOWN_DONE;
1732 	else {
1733 		tll->tll_onclose = NULL;
1734 		while (mb != NULL) {
1735 			mbnext = mb->b_next;
1736 			mb->b_next = NULL;
1737 			message_send(tll, mb);
1738 			mb = mbnext;
1739 		}
1740 	}
1741 	return (0);
1742 }
1743 
1744 /*
1745  * This function is called when a backenable occurs on the write side of a
1746  * lower stream.  It walks over the client streams, looking for ones that use
1747  * the given tunll_t lower stream.  Each client is then backenabled.
1748  */
1749 static void
1750 tclvm_backenable(void *arg, void *firstv, size_t numv)
1751 {
1752 	tunll_t *tll = arg;
1753 	int minorn = (int)(uintptr_t)firstv;
1754 	int minormax = minorn + numv;
1755 	tuncl_t *tcl;
1756 	queue_t *q;
1757 
1758 	while (minorn < minormax) {
1759 		tcl = tcl_slots[minorn - 1];
1760 		if ((tcl->tcl_data_tll == tll ||
1761 		    tcl->tcl_ctrl_tll == tll) &&
1762 		    (q = tcl->tcl_rq) != NULL) {
1763 			qenable(OTHERQ(q));
1764 		}
1765 		minorn++;
1766 	}
1767 }
1768 
1769 /*
1770  * sppptun_uwsrv()
1771  *
1772  * MT-Perimeters:
1773  *    exclusive inner, shared outer.
1774  *
1775  * Description:
1776  *    Upper write-side service procedure.  In addition to the usual
1777  *    STREAMS queue service handling, this routine also handles the
1778  *    transmission of the unbind/detach messages to the lower stream
1779  *    driver when a lower stream is being closed.  (See the use of
1780  *    qenable/qwait in sppptun_close().)
1781  */
1782 static int
1783 sppptun_uwsrv(queue_t *q)
1784 {
1785 	tuncl_t	*tcl;
1786 	mblk_t *mp;
1787 	queue_t *nextq;
1788 
1789 	tcl = q->q_ptr;
1790 	if (!(tcl->tcl_flags & TCLF_ISCLIENT)) {
1791 		tunll_t *tll = (tunll_t *)tcl;
1792 
1793 		if ((tll->tll_flags & (TLLF_CLOSING|TLLF_CLOSE_DONE)) ==
1794 		    TLLF_CLOSING) {
1795 			tll->tll_error = tll_close_req(tll);
1796 			tll->tll_flags |= TLLF_CLOSE_DONE;
1797 		} else {
1798 			/*
1799 			 * We've been enabled here because of a backenable on
1800 			 * output flow control.  Backenable clients using this
1801 			 * lower layer.
1802 			 */
1803 			vmem_walk(tcl_minor_arena, VMEM_ALLOC, tclvm_backenable,
1804 			    tll);
1805 		}
1806 		return (0);
1807 	}
1808 
1809 	while ((mp = getq(q)) != NULL) {
1810 		if ((nextq = sppptun_outpkt(q, &mp)) != NULL) {
1811 			putnext(nextq, mp);
1812 		} else if (mp != NULL) {
1813 			(void) putbq(q, mp);
1814 			break;
1815 		}
1816 	}
1817 	return (0);
1818 }
1819 
1820 /*
1821  * sppptun_lwput()
1822  *
1823  * MT-Perimeters:
1824  *    shared inner, shared outer.
1825  *
1826  * Description:
1827  *    Lower write-side put procedure.  Nothing should be sending
1828  *    packets down this stream.
1829  */
1830 static void
1831 sppptun_lwput(queue_t *q, mblk_t *mp)
1832 {
1833 	switch (MTYPE(mp)) {
1834 	case M_PROTO:
1835 		putnext(q, mp);
1836 		break;
1837 	default:
1838 		freemsg(mp);
1839 		break;
1840 	}
1841 }
1842 
1843 /*
1844  * sppptun_lrput()
1845  *
1846  * MT-Perimeters:
1847  *    shared inner, shared outer.
1848  *
1849  * Description:
1850  *    Lower read-side put procedure.  Nothing should arrive here.
1851  */
1852 static void
1853 sppptun_lrput(queue_t *q, mblk_t *mp)
1854 {
1855 	tuncl_t *tcl;
1856 
1857 	switch (MTYPE(mp)) {
1858 	case M_IOCTL:
1859 		miocnak(q, mp, 0, EINVAL);
1860 		return;
1861 	case M_FLUSH:
1862 		if (*mp->b_rptr & FLUSHR) {
1863 			flushq(q, FLUSHDATA);
1864 		}
1865 		if (*mp->b_rptr & FLUSHW) {
1866 			*mp->b_rptr &= ~FLUSHR;
1867 			qreply(q, mp);
1868 		} else {
1869 			freemsg(mp);
1870 		}
1871 		return;
1872 	}
1873 	/*
1874 	 * Try to forward the message to the put procedure for the upper
1875 	 * control stream for this lower stream. If there are already messages
1876 	 * queued here, queue this one up to preserve message ordering.
1877 	 */
1878 	if ((tcl = (tuncl_t *)q->q_ptr) == NULL || tcl->tcl_rq == NULL) {
1879 		freemsg(mp);
1880 		return;
1881 	}
1882 	if (queclass(mp) == QPCTL ||
1883 	    (q->q_first == NULL && canput(tcl->tcl_rq))) {
1884 		put(tcl->tcl_rq, mp);
1885 	} else {
1886 		if (!putq(q, mp))
1887 			freemsg(mp);
1888 	}
1889 }
1890 
1891 /*
1892  * MT-Perimeters:
1893  *    shared inner, shared outer.
1894  *
1895  *    Handle non-data DLPI messages.  Used with PPPoE, which runs over
1896  *    Ethernet only.
1897  */
1898 static void
1899 urput_dlpi(queue_t *q, mblk_t *mp)
1900 {
1901 	int err;
1902 	union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr;
1903 	tunll_t *tll = q->q_ptr;
1904 	size_t mlen = MBLKL(mp);
1905 
1906 	switch (dlp->dl_primitive) {
1907 	case DL_UDERROR_IND:
1908 		break;
1909 
1910 	case DL_ERROR_ACK:
1911 		if (mlen < DL_ERROR_ACK_SIZE)
1912 			break;
1913 		err = dlp->error_ack.dl_unix_errno ?
1914 		    dlp->error_ack.dl_unix_errno : ENXIO;
1915 		switch (dlp->error_ack.dl_error_primitive) {
1916 		case DL_UNBIND_REQ:
1917 			message_done(tll);
1918 			break;
1919 		case DL_DETACH_REQ:
1920 			message_done(tll);
1921 			tll->tll_error = err;
1922 			tll->tll_flags |= TLLF_SHUTDOWN_DONE;
1923 			break;
1924 		case DL_PHYS_ADDR_REQ:
1925 			message_done(tll);
1926 			break;
1927 		case DL_INFO_REQ:
1928 		case DL_ATTACH_REQ:
1929 		case DL_BIND_REQ:
1930 			message_done(tll);
1931 			tll->tll_error = err;
1932 			break;
1933 		}
1934 		break;
1935 
1936 	case DL_INFO_ACK:
1937 		message_done(tll);
1938 		break;
1939 
1940 	case DL_BIND_ACK:
1941 		message_done(tll);
1942 		break;
1943 
1944 	case DL_PHYS_ADDR_ACK:
1945 		break;
1946 
1947 	case DL_OK_ACK:
1948 		if (mlen < DL_OK_ACK_SIZE)
1949 			break;
1950 		switch (dlp->ok_ack.dl_correct_primitive) {
1951 		case DL_UNBIND_REQ:
1952 			message_done(tll);
1953 			break;
1954 		case DL_DETACH_REQ:
1955 			tll->tll_flags |= TLLF_SHUTDOWN_DONE;
1956 			break;
1957 		case DL_ATTACH_REQ:
1958 			message_done(tll);
1959 			break;
1960 		}
1961 		break;
1962 	}
1963 	freemsg(mp);
1964 }
1965 
1966 /* Search structure used with PPPoE only; see tclvm_pppoe_search(). */
1967 struct poedat {
1968 	uint_t sessid;
1969 	tunll_t *tll;
1970 	const void *srcaddr;
1971 	int isdata;
1972 	tuncl_t *tcl;
1973 };
1974 
1975 /*
1976  * This function is called by vmem_walk from within sppptun_recv.  It
1977  * iterates over a span of allocated minor node numbers to search for
1978  * the appropriate lower stream, session ID, and peer MAC address.
1979  *
1980  * (This is necessary due to a design flaw in the PPPoE protocol
1981  * itself.  The protocol assigns session IDs from the server side
1982  * only.  Both server and client use the same number.  Thus, if there
1983  * are multiple clients on a single host, there can be session ID
1984  * conflicts between servers and there's no way to detangle them
1985  * except by looking at the remote MAC address.)
1986  *
1987  * (This could have been handled by linking together sessions that
1988  * differ only in the remote MAC address.  This isn't done because it
1989  * would involve extra per-session storage and it's very unlikely that
1990  * PPPoE would be used this way.)
1991  */
1992 static void
1993 tclvm_pppoe_search(void *arg, void *firstv, size_t numv)
1994 {
1995 	struct poedat *poedat = (struct poedat *)arg;
1996 	int minorn = (int)(uintptr_t)firstv;
1997 	int minormax = minorn + numv;
1998 	tuncl_t *tcl;
1999 
2000 	if (poedat->tcl != NULL)
2001 		return;
2002 	while (minorn < minormax) {
2003 		tcl = tcl_slots[minorn - 1];
2004 		ASSERT(tcl != NULL);
2005 		if (tcl->tcl_rsessid == poedat->sessid &&
2006 		    ((!poedat->isdata && tcl->tcl_ctrl_tll == poedat->tll) ||
2007 		    (poedat->isdata && tcl->tcl_data_tll == poedat->tll)) &&
2008 		    bcmp(tcl->tcl_address.pta_pppoe.ptma_mac,
2009 		    poedat->srcaddr,
2010 		    sizeof (tcl->tcl_address.pta_pppoe.ptma_mac)) == 0) {
2011 			poedat->tcl = tcl;
2012 			break;
2013 		}
2014 		minorn++;
2015 	}
2016 }
2017 
2018 /*
2019  * sppptun_recv()
2020  *
2021  * MT-Perimeters:
2022  *    shared inner, shared outer.
2023  *
2024  * Description:
2025  *    Receive function called by sppptun_urput, which is called when
2026  *    the lower read-side put or service procedure sends a message
2027  *    upstream to the a device user (PPP).  It attempts to find an
2028  *    appropriate queue on the module above us (depending on what the
2029  *    associated upper stream for the protocol would be), and if not
2030  *    possible, it will find an upper control stream for the protocol.
2031  *    Returns a pointer to the upper queue_t, or NULL if the message
2032  *    has been discarded.
2033  *
2034  * About demultiplexing:
2035  *
2036  *	All four protocols (L2F, PPTP, L2TP, and PPPoE) support a
2037  *	locally assigned ID for demultiplexing incoming traffic.  For
2038  *	L2F, this is called the Client ID, for PPTP the Call ID, for
2039  *	L2TP the Session ID, and for PPPoE the SESSION_ID.  This is a
2040  *	16 bit number for all four protocols, and is used to directly
2041  *	index into a list of upper streams.  With the upper stream in
2042  *	hand, we verify that this is the right stream and deliver the
2043  *	data.
2044  *
2045  *	L2TP has a Tunnel ID, which represents a bundle of PPP
2046  *	sessions between the peers.  Because we always assign unique
2047  *	session ID numbers, we merely check that the given ID matches
2048  *	the assigned ID for the upper stream.
2049  *
2050  *	L2F has a Multiplex ID, which is unique per connection.  It
2051  *	does not have L2TP's concept of multiple-connections-within-
2052  *	a-tunnel.  The same checking is done.
2053  *
2054  *	PPPoE is a horribly broken protocol.  Only one ID is assigned
2055  *	per connection.  The client must somehow demultiplex based on
2056  *	an ID number assigned by the server.  It's not necessarily
2057  *	unique.  The search is done based on {ID,peerEthernet} (using
2058  *	tcl_rsessid) for all packet types except PADI and PADS.
2059  *
2060  *	Neither PPPoE nor PPTP supports additional ID numbers.
2061  *
2062  *	Both L2F and L2TP come in over UDP.  They are distinguished by
2063  *	looking at the GRE version field -- 001 for L2F and 010 for
2064  *	L2TP.
2065  */
2066 static queue_t *
2067 sppptun_recv(queue_t *q, mblk_t **mpp, const void *srcaddr)
2068 {
2069 	mblk_t *mp;
2070 	tunll_t *tll;
2071 	tuncl_t *tcl;
2072 	int sessid;
2073 	int remlen;
2074 	int msglen;
2075 	int isdata;
2076 	int i;
2077 	const uchar_t *ucp;
2078 	const poep_t *poep;
2079 	mblk_t *mnew;
2080 	ppptun_atype *pap;
2081 
2082 	mp = *mpp;
2083 
2084 	tll = q->q_ptr;
2085 	ASSERT(!(tll->tll_flags & TLLF_NOTLOWER));
2086 
2087 	tcl = NULL;
2088 	switch (tll->tll_style) {
2089 	case PTS_PPPOE:
2090 		/* Note that poep_t alignment is uint16_t */
2091 		if ((!IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)) ||
2092 		    MBLKL(mp) < sizeof (poep_t)) &&
2093 		    !pullupmsg(mp, sizeof (poep_t)))
2094 			break;
2095 		poep = (const poep_t *)mp->b_rptr;
2096 		if (poep->poep_version_type != POE_VERSION)
2097 			break;
2098 		/*
2099 		 * First, extract a session ID number.  All protocols have
2100 		 * this.
2101 		 */
2102 		isdata = (poep->poep_code == POECODE_DATA);
2103 		sessid = ntohs(poep->poep_session_id);
2104 		remlen = sizeof (*poep);
2105 		msglen = ntohs(poep->poep_length);
2106 		i = poep->poep_code;
2107 		if (i == POECODE_PADI || i == POECODE_PADR) {
2108 			/* These go to the server daemon only. */
2109 			tcl = tll->tll_defcl;
2110 		} else if (i == POECODE_PADO || i == POECODE_PADS) {
2111 			/*
2112 			 * These go to a client only, and are demuxed
2113 			 * by the Host-Uniq field (into which we stuff
2114 			 * our local ID number when generating
2115 			 * PADI/PADR).
2116 			 */
2117 			ucp = (const uchar_t *)(poep + 1);
2118 			i = msglen;
2119 			while (i > POET_HDRLEN) {
2120 				if (POET_GET_TYPE(ucp) == POETT_END) {
2121 					i = 0;
2122 					break;
2123 				}
2124 				if (POET_GET_TYPE(ucp) == POETT_UNIQ &&
2125 				    POET_GET_LENG(ucp) >= sizeof (uint32_t))
2126 					break;
2127 				i -= POET_GET_LENG(ucp) + POET_HDRLEN;
2128 				ucp = POET_NEXT(ucp);
2129 			}
2130 			if (i >= POET_HDRLEN + 4)
2131 				sessid = GETLONG(ucp + POET_HDRLEN);
2132 			tcl = tcl_by_minor((minor_t)sessid);
2133 		} else {
2134 			/*
2135 			 * Try minor number as session ID first, since
2136 			 * it's used that way on server side.  It's
2137 			 * not used that way on the client, though, so
2138 			 * this might not work.  If this isn't the
2139 			 * right one, then try the tll cache.  If
2140 			 * neither is right, then search all open
2141 			 * clients.  Did I mention that the PPPoE
2142 			 * protocol is badly designed?
2143 			 */
2144 			tcl = tcl_by_minor((minor_t)sessid);
2145 			if (tcl == NULL ||
2146 			    (!isdata && tcl->tcl_ctrl_tll != tll) ||
2147 			    (isdata && tcl->tcl_data_tll != tll) ||
2148 			    sessid != tcl->tcl_rsessid ||
2149 			    bcmp(srcaddr, tcl->tcl_address.pta_pppoe.ptma_mac,
2150 			    sizeof (tcl->tcl_address.pta_pppoe.ptma_mac)) != 0)
2151 				tcl = tll->tll_lastcl;
2152 			if (tcl == NULL ||
2153 			    (!isdata && tcl->tcl_ctrl_tll != tll) ||
2154 			    (isdata && tcl->tcl_data_tll != tll) ||
2155 			    sessid != tcl->tcl_rsessid ||
2156 			    bcmp(srcaddr, tcl->tcl_address.pta_pppoe.ptma_mac,
2157 			    sizeof (tcl->tcl_address.pta_pppoe.ptma_mac)) != 0)
2158 				tcl = NULL;
2159 			if (tcl == NULL && sessid != 0) {
2160 				struct poedat poedat;
2161 
2162 				/*
2163 				 * Slow mode.  Too bad.  If you don't like it,
2164 				 * you can always choose a better protocol.
2165 				 */
2166 				poedat.sessid = sessid;
2167 				poedat.tll = tll;
2168 				poedat.srcaddr = srcaddr;
2169 				poedat.tcl = NULL;
2170 				poedat.isdata = isdata;
2171 				vmem_walk(tcl_minor_arena, VMEM_ALLOC,
2172 				    tclvm_pppoe_search, &poedat);
2173 				KLINCR(lks_walks);
2174 				if ((tcl = poedat.tcl) != NULL) {
2175 					tll->tll_lastcl = tcl;
2176 					KCINCR(cks_walks);
2177 				}
2178 			}
2179 		}
2180 		break;
2181 	}
2182 
2183 	if (tcl == NULL || tcl->tcl_rq == NULL) {
2184 		DTRACE_PROBE3(sppptun__recv__discard, int, sessid,
2185 		    tuncl_t *, tcl, mblk_t *, mp);
2186 		if (tcl == NULL) {
2187 			KLINCR(lks_in_nomatch);
2188 		}
2189 		if (isdata) {
2190 			KLINCR(lks_indata_drops);
2191 			if (tcl != NULL)
2192 				tcl->tcl_stats.ppp_ierrors++;
2193 		} else {
2194 			KLINCR(lks_inctrl_drops);
2195 			if (tcl != NULL) {
2196 				KCINCR(cks_inctrl_drops);
2197 			}
2198 		}
2199 		freemsg(mp);
2200 		return (NULL);
2201 	}
2202 
2203 	if (tcl->tcl_data_tll == tll && isdata) {
2204 		if (!adjmsg(mp, remlen) ||
2205 		    (i = msgsize(mp)) < msglen ||
2206 		    (i > msglen && !adjmsg(mp, msglen - i))) {
2207 			KLINCR(lks_indata_drops);
2208 			tcl->tcl_stats.ppp_ierrors++;
2209 			freemsg(mp);
2210 			return (NULL);
2211 		}
2212 		/* XXX -- address/control handling in pppd needs help. */
2213 		if (*mp->b_rptr != 0xFF) {
2214 			if ((mp = prependb(mp, 2, 1)) == NULL) {
2215 				KLINCR(lks_indata_drops);
2216 				tcl->tcl_stats.ppp_ierrors++;
2217 				return (NULL);
2218 			}
2219 			mp->b_rptr[0] = 0xFF;
2220 			mp->b_rptr[1] = 0x03;
2221 		}
2222 		MTYPE(mp) = M_DATA;
2223 		tcl->tcl_stats.ppp_ibytes += msgsize(mp);
2224 		tcl->tcl_stats.ppp_ipackets++;
2225 		KLINCR(lks_indata);
2226 	} else {
2227 		if (isdata || tcl->tcl_ctrl_tll != tll ||
2228 		    (mnew = make_control(tcl, tll, PTCA_CONTROL, tcl)) ==
2229 		    NULL) {
2230 			KLINCR(lks_inctrl_drops);
2231 			KCINCR(cks_inctrl_drops);
2232 			freemsg(mp);
2233 			return (NULL);
2234 		}
2235 		/* Fix up source address; peer might not be set yet. */
2236 		pap = &((struct ppptun_control *)mnew->b_rptr)->ptc_address;
2237 		bcopy(srcaddr, pap->pta_pppoe.ptma_mac,
2238 		    sizeof (pap->pta_pppoe.ptma_mac));
2239 		mnew->b_cont = mp;
2240 		mp = mnew;
2241 		KLINCR(lks_inctrls);
2242 		KCINCR(cks_inctrls);
2243 	}
2244 	*mpp = mp;
2245 	return (tcl->tcl_rq);
2246 }
2247 
2248 /*
2249  * sppptun_urput()
2250  *
2251  * MT-Perimeters:
2252  *    shared inner, shared outer.
2253  *
2254  * Description:
2255  *    Upper read-side put procedure.  Messages from the underlying
2256  *    lower stream driver arrive here.  See sppptun_recv for the
2257  *    demultiplexing logic.
2258  */
2259 static void
2260 sppptun_urput(queue_t *q, mblk_t *mp)
2261 {
2262 	union DL_primitives *dlprim;
2263 	mblk_t *mpnext;
2264 	tunll_t *tll;
2265 	queue_t *nextq;
2266 
2267 	tll = q->q_ptr;
2268 	ASSERT(!(tll->tll_flags & TLLF_NOTLOWER));
2269 
2270 	switch (MTYPE(mp)) {
2271 	case M_DATA:
2272 		/*
2273 		 * When we're bound over IP, data arrives here.  The
2274 		 * packet starts with the IP header itself.
2275 		 */
2276 		if ((nextq = sppptun_recv(q, &mp, NULL)) != NULL)
2277 			putnext(nextq, mp);
2278 		break;
2279 
2280 	case M_PROTO:
2281 	case M_PCPROTO:
2282 		/* Data arrives here for UDP or raw Ethernet, not IP. */
2283 		switch (tll->tll_style) {
2284 			/* PPTP control messages are over TCP only. */
2285 		case PTS_PPTP:
2286 		default:
2287 			ASSERT(0);	/* how'd that happen? */
2288 			break;
2289 
2290 		case PTS_PPPOE:		/* DLPI message */
2291 			if (MBLKL(mp) < sizeof (t_uscalar_t))
2292 				break;
2293 			dlprim = (union DL_primitives *)mp->b_rptr;
2294 			switch (dlprim->dl_primitive) {
2295 			case DL_UNITDATA_IND: {
2296 				size_t mlen = MBLKL(mp);
2297 
2298 				if (mlen < DL_UNITDATA_IND_SIZE)
2299 					break;
2300 				if (dlprim->unitdata_ind.dl_src_addr_offset <
2301 				    DL_UNITDATA_IND_SIZE ||
2302 				    dlprim->unitdata_ind.dl_src_addr_offset +
2303 				    dlprim->unitdata_ind.dl_src_addr_length >
2304 				    mlen)
2305 					break;
2306 			}
2307 				/* FALLTHROUGH */
2308 			case DL_UNITDATA_REQ:	/* For loopback support. */
2309 				if (dlprim->dl_primitive == DL_UNITDATA_REQ &&
2310 				    MBLKL(mp) < DL_UNITDATA_REQ_SIZE)
2311 					break;
2312 				if ((mpnext = mp->b_cont) == NULL)
2313 					break;
2314 				MTYPE(mpnext) = M_DATA;
2315 				nextq = sppptun_recv(q, &mpnext,
2316 				    dlprim->dl_primitive == DL_UNITDATA_IND ?
2317 				    mp->b_rptr +
2318 				    dlprim->unitdata_ind.dl_src_addr_offset :
2319 				    tll->tll_lcladdr.pta_pppoe.ptma_mac);
2320 				if (nextq != NULL)
2321 					putnext(nextq, mpnext);
2322 				freeb(mp);
2323 				return;
2324 
2325 			default:
2326 				urput_dlpi(q, mp);
2327 				return;
2328 			}
2329 			break;
2330 		}
2331 		freemsg(mp);
2332 		break;
2333 
2334 	default:
2335 		freemsg(mp);
2336 		break;
2337 	}
2338 }
2339 
2340 /*
2341  * sppptun_ursrv()
2342  *
2343  * MT-Perimeters:
2344  *    exclusive inner, shared outer.
2345  *
2346  * Description:
2347  *    Upper read-side service procedure.  This procedure services the
2348  *    client streams.  We get here because the client (PPP) asserts
2349  *    flow control down to us.
2350  */
2351 static int
2352 sppptun_ursrv(queue_t *q)
2353 {
2354 	mblk_t		*mp;
2355 
2356 	ASSERT(q->q_ptr != NULL);
2357 
2358 	while ((mp = getq(q)) != NULL) {
2359 		if (canputnext(q)) {
2360 			putnext(q, mp);
2361 		} else {
2362 			(void) putbq(q, mp);
2363 			break;
2364 		}
2365 	}
2366 	return (0);
2367 }
2368 
2369 /*
2370  * Dummy constructor/destructor functions for kmem_cache_create.
2371  * We're just using kmem as an allocator of integers, not real
2372  * storage.
2373  */
2374 
2375 /*ARGSUSED*/
2376 static int
2377 tcl_constructor(void *maddr, void *arg, int kmflags)
2378 {
2379 	return (0);
2380 }
2381 
2382 /*ARGSUSED*/
2383 static void
2384 tcl_destructor(void *maddr, void *arg)
2385 {
2386 }
2387 
2388 /*
2389  * Total size occupied by one tunnel client.  Each tunnel client
2390  * consumes one pointer for tcl_slots array, one tuncl_t structure and
2391  * two messages preallocated for close.
2392  */
2393 #define	TUNCL_SIZE (sizeof (tuncl_t) + sizeof (tuncl_t *) + \
2394 			2 * sizeof (dblk_t))
2395 
2396 /*
2397  * Clear all bits of x except the highest bit
2398  */
2399 #define	truncate(x) 	((x) <= 2 ? (x) : (1 << (highbit(x) - 1)))
2400 
2401 /*
2402  * This function initializes some well-known global variables inside
2403  * the module.
2404  *
2405  * Called by sppptun_mod.c:_init() before installing the module.
2406  */
2407 void
2408 sppptun_init(void)
2409 {
2410 	tunll_list.q_forw = tunll_list.q_back = &tunll_list;
2411 }
2412 
2413 /*
2414  * This function allocates the initial internal storage for the
2415  * sppptun driver.
2416  *
2417  * Called by sppptun_mod.c:_init() after installing module.
2418  */
2419 void
2420 sppptun_tcl_init(void)
2421 {
2422 	uint_t i, j;
2423 
2424 	rw_init(&tcl_rwlock, NULL, RW_DRIVER, NULL);
2425 	rw_enter(&tcl_rwlock, RW_WRITER);
2426 	tcl_nslots = sppptun_init_cnt;
2427 	tcl_slots = kmem_zalloc(tcl_nslots * sizeof (tuncl_t *), KM_SLEEP);
2428 
2429 	tcl_cache = kmem_cache_create("sppptun_map", sizeof (tuncl_t), 0,
2430 	    tcl_constructor, tcl_destructor, NULL, NULL, NULL, 0);
2431 
2432 	/* Allocate integer space for minor numbers */
2433 	tcl_minor_arena = vmem_create("sppptun_minor", (void *)1, tcl_nslots,
2434 	    1, NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
2435 
2436 	/*
2437 	 * Calculate available number of tunnels - how many tunnels
2438 	 * can we allocate in sppptun_pctofmem % of available
2439 	 * memory.  The value is rounded up to the nearest power of 2.
2440 	 */
2441 	i = (sppptun_pctofmem * kmem_maxavail()) / (100 * TUNCL_SIZE);
2442 	j = truncate(i);	/* i with non-high bits stripped */
2443 	if (i != j)
2444 		j *= 2;
2445 	tcl_minormax = j;
2446 	rw_exit(&tcl_rwlock);
2447 }
2448 
2449 /*
2450  * This function checks that there are no plumbed streams or other users.
2451  *
2452  * Called by sppptun_mod.c:_fini().  Assumes that we're exclusive on
2453  * both perimeters.
2454  */
2455 int
2456 sppptun_tcl_fintest(void)
2457 {
2458 	if (tunll_list.q_forw != &tunll_list || tcl_inuse > 0)
2459 		return (EBUSY);
2460 	else
2461 		return (0);
2462 }
2463 
2464 /*
2465  * If no lower streams are plumbed, then this function deallocates all
2466  * internal storage in preparation for unload.
2467  *
2468  * Called by sppptun_mod.c:_fini().  Assumes that we're exclusive on
2469  * both perimeters.
2470  */
2471 void
2472 sppptun_tcl_fini(void)
2473 {
2474 	if (tcl_minor_arena != NULL) {
2475 		vmem_destroy(tcl_minor_arena);
2476 		tcl_minor_arena = NULL;
2477 	}
2478 	if (tcl_cache != NULL) {
2479 		kmem_cache_destroy(tcl_cache);
2480 		tcl_cache = NULL;
2481 	}
2482 	kmem_free(tcl_slots, tcl_nslots * sizeof (tuncl_t *));
2483 	tcl_slots = NULL;
2484 	rw_destroy(&tcl_rwlock);
2485 	ASSERT(tcl_slots == NULL);
2486 	ASSERT(tcl_cache == NULL);
2487 	ASSERT(tcl_minor_arena == NULL);
2488 }
2489