xref: /titanic_51/usr/src/uts/common/io/ppp/sppptun/sppptun.c (revision dc4bf86d3869090fa08d98d7030728be986fd289)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/debug.h>
29 #include <sys/param.h>
30 #include <sys/stat.h>
31 #include <sys/systm.h>
32 #include <sys/socket.h>
33 #include <sys/stream.h>
34 #include <sys/stropts.h>
35 #include <sys/errno.h>
36 #include <sys/time.h>
37 #include <sys/cmn_err.h>
38 #include <sys/sdt.h>
39 #include <sys/conf.h>
40 #include <sys/dlpi.h>
41 #include <sys/ddi.h>
42 #include <sys/kstat.h>
43 #include <sys/strsun.h>
44 #include <sys/bitmap.h>
45 #include <sys/sysmacros.h>
46 #include <sys/note.h>
47 #include <sys/policy.h>
48 #include <net/ppp_defs.h>
49 #include <net/pppio.h>
50 #include <net/sppptun.h>
51 #include <net/pppoe.h>
52 #include <netinet/in.h>
53 
54 #include "s_common.h"
55 #include "sppptun_mod.h"
56 #include "sppptun_impl.h"
57 
58 #define	NTUN_INITIAL 16			/* Initial number of sppptun slots */
59 #define	NTUN_PERCENT 5			/* Percent of memory to use */
60 
61 /*
62  * This is used to tag official Solaris sources.  Please do not define
63  * "INTERNAL_BUILD" when building this software outside of Sun
64  * Microsystems.
65  */
66 #ifdef INTERNAL_BUILD
67 /* MODINFO is limited to 32 characters. */
68 const char sppptun_driver_description[] = "PPP 4.0 tunnel driver";
69 const char sppptun_module_description[] = "PPP 4.0 tunnel module";
70 #else
71 const char sppptun_driver_description[] = "ANU PPP tundrv";
72 const char sppptun_module_description[] = "ANU PPP tunmod";
73 
74 /* LINTED */
75 static const char buildtime[] = "Built " __DATE__ " at " __TIME__
76 #ifdef DEBUG
77 " DEBUG"
78 #endif
79 "\n";
80 #endif
81 
82 /*
83  * Tunable values; these are similar to the values used in ptms_conf.c.
84  * Override these settings via /etc/system.
85  */
86 uint_t	sppptun_cnt = 0;		/* Minimum number of tunnels */
87 size_t	sppptun_max_pty = 0;		/* Maximum number of tunnels */
88 uint_t	sppptun_init_cnt = NTUN_INITIAL; /* Initial number of tunnel slots */
89 uint_t	sppptun_pctofmem = NTUN_PERCENT; /* Percent of memory to use */
90 
91 typedef struct ether_dest_s {
92 	ether_addr_t addr;
93 	ushort_t type;
94 } ether_dest_t;
95 
96 /* Allows unaligned access. */
97 #define	GETLONG(x)	(((x)[0]<<24)|((x)[1]<<16)|((x)[2]<<8)|(x)[3])
98 
99 static const char *tll_kstats_list[] = { TLL_KSTATS_NAMES };
100 static const char *tcl_kstats_list[] = { TCL_KSTATS_NAMES };
101 
102 #define	KREF(p, m, vn)	p->m.vn.value.ui64
103 #define	KINCR(p, m, vn)	++KREF(p, m, vn)
104 #define	KDECR(p, m, vn)	--KREF(p, m, vn)
105 
106 #define	KLINCR(vn)	KINCR(tll, tll_kstats, vn)
107 #define	KLDECR(vn)	KDECR(tll, tll_kstats, vn)
108 
109 #define	KCINCR(vn)	KINCR(tcl, tcl_kstats, vn)
110 #define	KCDECR(vn)	KDECR(tcl, tcl_kstats, vn)
111 
112 static int	sppptun_open(queue_t *, dev_t *, int, int, cred_t *);
113 static int	sppptun_close(queue_t *);
114 static void	sppptun_urput(queue_t *, mblk_t *);
115 static void	sppptun_uwput(queue_t *, mblk_t *);
116 static int	sppptun_ursrv(queue_t *);
117 static int	sppptun_uwsrv(queue_t *);
118 static void	sppptun_lrput(queue_t *, mblk_t *);
119 static void	sppptun_lwput(queue_t *, mblk_t *);
120 
121 /*
122  * This is the hash table of clients.  Clients are the programs that
123  * open /dev/sppptun as a device.  There may be a large number of
124  * these; one per tunneled PPP session.
125  *
126  * Note: slots are offset from minor node value by 1 because
127  * vmem_alloc returns 0 for failure.
128  *
129  * The tcl_slots array entries are modified only when exclusive on
130  * both inner and outer perimeters.  This ensures that threads on
131  * shared perimeters always view this as unchanging memory with no
132  * need to lock around accesses.  (Specifically, the tcl_slots array
133  * is modified by entry to sppptun_open, sppptun_close, and _fini.)
134  */
135 static tuncl_t **tcl_slots = NULL;	/* Slots for tuncl_t */
136 static size_t tcl_nslots = 0;		/* Size of slot array */
137 static size_t tcl_minormax = 0;		/* Maximum number of tunnels */
138 static size_t tcl_inuse = 0;		/* # of tunnels currently allocated */
139 static krwlock_t tcl_rwlock;
140 static struct kmem_cache *tcl_cache = NULL;	/* tunnel cache */
141 static vmem_t *tcl_minor_arena = NULL; /* Arena for device minors */
142 
143 /*
144  * This is the simple list of lower layers.  For PPPoE, there is one
145  * of these per Ethernet interface.  Lower layers are established by
146  * "plumbing" -- using I_PLINK to connect the tunnel multiplexor to
147  * the physical interface.
148  */
149 static struct qelem tunll_list;
150 static int tunll_index;
151 
152 /* Test value; if all zeroes, then address hasn't been set yet. */
153 static const ether_addr_t zero_mac_addr = { 0, 0, 0, 0, 0, 0 };
154 
155 #define	MIN_SET_FASTPATH_UNITDATAREQ_SIZE	\
156 	(sizeof (dl_unitdata_req_t) + 4)
157 
158 #define	TUN_MI_ID	2104	/* officially allocated module ID */
159 #define	TUN_MI_MINPSZ	(0)
160 #define	TUN_MI_MAXPSZ	(PPP_MAXMTU)
161 #define	TUN_MI_HIWAT	(PPP_MTU * 8)
162 #define	TUN_MI_LOWAT	(128)
163 
164 static struct module_info sppptun_modinfo = {
165 	TUN_MI_ID,		/* mi_idnum */
166 	PPP_TUN_NAME,		/* mi_idname */
167 	TUN_MI_MINPSZ,		/* mi_minpsz */
168 	TUN_MI_MAXPSZ,		/* mi_maxpsz */
169 	TUN_MI_HIWAT,		/* mi_hiwat */
170 	TUN_MI_LOWAT		/* mi_lowat */
171 };
172 
173 static struct qinit sppptun_urinit = {
174 	(int (*)())sppptun_urput, /* qi_putp */
175 	sppptun_ursrv,		/* qi_srvp */
176 	sppptun_open,		/* qi_qopen */
177 	sppptun_close,		/* qi_qclose */
178 	NULL,			/* qi_qadmin */
179 	&sppptun_modinfo,	/* qi_minfo */
180 	NULL			/* qi_mstat */
181 };
182 
183 static struct qinit sppptun_uwinit = {
184 	(int (*)())sppptun_uwput, /* qi_putp */
185 	sppptun_uwsrv,		/* qi_srvp */
186 	NULL,			/* qi_qopen */
187 	NULL,			/* qi_qclose */
188 	NULL,			/* qi_qadmin */
189 	&sppptun_modinfo,	/* qi_minfo */
190 	NULL			/* qi_mstat */
191 };
192 
193 static struct qinit sppptun_lrinit = {
194 	(int (*)())sppptun_lrput, /* qi_putp */
195 	NULL,			/* qi_srvp */
196 	NULL,			/* qi_qopen */
197 	NULL,			/* qi_qclose */
198 	NULL,			/* qi_qadmin */
199 	&sppptun_modinfo,	/* qi_minfo */
200 	NULL			/* qi_mstat */
201 };
202 
203 static struct qinit sppptun_lwinit = {
204 	(int (*)())sppptun_lwput, /* qi_putp */
205 	NULL,			/* qi_srvp */
206 	NULL,			/* qi_qopen */
207 	NULL,			/* qi_qclose */
208 	NULL,			/* qi_qadmin */
209 	&sppptun_modinfo,	/* qi_minfo */
210 	NULL			/* qi_mstat */
211 };
212 
213 /*
214  * This is referenced in sppptun_mod.c.
215  */
216 struct streamtab sppptun_tab = {
217 	&sppptun_urinit,	/* st_rdinit */
218 	&sppptun_uwinit,	/* st_wrinit */
219 	&sppptun_lrinit,	/* st_muxrinit */
220 	&sppptun_lwinit		/* st_muxwrinit */
221 };
222 
223 /*
224  * Allocate another slot table twice as large as the original one
225  * (limited to global maximum).  Migrate all tunnels to the new slot
226  * table and free the original one.  Assumes we're exclusive on both
227  * inner and outer perimeters, and thus there are no other users of
228  * the tcl_slots array.
229  */
230 static minor_t
231 tcl_grow(void)
232 {
233 	minor_t old_size = tcl_nslots;
234 	minor_t new_size = 2 * old_size;
235 	tuncl_t **tcl_old = tcl_slots;
236 	tuncl_t **tcl_new;
237 	void  *vaddr;			/* vmem_add return value */
238 
239 	ASSERT(RW_LOCK_HELD(&tcl_rwlock));
240 
241 	/* Allocate new ptms array */
242 	tcl_new = kmem_zalloc(new_size * sizeof (tuncl_t *), KM_NOSLEEP);
243 	if (tcl_new == NULL)
244 		return ((minor_t)0);
245 
246 	/* Increase clone index space */
247 	vaddr = vmem_add(tcl_minor_arena, (void*)((uintptr_t)old_size + 1),
248 	    new_size - old_size, VM_NOSLEEP);
249 
250 	if (vaddr == NULL) {
251 		kmem_free(tcl_new, new_size * sizeof (tuncl_t *));
252 		return ((minor_t)0);
253 	}
254 
255 	/* Migrate tuncl_t entries to a new location */
256 	tcl_nslots = new_size;
257 	bcopy(tcl_old, tcl_new, old_size * sizeof (tuncl_t *));
258 	tcl_slots = tcl_new;
259 	kmem_free(tcl_old, old_size * sizeof (tuncl_t *));
260 
261 	/* Allocate minor number and return it */
262 	return ((minor_t)(uintptr_t)vmem_alloc(tcl_minor_arena, 1, VM_NOSLEEP));
263 }
264 
265 /*
266  * Allocate new minor number and tunnel client entry.  Returns the new
267  * entry or NULL if no memory or maximum number of entries reached.
268  * Assumes we're exclusive on both inner and outer perimeters, and
269  * thus there are no other users of the tcl_slots array.
270  */
271 static tuncl_t *
272 tuncl_alloc(int wantminor)
273 {
274 	minor_t dminor;
275 	tuncl_t *tcl = NULL;
276 
277 	rw_enter(&tcl_rwlock, RW_WRITER);
278 
279 	ASSERT(tcl_slots != NULL);
280 
281 	/*
282 	 * Always try to allocate new pty when sppptun_cnt minimum
283 	 * limit is not achieved. If it is achieved, the maximum is
284 	 * determined by either user-specified value (if it is
285 	 * non-zero) or our memory estimations - whatever is less.
286 	 */
287 	if (tcl_inuse >= sppptun_cnt) {
288 		/*
289 		 * When system achieved required minimum of tunnels,
290 		 * check for the denial of service limits.
291 		 *
292 		 * Get user-imposed maximum, if configured, or
293 		 * calculated memory constraint.
294 		 */
295 		size_t user_max = (sppptun_max_pty == 0 ? tcl_minormax :
296 		    min(sppptun_max_pty, tcl_minormax));
297 
298 		/* Do not try to allocate more than allowed */
299 		if (tcl_inuse >= user_max) {
300 			rw_exit(&tcl_rwlock);
301 			return (NULL);
302 		}
303 	}
304 	tcl_inuse++;
305 
306 	/*
307 	 * Allocate new minor number. If this fails, all slots are
308 	 * busy and we need to grow the hash.
309 	 */
310 	if (wantminor <= 0) {
311 		dminor = (minor_t)(uintptr_t)vmem_alloc(tcl_minor_arena, 1,
312 		    VM_NOSLEEP);
313 		if (dminor == 0) {
314 			/* Grow the cache and retry allocation */
315 			dminor = tcl_grow();
316 		}
317 	} else {
318 		dminor = (minor_t)(uintptr_t)vmem_xalloc(tcl_minor_arena, 1,
319 		    0, 0, 0, (void *)(uintptr_t)wantminor,
320 		    (void *)((uintptr_t)wantminor+1), VM_NOSLEEP);
321 		if (dminor != 0 && dminor != wantminor) {
322 			vmem_free(tcl_minor_arena, (void *)(uintptr_t)dminor,
323 			    1);
324 			dminor = 0;
325 		}
326 	}
327 
328 	if (dminor == 0) {
329 		/* Not enough memory now */
330 		tcl_inuse--;
331 		rw_exit(&tcl_rwlock);
332 		return (NULL);
333 	}
334 
335 	tcl = kmem_cache_alloc(tcl_cache, KM_NOSLEEP);
336 	if (tcl == NULL) {
337 		/* Not enough memory - this entry can't be used now. */
338 		vmem_free(tcl_minor_arena, (void *)(uintptr_t)dminor, 1);
339 		tcl_inuse--;
340 	} else {
341 		bzero(tcl, sizeof (*tcl));
342 		tcl->tcl_lsessid = dminor;
343 		ASSERT(tcl_slots[dminor - 1] == NULL);
344 		tcl_slots[dminor - 1] = tcl;
345 	}
346 
347 	rw_exit(&tcl_rwlock);
348 	return (tcl);
349 }
350 
351 /*
352  * This routine frees an upper level (client) stream by removing it
353  * from the minor number pool and freeing the state structure storage.
354  * Assumes we're exclusive on both inner and outer perimeters, and
355  * thus there are no other concurrent users of the tcl_slots array or
356  * of any entry in that array.
357  */
358 static void
359 tuncl_free(tuncl_t *tcl)
360 {
361 	rw_enter(&tcl_rwlock, RW_WRITER);
362 	ASSERT(tcl->tcl_lsessid <= tcl_nslots);
363 	ASSERT(tcl_slots[tcl->tcl_lsessid - 1] == tcl);
364 	ASSERT(tcl_inuse > 0);
365 	tcl_inuse--;
366 	tcl_slots[tcl->tcl_lsessid - 1] = NULL;
367 
368 	if (tcl->tcl_ksp != NULL) {
369 		kstat_delete(tcl->tcl_ksp);
370 		tcl->tcl_ksp = NULL;
371 	}
372 
373 	/* Return minor number to the pool of minors */
374 	vmem_free(tcl_minor_arena, (void *)(uintptr_t)tcl->tcl_lsessid, 1);
375 
376 	/* Return tuncl_t to the cache */
377 	kmem_cache_free(tcl_cache, tcl);
378 	rw_exit(&tcl_rwlock);
379 }
380 
381 /*
382  * Get tuncl_t structure by minor number.  Returns NULL when minor is
383  * out of range.  Note that lookup of tcl pointers (and use of those
384  * pointers) is safe because modification is done only when exclusive
385  * on both inner and outer perimeters.
386  */
387 static tuncl_t *
388 tcl_by_minor(minor_t dminor)
389 {
390 	tuncl_t *tcl = NULL;
391 
392 	if ((dminor >= 1) && (dminor <= tcl_nslots) && tcl_slots != NULL) {
393 		tcl = tcl_slots[dminor - 1];
394 	}
395 
396 	return (tcl);
397 }
398 
399 /*
400  * Set up kstats for upper or lower stream.
401  */
402 static kstat_t *
403 kstat_setup(kstat_named_t *knt, const char **names, int nstat,
404     const char *modname, int unitnum)
405 {
406 	kstat_t *ksp;
407 	char unitname[KSTAT_STRLEN];
408 	int i;
409 
410 	for (i = 0; i < nstat; i++) {
411 		kstat_set_string(knt[i].name, names[i]);
412 		knt[i].data_type = KSTAT_DATA_UINT64;
413 	}
414 	(void) sprintf(unitname, "%s" "%d", modname, unitnum);
415 	ksp = kstat_create(modname, unitnum, unitname, "net",
416 	    KSTAT_TYPE_NAMED, nstat, KSTAT_FLAG_VIRTUAL);
417 	if (ksp != NULL) {
418 		ksp->ks_data = (void *)knt;
419 		kstat_install(ksp);
420 	}
421 	return (ksp);
422 }
423 
424 /*
425  * sppptun_open()
426  *
427  * MT-Perimeters:
428  *    exclusive inner, exclusive outer.
429  *
430  * Description:
431  *    Common open procedure for module and driver.
432  */
433 static int
434 sppptun_open(queue_t *q, dev_t *devp, int oflag, int sflag, cred_t *credp)
435 {
436 	_NOTE(ARGUNUSED(oflag))
437 
438 	/* Allow a re-open */
439 	if (q->q_ptr != NULL)
440 		return (0);
441 
442 	/* In the off chance that we're on our way out, just return error */
443 	if (tcl_slots == NULL)
444 		return (EINVAL);
445 
446 	if (sflag & MODOPEN) {
447 		tunll_t *tll;
448 		char *cp;
449 
450 		/* ordinary users have no need to push this module */
451 		if (secpolicy_net_config(credp, B_FALSE) != 0)
452 			return (EPERM);
453 
454 		tll = kmem_zalloc(sizeof (tunll_t), KM_SLEEP);
455 
456 		tll->tll_index = tunll_index++;
457 
458 		tll->tll_wq = WR(q);
459 
460 		/* Insert at end of list */
461 		insque(&tll->tll_next, tunll_list.q_back);
462 		q->q_ptr = WR(q)->q_ptr = tll;
463 
464 		tll->tll_style = PTS_PPPOE;
465 		tll->tll_alen = sizeof (tll->tll_lcladdr.pta_pppoe);
466 
467 		tll->tll_ksp = kstat_setup((kstat_named_t *)&tll->tll_kstats,
468 		    tll_kstats_list, Dim(tll_kstats_list), "tll",
469 		    tll->tll_index);
470 
471 		/*
472 		 * Find the name of the driver somewhere beneath us.
473 		 * Note that we have no driver under us until after
474 		 * qprocson().
475 		 */
476 		qprocson(q);
477 		for (q = WR(q); q->q_next != NULL; q = q->q_next)
478 			;
479 		cp = NULL;
480 		if (q->q_qinfo != NULL && q->q_qinfo->qi_minfo != NULL)
481 			cp = q->q_qinfo->qi_minfo->mi_idname;
482 		if (cp != NULL && *cp == '\0')
483 			cp = NULL;
484 
485 		/* Set initial name; user should overwrite. */
486 		if (cp == NULL)
487 			(void) snprintf(tll->tll_name, sizeof (tll->tll_name),
488 			    PPP_TUN_NAME "%d", tll->tll_index);
489 		else
490 			(void) snprintf(tll->tll_name, sizeof (tll->tll_name),
491 			    "%s:tun%d", cp, tll->tll_index);
492 	} else {
493 		tuncl_t	*tcl;
494 
495 		ASSERT(devp != NULL);
496 		if (sflag & CLONEOPEN) {
497 			tcl = tuncl_alloc(-1);
498 		} else {
499 			minor_t mn;
500 
501 			/*
502 			 * Support of non-clone open (ie, mknod with
503 			 * defined minor number) is supported for
504 			 * testing purposes so that 'arbitrary' minor
505 			 * numbers can be used.
506 			 */
507 			mn = getminor(*devp);
508 			if (mn == 0 || (tcl = tcl_by_minor(mn)) != NULL) {
509 				return (EPERM);
510 			}
511 			tcl = tuncl_alloc(mn);
512 		}
513 		if (tcl == NULL)
514 			return (ENOSR);
515 		tcl->tcl_rq = q;		/* save read queue pointer */
516 		tcl->tcl_flags |= TCLF_ISCLIENT;	/* sanity check */
517 
518 		q->q_ptr = WR(q)->q_ptr = (caddr_t)tcl;
519 		*devp = makedevice(getmajor(*devp), tcl->tcl_lsessid);
520 
521 		tcl->tcl_ksp = kstat_setup((kstat_named_t *)&tcl->tcl_kstats,
522 		    tcl_kstats_list, Dim(tcl_kstats_list), "tcl",
523 		    tcl->tcl_lsessid);
524 
525 		qprocson(q);
526 	}
527 	return (0);
528 }
529 
530 /*
531  * Create an appropriate control message for this client event.
532  */
533 static mblk_t *
534 make_control(tuncl_t *tclabout, tunll_t *tllabout, int action, tuncl_t *tclto)
535 {
536 	struct ppptun_control *ptc;
537 	mblk_t *mp = allocb(sizeof (*ptc), BPRI_HI);
538 
539 	if (mp != NULL) {
540 		MTYPE(mp) = M_PROTO;
541 		ptc = (struct ppptun_control *)mp->b_wptr;
542 		mp->b_wptr += sizeof (*ptc);
543 		if (tclabout != NULL) {
544 			ptc->ptc_rsessid = tclabout->tcl_rsessid;
545 			ptc->ptc_address = tclabout->tcl_address;
546 		} else {
547 			bzero(ptc, sizeof (*ptc));
548 		}
549 		ptc->ptc_discrim = tclto->tcl_ctlval;
550 		ptc->ptc_action = action;
551 		(void) strncpy(ptc->ptc_name, tllabout->tll_name,
552 		    sizeof (ptc->ptc_name));
553 	}
554 	return (mp);
555 }
556 
557 /*
558  * Send an appropriate control message up this client session.
559  */
560 static void
561 send_control(tuncl_t *tclabout, tunll_t *tllabout, int action, tuncl_t *tcl)
562 {
563 	mblk_t *mp;
564 
565 	if (tcl->tcl_rq != NULL) {
566 		mp = make_control(tclabout, tllabout, action, tcl);
567 		if (mp != NULL) {
568 			KCINCR(cks_octrl_spec);
569 			putnext(tcl->tcl_rq, mp);
570 		}
571 	}
572 }
573 
574 /*
575  * If a lower stream is being unplumbed, then the upper streams
576  * connected to this lower stream must be disconnected.  This routine
577  * accomplishes this by sending M_HANGUP to data streams and M_PROTO
578  * messages to control streams.  This is called by vmem_walk, and
579  * handles a span of minor node numbers.
580  *
581  * No need to update lks_clients here; the lower stream is on its way
582  * out.
583  */
584 static void
585 tclvm_remove_tll(void *arg, void *firstv, size_t numv)
586 {
587 	tunll_t *tll = (tunll_t *)arg;
588 	int minorn = (int)(uintptr_t)firstv;
589 	int minormax = minorn + numv;
590 	tuncl_t *tcl;
591 	mblk_t *mp;
592 
593 	while (minorn < minormax) {
594 		tcl = tcl_slots[minorn - 1];
595 		ASSERT(tcl != NULL);
596 		if (tcl->tcl_data_tll == tll && tcl->tcl_rq != NULL) {
597 			tcl->tcl_data_tll = NULL;
598 			mp = allocb(0, BPRI_HI);
599 			if (mp != NULL) {
600 				MTYPE(mp) = M_HANGUP;
601 				putnext(tcl->tcl_rq, mp);
602 				if (tcl->tcl_ctrl_tll == tll)
603 					tcl->tcl_ctrl_tll = NULL;
604 			}
605 		}
606 		if (tcl->tcl_ctrl_tll == tll) {
607 			send_control(tcl, tll, PTCA_UNPLUMB, tcl);
608 			tcl->tcl_ctrl_tll = NULL;
609 		}
610 		minorn++;
611 	}
612 }
613 
614 /*
615  * sppptun_close()
616  *
617  * MT-Perimeters:
618  *    exclusive inner, exclusive outer.
619  *
620  * Description:
621  *    Common close procedure for module and driver.
622  */
623 static int
624 sppptun_close(queue_t *q)
625 {
626 	int err;
627 	void *qptr;
628 	tunll_t *tll;
629 	tuncl_t *tcl;
630 
631 	qptr = q->q_ptr;
632 
633 	err = 0;
634 	tll = qptr;
635 	if (!(tll->tll_flags & TLLF_NOTLOWER)) {
636 		/* q_next is set on modules */
637 		ASSERT(WR(q)->q_next != NULL);
638 
639 		/* unlink any clients using this lower layer. */
640 		vmem_walk(tcl_minor_arena, VMEM_ALLOC, tclvm_remove_tll, tll);
641 
642 		/* tell daemon that this has been removed. */
643 		if ((tcl = tll->tll_defcl) != NULL)
644 			send_control(NULL, tll, PTCA_UNPLUMB, tcl);
645 
646 		tll->tll_flags |= TLLF_CLOSING;
647 		while (!(tll->tll_flags & TLLF_CLOSE_DONE)) {
648 			qenable(tll->tll_wq);
649 			qwait(tll->tll_wq);
650 		}
651 		tll->tll_error = 0;
652 		while (!(tll->tll_flags & TLLF_SHUTDOWN_DONE)) {
653 			if (!qwait_sig(tll->tll_wq))
654 				break;
655 		}
656 
657 		qprocsoff(q);
658 		q->q_ptr = WR(q)->q_ptr = NULL;
659 		tll->tll_wq = NULL;
660 		remque(&tll->tll_next);
661 		err = tll->tll_error;
662 		if (tll->tll_ksp != NULL)
663 			kstat_delete(tll->tll_ksp);
664 		kmem_free(tll, sizeof (*tll));
665 	} else {
666 		tcl = qptr;
667 
668 		/* devices are end of line; no q_next. */
669 		ASSERT(WR(q)->q_next == NULL);
670 
671 		qprocsoff(q);
672 		DTRACE_PROBE1(sppptun__client__close, tuncl_t *, tcl);
673 		tcl->tcl_rq = NULL;
674 		q->q_ptr = WR(q)->q_ptr = NULL;
675 
676 		tll = TO_TLL(tunll_list.q_forw);
677 		while (tll != TO_TLL(&tunll_list)) {
678 			if (tll->tll_defcl == tcl)
679 				tll->tll_defcl = NULL;
680 			if (tll->tll_lastcl == tcl)
681 				tll->tll_lastcl = NULL;
682 			tll = TO_TLL(tll->tll_next);
683 		}
684 		/*
685 		 * If this was a normal session, then tell the daemon.
686 		 */
687 		if (!(tcl->tcl_flags & TCLF_DAEMON) &&
688 		    (tll = tcl->tcl_ctrl_tll) != NULL &&
689 		    tll->tll_defcl != NULL) {
690 			send_control(tcl, tll, PTCA_DISCONNECT,
691 			    tll->tll_defcl);
692 		}
693 
694 		/* Update statistics for references being dropped. */
695 		if ((tll = tcl->tcl_data_tll) != NULL) {
696 			KLDECR(lks_clients);
697 		}
698 		if ((tll = tcl->tcl_ctrl_tll) != NULL) {
699 			KLDECR(lks_clients);
700 		}
701 
702 		tuncl_free(tcl);
703 	}
704 
705 	return (err);
706 }
707 
708 /*
709  * Allocate and initialize a DLPI or TPI template of the specified
710  * length.
711  */
712 static mblk_t *
713 pi_alloc(size_t len, int prim)
714 {
715 	mblk_t	*mp;
716 
717 	mp = allocb(len, BPRI_MED);
718 	if (mp != NULL) {
719 		MTYPE(mp) = M_PROTO;
720 		mp->b_wptr = mp->b_rptr + len;
721 		bzero(mp->b_rptr, len);
722 		*(int *)mp->b_rptr = prim;
723 	}
724 	return (mp);
725 }
726 
727 #define	dlpi_alloc(l, p)	pi_alloc((l), (p))
728 
729 /*
730  * Prepend some room to an mblk.  Try to reuse the existing buffer, if
731  * at all possible, rather than allocating a new one.  (Fast-path
732  * output should be able to use this.)
733  *
734  * (XXX why isn't this a library function ...?)
735  */
736 static mblk_t *
737 prependb(mblk_t *mp, size_t len, size_t align)
738 {
739 	mblk_t *newmp;
740 
741 
742 	if (align == 0)
743 		align = 8;
744 	if (DB_REF(mp) > 1 || mp->b_datap->db_base+len > mp->b_rptr ||
745 	    ((uint_t)((uintptr_t)mp->b_rptr - len) % align) != 0) {
746 		if ((newmp = allocb(len, BPRI_LO)) == NULL) {
747 			freemsg(mp);
748 			return (NULL);
749 		}
750 		newmp->b_wptr = newmp->b_rptr + len;
751 		newmp->b_cont = mp;
752 		return (newmp);
753 	}
754 	mp->b_rptr -= len;
755 	return (mp);
756 }
757 
758 /*
759  * sppptun_outpkt()
760  *
761  * MT-Perimeters:
762  *	shared inner, shared outer (if called from sppptun_uwput),
763  *	exclusive inner, shared outer (if called from sppptun_uwsrv).
764  *
765  * Description:
766  *    Called from sppptun_uwput or sppptun_uwsrv when processing a
767  *    M_DATA, M_PROTO, or M_PCPROTO message.  For all cases, it tries
768  *    to prepare the data to be sent to the module below this driver
769  *    if there is a lower stream linked underneath.  If no lower
770  *    stream exists, then the data will be discarded and an ENXIO
771  *    error returned.
772  *
773  * Returns:
774  *	pointer to queue if caller should do putnext, otherwise
775  *	*mpp != NULL if message should be enqueued, otherwise
776  *	*mpp == NULL if message is gone.
777  */
778 static queue_t *
779 sppptun_outpkt(queue_t *q, mblk_t **mpp)
780 {
781 	mblk_t *mp;
782 	tuncl_t *tcl;
783 	tunll_t *tll;
784 	mblk_t *encmb;
785 	mblk_t *datamb;
786 	dl_unitdata_req_t *dur;
787 	queue_t *lowerq;
788 	poep_t *poep;
789 	int len;
790 	ether_dest_t *edestp;
791 	enum { luNone, luCopy, luSend } loopup;
792 	boolean_t isdata;
793 	struct ppptun_control *ptc;
794 
795 	mp = *mpp;
796 	tcl = q->q_ptr;
797 
798 	*mpp = NULL;
799 	if (!(tcl->tcl_flags & TCLF_ISCLIENT)) {
800 		merror(q, mp, EINVAL);
801 		return (NULL);
802 	}
803 
804 	isdata = (MTYPE(mp) == M_DATA);
805 	if (isdata) {
806 		tll = tcl->tcl_data_tll;
807 		ptc = NULL;
808 	} else {
809 		/*
810 		 * If data are unaligned or otherwise unsuitable, then
811 		 * discard.
812 		 */
813 		if (MBLKL(mp) != sizeof (*ptc) || DB_REF(mp) > 1 ||
814 		    !IS_P2ALIGNED(mp->b_rptr, sizeof (ptc))) {
815 			KCINCR(cks_octrl_drop);
816 			DTRACE_PROBE2(sppptun__bad__control, tuncl_t *, tcl,
817 			    mblk_t *, mp);
818 			merror(q, mp, EINVAL);
819 			return (NULL);
820 		}
821 		ptc = (struct ppptun_control *)mp->b_rptr;
822 
823 		/* Set stream discriminator value if not yet set. */
824 		if (tcl->tcl_ctlval == 0)
825 			tcl->tcl_ctlval = ptc->ptc_discrim;
826 
827 		/* If this is a test message, then reply to caller. */
828 		if (ptc->ptc_action == PTCA_TEST) {
829 			DTRACE_PROBE2(sppptun__test, tuncl_t *, tcl,
830 			    struct ppptun_control *, ptc);
831 			if (mp->b_cont != NULL) {
832 				freemsg(mp->b_cont);
833 				mp->b_cont = NULL;
834 			}
835 			ptc->ptc_discrim = tcl->tcl_ctlval;
836 			putnext(RD(q), mp);
837 			return (NULL);
838 		}
839 
840 		/* If this one isn't for us, then discard it */
841 		if (tcl->tcl_ctlval != ptc->ptc_discrim) {
842 			DTRACE_PROBE2(sppptun__bad__discrim, tuncl_t *, tcl,
843 			    struct ppptun_control *, ptc);
844 			freemsg(mp);
845 			return (NULL);
846 		}
847 
848 		/* Don't allow empty control packets. */
849 		if (mp->b_cont == NULL) {
850 			KCINCR(cks_octrl_drop);
851 			merror(q, mp, EINVAL);
852 			return (NULL);
853 		}
854 		tll = tcl->tcl_ctrl_tll;
855 	}
856 
857 	if (tll == NULL || (lowerq = tll->tll_wq) == NULL) {
858 		DTRACE_PROBE3(sppptun__cannot__send, tuncl_t *, tcl,
859 		    tunll_t *, tll, mblk_t *, mp);
860 		merror(q, mp, ENXIO);
861 		if (isdata) {
862 			tcl->tcl_stats.ppp_oerrors++;
863 		} else {
864 			KCINCR(cks_octrl_drop);
865 		}
866 		return (NULL);
867 	}
868 
869 	/*
870 	 * If so, then try to send it down.  The lower queue is only
871 	 * ever detached while holding an exclusive lock on the whole
872 	 * driver, so we can be confident that the lower queue is
873 	 * still there.
874 	 */
875 	if (!bcanputnext(lowerq, mp->b_band)) {
876 		DTRACE_PROBE3(sppptun__flow__control, tuncl_t *, tcl,
877 		    tunll_t *, tll, mblk_t *, mp);
878 		*mpp = mp;
879 		return (NULL);
880 	}
881 
882 	/*
883 	 * Note: DLPI and TPI expect that the first buffer contains
884 	 * the control (unitdata-req) header, destination address, and
885 	 * nothing else.  Any protocol headers must go in the next
886 	 * buffer.
887 	 */
888 	loopup = luNone;
889 	encmb = NULL;
890 	if (isdata) {
891 		if (tll->tll_alen != 0 &&
892 		    bcmp(&tcl->tcl_address, &tll->tll_lcladdr,
893 		    tll->tll_alen) == 0)
894 			loopup = luSend;
895 		switch (tll->tll_style) {
896 		case PTS_PPPOE:
897 			/* Strip address and control fields if present. */
898 			if (mp->b_rptr[0] == 0xFF) {
899 				if (MBLKL(mp) < 3) {
900 					encmb = msgpullup(mp, 3);
901 					freemsg(mp);
902 					if ((mp = encmb) == NULL)
903 						break;
904 				}
905 				mp->b_rptr += 2;
906 			}
907 			/* Broadcasting data is probably not a good idea. */
908 			if (tcl->tcl_address.pta_pppoe.ptma_mac[0] & 1)
909 				break;
910 			encmb = dlpi_alloc(sizeof (*dur) + sizeof (*edestp),
911 			    DL_UNITDATA_REQ);
912 			if (encmb == NULL)
913 				break;
914 
915 			dur = (dl_unitdata_req_t *)encmb->b_rptr;
916 			dur->dl_dest_addr_length = sizeof (*edestp);
917 			dur->dl_dest_addr_offset = sizeof (*dur);
918 			edestp = (ether_dest_t *)(dur + 1);
919 			ether_copy(tcl->tcl_address.pta_pppoe.ptma_mac,
920 			    edestp->addr);
921 			/* DLPI SAPs are in host byte order! */
922 			edestp->type = ETHERTYPE_PPPOES;
923 
924 			/* Make sure the protocol field isn't compressed. */
925 			len = (*mp->b_rptr & 1);
926 			mp = prependb(mp, sizeof (*poep) + len, POE_HDR_ALIGN);
927 			if (mp == NULL)
928 				break;
929 			poep = (poep_t *)mp->b_rptr;
930 			poep->poep_version_type = POE_VERSION;
931 			poep->poep_code = POECODE_DATA;
932 			poep->poep_session_id = htons(tcl->tcl_rsessid);
933 			poep->poep_length = htons(msgsize(mp) -
934 			    sizeof (*poep));
935 			if (len > 0)
936 				*(char *)(poep + 1) = '\0';
937 			break;
938 
939 		default:
940 			ASSERT(0);
941 		}
942 	} else {
943 		/*
944 		 * Control side encapsulation.
945 		 */
946 		if (bcmp(&ptc->ptc_address, &tll->tll_lcladdr, tll->tll_alen)
947 		    == 0)
948 			loopup = luSend;
949 		datamb = mp->b_cont;
950 		switch (tll->tll_style) {
951 		case PTS_PPPOE:
952 			/*
953 			 * Don't allow a loopback session to establish
954 			 * itself.  PPPoE is broken; it uses only one
955 			 * session ID for both data directions, so the
956 			 * loopback data path can simply never work.
957 			 */
958 			if (loopup == luSend &&
959 			    ((poep_t *)datamb->b_rptr)->poep_code ==
960 			    POECODE_PADR)
961 				break;
962 			encmb = dlpi_alloc(sizeof (*dur) + sizeof (*edestp),
963 			    DL_UNITDATA_REQ);
964 			if (encmb == NULL)
965 				break;
966 			dur = (dl_unitdata_req_t *)encmb->b_rptr;
967 			dur->dl_dest_addr_length = sizeof (*edestp);
968 			dur->dl_dest_addr_offset = sizeof (*dur);
969 
970 			edestp = (ether_dest_t *)(dur + 1);
971 			/* DLPI SAPs are in host byte order! */
972 			edestp->type = ETHERTYPE_PPPOED;
973 
974 			/*
975 			 * If destination isn't set yet, then we have to
976 			 * allow anything at all.  Otherwise, force use
977 			 * of configured peer address.
978 			 */
979 			if (bcmp(tcl->tcl_address.pta_pppoe.ptma_mac,
980 			    zero_mac_addr, sizeof (zero_mac_addr)) == 0 ||
981 			    (tcl->tcl_flags & TCLF_DAEMON)) {
982 				ether_copy(ptc->ptc_address.pta_pppoe.ptma_mac,
983 				    edestp->addr);
984 			} else {
985 				ether_copy(tcl->tcl_address.pta_pppoe.ptma_mac,
986 				    edestp->addr);
987 			}
988 			/* Reflect multicast/broadcast back up. */
989 			if (edestp->addr[0] & 1)
990 				loopup = luCopy;
991 			break;
992 
993 		case PTS_PPTP:
994 			/*
995 			 * PPTP's control side is actually done over
996 			 * separate TCP connections.
997 			 */
998 		default:
999 			ASSERT(0);
1000 		}
1001 		freeb(mp);
1002 		mp = datamb;
1003 	}
1004 	if (mp == NULL || encmb == NULL) {
1005 		DTRACE_PROBE1(sppptun__output__failure, tuncl_t *, tcl);
1006 		freemsg(mp);
1007 		freemsg(encmb);
1008 		if (isdata) {
1009 			tcl->tcl_stats.ppp_oerrors++;
1010 		} else {
1011 			KCINCR(cks_octrl_drop);
1012 			KLINCR(lks_octrl_drop);
1013 		}
1014 		lowerq = NULL;
1015 	} else {
1016 		if (isdata) {
1017 			tcl->tcl_stats.ppp_obytes += msgsize(mp);
1018 			tcl->tcl_stats.ppp_opackets++;
1019 		} else {
1020 			KCINCR(cks_octrls);
1021 			KLINCR(lks_octrls);
1022 		}
1023 		if (encmb != mp)
1024 			encmb->b_cont = mp;
1025 		switch (loopup) {
1026 		case luNone:
1027 			*mpp = encmb;
1028 			break;
1029 		case luCopy:
1030 			mp = copymsg(encmb);
1031 			if (mp != NULL)
1032 				sppptun_urput(RD(lowerq), mp);
1033 			*mpp = encmb;
1034 			break;
1035 		case luSend:
1036 			sppptun_urput(RD(lowerq), encmb);
1037 			lowerq = NULL;
1038 			break;
1039 		}
1040 	}
1041 	return (lowerq);
1042 }
1043 
1044 /*
1045  * Enqueue a message to be sent when the lower stream is closed.  This
1046  * is done so that we're guaranteed that we always have the necessary
1047  * resources to properly detach ourselves from the system.  (If we
1048  * waited until the close was done to allocate these messages, then
1049  * the message allocation could fail, and we'd be unable to properly
1050  * detach.)
1051  */
1052 static void
1053 save_for_close(tunll_t *tll, mblk_t *mp)
1054 {
1055 	mblk_t *onc;
1056 
1057 	if ((onc = tll->tll_onclose) == NULL)
1058 		tll->tll_onclose = mp;
1059 	else {
1060 		while (onc->b_next != NULL)
1061 			onc = onc->b_next;
1062 		onc->b_next = mp;
1063 	}
1064 }
1065 
1066 /*
1067  * Given the lower stream name, locate the state structure.  Note that
1068  * lookup of tcl pointers (and use of those pointers) is safe because
1069  * modification is done only when exclusive on both inner and outer
1070  * perimeters.
1071  */
1072 static tunll_t *
1073 tll_lookup_on_name(char *dname)
1074 {
1075 	tunll_t *tll;
1076 
1077 	tll = TO_TLL(tunll_list.q_forw);
1078 	for (; tll != TO_TLL(&tunll_list); tll = TO_TLL(tll->tll_next))
1079 		if (strcmp(dname, tll->tll_name) == 0)
1080 			return (tll);
1081 	return (NULL);
1082 }
1083 
1084 /*
1085  * sppptun_inner_ioctl()
1086  *
1087  * MT-Perimeters:
1088  *    exclusive inner, shared outer.
1089  *
1090  * Description:
1091  *    Called by qwriter from sppptun_ioctl as the result of receiving
1092  *    a handled ioctl.
1093  */
1094 static void
1095 sppptun_inner_ioctl(queue_t *q, mblk_t *mp)
1096 {
1097 	struct iocblk *iop;
1098 	int rc = 0;
1099 	int len = 0;
1100 	int i;
1101 	tuncl_t *tcl;
1102 	tunll_t *tll;
1103 	union ppptun_name *ptn;
1104 	struct ppptun_info *pti;
1105 	struct ppptun_peer *ptp;
1106 	mblk_t *mptmp;
1107 	ppptun_atype *pap;
1108 	struct ppp_stats64 *psp;
1109 
1110 	iop = (struct iocblk *)mp->b_rptr;
1111 	tcl = NULL;
1112 	tll = q->q_ptr;
1113 	if (tll->tll_flags & TLLF_NOTLOWER) {
1114 		tcl = (tuncl_t *)tll;
1115 		tll = NULL;
1116 	}
1117 
1118 	DTRACE_PROBE3(sppptun__ioctl, tuncl_t *, tcl, tunll_t *, tll,
1119 	    struct iocblk *, iop);
1120 
1121 	switch (iop->ioc_cmd) {
1122 	case PPPIO_DEBUG:
1123 		/*
1124 		 * Debug requests are now ignored; use dtrace or wireshark
1125 		 * instead.
1126 		 */
1127 		break;
1128 
1129 	case PPPIO_GETSTAT:
1130 		rc = EINVAL;
1131 		break;
1132 
1133 	case PPPIO_GETSTAT64:
1134 		/* Client (device) side only */
1135 		if (tcl == NULL) {
1136 			rc = EINVAL;
1137 			break;
1138 		}
1139 		mptmp = allocb(sizeof (*psp), BPRI_HI);
1140 		if (mptmp == NULL) {
1141 			rc = ENOSR;
1142 			break;
1143 		}
1144 		freemsg(mp->b_cont);
1145 		mp->b_cont = mptmp;
1146 
1147 		psp = (struct ppp_stats64 *)mptmp->b_wptr;
1148 		bzero((caddr_t)psp, sizeof (*psp));
1149 		psp->p = tcl->tcl_stats;
1150 
1151 		len = sizeof (*psp);
1152 		break;
1153 
1154 	case PPPTUN_SNAME:
1155 		/* This is done on the *module* (lower level) side. */
1156 		if (tll == NULL || mp->b_cont == NULL ||
1157 		    iop->ioc_count != sizeof (*ptn) ||
1158 		    *mp->b_cont->b_rptr == '\0') {
1159 			rc = EINVAL;
1160 			break;
1161 		}
1162 
1163 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1164 		ptn->ptn_name[sizeof (ptn->ptn_name) - 1] = '\0';
1165 
1166 		if ((tll = tll_lookup_on_name(ptn->ptn_name)) != NULL) {
1167 			rc = EEXIST;
1168 			break;
1169 		}
1170 		tll = (tunll_t *)q->q_ptr;
1171 		(void) strcpy(tll->tll_name, ptn->ptn_name);
1172 		break;
1173 
1174 	case PPPTUN_GNAME:
1175 		/* This is done on the *module* (lower level) side. */
1176 		if (tll == NULL) {
1177 			rc = EINVAL;
1178 			break;
1179 		}
1180 		if (mp->b_cont != NULL)
1181 			freemsg(mp->b_cont);
1182 		if ((mp->b_cont = allocb(sizeof (*ptn), BPRI_HI)) == NULL) {
1183 			rc = ENOSR;
1184 			break;
1185 		}
1186 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1187 		bcopy(tll->tll_name, ptn->ptn_name, sizeof (ptn->ptn_name));
1188 		len = sizeof (*ptn);
1189 		break;
1190 
1191 	case PPPTUN_SINFO:
1192 	case PPPTUN_GINFO:
1193 		/* Either side */
1194 		if (mp->b_cont == NULL || iop->ioc_count != sizeof (*pti)) {
1195 			rc = EINVAL;
1196 			break;
1197 		}
1198 		pti = (struct ppptun_info *)mp->b_cont->b_rptr;
1199 		if (pti->pti_name[0] != '\0')
1200 			tll = tll_lookup_on_name(pti->pti_name);
1201 		if (tll == NULL) {
1202 			/* Driver (client) side must have name */
1203 			if (tcl != NULL && pti->pti_name[0] == '\0')
1204 				rc = EINVAL;
1205 			else
1206 				rc = ESRCH;
1207 			break;
1208 		}
1209 		if (iop->ioc_cmd == PPPTUN_GINFO) {
1210 			pti->pti_muxid = tll->tll_muxid;
1211 			pti->pti_style = tll->tll_style;
1212 			len = sizeof (*pti);
1213 			break;
1214 		}
1215 		tll->tll_muxid = pti->pti_muxid;
1216 		tll->tll_style = pti->pti_style;
1217 		switch (tll->tll_style) {
1218 		case PTS_PPPOE:		/* DLPI type */
1219 			tll->tll_alen = sizeof (tll->tll_lcladdr.pta_pppoe);
1220 			mptmp = dlpi_alloc(sizeof (dl_unbind_req_t),
1221 			    DL_UNBIND_REQ);
1222 			if (mptmp == NULL) {
1223 				rc = ENOSR;
1224 				break;
1225 			}
1226 			save_for_close(tll, mptmp);
1227 			mptmp = dlpi_alloc(sizeof (dl_detach_req_t),
1228 			    DL_DETACH_REQ);
1229 			if (mptmp == NULL) {
1230 				rc = ENOSR;
1231 				break;
1232 			}
1233 			save_for_close(tll, mptmp);
1234 			break;
1235 		default:
1236 			tll->tll_style = PTS_NONE;
1237 			tll->tll_alen = 0;
1238 			rc = EINVAL;
1239 			break;
1240 		}
1241 		break;
1242 
1243 	case PPPTUN_GNNAME:
1244 		/* This can be done on either side. */
1245 		if (mp->b_cont == NULL || iop->ioc_count < sizeof (uint32_t)) {
1246 			rc = EINVAL;
1247 			break;
1248 		}
1249 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1250 		i = ptn->ptn_index;
1251 		tll = TO_TLL(tunll_list.q_forw);
1252 		while (--i >= 0 && tll != TO_TLL(&tunll_list))
1253 			tll = TO_TLL(tll->tll_next);
1254 		if (tll != TO_TLL(&tunll_list)) {
1255 			bcopy(tll->tll_name, ptn->ptn_name,
1256 			    sizeof (ptn->ptn_name));
1257 		} else {
1258 			bzero(ptn, sizeof (*ptn));
1259 		}
1260 		len = sizeof (*ptn);
1261 		break;
1262 
1263 	case PPPTUN_LCLADDR:
1264 		/* This is done on the *module* (lower level) side. */
1265 		if (tll == NULL || mp->b_cont == NULL) {
1266 			rc = EINVAL;
1267 			break;
1268 		}
1269 
1270 		pap = &tll->tll_lcladdr;
1271 		len = tll->tll_alen;
1272 		if (len == 0 || len > iop->ioc_count) {
1273 			rc = EINVAL;
1274 			break;
1275 		}
1276 		bcopy(mp->b_cont->b_rptr, pap, len);
1277 		len = 0;
1278 		break;
1279 
1280 	case PPPTUN_SPEER:
1281 		/* Client (device) side only; before SDATA */
1282 		if (tcl == NULL || mp->b_cont == NULL ||
1283 		    iop->ioc_count != sizeof (*ptp)) {
1284 			rc = EINVAL;
1285 			break;
1286 		}
1287 		if (tcl->tcl_data_tll != NULL) {
1288 			rc = EINVAL;
1289 			break;
1290 		}
1291 		ptp = (struct ppptun_peer *)mp->b_cont->b_rptr;
1292 		DTRACE_PROBE2(sppptun__speer, tuncl_t *, tcl,
1293 		    struct ppptun_peer *, ptp);
1294 		/* Once set, the style cannot change. */
1295 		if (tcl->tcl_style != PTS_NONE &&
1296 		    tcl->tcl_style != ptp->ptp_style) {
1297 			rc = EINVAL;
1298 			break;
1299 		}
1300 		if (ptp->ptp_flags & PTPF_DAEMON) {
1301 			/* User requests registration for tunnel 0 */
1302 			if ((tcl->tcl_flags & TCLF_SPEER_DONE) ||
1303 			    ptp->ptp_ltunid != 0 || ptp->ptp_rtunid != 0 ||
1304 			    ptp->ptp_lsessid != 0 || ptp->ptp_rsessid != 0) {
1305 				rc = EINVAL;
1306 				break;
1307 			}
1308 			tcl->tcl_flags |= TCLF_DAEMON;
1309 		} else {
1310 			/* Normal client connection */
1311 			if (tcl->tcl_flags & TCLF_DAEMON) {
1312 				rc = EINVAL;
1313 				break;
1314 			}
1315 			if (ptp->ptp_lsessid != 0 &&
1316 			    ptp->ptp_lsessid != tcl->tcl_lsessid) {
1317 				rc = EINVAL;
1318 				break;
1319 			}
1320 			/*
1321 			 * If we're reassigning the peer data, then
1322 			 * the previous assignment must have been for
1323 			 * a client control connection.  Check that.
1324 			 */
1325 			if ((tcl->tcl_flags & TCLF_SPEER_DONE) &&
1326 			    ((tcl->tcl_ltunid != 0 &&
1327 			    tcl->tcl_ltunid != ptp->ptp_ltunid) ||
1328 			    (tcl->tcl_rtunid != 0 &&
1329 			    tcl->tcl_rtunid != ptp->ptp_rtunid) ||
1330 			    (tcl->tcl_rsessid != 0 &&
1331 			    tcl->tcl_rsessid != ptp->ptp_rsessid))) {
1332 				rc = EINVAL;
1333 				break;
1334 			}
1335 			if ((tcl->tcl_ltunid = ptp->ptp_ltunid) == 0 &&
1336 			    tcl->tcl_style == PTS_L2FTP)
1337 				tcl->tcl_ltunid = ptp->ptp_lsessid;
1338 			tcl->tcl_rtunid = ptp->ptp_rtunid;
1339 			tcl->tcl_rsessid = ptp->ptp_rsessid;
1340 		}
1341 		tcl->tcl_flags |= TCLF_SPEER_DONE;
1342 		tcl->tcl_style = ptp->ptp_style;
1343 		tcl->tcl_address = ptp->ptp_address;
1344 		goto fill_in_peer;
1345 
1346 	case PPPTUN_GPEER:
1347 		/* Client (device) side only */
1348 		if (tcl == NULL) {
1349 			rc = EINVAL;
1350 			break;
1351 		}
1352 		if (mp->b_cont != NULL)
1353 			freemsg(mp->b_cont);
1354 		mp->b_cont = allocb(sizeof (*ptp), BPRI_HI);
1355 		if (mp->b_cont == NULL) {
1356 			rc = ENOSR;
1357 			break;
1358 		}
1359 		ptp = (struct ppptun_peer *)mp->b_cont->b_rptr;
1360 	fill_in_peer:
1361 		ptp->ptp_style = tcl->tcl_style;
1362 		ptp->ptp_flags = (tcl->tcl_flags & TCLF_DAEMON) ? PTPF_DAEMON :
1363 		    0;
1364 		ptp->ptp_ltunid = tcl->tcl_ltunid;
1365 		ptp->ptp_rtunid = tcl->tcl_rtunid;
1366 		ptp->ptp_lsessid = tcl->tcl_lsessid;
1367 		ptp->ptp_rsessid = tcl->tcl_rsessid;
1368 		ptp->ptp_address = tcl->tcl_address;
1369 		len = sizeof (*ptp);
1370 		break;
1371 
1372 	case PPPTUN_SDATA:
1373 	case PPPTUN_SCTL:
1374 		/* Client (device) side only; must do SPEER first */
1375 		if (tcl == NULL || mp->b_cont == NULL ||
1376 		    iop->ioc_count != sizeof (*ptn) ||
1377 		    *mp->b_cont->b_rptr == '\0') {
1378 			rc = EINVAL;
1379 			break;
1380 		}
1381 		if (!(tcl->tcl_flags & TCLF_SPEER_DONE)) {
1382 			rc = EINVAL;
1383 			break;
1384 		}
1385 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1386 		ptn->ptn_name[sizeof (ptn->ptn_name) - 1] = '\0';
1387 		tll = tll_lookup_on_name(ptn->ptn_name);
1388 		if (tll == NULL) {
1389 			rc = ESRCH;
1390 			break;
1391 		}
1392 		if (tll->tll_style != tcl->tcl_style) {
1393 			rc = ENXIO;
1394 			break;
1395 		}
1396 		if (iop->ioc_cmd == PPPTUN_SDATA) {
1397 			if (tcl->tcl_data_tll != NULL) {
1398 				rc = EEXIST;
1399 				break;
1400 			}
1401 			/* server daemons cannot use regular data */
1402 			if (tcl->tcl_flags & TCLF_DAEMON) {
1403 				rc = EINVAL;
1404 				break;
1405 			}
1406 			tcl->tcl_data_tll = tll;
1407 		} else if (tcl->tcl_flags & TCLF_DAEMON) {
1408 			if (tll->tll_defcl != NULL && tll->tll_defcl != tcl) {
1409 				rc = EEXIST;
1410 				break;
1411 			}
1412 			tll->tll_defcl = tcl;
1413 			if (tcl->tcl_ctrl_tll != NULL) {
1414 				KDECR(tcl->tcl_ctrl_tll, tll_kstats,
1415 				    lks_clients);
1416 			}
1417 			tcl->tcl_ctrl_tll = tll;
1418 		} else {
1419 			if (tcl->tcl_ctrl_tll != NULL) {
1420 				rc = EEXIST;
1421 				break;
1422 			}
1423 			tcl->tcl_ctrl_tll = tll;
1424 		}
1425 		KLINCR(lks_clients);
1426 		break;
1427 
1428 	case PPPTUN_GDATA:
1429 	case PPPTUN_GCTL:
1430 		/* Client (device) side only */
1431 		if (tcl == NULL) {
1432 			rc = EINVAL;
1433 			break;
1434 		}
1435 		if (mp->b_cont != NULL)
1436 			freemsg(mp->b_cont);
1437 		mp->b_cont = allocb(sizeof (*ptn), BPRI_HI);
1438 		if (mp->b_cont == NULL) {
1439 			rc = ENOSR;
1440 			break;
1441 		}
1442 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1443 		if (iop->ioc_cmd == PPPTUN_GDATA)
1444 			tll = tcl->tcl_data_tll;
1445 		else
1446 			tll = tcl->tcl_ctrl_tll;
1447 		if (tll == NULL)
1448 			bzero(ptn, sizeof (*ptn));
1449 		else
1450 			bcopy(tll->tll_name, ptn->ptn_name,
1451 			    sizeof (ptn->ptn_name));
1452 		len = sizeof (*ptn);
1453 		break;
1454 
1455 	case PPPTUN_DCTL:
1456 		/* Client (device) side daemon mode only */
1457 		if (tcl == NULL || mp->b_cont == NULL ||
1458 		    iop->ioc_count != sizeof (*ptn) ||
1459 		    !(tcl->tcl_flags & TCLF_DAEMON)) {
1460 			rc = EINVAL;
1461 			break;
1462 		}
1463 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1464 		ptn->ptn_name[sizeof (ptn->ptn_name) - 1] = '\0';
1465 		tll = tll_lookup_on_name(ptn->ptn_name);
1466 		if (tll == NULL || tll->tll_defcl != tcl) {
1467 			rc = ESRCH;
1468 			break;
1469 		}
1470 		tll->tll_defcl = NULL;
1471 		break;
1472 
1473 	default:
1474 		/* Caller should already have checked command value */
1475 		ASSERT(0);
1476 	}
1477 	if (rc != 0) {
1478 		miocnak(q, mp, 0, rc);
1479 	} else {
1480 		if (len > 0)
1481 			mp->b_cont->b_wptr = mp->b_cont->b_rptr + len;
1482 		miocack(q, mp, len, 0);
1483 	}
1484 }
1485 
1486 /*
1487  * sppptun_ioctl()
1488  *
1489  * MT-Perimeters:
1490  *    shared inner, shared outer.
1491  *
1492  * Description:
1493  *    Called by sppptun_uwput as the result of receiving a M_IOCTL command.
1494  */
1495 static void
1496 sppptun_ioctl(queue_t *q, mblk_t *mp)
1497 {
1498 	struct iocblk *iop;
1499 	int rc = 0;
1500 	int len = 0;
1501 	uint32_t val = 0;
1502 	tunll_t *tll;
1503 
1504 	iop = (struct iocblk *)mp->b_rptr;
1505 
1506 	switch (iop->ioc_cmd) {
1507 	case PPPIO_DEBUG:
1508 	case PPPIO_GETSTAT:
1509 	case PPPIO_GETSTAT64:
1510 	case PPPTUN_SNAME:
1511 	case PPPTUN_GNAME:
1512 	case PPPTUN_SINFO:
1513 	case PPPTUN_GINFO:
1514 	case PPPTUN_GNNAME:
1515 	case PPPTUN_LCLADDR:
1516 	case PPPTUN_SPEER:
1517 	case PPPTUN_GPEER:
1518 	case PPPTUN_SDATA:
1519 	case PPPTUN_GDATA:
1520 	case PPPTUN_SCTL:
1521 	case PPPTUN_GCTL:
1522 	case PPPTUN_DCTL:
1523 		qwriter(q, mp, sppptun_inner_ioctl, PERIM_INNER);
1524 		return;
1525 
1526 	case PPPIO_GCLEAN:	/* always clean */
1527 		val = RCV_B7_1 | RCV_B7_0 | RCV_ODDP | RCV_EVNP;
1528 		len = sizeof (uint32_t);
1529 		break;
1530 
1531 	case PPPIO_GTYPE:	/* we look like an async driver. */
1532 		val = PPPTYP_AHDLC;
1533 		len = sizeof (uint32_t);
1534 		break;
1535 
1536 	case PPPIO_CFLAGS:	/* never compress headers */
1537 		val = 0;
1538 		len = sizeof (uint32_t);
1539 		break;
1540 
1541 		/* quietly ack PPP things we don't need to do. */
1542 	case PPPIO_XFCS:
1543 	case PPPIO_RFCS:
1544 	case PPPIO_XACCM:
1545 	case PPPIO_RACCM:
1546 	case PPPIO_LASTMOD:
1547 	case PPPIO_MUX:
1548 	case I_PLINK:
1549 	case I_PUNLINK:
1550 	case I_LINK:
1551 	case I_UNLINK:
1552 		break;
1553 
1554 	default:
1555 		tll = (tunll_t *)q->q_ptr;
1556 		if (!(tll->tll_flags & TLLF_NOTLOWER)) {
1557 			/* module side; pass this through. */
1558 			putnext(q, mp);
1559 			return;
1560 		}
1561 		rc = EINVAL;
1562 		break;
1563 	}
1564 	if (rc == 0 && len == sizeof (uint32_t)) {
1565 		if (mp->b_cont != NULL)
1566 			freemsg(mp->b_cont);
1567 		mp->b_cont = allocb(sizeof (uint32_t), BPRI_HI);
1568 		if (mp->b_cont == NULL) {
1569 			rc = ENOSR;
1570 		} else {
1571 			*(uint32_t *)mp->b_cont->b_wptr = val;
1572 			mp->b_cont->b_wptr += sizeof (uint32_t);
1573 		}
1574 	}
1575 	if (rc == 0) {
1576 		miocack(q, mp, len, 0);
1577 	} else {
1578 		miocnak(q, mp, 0, rc);
1579 	}
1580 }
1581 
1582 /*
1583  * sppptun_inner_mctl()
1584  *
1585  * MT-Perimeters:
1586  *    exclusive inner, shared outer.
1587  *
1588  * Description:
1589  *    Called by qwriter (via sppptun_uwput) as the result of receiving
1590  *    an M_CTL.  Called only on the client (driver) side.
1591  */
1592 static void
1593 sppptun_inner_mctl(queue_t *q, mblk_t *mp)
1594 {
1595 	int msglen;
1596 	tuncl_t *tcl;
1597 
1598 	tcl = q->q_ptr;
1599 
1600 	if (!(tcl->tcl_flags & TCLF_ISCLIENT)) {
1601 		freemsg(mp);
1602 		return;
1603 	}
1604 
1605 	msglen = MBLKL(mp);
1606 	switch (*mp->b_rptr) {
1607 	case PPPCTL_UNIT:
1608 		if (msglen == 2)
1609 			tcl->tcl_unit = mp->b_rptr[1];
1610 		else if (msglen == 8)
1611 			tcl->tcl_unit = ((uint32_t *)mp->b_rptr)[1];
1612 		break;
1613 	}
1614 	freemsg(mp);
1615 }
1616 
1617 /*
1618  * sppptun_uwput()
1619  *
1620  * MT-Perimeters:
1621  *    shared inner, shared outer.
1622  *
1623  * Description:
1624  *	Regular output data and controls pass through here.
1625  */
1626 static void
1627 sppptun_uwput(queue_t *q, mblk_t *mp)
1628 {
1629 	queue_t *nextq;
1630 	tuncl_t *tcl;
1631 
1632 	ASSERT(q->q_ptr != NULL);
1633 
1634 	switch (MTYPE(mp)) {
1635 	case M_DATA:
1636 	case M_PROTO:
1637 	case M_PCPROTO:
1638 		if (q->q_first == NULL &&
1639 		    (nextq = sppptun_outpkt(q, &mp)) != NULL) {
1640 			putnext(nextq, mp);
1641 		} else if (mp != NULL && !putq(q, mp)) {
1642 			freemsg(mp);
1643 		}
1644 		break;
1645 	case M_IOCTL:
1646 		sppptun_ioctl(q, mp);
1647 		break;
1648 	case M_CTL:
1649 		qwriter(q, mp, sppptun_inner_mctl, PERIM_INNER);
1650 		break;
1651 	default:
1652 		tcl = (tuncl_t *)q->q_ptr;
1653 		/*
1654 		 * If we're the driver, then discard unknown junk.
1655 		 * Otherwise, if we're the module, then forward along.
1656 		 */
1657 		if (tcl->tcl_flags & TCLF_ISCLIENT)
1658 			freemsg(mp);
1659 		else
1660 			putnext(q, mp);
1661 		break;
1662 	}
1663 }
1664 
1665 /*
1666  * Send a DLPI/TPI control message to the driver but make sure there
1667  * is only one outstanding message.  Uses tll_msg_pending to tell when
1668  * it must queue.  sppptun_urput calls message_done() when an ACK or a
1669  * NAK is received to process the next queued message.
1670  */
1671 static void
1672 message_send(tunll_t *tll, mblk_t *mp)
1673 {
1674 	mblk_t **mpp;
1675 
1676 	if (tll->tll_msg_pending) {
1677 		/* Must queue message. Tail insertion */
1678 		mpp = &tll->tll_msg_deferred;
1679 		while (*mpp != NULL)
1680 			mpp = &((*mpp)->b_next);
1681 		*mpp = mp;
1682 		return;
1683 	}
1684 	tll->tll_msg_pending = 1;
1685 	putnext(tll->tll_wq, mp);
1686 }
1687 
1688 /*
1689  * Called when an DLPI/TPI control message has been acked or nacked to
1690  * send down the next queued message (if any).
1691  */
1692 static void
1693 message_done(tunll_t *tll)
1694 {
1695 	mblk_t *mp;
1696 
1697 	ASSERT(tll->tll_msg_pending);
1698 	tll->tll_msg_pending = 0;
1699 	mp = tll->tll_msg_deferred;
1700 	if (mp != NULL) {
1701 		tll->tll_msg_deferred = mp->b_next;
1702 		mp->b_next = NULL;
1703 		tll->tll_msg_pending = 1;
1704 		putnext(tll->tll_wq, mp);
1705 	}
1706 }
1707 
1708 /*
1709  * Send down queued "close" messages to lower stream.  These were
1710  * enqueued right after the stream was originally allocated, when the
1711  * tll_style was set by PPPTUN_SINFO.
1712  */
1713 static int
1714 tll_close_req(tunll_t *tll)
1715 {
1716 	mblk_t *mb, *mbnext;
1717 
1718 	if ((mb = tll->tll_onclose) == NULL)
1719 		tll->tll_flags |= TLLF_SHUTDOWN_DONE;
1720 	else {
1721 		tll->tll_onclose = NULL;
1722 		while (mb != NULL) {
1723 			mbnext = mb->b_next;
1724 			mb->b_next = NULL;
1725 			message_send(tll, mb);
1726 			mb = mbnext;
1727 		}
1728 	}
1729 	return (0);
1730 }
1731 
1732 /*
1733  * This function is called when a backenable occurs on the write side of a
1734  * lower stream.  It walks over the client streams, looking for ones that use
1735  * the given tunll_t lower stream.  Each client is then backenabled.
1736  */
1737 static void
1738 tclvm_backenable(void *arg, void *firstv, size_t numv)
1739 {
1740 	tunll_t *tll = arg;
1741 	int minorn = (int)(uintptr_t)firstv;
1742 	int minormax = minorn + numv;
1743 	tuncl_t *tcl;
1744 	queue_t *q;
1745 
1746 	while (minorn < minormax) {
1747 		tcl = tcl_slots[minorn - 1];
1748 		if ((tcl->tcl_data_tll == tll ||
1749 		    tcl->tcl_ctrl_tll == tll) &&
1750 		    (q = tcl->tcl_rq) != NULL) {
1751 			qenable(OTHERQ(q));
1752 		}
1753 		minorn++;
1754 	}
1755 }
1756 
1757 /*
1758  * sppptun_uwsrv()
1759  *
1760  * MT-Perimeters:
1761  *    exclusive inner, shared outer.
1762  *
1763  * Description:
1764  *    Upper write-side service procedure.  In addition to the usual
1765  *    STREAMS queue service handling, this routine also handles the
1766  *    transmission of the unbind/detach messages to the lower stream
1767  *    driver when a lower stream is being closed.  (See the use of
1768  *    qenable/qwait in sppptun_close().)
1769  */
1770 static int
1771 sppptun_uwsrv(queue_t *q)
1772 {
1773 	tuncl_t	*tcl;
1774 	mblk_t *mp;
1775 	queue_t *nextq;
1776 
1777 	tcl = q->q_ptr;
1778 	if (!(tcl->tcl_flags & TCLF_ISCLIENT)) {
1779 		tunll_t *tll = (tunll_t *)tcl;
1780 
1781 		if ((tll->tll_flags & (TLLF_CLOSING|TLLF_CLOSE_DONE)) ==
1782 		    TLLF_CLOSING) {
1783 			tll->tll_error = tll_close_req(tll);
1784 			tll->tll_flags |= TLLF_CLOSE_DONE;
1785 		} else {
1786 			/*
1787 			 * We've been enabled here because of a backenable on
1788 			 * output flow control.  Backenable clients using this
1789 			 * lower layer.
1790 			 */
1791 			vmem_walk(tcl_minor_arena, VMEM_ALLOC, tclvm_backenable,
1792 			    tll);
1793 		}
1794 		return (0);
1795 	}
1796 
1797 	while ((mp = getq(q)) != NULL) {
1798 		if ((nextq = sppptun_outpkt(q, &mp)) != NULL) {
1799 			putnext(nextq, mp);
1800 		} else if (mp != NULL) {
1801 			(void) putbq(q, mp);
1802 			break;
1803 		}
1804 	}
1805 	return (0);
1806 }
1807 
1808 /*
1809  * sppptun_lwput()
1810  *
1811  * MT-Perimeters:
1812  *    shared inner, shared outer.
1813  *
1814  * Description:
1815  *    Lower write-side put procedure.  Nothing should be sending
1816  *    packets down this stream.
1817  */
1818 static void
1819 sppptun_lwput(queue_t *q, mblk_t *mp)
1820 {
1821 	switch (MTYPE(mp)) {
1822 	case M_PROTO:
1823 		putnext(q, mp);
1824 		break;
1825 	default:
1826 		freemsg(mp);
1827 		break;
1828 	}
1829 }
1830 
1831 /*
1832  * sppptun_lrput()
1833  *
1834  * MT-Perimeters:
1835  *    shared inner, shared outer.
1836  *
1837  * Description:
1838  *    Lower read-side put procedure.  Nothing should arrive here.
1839  */
1840 static void
1841 sppptun_lrput(queue_t *q, mblk_t *mp)
1842 {
1843 	tuncl_t *tcl;
1844 
1845 	switch (MTYPE(mp)) {
1846 	case M_IOCTL:
1847 		miocnak(q, mp, 0, EINVAL);
1848 		return;
1849 	case M_FLUSH:
1850 		if (*mp->b_rptr & FLUSHR) {
1851 			flushq(q, FLUSHDATA);
1852 		}
1853 		if (*mp->b_rptr & FLUSHW) {
1854 			*mp->b_rptr &= ~FLUSHR;
1855 			qreply(q, mp);
1856 		} else {
1857 			freemsg(mp);
1858 		}
1859 		return;
1860 	}
1861 	/*
1862 	 * Try to forward the message to the put procedure for the upper
1863 	 * control stream for this lower stream. If there are already messages
1864 	 * queued here, queue this one up to preserve message ordering.
1865 	 */
1866 	if ((tcl = (tuncl_t *)q->q_ptr) == NULL || tcl->tcl_rq == NULL) {
1867 		freemsg(mp);
1868 		return;
1869 	}
1870 	if (queclass(mp) == QPCTL ||
1871 	    (q->q_first == NULL && canput(tcl->tcl_rq))) {
1872 		put(tcl->tcl_rq, mp);
1873 	} else {
1874 		if (!putq(q, mp))
1875 			freemsg(mp);
1876 	}
1877 }
1878 
1879 /*
1880  * MT-Perimeters:
1881  *    shared inner, shared outer.
1882  *
1883  *    Handle non-data DLPI messages.  Used with PPPoE, which runs over
1884  *    Ethernet only.
1885  */
1886 static void
1887 urput_dlpi(queue_t *q, mblk_t *mp)
1888 {
1889 	int err;
1890 	union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr;
1891 	tunll_t *tll = q->q_ptr;
1892 	size_t mlen = MBLKL(mp);
1893 
1894 	switch (dlp->dl_primitive) {
1895 	case DL_UDERROR_IND:
1896 		break;
1897 
1898 	case DL_ERROR_ACK:
1899 		if (mlen < DL_ERROR_ACK_SIZE)
1900 			break;
1901 		err = dlp->error_ack.dl_unix_errno ?
1902 		    dlp->error_ack.dl_unix_errno : ENXIO;
1903 		switch (dlp->error_ack.dl_error_primitive) {
1904 		case DL_UNBIND_REQ:
1905 			message_done(tll);
1906 			break;
1907 		case DL_DETACH_REQ:
1908 			message_done(tll);
1909 			tll->tll_error = err;
1910 			tll->tll_flags |= TLLF_SHUTDOWN_DONE;
1911 			break;
1912 		case DL_PHYS_ADDR_REQ:
1913 			message_done(tll);
1914 			break;
1915 		case DL_INFO_REQ:
1916 		case DL_ATTACH_REQ:
1917 		case DL_BIND_REQ:
1918 			message_done(tll);
1919 			tll->tll_error = err;
1920 			break;
1921 		}
1922 		break;
1923 
1924 	case DL_INFO_ACK:
1925 		message_done(tll);
1926 		break;
1927 
1928 	case DL_BIND_ACK:
1929 		message_done(tll);
1930 		break;
1931 
1932 	case DL_PHYS_ADDR_ACK:
1933 		break;
1934 
1935 	case DL_OK_ACK:
1936 		if (mlen < DL_OK_ACK_SIZE)
1937 			break;
1938 		switch (dlp->ok_ack.dl_correct_primitive) {
1939 		case DL_UNBIND_REQ:
1940 			message_done(tll);
1941 			break;
1942 		case DL_DETACH_REQ:
1943 			tll->tll_flags |= TLLF_SHUTDOWN_DONE;
1944 			break;
1945 		case DL_ATTACH_REQ:
1946 			message_done(tll);
1947 			break;
1948 		}
1949 		break;
1950 	}
1951 	freemsg(mp);
1952 }
1953 
1954 /* Search structure used with PPPoE only; see tclvm_pppoe_search(). */
1955 struct poedat {
1956 	uint_t sessid;
1957 	tunll_t *tll;
1958 	const void *srcaddr;
1959 	int isdata;
1960 	tuncl_t *tcl;
1961 };
1962 
1963 /*
1964  * This function is called by vmem_walk from within sppptun_recv.  It
1965  * iterates over a span of allocated minor node numbers to search for
1966  * the appropriate lower stream, session ID, and peer MAC address.
1967  *
1968  * (This is necessary due to a design flaw in the PPPoE protocol
1969  * itself.  The protocol assigns session IDs from the server side
1970  * only.  Both server and client use the same number.  Thus, if there
1971  * are multiple clients on a single host, there can be session ID
1972  * conflicts between servers and there's no way to detangle them
1973  * except by looking at the remote MAC address.)
1974  *
1975  * (This could have been handled by linking together sessions that
1976  * differ only in the remote MAC address.  This isn't done because it
1977  * would involve extra per-session storage and it's very unlikely that
1978  * PPPoE would be used this way.)
1979  */
1980 static void
1981 tclvm_pppoe_search(void *arg, void *firstv, size_t numv)
1982 {
1983 	struct poedat *poedat = (struct poedat *)arg;
1984 	int minorn = (int)(uintptr_t)firstv;
1985 	int minormax = minorn + numv;
1986 	tuncl_t *tcl;
1987 
1988 	if (poedat->tcl != NULL)
1989 		return;
1990 	while (minorn < minormax) {
1991 		tcl = tcl_slots[minorn - 1];
1992 		ASSERT(tcl != NULL);
1993 		if (tcl->tcl_rsessid == poedat->sessid &&
1994 		    ((!poedat->isdata && tcl->tcl_ctrl_tll == poedat->tll) ||
1995 		    (poedat->isdata && tcl->tcl_data_tll == poedat->tll)) &&
1996 		    bcmp(tcl->tcl_address.pta_pppoe.ptma_mac,
1997 		    poedat->srcaddr,
1998 		    sizeof (tcl->tcl_address.pta_pppoe.ptma_mac)) == 0) {
1999 			poedat->tcl = tcl;
2000 			break;
2001 		}
2002 		minorn++;
2003 	}
2004 }
2005 
2006 /*
2007  * sppptun_recv()
2008  *
2009  * MT-Perimeters:
2010  *    shared inner, shared outer.
2011  *
2012  * Description:
2013  *    Receive function called by sppptun_urput, which is called when
2014  *    the lower read-side put or service procedure sends a message
2015  *    upstream to the a device user (PPP).  It attempts to find an
2016  *    appropriate queue on the module above us (depending on what the
2017  *    associated upper stream for the protocol would be), and if not
2018  *    possible, it will find an upper control stream for the protocol.
2019  *    Returns a pointer to the upper queue_t, or NULL if the message
2020  *    has been discarded.
2021  *
2022  * About demultiplexing:
2023  *
2024  *	All four protocols (L2F, PPTP, L2TP, and PPPoE) support a
2025  *	locally assigned ID for demultiplexing incoming traffic.  For
2026  *	L2F, this is called the Client ID, for PPTP the Call ID, for
2027  *	L2TP the Session ID, and for PPPoE the SESSION_ID.  This is a
2028  *	16 bit number for all four protocols, and is used to directly
2029  *	index into a list of upper streams.  With the upper stream in
2030  *	hand, we verify that this is the right stream and deliver the
2031  *	data.
2032  *
2033  *	L2TP has a Tunnel ID, which represents a bundle of PPP
2034  *	sessions between the peers.  Because we always assign unique
2035  *	session ID numbers, we merely check that the given ID matches
2036  *	the assigned ID for the upper stream.
2037  *
2038  *	L2F has a Multiplex ID, which is unique per connection.  It
2039  *	does not have L2TP's concept of multiple-connections-within-
2040  *	a-tunnel.  The same checking is done.
2041  *
2042  *	PPPoE is a horribly broken protocol.  Only one ID is assigned
2043  *	per connection.  The client must somehow demultiplex based on
2044  *	an ID number assigned by the server.  It's not necessarily
2045  *	unique.  The search is done based on {ID,peerEthernet} (using
2046  *	tcl_rsessid) for all packet types except PADI and PADS.
2047  *
2048  *	Neither PPPoE nor PPTP supports additional ID numbers.
2049  *
2050  *	Both L2F and L2TP come in over UDP.  They are distinguished by
2051  *	looking at the GRE version field -- 001 for L2F and 010 for
2052  *	L2TP.
2053  */
2054 static queue_t *
2055 sppptun_recv(queue_t *q, mblk_t **mpp, const void *srcaddr)
2056 {
2057 	mblk_t *mp;
2058 	tunll_t *tll;
2059 	tuncl_t *tcl;
2060 	int sessid;
2061 	int remlen;
2062 	int msglen;
2063 	int isdata;
2064 	int i;
2065 	const uchar_t *ucp;
2066 	const poep_t *poep;
2067 	mblk_t *mnew;
2068 	ppptun_atype *pap;
2069 
2070 	mp = *mpp;
2071 
2072 	tll = q->q_ptr;
2073 	ASSERT(!(tll->tll_flags & TLLF_NOTLOWER));
2074 
2075 	tcl = NULL;
2076 	switch (tll->tll_style) {
2077 	case PTS_PPPOE:
2078 		/* Note that poep_t alignment is uint16_t */
2079 		if ((!IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)) ||
2080 		    MBLKL(mp) < sizeof (poep_t)) &&
2081 		    !pullupmsg(mp, sizeof (poep_t)))
2082 			break;
2083 		poep = (const poep_t *)mp->b_rptr;
2084 		if (poep->poep_version_type != POE_VERSION)
2085 			break;
2086 		/*
2087 		 * First, extract a session ID number.  All protocols have
2088 		 * this.
2089 		 */
2090 		isdata = (poep->poep_code == POECODE_DATA);
2091 		sessid = ntohs(poep->poep_session_id);
2092 		remlen = sizeof (*poep);
2093 		msglen = ntohs(poep->poep_length);
2094 		i = poep->poep_code;
2095 		if (i == POECODE_PADI || i == POECODE_PADR) {
2096 			/* These go to the server daemon only. */
2097 			tcl = tll->tll_defcl;
2098 		} else if (i == POECODE_PADO || i == POECODE_PADS) {
2099 			/*
2100 			 * These go to a client only, and are demuxed
2101 			 * by the Host-Uniq field (into which we stuff
2102 			 * our local ID number when generating
2103 			 * PADI/PADR).
2104 			 */
2105 			ucp = (const uchar_t *)(poep + 1);
2106 			i = msglen;
2107 			while (i > POET_HDRLEN) {
2108 				if (POET_GET_TYPE(ucp) == POETT_END) {
2109 					i = 0;
2110 					break;
2111 				}
2112 				if (POET_GET_TYPE(ucp) == POETT_UNIQ &&
2113 				    POET_GET_LENG(ucp) >= sizeof (uint32_t))
2114 					break;
2115 				i -= POET_GET_LENG(ucp) + POET_HDRLEN;
2116 				ucp = POET_NEXT(ucp);
2117 			}
2118 			if (i >= POET_HDRLEN + 4)
2119 				sessid = GETLONG(ucp + POET_HDRLEN);
2120 			tcl = tcl_by_minor((minor_t)sessid);
2121 		} else {
2122 			/*
2123 			 * Try minor number as session ID first, since
2124 			 * it's used that way on server side.  It's
2125 			 * not used that way on the client, though, so
2126 			 * this might not work.  If this isn't the
2127 			 * right one, then try the tll cache.  If
2128 			 * neither is right, then search all open
2129 			 * clients.  Did I mention that the PPPoE
2130 			 * protocol is badly designed?
2131 			 */
2132 			tcl = tcl_by_minor((minor_t)sessid);
2133 			if (tcl == NULL ||
2134 			    (!isdata && tcl->tcl_ctrl_tll != tll) ||
2135 			    (isdata && tcl->tcl_data_tll != tll) ||
2136 			    sessid != tcl->tcl_rsessid ||
2137 			    bcmp(srcaddr, tcl->tcl_address.pta_pppoe.ptma_mac,
2138 			    sizeof (tcl->tcl_address.pta_pppoe.ptma_mac)) != 0)
2139 				tcl = tll->tll_lastcl;
2140 			if (tcl == NULL ||
2141 			    (!isdata && tcl->tcl_ctrl_tll != tll) ||
2142 			    (isdata && tcl->tcl_data_tll != tll) ||
2143 			    sessid != tcl->tcl_rsessid ||
2144 			    bcmp(srcaddr, tcl->tcl_address.pta_pppoe.ptma_mac,
2145 			    sizeof (tcl->tcl_address.pta_pppoe.ptma_mac)) != 0)
2146 				tcl = NULL;
2147 			if (tcl == NULL && sessid != 0) {
2148 				struct poedat poedat;
2149 
2150 				/*
2151 				 * Slow mode.  Too bad.  If you don't like it,
2152 				 * you can always choose a better protocol.
2153 				 */
2154 				poedat.sessid = sessid;
2155 				poedat.tll = tll;
2156 				poedat.srcaddr = srcaddr;
2157 				poedat.tcl = NULL;
2158 				poedat.isdata = isdata;
2159 				vmem_walk(tcl_minor_arena, VMEM_ALLOC,
2160 				    tclvm_pppoe_search, &poedat);
2161 				KLINCR(lks_walks);
2162 				if ((tcl = poedat.tcl) != NULL) {
2163 					tll->tll_lastcl = tcl;
2164 					KCINCR(cks_walks);
2165 				}
2166 			}
2167 		}
2168 		break;
2169 	}
2170 
2171 	if (tcl == NULL || tcl->tcl_rq == NULL) {
2172 		DTRACE_PROBE3(sppptun__recv__discard, int, sessid,
2173 		    tuncl_t *, tcl, mblk_t *, mp);
2174 		if (tcl == NULL) {
2175 			KLINCR(lks_in_nomatch);
2176 		}
2177 		if (isdata) {
2178 			KLINCR(lks_indata_drops);
2179 			if (tcl != NULL)
2180 				tcl->tcl_stats.ppp_ierrors++;
2181 		} else {
2182 			KLINCR(lks_inctrl_drops);
2183 			if (tcl != NULL) {
2184 				KCINCR(cks_inctrl_drops);
2185 			}
2186 		}
2187 		freemsg(mp);
2188 		return (NULL);
2189 	}
2190 
2191 	if (tcl->tcl_data_tll == tll && isdata) {
2192 		if (!adjmsg(mp, remlen) ||
2193 		    (i = msgsize(mp)) < msglen ||
2194 		    (i > msglen && !adjmsg(mp, msglen - i))) {
2195 			KLINCR(lks_indata_drops);
2196 			tcl->tcl_stats.ppp_ierrors++;
2197 			freemsg(mp);
2198 			return (NULL);
2199 		}
2200 		/* XXX -- address/control handling in pppd needs help. */
2201 		if (*mp->b_rptr != 0xFF) {
2202 			if ((mp = prependb(mp, 2, 1)) == NULL) {
2203 				KLINCR(lks_indata_drops);
2204 				tcl->tcl_stats.ppp_ierrors++;
2205 				return (NULL);
2206 			}
2207 			mp->b_rptr[0] = 0xFF;
2208 			mp->b_rptr[1] = 0x03;
2209 		}
2210 		MTYPE(mp) = M_DATA;
2211 		tcl->tcl_stats.ppp_ibytes += msgsize(mp);
2212 		tcl->tcl_stats.ppp_ipackets++;
2213 		KLINCR(lks_indata);
2214 	} else {
2215 		if (isdata || tcl->tcl_ctrl_tll != tll ||
2216 		    (mnew = make_control(tcl, tll, PTCA_CONTROL, tcl)) ==
2217 		    NULL) {
2218 			KLINCR(lks_inctrl_drops);
2219 			KCINCR(cks_inctrl_drops);
2220 			freemsg(mp);
2221 			return (NULL);
2222 		}
2223 		/* Fix up source address; peer might not be set yet. */
2224 		pap = &((struct ppptun_control *)mnew->b_rptr)->ptc_address;
2225 		bcopy(srcaddr, pap->pta_pppoe.ptma_mac,
2226 		    sizeof (pap->pta_pppoe.ptma_mac));
2227 		mnew->b_cont = mp;
2228 		mp = mnew;
2229 		KLINCR(lks_inctrls);
2230 		KCINCR(cks_inctrls);
2231 	}
2232 	*mpp = mp;
2233 	return (tcl->tcl_rq);
2234 }
2235 
2236 /*
2237  * sppptun_urput()
2238  *
2239  * MT-Perimeters:
2240  *    shared inner, shared outer.
2241  *
2242  * Description:
2243  *    Upper read-side put procedure.  Messages from the underlying
2244  *    lower stream driver arrive here.  See sppptun_recv for the
2245  *    demultiplexing logic.
2246  */
2247 static void
2248 sppptun_urput(queue_t *q, mblk_t *mp)
2249 {
2250 	union DL_primitives *dlprim;
2251 	mblk_t *mpnext;
2252 	tunll_t *tll;
2253 	queue_t *nextq;
2254 
2255 	tll = q->q_ptr;
2256 	ASSERT(!(tll->tll_flags & TLLF_NOTLOWER));
2257 
2258 	switch (MTYPE(mp)) {
2259 	case M_DATA:
2260 		/*
2261 		 * When we're bound over IP, data arrives here.  The
2262 		 * packet starts with the IP header itself.
2263 		 */
2264 		if ((nextq = sppptun_recv(q, &mp, NULL)) != NULL)
2265 			putnext(nextq, mp);
2266 		break;
2267 
2268 	case M_PROTO:
2269 	case M_PCPROTO:
2270 		/* Data arrives here for UDP or raw Ethernet, not IP. */
2271 		switch (tll->tll_style) {
2272 			/* PPTP control messages are over TCP only. */
2273 		case PTS_PPTP:
2274 		default:
2275 			ASSERT(0);	/* how'd that happen? */
2276 			break;
2277 
2278 		case PTS_PPPOE:		/* DLPI message */
2279 			if (MBLKL(mp) < sizeof (t_uscalar_t))
2280 				break;
2281 			dlprim = (union DL_primitives *)mp->b_rptr;
2282 			switch (dlprim->dl_primitive) {
2283 			case DL_UNITDATA_IND: {
2284 				size_t mlen = MBLKL(mp);
2285 
2286 				if (mlen < DL_UNITDATA_IND_SIZE)
2287 					break;
2288 				if (dlprim->unitdata_ind.dl_src_addr_offset <
2289 				    DL_UNITDATA_IND_SIZE ||
2290 				    dlprim->unitdata_ind.dl_src_addr_offset +
2291 				    dlprim->unitdata_ind.dl_src_addr_length >
2292 				    mlen)
2293 					break;
2294 			}
2295 				/* FALLTHROUGH */
2296 			case DL_UNITDATA_REQ:	/* For loopback support. */
2297 				if (dlprim->dl_primitive == DL_UNITDATA_REQ &&
2298 				    MBLKL(mp) < DL_UNITDATA_REQ_SIZE)
2299 					break;
2300 				if ((mpnext = mp->b_cont) == NULL)
2301 					break;
2302 				MTYPE(mpnext) = M_DATA;
2303 				nextq = sppptun_recv(q, &mpnext,
2304 				    dlprim->dl_primitive == DL_UNITDATA_IND ?
2305 				    mp->b_rptr +
2306 				    dlprim->unitdata_ind.dl_src_addr_offset :
2307 				    tll->tll_lcladdr.pta_pppoe.ptma_mac);
2308 				if (nextq != NULL)
2309 					putnext(nextq, mpnext);
2310 				freeb(mp);
2311 				return;
2312 
2313 			default:
2314 				urput_dlpi(q, mp);
2315 				return;
2316 			}
2317 			break;
2318 		}
2319 		freemsg(mp);
2320 		break;
2321 
2322 	default:
2323 		freemsg(mp);
2324 		break;
2325 	}
2326 }
2327 
2328 /*
2329  * sppptun_ursrv()
2330  *
2331  * MT-Perimeters:
2332  *    exclusive inner, shared outer.
2333  *
2334  * Description:
2335  *    Upper read-side service procedure.  This procedure services the
2336  *    client streams.  We get here because the client (PPP) asserts
2337  *    flow control down to us.
2338  */
2339 static int
2340 sppptun_ursrv(queue_t *q)
2341 {
2342 	mblk_t		*mp;
2343 
2344 	ASSERT(q->q_ptr != NULL);
2345 
2346 	while ((mp = getq(q)) != NULL) {
2347 		if (canputnext(q)) {
2348 			putnext(q, mp);
2349 		} else {
2350 			(void) putbq(q, mp);
2351 			break;
2352 		}
2353 	}
2354 	return (0);
2355 }
2356 
2357 /*
2358  * Dummy constructor/destructor functions for kmem_cache_create.
2359  * We're just using kmem as an allocator of integers, not real
2360  * storage.
2361  */
2362 
2363 /*ARGSUSED*/
2364 static int
2365 tcl_constructor(void *maddr, void *arg, int kmflags)
2366 {
2367 	return (0);
2368 }
2369 
2370 /*ARGSUSED*/
2371 static void
2372 tcl_destructor(void *maddr, void *arg)
2373 {
2374 }
2375 
2376 /*
2377  * Total size occupied by one tunnel client.  Each tunnel client
2378  * consumes one pointer for tcl_slots array, one tuncl_t structure and
2379  * two messages preallocated for close.
2380  */
2381 #define	TUNCL_SIZE (sizeof (tuncl_t) + sizeof (tuncl_t *) + \
2382 			2 * sizeof (dblk_t))
2383 
2384 /*
2385  * Clear all bits of x except the highest bit
2386  */
2387 #define	truncate(x) 	((x) <= 2 ? (x) : (1 << (highbit(x) - 1)))
2388 
2389 /*
2390  * This function initializes some well-known global variables inside
2391  * the module.
2392  *
2393  * Called by sppptun_mod.c:_init() before installing the module.
2394  */
2395 void
2396 sppptun_init(void)
2397 {
2398 	tunll_list.q_forw = tunll_list.q_back = &tunll_list;
2399 }
2400 
2401 /*
2402  * This function allocates the initial internal storage for the
2403  * sppptun driver.
2404  *
2405  * Called by sppptun_mod.c:_init() after installing module.
2406  */
2407 void
2408 sppptun_tcl_init(void)
2409 {
2410 	uint_t i, j;
2411 
2412 	rw_init(&tcl_rwlock, NULL, RW_DRIVER, NULL);
2413 	rw_enter(&tcl_rwlock, RW_WRITER);
2414 	tcl_nslots = sppptun_init_cnt;
2415 	tcl_slots = kmem_zalloc(tcl_nslots * sizeof (tuncl_t *), KM_SLEEP);
2416 
2417 	tcl_cache = kmem_cache_create("sppptun_map", sizeof (tuncl_t), 0,
2418 	    tcl_constructor, tcl_destructor, NULL, NULL, NULL, 0);
2419 
2420 	/* Allocate integer space for minor numbers */
2421 	tcl_minor_arena = vmem_create("sppptun_minor", (void *)1, tcl_nslots,
2422 	    1, NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
2423 
2424 	/*
2425 	 * Calculate available number of tunnels - how many tunnels
2426 	 * can we allocate in sppptun_pctofmem % of available
2427 	 * memory.  The value is rounded up to the nearest power of 2.
2428 	 */
2429 	i = (sppptun_pctofmem * kmem_maxavail()) / (100 * TUNCL_SIZE);
2430 	j = truncate(i);	/* i with non-high bits stripped */
2431 	if (i != j)
2432 		j *= 2;
2433 	tcl_minormax = j;
2434 	rw_exit(&tcl_rwlock);
2435 }
2436 
2437 /*
2438  * This function checks that there are no plumbed streams or other users.
2439  *
2440  * Called by sppptun_mod.c:_fini().  Assumes that we're exclusive on
2441  * both perimeters.
2442  */
2443 int
2444 sppptun_tcl_fintest(void)
2445 {
2446 	if (tunll_list.q_forw != &tunll_list || tcl_inuse > 0)
2447 		return (EBUSY);
2448 	else
2449 		return (0);
2450 }
2451 
2452 /*
2453  * If no lower streams are plumbed, then this function deallocates all
2454  * internal storage in preparation for unload.
2455  *
2456  * Called by sppptun_mod.c:_fini().  Assumes that we're exclusive on
2457  * both perimeters.
2458  */
2459 void
2460 sppptun_tcl_fini(void)
2461 {
2462 	if (tcl_minor_arena != NULL) {
2463 		vmem_destroy(tcl_minor_arena);
2464 		tcl_minor_arena = NULL;
2465 	}
2466 	if (tcl_cache != NULL) {
2467 		kmem_cache_destroy(tcl_cache);
2468 		tcl_cache = NULL;
2469 	}
2470 	kmem_free(tcl_slots, tcl_nslots * sizeof (tuncl_t *));
2471 	tcl_slots = NULL;
2472 	rw_destroy(&tcl_rwlock);
2473 	ASSERT(tcl_slots == NULL);
2474 	ASSERT(tcl_cache == NULL);
2475 	ASSERT(tcl_minor_arena == NULL);
2476 }
2477