xref: /titanic_51/usr/src/uts/common/io/ppp/sppptun/sppptun.c (revision a799b1e741b6f59012a469e6b57c40cb8061127b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/debug.h>
29 #include <sys/param.h>
30 #include <sys/stat.h>
31 #include <sys/systm.h>
32 #include <sys/socket.h>
33 #include <sys/stream.h>
34 #include <sys/stropts.h>
35 #include <sys/errno.h>
36 #include <sys/time.h>
37 #include <sys/cmn_err.h>
38 #include <sys/sdt.h>
39 #include <sys/conf.h>
40 #include <sys/dlpi.h>
41 #include <sys/ddi.h>
42 #include <sys/kstat.h>
43 #include <sys/strsun.h>
44 #include <sys/bitmap.h>
45 #include <sys/sysmacros.h>
46 #include <sys/note.h>
47 #include <sys/policy.h>
48 #include <net/ppp_defs.h>
49 #include <net/pppio.h>
50 #include <net/sppptun.h>
51 #include <net/pppoe.h>
52 #include <netinet/in.h>
53 
54 #include "s_common.h"
55 #include "sppptun_mod.h"
56 #include "sppptun_impl.h"
57 
58 #define	NTUN_INITIAL 16			/* Initial number of sppptun slots */
59 #define	NTUN_PERCENT 5			/* Percent of memory to use */
60 
61 /*
62  * This is used to tag official Solaris sources.  Please do not define
63  * "INTERNAL_BUILD" when building this software outside of Sun
64  * Microsystems.
65  */
66 #ifdef INTERNAL_BUILD
67 /* MODINFO is limited to 32 characters. */
68 const char sppptun_driver_description[] = "PPP 4.0 tunnel driver";
69 const char sppptun_module_description[] = "PPP 4.0 tunnel module";
70 #else
71 const char sppptun_driver_description[] = "ANU PPP tundrv";
72 const char sppptun_module_description[] = "ANU PPP tunmod";
73 
74 /* LINTED */
75 static const char buildtime[] = "Built " __DATE__ " at " __TIME__
76 #ifdef DEBUG
77 " DEBUG"
78 #endif
79 "\n";
80 #endif
81 
82 /*
83  * Tunable values; these are similar to the values used in ptms_conf.c.
84  * Override these settings via /etc/system.
85  */
86 uint_t	sppptun_cnt = 0;		/* Minimum number of tunnels */
87 size_t	sppptun_max_pty = 0;		/* Maximum number of tunnels */
88 uint_t	sppptun_init_cnt = NTUN_INITIAL; /* Initial number of tunnel slots */
89 uint_t	sppptun_pctofmem = NTUN_PERCENT; /* Percent of memory to use */
90 
91 typedef struct ether_dest_s {
92 	ether_addr_t addr;
93 	ushort_t type;
94 } ether_dest_t;
95 
96 /* Allows unaligned access. */
97 #define	GETLONG(x)	(((x)[0]<<24)|((x)[1]<<16)|((x)[2]<<8)|(x)[3])
98 
99 static const char *tll_kstats_list[] = { TLL_KSTATS_NAMES };
100 static const char *tcl_kstats_list[] = { TCL_KSTATS_NAMES };
101 
102 #define	KREF(p, m, vn)	p->m.vn.value.ui64
103 #define	KINCR(p, m, vn)	++KREF(p, m, vn)
104 #define	KDECR(p, m, vn)	--KREF(p, m, vn)
105 
106 #define	KLINCR(vn)	KINCR(tll, tll_kstats, vn)
107 #define	KLDECR(vn)	KDECR(tll, tll_kstats, vn)
108 
109 #define	KCINCR(vn)	KINCR(tcl, tcl_kstats, vn)
110 #define	KCDECR(vn)	KDECR(tcl, tcl_kstats, vn)
111 
112 static int	sppptun_open(queue_t *, dev_t *, int, int, cred_t *);
113 static int	sppptun_close(queue_t *);
114 static void	sppptun_urput(queue_t *, mblk_t *);
115 static void	sppptun_uwput(queue_t *, mblk_t *);
116 static int	sppptun_ursrv(queue_t *);
117 static int	sppptun_uwsrv(queue_t *);
118 static void	sppptun_lrput(queue_t *, mblk_t *);
119 static void	sppptun_lwput(queue_t *, mblk_t *);
120 
121 /*
122  * This is the hash table of clients.  Clients are the programs that
123  * open /dev/sppptun as a device.  There may be a large number of
124  * these; one per tunneled PPP session.
125  *
126  * Note: slots are offset from minor node value by 1 because
127  * vmem_alloc returns 0 for failure.
128  *
129  * The tcl_slots array entries are modified only when exclusive on
130  * both inner and outer perimeters.  This ensures that threads on
131  * shared perimeters always view this as unchanging memory with no
132  * need to lock around accesses.  (Specifically, the tcl_slots array
133  * is modified by entry to sppptun_open, sppptun_close, and _fini.)
134  */
135 static tuncl_t **tcl_slots = NULL;	/* Slots for tuncl_t */
136 static size_t tcl_nslots = 0;		/* Size of slot array */
137 static size_t tcl_minormax = 0;		/* Maximum number of tunnels */
138 static size_t tcl_inuse = 0;		/* # of tunnels currently allocated */
139 static krwlock_t tcl_rwlock;
140 static struct kmem_cache *tcl_cache = NULL;	/* tunnel cache */
141 static vmem_t *tcl_minor_arena = NULL; /* Arena for device minors */
142 
143 /*
144  * This is the simple list of lower layers.  For PPPoE, there is one
145  * of these per Ethernet interface.  Lower layers are established by
146  * "plumbing" -- using I_PLINK to connect the tunnel multiplexor to
147  * the physical interface.
148  */
149 static struct qelem tunll_list;
150 static int tunll_index;
151 
152 /* Test value; if all zeroes, then address hasn't been set yet. */
153 static const ether_addr_t zero_mac_addr = { 0, 0, 0, 0, 0, 0 };
154 
155 #define	MIN_SET_FASTPATH_UNITDATAREQ_SIZE	\
156 	(sizeof (dl_unitdata_req_t) + 4)
157 
158 #define	TUN_MI_ID	2104	/* officially allocated module ID */
159 #define	TUN_MI_MINPSZ	(0)
160 #define	TUN_MI_MAXPSZ	(PPP_MAXMTU)
161 #define	TUN_MI_HIWAT	(PPP_MTU * 8)
162 #define	TUN_MI_LOWAT	(128)
163 
164 static struct module_info sppptun_modinfo = {
165 	TUN_MI_ID,		/* mi_idnum */
166 	PPP_TUN_NAME,		/* mi_idname */
167 	TUN_MI_MINPSZ,		/* mi_minpsz */
168 	TUN_MI_MAXPSZ,		/* mi_maxpsz */
169 	TUN_MI_HIWAT,		/* mi_hiwat */
170 	TUN_MI_LOWAT		/* mi_lowat */
171 };
172 
173 static struct qinit sppptun_urinit = {
174 	(int (*)())sppptun_urput, /* qi_putp */
175 	sppptun_ursrv,		/* qi_srvp */
176 	sppptun_open,		/* qi_qopen */
177 	sppptun_close,		/* qi_qclose */
178 	NULL,			/* qi_qadmin */
179 	&sppptun_modinfo,	/* qi_minfo */
180 	NULL			/* qi_mstat */
181 };
182 
183 static struct qinit sppptun_uwinit = {
184 	(int (*)())sppptun_uwput, /* qi_putp */
185 	sppptun_uwsrv,		/* qi_srvp */
186 	NULL,			/* qi_qopen */
187 	NULL,			/* qi_qclose */
188 	NULL,			/* qi_qadmin */
189 	&sppptun_modinfo,	/* qi_minfo */
190 	NULL			/* qi_mstat */
191 };
192 
193 static struct qinit sppptun_lrinit = {
194 	(int (*)())sppptun_lrput, /* qi_putp */
195 	NULL,			/* qi_srvp */
196 	NULL,			/* qi_qopen */
197 	NULL,			/* qi_qclose */
198 	NULL,			/* qi_qadmin */
199 	&sppptun_modinfo,	/* qi_minfo */
200 	NULL			/* qi_mstat */
201 };
202 
203 static struct qinit sppptun_lwinit = {
204 	(int (*)())sppptun_lwput, /* qi_putp */
205 	NULL,			/* qi_srvp */
206 	NULL,			/* qi_qopen */
207 	NULL,			/* qi_qclose */
208 	NULL,			/* qi_qadmin */
209 	&sppptun_modinfo,	/* qi_minfo */
210 	NULL			/* qi_mstat */
211 };
212 
213 /*
214  * This is referenced in sppptun_mod.c.
215  */
216 struct streamtab sppptun_tab = {
217 	&sppptun_urinit,	/* st_rdinit */
218 	&sppptun_uwinit,	/* st_wrinit */
219 	&sppptun_lrinit,	/* st_muxrinit */
220 	&sppptun_lwinit		/* st_muxwrinit */
221 };
222 
223 /*
224  * Allocate another slot table twice as large as the original one
225  * (limited to global maximum).  Migrate all tunnels to the new slot
226  * table and free the original one.  Assumes we're exclusive on both
227  * inner and outer perimeters, and thus there are no other users of
228  * the tcl_slots array.
229  */
230 static minor_t
231 tcl_grow(void)
232 {
233 	minor_t old_size = tcl_nslots;
234 	minor_t new_size = 2 * old_size;
235 	tuncl_t **tcl_old = tcl_slots;
236 	tuncl_t **tcl_new;
237 	void  *vaddr;			/* vmem_add return value */
238 
239 	ASSERT(RW_LOCK_HELD(&tcl_rwlock));
240 
241 	/* Allocate new ptms array */
242 	tcl_new = kmem_zalloc(new_size * sizeof (tuncl_t *), KM_NOSLEEP);
243 	if (tcl_new == NULL)
244 		return ((minor_t)0);
245 
246 	/* Increase clone index space */
247 	vaddr = vmem_add(tcl_minor_arena, (void*)((uintptr_t)old_size + 1),
248 	    new_size - old_size, VM_NOSLEEP);
249 
250 	if (vaddr == NULL) {
251 		kmem_free(tcl_new, new_size * sizeof (tuncl_t *));
252 		return ((minor_t)0);
253 	}
254 
255 	/* Migrate tuncl_t entries to a new location */
256 	tcl_nslots = new_size;
257 	bcopy(tcl_old, tcl_new, old_size * sizeof (tuncl_t *));
258 	tcl_slots = tcl_new;
259 	kmem_free(tcl_old, old_size * sizeof (tuncl_t *));
260 
261 	/* Allocate minor number and return it */
262 	return ((minor_t)(uintptr_t)vmem_alloc(tcl_minor_arena, 1, VM_NOSLEEP));
263 }
264 
265 /*
266  * Allocate new minor number and tunnel client entry.  Returns the new
267  * entry or NULL if no memory or maximum number of entries reached.
268  * Assumes we're exclusive on both inner and outer perimeters, and
269  * thus there are no other users of the tcl_slots array.
270  */
271 static tuncl_t *
272 tuncl_alloc(int wantminor)
273 {
274 	minor_t dminor;
275 	tuncl_t *tcl = NULL;
276 
277 	rw_enter(&tcl_rwlock, RW_WRITER);
278 
279 	ASSERT(tcl_slots != NULL);
280 
281 	/*
282 	 * Always try to allocate new pty when sppptun_cnt minimum
283 	 * limit is not achieved. If it is achieved, the maximum is
284 	 * determined by either user-specified value (if it is
285 	 * non-zero) or our memory estimations - whatever is less.
286 	 */
287 	if (tcl_inuse >= sppptun_cnt) {
288 		/*
289 		 * When system achieved required minimum of tunnels,
290 		 * check for the denial of service limits.
291 		 *
292 		 * Get user-imposed maximum, if configured, or
293 		 * calculated memory constraint.
294 		 */
295 		size_t user_max = (sppptun_max_pty == 0 ? tcl_minormax :
296 		    min(sppptun_max_pty, tcl_minormax));
297 
298 		/* Do not try to allocate more than allowed */
299 		if (tcl_inuse >= user_max) {
300 			rw_exit(&tcl_rwlock);
301 			return (NULL);
302 		}
303 	}
304 	tcl_inuse++;
305 
306 	/*
307 	 * Allocate new minor number. If this fails, all slots are
308 	 * busy and we need to grow the hash.
309 	 */
310 	if (wantminor <= 0) {
311 		dminor = (minor_t)(uintptr_t)vmem_alloc(tcl_minor_arena, 1,
312 		    VM_NOSLEEP);
313 		if (dminor == 0) {
314 			/* Grow the cache and retry allocation */
315 			dminor = tcl_grow();
316 		}
317 	} else {
318 		dminor = (minor_t)(uintptr_t)vmem_xalloc(tcl_minor_arena, 1,
319 		    0, 0, 0, (void *)(uintptr_t)wantminor,
320 		    (void *)((uintptr_t)wantminor+1), VM_NOSLEEP);
321 		if (dminor != 0 && dminor != wantminor) {
322 			vmem_free(tcl_minor_arena, (void *)(uintptr_t)dminor,
323 			    1);
324 			dminor = 0;
325 		}
326 	}
327 
328 	if (dminor == 0) {
329 		/* Not enough memory now */
330 		tcl_inuse--;
331 		rw_exit(&tcl_rwlock);
332 		return (NULL);
333 	}
334 
335 	tcl = kmem_cache_alloc(tcl_cache, KM_NOSLEEP);
336 	if (tcl == NULL) {
337 		/* Not enough memory - this entry can't be used now. */
338 		vmem_free(tcl_minor_arena, (void *)(uintptr_t)dminor, 1);
339 		tcl_inuse--;
340 	} else {
341 		bzero(tcl, sizeof (*tcl));
342 		tcl->tcl_lsessid = dminor;
343 		ASSERT(tcl_slots[dminor - 1] == NULL);
344 		tcl_slots[dminor - 1] = tcl;
345 	}
346 
347 	rw_exit(&tcl_rwlock);
348 	return (tcl);
349 }
350 
351 /*
352  * This routine frees an upper level (client) stream by removing it
353  * from the minor number pool and freeing the state structure storage.
354  * Assumes we're exclusive on both inner and outer perimeters, and
355  * thus there are no other concurrent users of the tcl_slots array or
356  * of any entry in that array.
357  */
358 static void
359 tuncl_free(tuncl_t *tcl)
360 {
361 	rw_enter(&tcl_rwlock, RW_WRITER);
362 	ASSERT(tcl->tcl_lsessid <= tcl_nslots);
363 	ASSERT(tcl_slots[tcl->tcl_lsessid - 1] == tcl);
364 	ASSERT(tcl_inuse > 0);
365 	tcl_inuse--;
366 	tcl_slots[tcl->tcl_lsessid - 1] = NULL;
367 
368 	if (tcl->tcl_ksp != NULL) {
369 		kstat_delete(tcl->tcl_ksp);
370 		tcl->tcl_ksp = NULL;
371 	}
372 
373 	/* Return minor number to the pool of minors */
374 	vmem_free(tcl_minor_arena, (void *)(uintptr_t)tcl->tcl_lsessid, 1);
375 
376 	/* Return tuncl_t to the cache */
377 	kmem_cache_free(tcl_cache, tcl);
378 	rw_exit(&tcl_rwlock);
379 }
380 
381 /*
382  * Get tuncl_t structure by minor number.  Returns NULL when minor is
383  * out of range.  Note that lookup of tcl pointers (and use of those
384  * pointers) is safe because modification is done only when exclusive
385  * on both inner and outer perimeters.
386  */
387 static tuncl_t *
388 tcl_by_minor(minor_t dminor)
389 {
390 	tuncl_t *tcl = NULL;
391 
392 	if ((dminor >= 1) && (dminor <= tcl_nslots) && tcl_slots != NULL) {
393 		tcl = tcl_slots[dminor - 1];
394 	}
395 
396 	return (tcl);
397 }
398 
399 /*
400  * Set up kstats for upper or lower stream.
401  */
402 static kstat_t *
403 kstat_setup(kstat_named_t *knt, const char **names, int nstat,
404     const char *modname, int unitnum)
405 {
406 	kstat_t *ksp;
407 	char unitname[KSTAT_STRLEN];
408 	int i;
409 
410 	for (i = 0; i < nstat; i++) {
411 		kstat_set_string(knt[i].name, names[i]);
412 		knt[i].data_type = KSTAT_DATA_UINT64;
413 	}
414 	(void) sprintf(unitname, "%s" "%d", modname, unitnum);
415 	ksp = kstat_create(modname, unitnum, unitname, "net",
416 	    KSTAT_TYPE_NAMED, nstat, KSTAT_FLAG_VIRTUAL);
417 	if (ksp != NULL) {
418 		ksp->ks_data = (void *)knt;
419 		kstat_install(ksp);
420 	}
421 	return (ksp);
422 }
423 
424 /*
425  * sppptun_open()
426  *
427  * MT-Perimeters:
428  *    exclusive inner, exclusive outer.
429  *
430  * Description:
431  *    Common open procedure for module and driver.
432  */
433 static int
434 sppptun_open(queue_t *q, dev_t *devp, int oflag, int sflag, cred_t *credp)
435 {
436 	_NOTE(ARGUNUSED(oflag))
437 
438 	/* Allow a re-open */
439 	if (q->q_ptr != NULL)
440 		return (0);
441 
442 	/* In the off chance that we're on our way out, just return error */
443 	if (tcl_slots == NULL)
444 		return (EINVAL);
445 
446 	if (sflag & MODOPEN) {
447 		tunll_t *tll;
448 		char *cp;
449 
450 		/* ordinary users have no need to push this module */
451 		if (secpolicy_ppp_config(credp) != 0)
452 			return (EPERM);
453 
454 		tll = kmem_zalloc(sizeof (tunll_t), KM_SLEEP);
455 
456 		tll->tll_index = tunll_index++;
457 
458 		tll->tll_wq = WR(q);
459 		tll->tll_zoneid = crgetzoneid(credp);
460 
461 		/* Insert at end of list */
462 		insque(&tll->tll_next, tunll_list.q_back);
463 		q->q_ptr = WR(q)->q_ptr = tll;
464 
465 		tll->tll_style = PTS_PPPOE;
466 		tll->tll_alen = sizeof (tll->tll_lcladdr.pta_pppoe);
467 
468 		tll->tll_ksp = kstat_setup((kstat_named_t *)&tll->tll_kstats,
469 		    tll_kstats_list, Dim(tll_kstats_list), "tll",
470 		    tll->tll_index);
471 
472 		/*
473 		 * Find the name of the driver somewhere beneath us.
474 		 * Note that we have no driver under us until after
475 		 * qprocson().
476 		 */
477 		qprocson(q);
478 		for (q = WR(q); q->q_next != NULL; q = q->q_next)
479 			;
480 		cp = NULL;
481 		if (q->q_qinfo != NULL && q->q_qinfo->qi_minfo != NULL)
482 			cp = q->q_qinfo->qi_minfo->mi_idname;
483 		if (cp != NULL && *cp == '\0')
484 			cp = NULL;
485 
486 		/* Set initial name; user should overwrite. */
487 		if (cp == NULL)
488 			(void) snprintf(tll->tll_name, sizeof (tll->tll_name),
489 			    PPP_TUN_NAME "%d", tll->tll_index);
490 		else
491 			(void) snprintf(tll->tll_name, sizeof (tll->tll_name),
492 			    "%s:tun%d", cp, tll->tll_index);
493 	} else {
494 		tuncl_t	*tcl;
495 
496 		ASSERT(devp != NULL);
497 		if (sflag & CLONEOPEN) {
498 			tcl = tuncl_alloc(-1);
499 		} else {
500 			minor_t mn;
501 
502 			/*
503 			 * Support of non-clone open (ie, mknod with
504 			 * defined minor number) is supported for
505 			 * testing purposes so that 'arbitrary' minor
506 			 * numbers can be used.
507 			 */
508 			mn = getminor(*devp);
509 			if (mn == 0 || (tcl = tcl_by_minor(mn)) != NULL) {
510 				return (EPERM);
511 			}
512 			tcl = tuncl_alloc(mn);
513 		}
514 		if (tcl == NULL)
515 			return (ENOSR);
516 		tcl->tcl_rq = q;		/* save read queue pointer */
517 		tcl->tcl_flags |= TCLF_ISCLIENT;	/* sanity check */
518 		tcl->tcl_zoneid = crgetzoneid(credp);
519 
520 		q->q_ptr = WR(q)->q_ptr = (caddr_t)tcl;
521 		*devp = makedevice(getmajor(*devp), tcl->tcl_lsessid);
522 
523 		tcl->tcl_ksp = kstat_setup((kstat_named_t *)&tcl->tcl_kstats,
524 		    tcl_kstats_list, Dim(tcl_kstats_list), "tcl",
525 		    tcl->tcl_lsessid);
526 
527 		qprocson(q);
528 	}
529 	return (0);
530 }
531 
532 /*
533  * Create an appropriate control message for this client event.
534  */
535 static mblk_t *
536 make_control(tuncl_t *tclabout, tunll_t *tllabout, int action, tuncl_t *tclto)
537 {
538 	struct ppptun_control *ptc;
539 	mblk_t *mp = allocb(sizeof (*ptc), BPRI_HI);
540 
541 	if (mp != NULL) {
542 		MTYPE(mp) = M_PROTO;
543 		ptc = (struct ppptun_control *)mp->b_wptr;
544 		bzero(ptc, sizeof (*ptc));
545 		mp->b_wptr += sizeof (*ptc);
546 		if (tclabout != NULL) {
547 			ptc->ptc_rsessid = tclabout->tcl_rsessid;
548 			ptc->ptc_address = tclabout->tcl_address;
549 		}
550 		ptc->ptc_discrim = tclto->tcl_ctlval;
551 		ptc->ptc_action = action;
552 		if (tllabout != NULL) {
553 			(void) strncpy(ptc->ptc_name, tllabout->tll_name,
554 			    sizeof (ptc->ptc_name));
555 		}
556 	}
557 	return (mp);
558 }
559 
560 /*
561  * Send an appropriate control message up this client session.
562  */
563 static void
564 send_control(tuncl_t *tclabout, tunll_t *tllabout, int action, tuncl_t *tcl)
565 {
566 	mblk_t *mp;
567 
568 	if (tcl->tcl_rq != NULL) {
569 		mp = make_control(tclabout, tllabout, action, tcl);
570 		if (mp != NULL) {
571 			KCINCR(cks_octrl_spec);
572 			putnext(tcl->tcl_rq, mp);
573 		}
574 	}
575 }
576 
577 /*
578  * If a lower stream is being unplumbed, then the upper streams
579  * connected to this lower stream must be disconnected.  This routine
580  * accomplishes this by sending M_HANGUP to data streams and M_PROTO
581  * messages to control streams.  This is called by vmem_walk, and
582  * handles a span of minor node numbers.
583  *
584  * No need to update lks_clients here; the lower stream is on its way
585  * out.
586  */
587 static void
588 tclvm_remove_tll(void *arg, void *firstv, size_t numv)
589 {
590 	tunll_t *tll = (tunll_t *)arg;
591 	int minorn = (int)(uintptr_t)firstv;
592 	int minormax = minorn + numv;
593 	tuncl_t *tcl;
594 	mblk_t *mp;
595 
596 	while (minorn < minormax) {
597 		tcl = tcl_slots[minorn - 1];
598 		ASSERT(tcl != NULL);
599 		if (tcl->tcl_data_tll == tll && tcl->tcl_rq != NULL) {
600 			tcl->tcl_data_tll = NULL;
601 			mp = allocb(0, BPRI_HI);
602 			if (mp != NULL) {
603 				MTYPE(mp) = M_HANGUP;
604 				putnext(tcl->tcl_rq, mp);
605 				if (tcl->tcl_ctrl_tll == tll)
606 					tcl->tcl_ctrl_tll = NULL;
607 			}
608 		}
609 		if (tcl->tcl_ctrl_tll == tll) {
610 			send_control(tcl, tll, PTCA_UNPLUMB, tcl);
611 			tcl->tcl_ctrl_tll = NULL;
612 		}
613 		minorn++;
614 	}
615 }
616 
617 /*
618  * sppptun_close()
619  *
620  * MT-Perimeters:
621  *    exclusive inner, exclusive outer.
622  *
623  * Description:
624  *    Common close procedure for module and driver.
625  */
626 static int
627 sppptun_close(queue_t *q)
628 {
629 	int err;
630 	void *qptr;
631 	tunll_t *tll;
632 	tuncl_t *tcl;
633 
634 	qptr = q->q_ptr;
635 
636 	err = 0;
637 	tll = qptr;
638 	if (!(tll->tll_flags & TLLF_NOTLOWER)) {
639 		/* q_next is set on modules */
640 		ASSERT(WR(q)->q_next != NULL);
641 
642 		/* unlink any clients using this lower layer. */
643 		vmem_walk(tcl_minor_arena, VMEM_ALLOC, tclvm_remove_tll, tll);
644 
645 		/* tell daemon that this has been removed. */
646 		if ((tcl = tll->tll_defcl) != NULL)
647 			send_control(NULL, tll, PTCA_UNPLUMB, tcl);
648 
649 		tll->tll_flags |= TLLF_CLOSING;
650 		while (!(tll->tll_flags & TLLF_CLOSE_DONE)) {
651 			qenable(tll->tll_wq);
652 			qwait(tll->tll_wq);
653 		}
654 		tll->tll_error = 0;
655 		while (!(tll->tll_flags & TLLF_SHUTDOWN_DONE)) {
656 			if (!qwait_sig(tll->tll_wq))
657 				break;
658 		}
659 
660 		qprocsoff(q);
661 		q->q_ptr = WR(q)->q_ptr = NULL;
662 		tll->tll_wq = NULL;
663 		remque(&tll->tll_next);
664 		err = tll->tll_error;
665 		if (tll->tll_ksp != NULL)
666 			kstat_delete(tll->tll_ksp);
667 		kmem_free(tll, sizeof (*tll));
668 	} else {
669 		tcl = qptr;
670 
671 		/* devices are end of line; no q_next. */
672 		ASSERT(WR(q)->q_next == NULL);
673 
674 		qprocsoff(q);
675 		DTRACE_PROBE1(sppptun__client__close, tuncl_t *, tcl);
676 		tcl->tcl_rq = NULL;
677 		q->q_ptr = WR(q)->q_ptr = NULL;
678 
679 		tll = TO_TLL(tunll_list.q_forw);
680 		while (tll != TO_TLL(&tunll_list)) {
681 			if (tll->tll_defcl == tcl)
682 				tll->tll_defcl = NULL;
683 			if (tll->tll_lastcl == tcl)
684 				tll->tll_lastcl = NULL;
685 			tll = TO_TLL(tll->tll_next);
686 		}
687 		/*
688 		 * If this was a normal session, then tell the daemon.
689 		 */
690 		if (!(tcl->tcl_flags & TCLF_DAEMON) &&
691 		    (tll = tcl->tcl_ctrl_tll) != NULL &&
692 		    tll->tll_defcl != NULL) {
693 			send_control(tcl, tll, PTCA_DISCONNECT,
694 			    tll->tll_defcl);
695 		}
696 
697 		/* Update statistics for references being dropped. */
698 		if ((tll = tcl->tcl_data_tll) != NULL) {
699 			KLDECR(lks_clients);
700 		}
701 		if ((tll = tcl->tcl_ctrl_tll) != NULL) {
702 			KLDECR(lks_clients);
703 		}
704 
705 		tuncl_free(tcl);
706 	}
707 
708 	return (err);
709 }
710 
711 /*
712  * Allocate and initialize a DLPI or TPI template of the specified
713  * length.
714  */
715 static mblk_t *
716 pi_alloc(size_t len, int prim)
717 {
718 	mblk_t	*mp;
719 
720 	mp = allocb(len, BPRI_MED);
721 	if (mp != NULL) {
722 		MTYPE(mp) = M_PROTO;
723 		mp->b_wptr = mp->b_rptr + len;
724 		bzero(mp->b_rptr, len);
725 		*(int *)mp->b_rptr = prim;
726 	}
727 	return (mp);
728 }
729 
730 #define	dlpi_alloc(l, p)	pi_alloc((l), (p))
731 
732 /*
733  * Prepend some room to an mblk.  Try to reuse the existing buffer, if
734  * at all possible, rather than allocating a new one.  (Fast-path
735  * output should be able to use this.)
736  *
737  * (XXX why isn't this a library function ...?)
738  */
739 static mblk_t *
740 prependb(mblk_t *mp, size_t len, size_t align)
741 {
742 	mblk_t *newmp;
743 
744 
745 	if (align == 0)
746 		align = 8;
747 	if (DB_REF(mp) > 1 || mp->b_datap->db_base+len > mp->b_rptr ||
748 	    ((uint_t)((uintptr_t)mp->b_rptr - len) % align) != 0) {
749 		if ((newmp = allocb(len, BPRI_LO)) == NULL) {
750 			freemsg(mp);
751 			return (NULL);
752 		}
753 		newmp->b_wptr = newmp->b_rptr + len;
754 		newmp->b_cont = mp;
755 		return (newmp);
756 	}
757 	mp->b_rptr -= len;
758 	return (mp);
759 }
760 
761 /*
762  * sppptun_outpkt()
763  *
764  * MT-Perimeters:
765  *	shared inner, shared outer (if called from sppptun_uwput),
766  *	exclusive inner, shared outer (if called from sppptun_uwsrv).
767  *
768  * Description:
769  *    Called from sppptun_uwput or sppptun_uwsrv when processing a
770  *    M_DATA, M_PROTO, or M_PCPROTO message.  For all cases, it tries
771  *    to prepare the data to be sent to the module below this driver
772  *    if there is a lower stream linked underneath.  If no lower
773  *    stream exists, then the data will be discarded and an ENXIO
774  *    error returned.
775  *
776  * Returns:
777  *	pointer to queue if caller should do putnext, otherwise
778  *	*mpp != NULL if message should be enqueued, otherwise
779  *	*mpp == NULL if message is gone.
780  */
781 static queue_t *
782 sppptun_outpkt(queue_t *q, mblk_t **mpp)
783 {
784 	mblk_t *mp;
785 	tuncl_t *tcl;
786 	tunll_t *tll;
787 	mblk_t *encmb;
788 	mblk_t *datamb;
789 	dl_unitdata_req_t *dur;
790 	queue_t *lowerq;
791 	poep_t *poep;
792 	int len;
793 	ether_dest_t *edestp;
794 	enum { luNone, luCopy, luSend } loopup;
795 	boolean_t isdata;
796 	struct ppptun_control *ptc;
797 
798 	mp = *mpp;
799 	tcl = q->q_ptr;
800 
801 	*mpp = NULL;
802 	if (!(tcl->tcl_flags & TCLF_ISCLIENT)) {
803 		/* This should never happen on a lower layer stream */
804 		freemsg(mp);
805 		return (NULL);
806 	}
807 
808 	isdata = (MTYPE(mp) == M_DATA);
809 	if (isdata) {
810 		tll = tcl->tcl_data_tll;
811 		ptc = NULL;
812 	} else {
813 		/*
814 		 * If data are unaligned or otherwise unsuitable, then
815 		 * discard.
816 		 */
817 		if (MBLKL(mp) != sizeof (*ptc) || DB_REF(mp) > 1 ||
818 		    !IS_P2ALIGNED(mp->b_rptr, sizeof (ptc))) {
819 			KCINCR(cks_octrl_drop);
820 			DTRACE_PROBE2(sppptun__bad__control, tuncl_t *, tcl,
821 			    mblk_t *, mp);
822 			send_control(tcl, tcl->tcl_ctrl_tll, PTCA_BADCTRL, tcl);
823 			freemsg(mp);
824 			return (NULL);
825 		}
826 		ptc = (struct ppptun_control *)mp->b_rptr;
827 
828 		/* Set stream discriminator value if not yet set. */
829 		if (tcl->tcl_ctlval == 0)
830 			tcl->tcl_ctlval = ptc->ptc_discrim;
831 
832 		/* If this is a test message, then reply to caller. */
833 		if (ptc->ptc_action == PTCA_TEST) {
834 			DTRACE_PROBE2(sppptun__test, tuncl_t *, tcl,
835 			    struct ppptun_control *, ptc);
836 			if (mp->b_cont != NULL) {
837 				freemsg(mp->b_cont);
838 				mp->b_cont = NULL;
839 			}
840 			ptc->ptc_discrim = tcl->tcl_ctlval;
841 			putnext(RD(q), mp);
842 			return (NULL);
843 		}
844 
845 		/* If this one isn't for us, then discard it */
846 		if (tcl->tcl_ctlval != ptc->ptc_discrim) {
847 			DTRACE_PROBE2(sppptun__bad__discrim, tuncl_t *, tcl,
848 			    struct ppptun_control *, ptc);
849 			freemsg(mp);
850 			return (NULL);
851 		}
852 
853 		/* Don't allow empty control packets. */
854 		tll = tcl->tcl_ctrl_tll;
855 		if (mp->b_cont == NULL) {
856 			KCINCR(cks_octrl_drop);
857 			DTRACE_PROBE2(sppptun__bad__control, tuncl_t *, tcl,
858 			    mblk_t *, mp);
859 			send_control(tcl, tll, PTCA_BADCTRL, tcl);
860 			freemsg(mp);
861 			return (NULL);
862 		}
863 	}
864 
865 	if (tll == NULL || (lowerq = tll->tll_wq) == NULL) {
866 		DTRACE_PROBE3(sppptun__cannot__send, tuncl_t *, tcl,
867 		    tunll_t *, tll, mblk_t *, mp);
868 		send_control(tcl, tll, PTCA_UNPLUMB, tcl);
869 		freemsg(mp);
870 		if (isdata) {
871 			tcl->tcl_stats.ppp_oerrors++;
872 		} else {
873 			KCINCR(cks_octrl_drop);
874 		}
875 		return (NULL);
876 	}
877 
878 	/*
879 	 * If so, then try to send it down.  The lower queue is only
880 	 * ever detached while holding an exclusive lock on the whole
881 	 * driver, so we can be confident that the lower queue is
882 	 * still there.
883 	 */
884 	if (!bcanputnext(lowerq, mp->b_band)) {
885 		DTRACE_PROBE3(sppptun__flow__control, tuncl_t *, tcl,
886 		    tunll_t *, tll, mblk_t *, mp);
887 		*mpp = mp;
888 		return (NULL);
889 	}
890 
891 	/*
892 	 * Note: DLPI and TPI expect that the first buffer contains
893 	 * the control (unitdata-req) header, destination address, and
894 	 * nothing else.  Any protocol headers must go in the next
895 	 * buffer.
896 	 */
897 	loopup = luNone;
898 	encmb = NULL;
899 	if (isdata) {
900 		if (tll->tll_alen != 0 &&
901 		    bcmp(&tcl->tcl_address, &tll->tll_lcladdr,
902 		    tll->tll_alen) == 0)
903 			loopup = luSend;
904 		switch (tll->tll_style) {
905 		case PTS_PPPOE:
906 			/* Strip address and control fields if present. */
907 			if (mp->b_rptr[0] == 0xFF) {
908 				if (MBLKL(mp) < 3) {
909 					encmb = msgpullup(mp, 3);
910 					freemsg(mp);
911 					if ((mp = encmb) == NULL)
912 						break;
913 				}
914 				mp->b_rptr += 2;
915 			}
916 			/* Broadcasting data is probably not a good idea. */
917 			if (tcl->tcl_address.pta_pppoe.ptma_mac[0] & 1)
918 				break;
919 			encmb = dlpi_alloc(sizeof (*dur) + sizeof (*edestp),
920 			    DL_UNITDATA_REQ);
921 			if (encmb == NULL)
922 				break;
923 
924 			dur = (dl_unitdata_req_t *)encmb->b_rptr;
925 			dur->dl_dest_addr_length = sizeof (*edestp);
926 			dur->dl_dest_addr_offset = sizeof (*dur);
927 			edestp = (ether_dest_t *)(dur + 1);
928 			ether_copy(tcl->tcl_address.pta_pppoe.ptma_mac,
929 			    edestp->addr);
930 			/* DLPI SAPs are in host byte order! */
931 			edestp->type = tll->tll_sap;
932 
933 			/* Make sure the protocol field isn't compressed. */
934 			len = (*mp->b_rptr & 1);
935 			mp = prependb(mp, sizeof (*poep) + len, POE_HDR_ALIGN);
936 			if (mp == NULL)
937 				break;
938 			poep = (poep_t *)mp->b_rptr;
939 			poep->poep_version_type = POE_VERSION;
940 			poep->poep_code = POECODE_DATA;
941 			poep->poep_session_id = htons(tcl->tcl_rsessid);
942 			poep->poep_length = htons(msgsize(mp) -
943 			    sizeof (*poep));
944 			if (len > 0)
945 				*(char *)(poep + 1) = '\0';
946 			break;
947 
948 		default:
949 			ASSERT(0);
950 		}
951 	} else {
952 		/*
953 		 * Control side encapsulation.
954 		 */
955 		if (bcmp(&ptc->ptc_address, &tll->tll_lcladdr, tll->tll_alen)
956 		    == 0)
957 			loopup = luSend;
958 		datamb = mp->b_cont;
959 		switch (tll->tll_style) {
960 		case PTS_PPPOE:
961 			/*
962 			 * Don't allow a loopback session to establish
963 			 * itself.  PPPoE is broken; it uses only one
964 			 * session ID for both data directions, so the
965 			 * loopback data path can simply never work.
966 			 */
967 			if (loopup == luSend &&
968 			    ((poep_t *)datamb->b_rptr)->poep_code ==
969 			    POECODE_PADR)
970 				break;
971 			encmb = dlpi_alloc(sizeof (*dur) + sizeof (*edestp),
972 			    DL_UNITDATA_REQ);
973 			if (encmb == NULL)
974 				break;
975 			dur = (dl_unitdata_req_t *)encmb->b_rptr;
976 			dur->dl_dest_addr_length = sizeof (*edestp);
977 			dur->dl_dest_addr_offset = sizeof (*dur);
978 
979 			edestp = (ether_dest_t *)(dur + 1);
980 			/* DLPI SAPs are in host byte order! */
981 			edestp->type = tll->tll_sap;
982 
983 			/*
984 			 * If destination isn't set yet, then we have to
985 			 * allow anything at all.  Otherwise, force use
986 			 * of configured peer address.
987 			 */
988 			if (bcmp(tcl->tcl_address.pta_pppoe.ptma_mac,
989 			    zero_mac_addr, sizeof (zero_mac_addr)) == 0 ||
990 			    (tcl->tcl_flags & TCLF_DAEMON)) {
991 				ether_copy(ptc->ptc_address.pta_pppoe.ptma_mac,
992 				    edestp->addr);
993 			} else {
994 				ether_copy(tcl->tcl_address.pta_pppoe.ptma_mac,
995 				    edestp->addr);
996 			}
997 			/* Reflect multicast/broadcast back up. */
998 			if (edestp->addr[0] & 1)
999 				loopup = luCopy;
1000 			break;
1001 
1002 		case PTS_PPTP:
1003 			/*
1004 			 * PPTP's control side is actually done over
1005 			 * separate TCP connections.
1006 			 */
1007 		default:
1008 			ASSERT(0);
1009 		}
1010 		freeb(mp);
1011 		mp = datamb;
1012 	}
1013 	if (mp == NULL || encmb == NULL) {
1014 		DTRACE_PROBE1(sppptun__output__failure, tuncl_t *, tcl);
1015 		freemsg(mp);
1016 		freemsg(encmb);
1017 		if (isdata) {
1018 			tcl->tcl_stats.ppp_oerrors++;
1019 		} else {
1020 			KCINCR(cks_octrl_drop);
1021 			KLINCR(lks_octrl_drop);
1022 		}
1023 		lowerq = NULL;
1024 	} else {
1025 		if (isdata) {
1026 			tcl->tcl_stats.ppp_obytes += msgsize(mp);
1027 			tcl->tcl_stats.ppp_opackets++;
1028 		} else {
1029 			KCINCR(cks_octrls);
1030 			KLINCR(lks_octrls);
1031 		}
1032 		if (encmb != mp)
1033 			encmb->b_cont = mp;
1034 		switch (loopup) {
1035 		case luNone:
1036 			*mpp = encmb;
1037 			break;
1038 		case luCopy:
1039 			mp = copymsg(encmb);
1040 			if (mp != NULL)
1041 				sppptun_urput(RD(lowerq), mp);
1042 			*mpp = encmb;
1043 			break;
1044 		case luSend:
1045 			sppptun_urput(RD(lowerq), encmb);
1046 			lowerq = NULL;
1047 			break;
1048 		}
1049 	}
1050 	return (lowerq);
1051 }
1052 
1053 /*
1054  * Enqueue a message to be sent when the lower stream is closed.  This
1055  * is done so that we're guaranteed that we always have the necessary
1056  * resources to properly detach ourselves from the system.  (If we
1057  * waited until the close was done to allocate these messages, then
1058  * the message allocation could fail, and we'd be unable to properly
1059  * detach.)
1060  */
1061 static void
1062 save_for_close(tunll_t *tll, mblk_t *mp)
1063 {
1064 	mblk_t *onc;
1065 
1066 	if ((onc = tll->tll_onclose) == NULL)
1067 		tll->tll_onclose = mp;
1068 	else {
1069 		while (onc->b_next != NULL)
1070 			onc = onc->b_next;
1071 		onc->b_next = mp;
1072 	}
1073 }
1074 
1075 /*
1076  * Given the lower stream name, locate the state structure.  Note that
1077  * lookup of tcl pointers (and use of those pointers) is safe because
1078  * modification is done only when exclusive on both inner and outer
1079  * perimeters.
1080  */
1081 static tunll_t *
1082 tll_lookup_on_name(const char *dname, zoneid_t zoneid)
1083 {
1084 	tunll_t *tll;
1085 
1086 	tll = TO_TLL(tunll_list.q_forw);
1087 	for (; tll != TO_TLL(&tunll_list); tll = TO_TLL(tll->tll_next))
1088 		if (tll->tll_zoneid == zoneid &&
1089 		    strcmp(dname, tll->tll_name) == 0)
1090 			return (tll);
1091 	return (NULL);
1092 }
1093 
1094 /*
1095  * sppptun_inner_ioctl()
1096  *
1097  * MT-Perimeters:
1098  *    exclusive inner, shared outer.
1099  *
1100  * Description:
1101  *    Called by qwriter from sppptun_ioctl as the result of receiving
1102  *    a handled ioctl.
1103  */
1104 static void
1105 sppptun_inner_ioctl(queue_t *q, mblk_t *mp)
1106 {
1107 	struct iocblk *iop;
1108 	int rc = 0;
1109 	int len = 0;
1110 	int i;
1111 	tuncl_t *tcl;
1112 	tunll_t *tll;
1113 	union ppptun_name *ptn;
1114 	struct ppptun_info *pti;
1115 	struct ppptun_peer *ptp;
1116 	mblk_t *mptmp;
1117 	ppptun_atype *pap;
1118 	struct ppp_stats64 *psp;
1119 	zoneid_t zoneid;
1120 
1121 	iop = (struct iocblk *)mp->b_rptr;
1122 	tcl = NULL;
1123 	tll = q->q_ptr;
1124 	if (tll->tll_flags & TLLF_NOTLOWER) {
1125 		tcl = (tuncl_t *)tll;
1126 		tll = NULL;
1127 	}
1128 
1129 	DTRACE_PROBE3(sppptun__ioctl, tuncl_t *, tcl, tunll_t *, tll,
1130 	    struct iocblk *, iop);
1131 
1132 	switch (iop->ioc_cmd) {
1133 	case PPPIO_DEBUG:
1134 		/*
1135 		 * Debug requests are now ignored; use dtrace or wireshark
1136 		 * instead.
1137 		 */
1138 		break;
1139 
1140 	case PPPIO_GETSTAT:
1141 		rc = EINVAL;
1142 		break;
1143 
1144 	case PPPIO_GETSTAT64:
1145 		/* Client (device) side only */
1146 		if (tcl == NULL) {
1147 			rc = EINVAL;
1148 			break;
1149 		}
1150 		mptmp = allocb(sizeof (*psp), BPRI_HI);
1151 		if (mptmp == NULL) {
1152 			rc = ENOSR;
1153 			break;
1154 		}
1155 		freemsg(mp->b_cont);
1156 		mp->b_cont = mptmp;
1157 
1158 		psp = (struct ppp_stats64 *)mptmp->b_wptr;
1159 		bzero((caddr_t)psp, sizeof (*psp));
1160 		psp->p = tcl->tcl_stats;
1161 
1162 		len = sizeof (*psp);
1163 		break;
1164 
1165 	case PPPTUN_SNAME:
1166 		/* This is done on the *module* (lower level) side. */
1167 		if (tll == NULL || mp->b_cont == NULL ||
1168 		    iop->ioc_count != sizeof (*ptn) ||
1169 		    *mp->b_cont->b_rptr == '\0') {
1170 			rc = EINVAL;
1171 			break;
1172 		}
1173 
1174 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1175 		ptn->ptn_name[sizeof (ptn->ptn_name) - 1] = '\0';
1176 
1177 		tll = tll_lookup_on_name(ptn->ptn_name, tll->tll_zoneid);
1178 		if (tll != NULL) {
1179 			rc = EEXIST;
1180 			break;
1181 		}
1182 		tll = (tunll_t *)q->q_ptr;
1183 		(void) strcpy(tll->tll_name, ptn->ptn_name);
1184 		break;
1185 
1186 	case PPPTUN_SINFO:
1187 	case PPPTUN_GINFO:
1188 		/* Either side */
1189 		if (mp->b_cont == NULL || iop->ioc_count != sizeof (*pti)) {
1190 			rc = EINVAL;
1191 			break;
1192 		}
1193 		pti = (struct ppptun_info *)mp->b_cont->b_rptr;
1194 		if (pti->pti_name[0] != '\0')
1195 			tll = tll_lookup_on_name(pti->pti_name,
1196 			    tcl == NULL ? tll->tll_zoneid : tcl->tcl_zoneid);
1197 		if (tll == NULL) {
1198 			/* Driver (client) side must have name */
1199 			if (tcl != NULL && pti->pti_name[0] == '\0')
1200 				rc = EINVAL;
1201 			else
1202 				rc = ESRCH;
1203 			break;
1204 		}
1205 		if (iop->ioc_cmd == PPPTUN_GINFO) {
1206 			pti->pti_muxid = tll->tll_muxid;
1207 			pti->pti_style = tll->tll_style;
1208 			len = sizeof (*pti);
1209 			break;
1210 		}
1211 		tll->tll_muxid = pti->pti_muxid;
1212 		tll->tll_style = pti->pti_style;
1213 		switch (tll->tll_style) {
1214 		case PTS_PPPOE:		/* DLPI type */
1215 			tll->tll_alen = sizeof (tll->tll_lcladdr.pta_pppoe);
1216 			mptmp = dlpi_alloc(sizeof (dl_unbind_req_t),
1217 			    DL_UNBIND_REQ);
1218 			if (mptmp == NULL) {
1219 				rc = ENOSR;
1220 				break;
1221 			}
1222 			save_for_close(tll, mptmp);
1223 			mptmp = dlpi_alloc(sizeof (dl_detach_req_t),
1224 			    DL_DETACH_REQ);
1225 			if (mptmp == NULL) {
1226 				rc = ENOSR;
1227 				break;
1228 			}
1229 			save_for_close(tll, mptmp);
1230 			break;
1231 		default:
1232 			tll->tll_style = PTS_NONE;
1233 			tll->tll_alen = 0;
1234 			rc = EINVAL;
1235 			break;
1236 		}
1237 		break;
1238 
1239 	case PPPTUN_GNNAME:
1240 		/* This can be done on either side. */
1241 		if (mp->b_cont == NULL || iop->ioc_count < sizeof (uint32_t)) {
1242 			rc = EINVAL;
1243 			break;
1244 		}
1245 		zoneid = tcl == NULL ? tll->tll_zoneid : tcl->tcl_zoneid;
1246 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1247 		i = ptn->ptn_index;
1248 		tll = TO_TLL(tunll_list.q_forw);
1249 		while (tll != TO_TLL(&tunll_list)) {
1250 			if (tll->tll_zoneid == zoneid && --i < 0)
1251 				break;
1252 			tll = TO_TLL(tll->tll_next);
1253 		}
1254 		if (tll != TO_TLL(&tunll_list)) {
1255 			bcopy(tll->tll_name, ptn->ptn_name,
1256 			    sizeof (ptn->ptn_name));
1257 		} else {
1258 			bzero(ptn, sizeof (*ptn));
1259 		}
1260 		len = sizeof (*ptn);
1261 		break;
1262 
1263 	case PPPTUN_LCLADDR:
1264 		/* This is done on the *module* (lower level) side. */
1265 		if (tll == NULL || mp->b_cont == NULL) {
1266 			rc = EINVAL;
1267 			break;
1268 		}
1269 
1270 		pap = &tll->tll_lcladdr;
1271 		len = tll->tll_alen;
1272 		if (len == 0 || len > iop->ioc_count) {
1273 			rc = EINVAL;
1274 			break;
1275 		}
1276 		bcopy(mp->b_cont->b_rptr, pap, len);
1277 		len = 0;
1278 		break;
1279 
1280 	case PPPTUN_SPEER:
1281 		/* Client (device) side only; before SDATA */
1282 		if (tcl == NULL || mp->b_cont == NULL ||
1283 		    iop->ioc_count != sizeof (*ptp)) {
1284 			rc = EINVAL;
1285 			break;
1286 		}
1287 		if (tcl->tcl_data_tll != NULL) {
1288 			rc = EINVAL;
1289 			break;
1290 		}
1291 		ptp = (struct ppptun_peer *)mp->b_cont->b_rptr;
1292 		DTRACE_PROBE2(sppptun__speer, tuncl_t *, tcl,
1293 		    struct ppptun_peer *, ptp);
1294 		/* Once set, the style cannot change. */
1295 		if (tcl->tcl_style != PTS_NONE &&
1296 		    tcl->tcl_style != ptp->ptp_style) {
1297 			rc = EINVAL;
1298 			break;
1299 		}
1300 		if (ptp->ptp_flags & PTPF_DAEMON) {
1301 			/* User requests registration for tunnel 0 */
1302 			if ((tcl->tcl_flags & TCLF_SPEER_DONE) ||
1303 			    ptp->ptp_ltunid != 0 || ptp->ptp_rtunid != 0 ||
1304 			    ptp->ptp_lsessid != 0 || ptp->ptp_rsessid != 0) {
1305 				rc = EINVAL;
1306 				break;
1307 			}
1308 			tcl->tcl_flags |= TCLF_DAEMON;
1309 		} else {
1310 			/* Normal client connection */
1311 			if (tcl->tcl_flags & TCLF_DAEMON) {
1312 				rc = EINVAL;
1313 				break;
1314 			}
1315 			if (ptp->ptp_lsessid != 0 &&
1316 			    ptp->ptp_lsessid != tcl->tcl_lsessid) {
1317 				rc = EINVAL;
1318 				break;
1319 			}
1320 			/*
1321 			 * If we're reassigning the peer data, then
1322 			 * the previous assignment must have been for
1323 			 * a client control connection.  Check that.
1324 			 */
1325 			if ((tcl->tcl_flags & TCLF_SPEER_DONE) &&
1326 			    ((tcl->tcl_ltunid != 0 &&
1327 			    tcl->tcl_ltunid != ptp->ptp_ltunid) ||
1328 			    (tcl->tcl_rtunid != 0 &&
1329 			    tcl->tcl_rtunid != ptp->ptp_rtunid) ||
1330 			    (tcl->tcl_rsessid != 0 &&
1331 			    tcl->tcl_rsessid != ptp->ptp_rsessid))) {
1332 				rc = EINVAL;
1333 				break;
1334 			}
1335 			if ((tcl->tcl_ltunid = ptp->ptp_ltunid) == 0 &&
1336 			    tcl->tcl_style == PTS_L2FTP)
1337 				tcl->tcl_ltunid = ptp->ptp_lsessid;
1338 			tcl->tcl_rtunid = ptp->ptp_rtunid;
1339 			tcl->tcl_rsessid = ptp->ptp_rsessid;
1340 		}
1341 		tcl->tcl_flags |= TCLF_SPEER_DONE;
1342 		tcl->tcl_style = ptp->ptp_style;
1343 		tcl->tcl_address = ptp->ptp_address;
1344 		goto fill_in_peer;
1345 
1346 	case PPPTUN_GPEER:
1347 		/* Client (device) side only */
1348 		if (tcl == NULL) {
1349 			rc = EINVAL;
1350 			break;
1351 		}
1352 		if (mp->b_cont != NULL)
1353 			freemsg(mp->b_cont);
1354 		mp->b_cont = allocb(sizeof (*ptp), BPRI_HI);
1355 		if (mp->b_cont == NULL) {
1356 			rc = ENOSR;
1357 			break;
1358 		}
1359 		ptp = (struct ppptun_peer *)mp->b_cont->b_rptr;
1360 	fill_in_peer:
1361 		ptp->ptp_style = tcl->tcl_style;
1362 		ptp->ptp_flags = (tcl->tcl_flags & TCLF_DAEMON) ? PTPF_DAEMON :
1363 		    0;
1364 		ptp->ptp_ltunid = tcl->tcl_ltunid;
1365 		ptp->ptp_rtunid = tcl->tcl_rtunid;
1366 		ptp->ptp_lsessid = tcl->tcl_lsessid;
1367 		ptp->ptp_rsessid = tcl->tcl_rsessid;
1368 		ptp->ptp_address = tcl->tcl_address;
1369 		len = sizeof (*ptp);
1370 		break;
1371 
1372 	case PPPTUN_SDATA:
1373 	case PPPTUN_SCTL:
1374 		/* Client (device) side only; must do SPEER first */
1375 		if (tcl == NULL || mp->b_cont == NULL ||
1376 		    iop->ioc_count != sizeof (*ptn) ||
1377 		    *mp->b_cont->b_rptr == '\0') {
1378 			rc = EINVAL;
1379 			break;
1380 		}
1381 		if (!(tcl->tcl_flags & TCLF_SPEER_DONE)) {
1382 			rc = EINVAL;
1383 			break;
1384 		}
1385 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1386 		ptn->ptn_name[sizeof (ptn->ptn_name) - 1] = '\0';
1387 		tll = tll_lookup_on_name(ptn->ptn_name, tcl->tcl_zoneid);
1388 		if (tll == NULL) {
1389 			rc = ESRCH;
1390 			break;
1391 		}
1392 		if (tll->tll_style != tcl->tcl_style) {
1393 			rc = ENXIO;
1394 			break;
1395 		}
1396 		if (iop->ioc_cmd == PPPTUN_SDATA) {
1397 			if (tcl->tcl_data_tll != NULL) {
1398 				rc = EEXIST;
1399 				break;
1400 			}
1401 			/* server daemons cannot use regular data */
1402 			if (tcl->tcl_flags & TCLF_DAEMON) {
1403 				rc = EINVAL;
1404 				break;
1405 			}
1406 			tcl->tcl_data_tll = tll;
1407 		} else if (tcl->tcl_flags & TCLF_DAEMON) {
1408 			if (tll->tll_defcl != NULL && tll->tll_defcl != tcl) {
1409 				rc = EEXIST;
1410 				break;
1411 			}
1412 			tll->tll_defcl = tcl;
1413 			if (tcl->tcl_ctrl_tll != NULL) {
1414 				KDECR(tcl->tcl_ctrl_tll, tll_kstats,
1415 				    lks_clients);
1416 			}
1417 			tcl->tcl_ctrl_tll = tll;
1418 		} else {
1419 			if (tcl->tcl_ctrl_tll != NULL) {
1420 				rc = EEXIST;
1421 				break;
1422 			}
1423 			tcl->tcl_ctrl_tll = tll;
1424 		}
1425 		KLINCR(lks_clients);
1426 		break;
1427 
1428 	case PPPTUN_GDATA:
1429 	case PPPTUN_GCTL:
1430 		/* Client (device) side only */
1431 		if (tcl == NULL) {
1432 			rc = EINVAL;
1433 			break;
1434 		}
1435 		if (mp->b_cont != NULL)
1436 			freemsg(mp->b_cont);
1437 		mp->b_cont = allocb(sizeof (*ptn), BPRI_HI);
1438 		if (mp->b_cont == NULL) {
1439 			rc = ENOSR;
1440 			break;
1441 		}
1442 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1443 		if (iop->ioc_cmd == PPPTUN_GDATA)
1444 			tll = tcl->tcl_data_tll;
1445 		else
1446 			tll = tcl->tcl_ctrl_tll;
1447 		if (tll == NULL)
1448 			bzero(ptn, sizeof (*ptn));
1449 		else
1450 			bcopy(tll->tll_name, ptn->ptn_name,
1451 			    sizeof (ptn->ptn_name));
1452 		len = sizeof (*ptn);
1453 		break;
1454 
1455 	case PPPTUN_DCTL:
1456 		/* Client (device) side daemon mode only */
1457 		if (tcl == NULL || mp->b_cont == NULL ||
1458 		    iop->ioc_count != sizeof (*ptn) ||
1459 		    !(tcl->tcl_flags & TCLF_DAEMON)) {
1460 			rc = EINVAL;
1461 			break;
1462 		}
1463 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1464 		ptn->ptn_name[sizeof (ptn->ptn_name) - 1] = '\0';
1465 		tll = tll_lookup_on_name(ptn->ptn_name, tcl->tcl_zoneid);
1466 		if (tll == NULL || tll->tll_defcl != tcl) {
1467 			rc = ESRCH;
1468 			break;
1469 		}
1470 		tll->tll_defcl = NULL;
1471 		break;
1472 
1473 	case PPPTUN_SSAP:
1474 		/* This is done on the *module* (lower level) side. */
1475 		if (tll == NULL || mp->b_cont == NULL ||
1476 		    iop->ioc_count != sizeof (uint_t)) {
1477 			rc = EINVAL;
1478 			break;
1479 		}
1480 
1481 		tll->tll_sap = *(uint_t *)mp->b_cont->b_rptr;
1482 		break;
1483 
1484 	default:
1485 		/* Caller should already have checked command value */
1486 		ASSERT(0);
1487 	}
1488 	if (rc != 0) {
1489 		miocnak(q, mp, 0, rc);
1490 	} else {
1491 		if (len > 0)
1492 			mp->b_cont->b_wptr = mp->b_cont->b_rptr + len;
1493 		miocack(q, mp, len, 0);
1494 	}
1495 }
1496 
1497 /*
1498  * sppptun_ioctl()
1499  *
1500  * MT-Perimeters:
1501  *    shared inner, shared outer.
1502  *
1503  * Description:
1504  *    Called by sppptun_uwput as the result of receiving a M_IOCTL command.
1505  */
1506 static void
1507 sppptun_ioctl(queue_t *q, mblk_t *mp)
1508 {
1509 	struct iocblk *iop;
1510 	int rc = 0;
1511 	int len = 0;
1512 	uint32_t val = 0;
1513 	tunll_t *tll;
1514 
1515 	iop = (struct iocblk *)mp->b_rptr;
1516 
1517 	switch (iop->ioc_cmd) {
1518 	case PPPIO_DEBUG:
1519 	case PPPIO_GETSTAT:
1520 	case PPPIO_GETSTAT64:
1521 	case PPPTUN_SNAME:
1522 	case PPPTUN_SINFO:
1523 	case PPPTUN_GINFO:
1524 	case PPPTUN_GNNAME:
1525 	case PPPTUN_LCLADDR:
1526 	case PPPTUN_SPEER:
1527 	case PPPTUN_GPEER:
1528 	case PPPTUN_SDATA:
1529 	case PPPTUN_GDATA:
1530 	case PPPTUN_SCTL:
1531 	case PPPTUN_GCTL:
1532 	case PPPTUN_DCTL:
1533 	case PPPTUN_SSAP:
1534 		qwriter(q, mp, sppptun_inner_ioctl, PERIM_INNER);
1535 		return;
1536 
1537 	case PPPIO_GCLEAN:	/* always clean */
1538 		val = RCV_B7_1 | RCV_B7_0 | RCV_ODDP | RCV_EVNP;
1539 		len = sizeof (uint32_t);
1540 		break;
1541 
1542 	case PPPIO_GTYPE:	/* we look like an async driver. */
1543 		val = PPPTYP_AHDLC;
1544 		len = sizeof (uint32_t);
1545 		break;
1546 
1547 	case PPPIO_CFLAGS:	/* never compress headers */
1548 		val = 0;
1549 		len = sizeof (uint32_t);
1550 		break;
1551 
1552 		/* quietly ack PPP things we don't need to do. */
1553 	case PPPIO_XFCS:
1554 	case PPPIO_RFCS:
1555 	case PPPIO_XACCM:
1556 	case PPPIO_RACCM:
1557 	case PPPIO_LASTMOD:
1558 	case PPPIO_MUX:
1559 	case I_PLINK:
1560 	case I_PUNLINK:
1561 	case I_LINK:
1562 	case I_UNLINK:
1563 		break;
1564 
1565 	default:
1566 		tll = (tunll_t *)q->q_ptr;
1567 		if (!(tll->tll_flags & TLLF_NOTLOWER)) {
1568 			/* module side; pass this through. */
1569 			putnext(q, mp);
1570 			return;
1571 		}
1572 		rc = EINVAL;
1573 		break;
1574 	}
1575 	if (rc == 0 && len == sizeof (uint32_t)) {
1576 		if (mp->b_cont != NULL)
1577 			freemsg(mp->b_cont);
1578 		mp->b_cont = allocb(sizeof (uint32_t), BPRI_HI);
1579 		if (mp->b_cont == NULL) {
1580 			rc = ENOSR;
1581 		} else {
1582 			*(uint32_t *)mp->b_cont->b_wptr = val;
1583 			mp->b_cont->b_wptr += sizeof (uint32_t);
1584 		}
1585 	}
1586 	if (rc == 0) {
1587 		miocack(q, mp, len, 0);
1588 	} else {
1589 		miocnak(q, mp, 0, rc);
1590 	}
1591 }
1592 
1593 /*
1594  * sppptun_inner_mctl()
1595  *
1596  * MT-Perimeters:
1597  *    exclusive inner, shared outer.
1598  *
1599  * Description:
1600  *    Called by qwriter (via sppptun_uwput) as the result of receiving
1601  *    an M_CTL.  Called only on the client (driver) side.
1602  */
1603 static void
1604 sppptun_inner_mctl(queue_t *q, mblk_t *mp)
1605 {
1606 	int msglen;
1607 	tuncl_t *tcl;
1608 
1609 	tcl = q->q_ptr;
1610 
1611 	if (!(tcl->tcl_flags & TCLF_ISCLIENT)) {
1612 		freemsg(mp);
1613 		return;
1614 	}
1615 
1616 	msglen = MBLKL(mp);
1617 	switch (*mp->b_rptr) {
1618 	case PPPCTL_UNIT:
1619 		if (msglen == 2)
1620 			tcl->tcl_unit = mp->b_rptr[1];
1621 		else if (msglen == 8)
1622 			tcl->tcl_unit = ((uint32_t *)mp->b_rptr)[1];
1623 		break;
1624 	}
1625 	freemsg(mp);
1626 }
1627 
1628 /*
1629  * sppptun_uwput()
1630  *
1631  * MT-Perimeters:
1632  *    shared inner, shared outer.
1633  *
1634  * Description:
1635  *	Regular output data and controls pass through here.
1636  */
1637 static void
1638 sppptun_uwput(queue_t *q, mblk_t *mp)
1639 {
1640 	queue_t *nextq;
1641 	tuncl_t *tcl;
1642 
1643 	ASSERT(q->q_ptr != NULL);
1644 
1645 	switch (MTYPE(mp)) {
1646 	case M_DATA:
1647 	case M_PROTO:
1648 	case M_PCPROTO:
1649 		if (q->q_first == NULL &&
1650 		    (nextq = sppptun_outpkt(q, &mp)) != NULL) {
1651 			putnext(nextq, mp);
1652 		} else if (mp != NULL && !putq(q, mp)) {
1653 			freemsg(mp);
1654 		}
1655 		break;
1656 	case M_IOCTL:
1657 		sppptun_ioctl(q, mp);
1658 		break;
1659 	case M_CTL:
1660 		qwriter(q, mp, sppptun_inner_mctl, PERIM_INNER);
1661 		break;
1662 	default:
1663 		tcl = (tuncl_t *)q->q_ptr;
1664 		/*
1665 		 * If we're the driver, then discard unknown junk.
1666 		 * Otherwise, if we're the module, then forward along.
1667 		 */
1668 		if (tcl->tcl_flags & TCLF_ISCLIENT)
1669 			freemsg(mp);
1670 		else
1671 			putnext(q, mp);
1672 		break;
1673 	}
1674 }
1675 
1676 /*
1677  * Send a DLPI/TPI control message to the driver but make sure there
1678  * is only one outstanding message.  Uses tll_msg_pending to tell when
1679  * it must queue.  sppptun_urput calls message_done() when an ACK or a
1680  * NAK is received to process the next queued message.
1681  */
1682 static void
1683 message_send(tunll_t *tll, mblk_t *mp)
1684 {
1685 	mblk_t **mpp;
1686 
1687 	if (tll->tll_msg_pending) {
1688 		/* Must queue message. Tail insertion */
1689 		mpp = &tll->tll_msg_deferred;
1690 		while (*mpp != NULL)
1691 			mpp = &((*mpp)->b_next);
1692 		*mpp = mp;
1693 		return;
1694 	}
1695 	tll->tll_msg_pending = 1;
1696 	putnext(tll->tll_wq, mp);
1697 }
1698 
1699 /*
1700  * Called when an DLPI/TPI control message has been acked or nacked to
1701  * send down the next queued message (if any).
1702  */
1703 static void
1704 message_done(tunll_t *tll)
1705 {
1706 	mblk_t *mp;
1707 
1708 	ASSERT(tll->tll_msg_pending);
1709 	tll->tll_msg_pending = 0;
1710 	mp = tll->tll_msg_deferred;
1711 	if (mp != NULL) {
1712 		tll->tll_msg_deferred = mp->b_next;
1713 		mp->b_next = NULL;
1714 		tll->tll_msg_pending = 1;
1715 		putnext(tll->tll_wq, mp);
1716 	}
1717 }
1718 
1719 /*
1720  * Send down queued "close" messages to lower stream.  These were
1721  * enqueued right after the stream was originally allocated, when the
1722  * tll_style was set by PPPTUN_SINFO.
1723  */
1724 static int
1725 tll_close_req(tunll_t *tll)
1726 {
1727 	mblk_t *mb, *mbnext;
1728 
1729 	if ((mb = tll->tll_onclose) == NULL)
1730 		tll->tll_flags |= TLLF_SHUTDOWN_DONE;
1731 	else {
1732 		tll->tll_onclose = NULL;
1733 		while (mb != NULL) {
1734 			mbnext = mb->b_next;
1735 			mb->b_next = NULL;
1736 			message_send(tll, mb);
1737 			mb = mbnext;
1738 		}
1739 	}
1740 	return (0);
1741 }
1742 
1743 /*
1744  * This function is called when a backenable occurs on the write side of a
1745  * lower stream.  It walks over the client streams, looking for ones that use
1746  * the given tunll_t lower stream.  Each client is then backenabled.
1747  */
1748 static void
1749 tclvm_backenable(void *arg, void *firstv, size_t numv)
1750 {
1751 	tunll_t *tll = arg;
1752 	int minorn = (int)(uintptr_t)firstv;
1753 	int minormax = minorn + numv;
1754 	tuncl_t *tcl;
1755 	queue_t *q;
1756 
1757 	while (minorn < minormax) {
1758 		tcl = tcl_slots[minorn - 1];
1759 		if ((tcl->tcl_data_tll == tll ||
1760 		    tcl->tcl_ctrl_tll == tll) &&
1761 		    (q = tcl->tcl_rq) != NULL) {
1762 			qenable(OTHERQ(q));
1763 		}
1764 		minorn++;
1765 	}
1766 }
1767 
1768 /*
1769  * sppptun_uwsrv()
1770  *
1771  * MT-Perimeters:
1772  *    exclusive inner, shared outer.
1773  *
1774  * Description:
1775  *    Upper write-side service procedure.  In addition to the usual
1776  *    STREAMS queue service handling, this routine also handles the
1777  *    transmission of the unbind/detach messages to the lower stream
1778  *    driver when a lower stream is being closed.  (See the use of
1779  *    qenable/qwait in sppptun_close().)
1780  */
1781 static int
1782 sppptun_uwsrv(queue_t *q)
1783 {
1784 	tuncl_t	*tcl;
1785 	mblk_t *mp;
1786 	queue_t *nextq;
1787 
1788 	tcl = q->q_ptr;
1789 	if (!(tcl->tcl_flags & TCLF_ISCLIENT)) {
1790 		tunll_t *tll = (tunll_t *)tcl;
1791 
1792 		if ((tll->tll_flags & (TLLF_CLOSING|TLLF_CLOSE_DONE)) ==
1793 		    TLLF_CLOSING) {
1794 			tll->tll_error = tll_close_req(tll);
1795 			tll->tll_flags |= TLLF_CLOSE_DONE;
1796 		} else {
1797 			/*
1798 			 * We've been enabled here because of a backenable on
1799 			 * output flow control.  Backenable clients using this
1800 			 * lower layer.
1801 			 */
1802 			vmem_walk(tcl_minor_arena, VMEM_ALLOC, tclvm_backenable,
1803 			    tll);
1804 		}
1805 		return (0);
1806 	}
1807 
1808 	while ((mp = getq(q)) != NULL) {
1809 		if ((nextq = sppptun_outpkt(q, &mp)) != NULL) {
1810 			putnext(nextq, mp);
1811 		} else if (mp != NULL) {
1812 			(void) putbq(q, mp);
1813 			break;
1814 		}
1815 	}
1816 	return (0);
1817 }
1818 
1819 /*
1820  * sppptun_lwput()
1821  *
1822  * MT-Perimeters:
1823  *    shared inner, shared outer.
1824  *
1825  * Description:
1826  *    Lower write-side put procedure.  Nothing should be sending
1827  *    packets down this stream.
1828  */
1829 static void
1830 sppptun_lwput(queue_t *q, mblk_t *mp)
1831 {
1832 	switch (MTYPE(mp)) {
1833 	case M_PROTO:
1834 		putnext(q, mp);
1835 		break;
1836 	default:
1837 		freemsg(mp);
1838 		break;
1839 	}
1840 }
1841 
1842 /*
1843  * sppptun_lrput()
1844  *
1845  * MT-Perimeters:
1846  *    shared inner, shared outer.
1847  *
1848  * Description:
1849  *    Lower read-side put procedure.  Nothing should arrive here.
1850  */
1851 static void
1852 sppptun_lrput(queue_t *q, mblk_t *mp)
1853 {
1854 	tuncl_t *tcl;
1855 
1856 	switch (MTYPE(mp)) {
1857 	case M_IOCTL:
1858 		miocnak(q, mp, 0, EINVAL);
1859 		return;
1860 	case M_FLUSH:
1861 		if (*mp->b_rptr & FLUSHR) {
1862 			flushq(q, FLUSHDATA);
1863 		}
1864 		if (*mp->b_rptr & FLUSHW) {
1865 			*mp->b_rptr &= ~FLUSHR;
1866 			qreply(q, mp);
1867 		} else {
1868 			freemsg(mp);
1869 		}
1870 		return;
1871 	}
1872 	/*
1873 	 * Try to forward the message to the put procedure for the upper
1874 	 * control stream for this lower stream. If there are already messages
1875 	 * queued here, queue this one up to preserve message ordering.
1876 	 */
1877 	if ((tcl = (tuncl_t *)q->q_ptr) == NULL || tcl->tcl_rq == NULL) {
1878 		freemsg(mp);
1879 		return;
1880 	}
1881 	if (queclass(mp) == QPCTL ||
1882 	    (q->q_first == NULL && canput(tcl->tcl_rq))) {
1883 		put(tcl->tcl_rq, mp);
1884 	} else {
1885 		if (!putq(q, mp))
1886 			freemsg(mp);
1887 	}
1888 }
1889 
1890 /*
1891  * MT-Perimeters:
1892  *    shared inner, shared outer.
1893  *
1894  *    Handle non-data DLPI messages.  Used with PPPoE, which runs over
1895  *    Ethernet only.
1896  */
1897 static void
1898 urput_dlpi(queue_t *q, mblk_t *mp)
1899 {
1900 	int err;
1901 	union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr;
1902 	tunll_t *tll = q->q_ptr;
1903 	size_t mlen = MBLKL(mp);
1904 
1905 	switch (dlp->dl_primitive) {
1906 	case DL_UDERROR_IND:
1907 		break;
1908 
1909 	case DL_ERROR_ACK:
1910 		if (mlen < DL_ERROR_ACK_SIZE)
1911 			break;
1912 		err = dlp->error_ack.dl_unix_errno ?
1913 		    dlp->error_ack.dl_unix_errno : ENXIO;
1914 		switch (dlp->error_ack.dl_error_primitive) {
1915 		case DL_UNBIND_REQ:
1916 			message_done(tll);
1917 			break;
1918 		case DL_DETACH_REQ:
1919 			message_done(tll);
1920 			tll->tll_error = err;
1921 			tll->tll_flags |= TLLF_SHUTDOWN_DONE;
1922 			break;
1923 		case DL_PHYS_ADDR_REQ:
1924 			message_done(tll);
1925 			break;
1926 		case DL_INFO_REQ:
1927 		case DL_ATTACH_REQ:
1928 		case DL_BIND_REQ:
1929 			message_done(tll);
1930 			tll->tll_error = err;
1931 			break;
1932 		}
1933 		break;
1934 
1935 	case DL_INFO_ACK:
1936 		message_done(tll);
1937 		break;
1938 
1939 	case DL_BIND_ACK:
1940 		message_done(tll);
1941 		break;
1942 
1943 	case DL_PHYS_ADDR_ACK:
1944 		break;
1945 
1946 	case DL_OK_ACK:
1947 		if (mlen < DL_OK_ACK_SIZE)
1948 			break;
1949 		switch (dlp->ok_ack.dl_correct_primitive) {
1950 		case DL_UNBIND_REQ:
1951 			message_done(tll);
1952 			break;
1953 		case DL_DETACH_REQ:
1954 			tll->tll_flags |= TLLF_SHUTDOWN_DONE;
1955 			break;
1956 		case DL_ATTACH_REQ:
1957 			message_done(tll);
1958 			break;
1959 		}
1960 		break;
1961 	}
1962 	freemsg(mp);
1963 }
1964 
1965 /* Search structure used with PPPoE only; see tclvm_pppoe_search(). */
1966 struct poedat {
1967 	uint_t sessid;
1968 	tunll_t *tll;
1969 	const void *srcaddr;
1970 	int isdata;
1971 	tuncl_t *tcl;
1972 };
1973 
1974 /*
1975  * This function is called by vmem_walk from within sppptun_recv.  It
1976  * iterates over a span of allocated minor node numbers to search for
1977  * the appropriate lower stream, session ID, and peer MAC address.
1978  *
1979  * (This is necessary due to a design flaw in the PPPoE protocol
1980  * itself.  The protocol assigns session IDs from the server side
1981  * only.  Both server and client use the same number.  Thus, if there
1982  * are multiple clients on a single host, there can be session ID
1983  * conflicts between servers and there's no way to detangle them
1984  * except by looking at the remote MAC address.)
1985  *
1986  * (This could have been handled by linking together sessions that
1987  * differ only in the remote MAC address.  This isn't done because it
1988  * would involve extra per-session storage and it's very unlikely that
1989  * PPPoE would be used this way.)
1990  */
1991 static void
1992 tclvm_pppoe_search(void *arg, void *firstv, size_t numv)
1993 {
1994 	struct poedat *poedat = (struct poedat *)arg;
1995 	int minorn = (int)(uintptr_t)firstv;
1996 	int minormax = minorn + numv;
1997 	tuncl_t *tcl;
1998 
1999 	if (poedat->tcl != NULL)
2000 		return;
2001 	while (minorn < minormax) {
2002 		tcl = tcl_slots[minorn - 1];
2003 		ASSERT(tcl != NULL);
2004 		if (tcl->tcl_rsessid == poedat->sessid &&
2005 		    ((!poedat->isdata && tcl->tcl_ctrl_tll == poedat->tll) ||
2006 		    (poedat->isdata && tcl->tcl_data_tll == poedat->tll)) &&
2007 		    bcmp(tcl->tcl_address.pta_pppoe.ptma_mac,
2008 		    poedat->srcaddr,
2009 		    sizeof (tcl->tcl_address.pta_pppoe.ptma_mac)) == 0) {
2010 			poedat->tcl = tcl;
2011 			break;
2012 		}
2013 		minorn++;
2014 	}
2015 }
2016 
2017 /*
2018  * sppptun_recv()
2019  *
2020  * MT-Perimeters:
2021  *    shared inner, shared outer.
2022  *
2023  * Description:
2024  *    Receive function called by sppptun_urput, which is called when
2025  *    the lower read-side put or service procedure sends a message
2026  *    upstream to the a device user (PPP).  It attempts to find an
2027  *    appropriate queue on the module above us (depending on what the
2028  *    associated upper stream for the protocol would be), and if not
2029  *    possible, it will find an upper control stream for the protocol.
2030  *    Returns a pointer to the upper queue_t, or NULL if the message
2031  *    has been discarded.
2032  *
2033  * About demultiplexing:
2034  *
2035  *	All four protocols (L2F, PPTP, L2TP, and PPPoE) support a
2036  *	locally assigned ID for demultiplexing incoming traffic.  For
2037  *	L2F, this is called the Client ID, for PPTP the Call ID, for
2038  *	L2TP the Session ID, and for PPPoE the SESSION_ID.  This is a
2039  *	16 bit number for all four protocols, and is used to directly
2040  *	index into a list of upper streams.  With the upper stream in
2041  *	hand, we verify that this is the right stream and deliver the
2042  *	data.
2043  *
2044  *	L2TP has a Tunnel ID, which represents a bundle of PPP
2045  *	sessions between the peers.  Because we always assign unique
2046  *	session ID numbers, we merely check that the given ID matches
2047  *	the assigned ID for the upper stream.
2048  *
2049  *	L2F has a Multiplex ID, which is unique per connection.  It
2050  *	does not have L2TP's concept of multiple-connections-within-
2051  *	a-tunnel.  The same checking is done.
2052  *
2053  *	PPPoE is a horribly broken protocol.  Only one ID is assigned
2054  *	per connection.  The client must somehow demultiplex based on
2055  *	an ID number assigned by the server.  It's not necessarily
2056  *	unique.  The search is done based on {ID,peerEthernet} (using
2057  *	tcl_rsessid) for all packet types except PADI and PADS.
2058  *
2059  *	Neither PPPoE nor PPTP supports additional ID numbers.
2060  *
2061  *	Both L2F and L2TP come in over UDP.  They are distinguished by
2062  *	looking at the GRE version field -- 001 for L2F and 010 for
2063  *	L2TP.
2064  */
2065 static queue_t *
2066 sppptun_recv(queue_t *q, mblk_t **mpp, const void *srcaddr)
2067 {
2068 	mblk_t *mp;
2069 	tunll_t *tll;
2070 	tuncl_t *tcl;
2071 	int sessid;
2072 	int remlen;
2073 	int msglen;
2074 	int isdata;
2075 	int i;
2076 	const uchar_t *ucp;
2077 	const poep_t *poep;
2078 	mblk_t *mnew;
2079 	ppptun_atype *pap;
2080 
2081 	mp = *mpp;
2082 
2083 	tll = q->q_ptr;
2084 	ASSERT(!(tll->tll_flags & TLLF_NOTLOWER));
2085 
2086 	tcl = NULL;
2087 	switch (tll->tll_style) {
2088 	case PTS_PPPOE:
2089 		/* Note that poep_t alignment is uint16_t */
2090 		if ((!IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)) ||
2091 		    MBLKL(mp) < sizeof (poep_t)) &&
2092 		    !pullupmsg(mp, sizeof (poep_t)))
2093 			break;
2094 		poep = (const poep_t *)mp->b_rptr;
2095 		if (poep->poep_version_type != POE_VERSION)
2096 			break;
2097 		/*
2098 		 * First, extract a session ID number.  All protocols have
2099 		 * this.
2100 		 */
2101 		isdata = (poep->poep_code == POECODE_DATA);
2102 		sessid = ntohs(poep->poep_session_id);
2103 		remlen = sizeof (*poep);
2104 		msglen = ntohs(poep->poep_length);
2105 		i = poep->poep_code;
2106 		if (i == POECODE_PADI || i == POECODE_PADR) {
2107 			/* These go to the server daemon only. */
2108 			tcl = tll->tll_defcl;
2109 		} else if (i == POECODE_PADO || i == POECODE_PADS) {
2110 			/*
2111 			 * These go to a client only, and are demuxed
2112 			 * by the Host-Uniq field (into which we stuff
2113 			 * our local ID number when generating
2114 			 * PADI/PADR).
2115 			 */
2116 			ucp = (const uchar_t *)(poep + 1);
2117 			i = msglen;
2118 			while (i > POET_HDRLEN) {
2119 				if (POET_GET_TYPE(ucp) == POETT_END) {
2120 					i = 0;
2121 					break;
2122 				}
2123 				if (POET_GET_TYPE(ucp) == POETT_UNIQ &&
2124 				    POET_GET_LENG(ucp) >= sizeof (uint32_t))
2125 					break;
2126 				i -= POET_GET_LENG(ucp) + POET_HDRLEN;
2127 				ucp = POET_NEXT(ucp);
2128 			}
2129 			if (i >= POET_HDRLEN + 4)
2130 				sessid = GETLONG(ucp + POET_HDRLEN);
2131 			tcl = tcl_by_minor((minor_t)sessid);
2132 		} else {
2133 			/*
2134 			 * Try minor number as session ID first, since
2135 			 * it's used that way on server side.  It's
2136 			 * not used that way on the client, though, so
2137 			 * this might not work.  If this isn't the
2138 			 * right one, then try the tll cache.  If
2139 			 * neither is right, then search all open
2140 			 * clients.  Did I mention that the PPPoE
2141 			 * protocol is badly designed?
2142 			 */
2143 			tcl = tcl_by_minor((minor_t)sessid);
2144 			if (tcl == NULL ||
2145 			    (!isdata && tcl->tcl_ctrl_tll != tll) ||
2146 			    (isdata && tcl->tcl_data_tll != tll) ||
2147 			    sessid != tcl->tcl_rsessid ||
2148 			    bcmp(srcaddr, tcl->tcl_address.pta_pppoe.ptma_mac,
2149 			    sizeof (tcl->tcl_address.pta_pppoe.ptma_mac)) != 0)
2150 				tcl = tll->tll_lastcl;
2151 			if (tcl == NULL ||
2152 			    (!isdata && tcl->tcl_ctrl_tll != tll) ||
2153 			    (isdata && tcl->tcl_data_tll != tll) ||
2154 			    sessid != tcl->tcl_rsessid ||
2155 			    bcmp(srcaddr, tcl->tcl_address.pta_pppoe.ptma_mac,
2156 			    sizeof (tcl->tcl_address.pta_pppoe.ptma_mac)) != 0)
2157 				tcl = NULL;
2158 			if (tcl == NULL && sessid != 0) {
2159 				struct poedat poedat;
2160 
2161 				/*
2162 				 * Slow mode.  Too bad.  If you don't like it,
2163 				 * you can always choose a better protocol.
2164 				 */
2165 				poedat.sessid = sessid;
2166 				poedat.tll = tll;
2167 				poedat.srcaddr = srcaddr;
2168 				poedat.tcl = NULL;
2169 				poedat.isdata = isdata;
2170 				vmem_walk(tcl_minor_arena, VMEM_ALLOC,
2171 				    tclvm_pppoe_search, &poedat);
2172 				KLINCR(lks_walks);
2173 				if ((tcl = poedat.tcl) != NULL) {
2174 					tll->tll_lastcl = tcl;
2175 					KCINCR(cks_walks);
2176 				}
2177 			}
2178 		}
2179 		break;
2180 	}
2181 
2182 	if (tcl == NULL || tcl->tcl_rq == NULL) {
2183 		DTRACE_PROBE3(sppptun__recv__discard, int, sessid,
2184 		    tuncl_t *, tcl, mblk_t *, mp);
2185 		if (tcl == NULL) {
2186 			KLINCR(lks_in_nomatch);
2187 		}
2188 		if (isdata) {
2189 			KLINCR(lks_indata_drops);
2190 			if (tcl != NULL)
2191 				tcl->tcl_stats.ppp_ierrors++;
2192 		} else {
2193 			KLINCR(lks_inctrl_drops);
2194 			if (tcl != NULL) {
2195 				KCINCR(cks_inctrl_drops);
2196 			}
2197 		}
2198 		freemsg(mp);
2199 		return (NULL);
2200 	}
2201 
2202 	if (tcl->tcl_data_tll == tll && isdata) {
2203 		if (!adjmsg(mp, remlen) ||
2204 		    (i = msgsize(mp)) < msglen ||
2205 		    (i > msglen && !adjmsg(mp, msglen - i))) {
2206 			KLINCR(lks_indata_drops);
2207 			tcl->tcl_stats.ppp_ierrors++;
2208 			freemsg(mp);
2209 			return (NULL);
2210 		}
2211 		/* XXX -- address/control handling in pppd needs help. */
2212 		if (*mp->b_rptr != 0xFF) {
2213 			if ((mp = prependb(mp, 2, 1)) == NULL) {
2214 				KLINCR(lks_indata_drops);
2215 				tcl->tcl_stats.ppp_ierrors++;
2216 				return (NULL);
2217 			}
2218 			mp->b_rptr[0] = 0xFF;
2219 			mp->b_rptr[1] = 0x03;
2220 		}
2221 		MTYPE(mp) = M_DATA;
2222 		tcl->tcl_stats.ppp_ibytes += msgsize(mp);
2223 		tcl->tcl_stats.ppp_ipackets++;
2224 		KLINCR(lks_indata);
2225 	} else {
2226 		if (isdata || tcl->tcl_ctrl_tll != tll ||
2227 		    (mnew = make_control(tcl, tll, PTCA_CONTROL, tcl)) ==
2228 		    NULL) {
2229 			KLINCR(lks_inctrl_drops);
2230 			KCINCR(cks_inctrl_drops);
2231 			freemsg(mp);
2232 			return (NULL);
2233 		}
2234 		/* Fix up source address; peer might not be set yet. */
2235 		pap = &((struct ppptun_control *)mnew->b_rptr)->ptc_address;
2236 		bcopy(srcaddr, pap->pta_pppoe.ptma_mac,
2237 		    sizeof (pap->pta_pppoe.ptma_mac));
2238 		mnew->b_cont = mp;
2239 		mp = mnew;
2240 		KLINCR(lks_inctrls);
2241 		KCINCR(cks_inctrls);
2242 	}
2243 	*mpp = mp;
2244 	return (tcl->tcl_rq);
2245 }
2246 
2247 /*
2248  * sppptun_urput()
2249  *
2250  * MT-Perimeters:
2251  *    shared inner, shared outer.
2252  *
2253  * Description:
2254  *    Upper read-side put procedure.  Messages from the underlying
2255  *    lower stream driver arrive here.  See sppptun_recv for the
2256  *    demultiplexing logic.
2257  */
2258 static void
2259 sppptun_urput(queue_t *q, mblk_t *mp)
2260 {
2261 	union DL_primitives *dlprim;
2262 	mblk_t *mpnext;
2263 	tunll_t *tll;
2264 	queue_t *nextq;
2265 
2266 	tll = q->q_ptr;
2267 	ASSERT(!(tll->tll_flags & TLLF_NOTLOWER));
2268 
2269 	switch (MTYPE(mp)) {
2270 	case M_DATA:
2271 		/*
2272 		 * When we're bound over IP, data arrives here.  The
2273 		 * packet starts with the IP header itself.
2274 		 */
2275 		if ((nextq = sppptun_recv(q, &mp, NULL)) != NULL)
2276 			putnext(nextq, mp);
2277 		break;
2278 
2279 	case M_PROTO:
2280 	case M_PCPROTO:
2281 		/* Data arrives here for UDP or raw Ethernet, not IP. */
2282 		switch (tll->tll_style) {
2283 			/* PPTP control messages are over TCP only. */
2284 		case PTS_PPTP:
2285 		default:
2286 			ASSERT(0);	/* how'd that happen? */
2287 			break;
2288 
2289 		case PTS_PPPOE:		/* DLPI message */
2290 			if (MBLKL(mp) < sizeof (t_uscalar_t))
2291 				break;
2292 			dlprim = (union DL_primitives *)mp->b_rptr;
2293 			switch (dlprim->dl_primitive) {
2294 			case DL_UNITDATA_IND: {
2295 				size_t mlen = MBLKL(mp);
2296 
2297 				if (mlen < DL_UNITDATA_IND_SIZE)
2298 					break;
2299 				if (dlprim->unitdata_ind.dl_src_addr_offset <
2300 				    DL_UNITDATA_IND_SIZE ||
2301 				    dlprim->unitdata_ind.dl_src_addr_offset +
2302 				    dlprim->unitdata_ind.dl_src_addr_length >
2303 				    mlen)
2304 					break;
2305 			}
2306 				/* FALLTHROUGH */
2307 			case DL_UNITDATA_REQ:	/* For loopback support. */
2308 				if (dlprim->dl_primitive == DL_UNITDATA_REQ &&
2309 				    MBLKL(mp) < DL_UNITDATA_REQ_SIZE)
2310 					break;
2311 				if ((mpnext = mp->b_cont) == NULL)
2312 					break;
2313 				MTYPE(mpnext) = M_DATA;
2314 				nextq = sppptun_recv(q, &mpnext,
2315 				    dlprim->dl_primitive == DL_UNITDATA_IND ?
2316 				    mp->b_rptr +
2317 				    dlprim->unitdata_ind.dl_src_addr_offset :
2318 				    tll->tll_lcladdr.pta_pppoe.ptma_mac);
2319 				if (nextq != NULL)
2320 					putnext(nextq, mpnext);
2321 				freeb(mp);
2322 				return;
2323 
2324 			default:
2325 				urput_dlpi(q, mp);
2326 				return;
2327 			}
2328 			break;
2329 		}
2330 		freemsg(mp);
2331 		break;
2332 
2333 	default:
2334 		freemsg(mp);
2335 		break;
2336 	}
2337 }
2338 
2339 /*
2340  * sppptun_ursrv()
2341  *
2342  * MT-Perimeters:
2343  *    exclusive inner, shared outer.
2344  *
2345  * Description:
2346  *    Upper read-side service procedure.  This procedure services the
2347  *    client streams.  We get here because the client (PPP) asserts
2348  *    flow control down to us.
2349  */
2350 static int
2351 sppptun_ursrv(queue_t *q)
2352 {
2353 	mblk_t		*mp;
2354 
2355 	ASSERT(q->q_ptr != NULL);
2356 
2357 	while ((mp = getq(q)) != NULL) {
2358 		if (canputnext(q)) {
2359 			putnext(q, mp);
2360 		} else {
2361 			(void) putbq(q, mp);
2362 			break;
2363 		}
2364 	}
2365 	return (0);
2366 }
2367 
2368 /*
2369  * Dummy constructor/destructor functions for kmem_cache_create.
2370  * We're just using kmem as an allocator of integers, not real
2371  * storage.
2372  */
2373 
2374 /*ARGSUSED*/
2375 static int
2376 tcl_constructor(void *maddr, void *arg, int kmflags)
2377 {
2378 	return (0);
2379 }
2380 
2381 /*ARGSUSED*/
2382 static void
2383 tcl_destructor(void *maddr, void *arg)
2384 {
2385 }
2386 
2387 /*
2388  * Total size occupied by one tunnel client.  Each tunnel client
2389  * consumes one pointer for tcl_slots array, one tuncl_t structure and
2390  * two messages preallocated for close.
2391  */
2392 #define	TUNCL_SIZE (sizeof (tuncl_t) + sizeof (tuncl_t *) + \
2393 			2 * sizeof (dblk_t))
2394 
2395 /*
2396  * Clear all bits of x except the highest bit
2397  */
2398 #define	truncate(x) 	((x) <= 2 ? (x) : (1 << (highbit(x) - 1)))
2399 
2400 /*
2401  * This function initializes some well-known global variables inside
2402  * the module.
2403  *
2404  * Called by sppptun_mod.c:_init() before installing the module.
2405  */
2406 void
2407 sppptun_init(void)
2408 {
2409 	tunll_list.q_forw = tunll_list.q_back = &tunll_list;
2410 }
2411 
2412 /*
2413  * This function allocates the initial internal storage for the
2414  * sppptun driver.
2415  *
2416  * Called by sppptun_mod.c:_init() after installing module.
2417  */
2418 void
2419 sppptun_tcl_init(void)
2420 {
2421 	uint_t i, j;
2422 
2423 	rw_init(&tcl_rwlock, NULL, RW_DRIVER, NULL);
2424 	rw_enter(&tcl_rwlock, RW_WRITER);
2425 	tcl_nslots = sppptun_init_cnt;
2426 	tcl_slots = kmem_zalloc(tcl_nslots * sizeof (tuncl_t *), KM_SLEEP);
2427 
2428 	tcl_cache = kmem_cache_create("sppptun_map", sizeof (tuncl_t), 0,
2429 	    tcl_constructor, tcl_destructor, NULL, NULL, NULL, 0);
2430 
2431 	/* Allocate integer space for minor numbers */
2432 	tcl_minor_arena = vmem_create("sppptun_minor", (void *)1, tcl_nslots,
2433 	    1, NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
2434 
2435 	/*
2436 	 * Calculate available number of tunnels - how many tunnels
2437 	 * can we allocate in sppptun_pctofmem % of available
2438 	 * memory.  The value is rounded up to the nearest power of 2.
2439 	 */
2440 	i = (sppptun_pctofmem * kmem_maxavail()) / (100 * TUNCL_SIZE);
2441 	j = truncate(i);	/* i with non-high bits stripped */
2442 	if (i != j)
2443 		j *= 2;
2444 	tcl_minormax = j;
2445 	rw_exit(&tcl_rwlock);
2446 }
2447 
2448 /*
2449  * This function checks that there are no plumbed streams or other users.
2450  *
2451  * Called by sppptun_mod.c:_fini().  Assumes that we're exclusive on
2452  * both perimeters.
2453  */
2454 int
2455 sppptun_tcl_fintest(void)
2456 {
2457 	if (tunll_list.q_forw != &tunll_list || tcl_inuse > 0)
2458 		return (EBUSY);
2459 	else
2460 		return (0);
2461 }
2462 
2463 /*
2464  * If no lower streams are plumbed, then this function deallocates all
2465  * internal storage in preparation for unload.
2466  *
2467  * Called by sppptun_mod.c:_fini().  Assumes that we're exclusive on
2468  * both perimeters.
2469  */
2470 void
2471 sppptun_tcl_fini(void)
2472 {
2473 	if (tcl_minor_arena != NULL) {
2474 		vmem_destroy(tcl_minor_arena);
2475 		tcl_minor_arena = NULL;
2476 	}
2477 	if (tcl_cache != NULL) {
2478 		kmem_cache_destroy(tcl_cache);
2479 		tcl_cache = NULL;
2480 	}
2481 	kmem_free(tcl_slots, tcl_nslots * sizeof (tuncl_t *));
2482 	tcl_slots = NULL;
2483 	rw_destroy(&tcl_rwlock);
2484 	ASSERT(tcl_slots == NULL);
2485 	ASSERT(tcl_cache == NULL);
2486 	ASSERT(tcl_minor_arena == NULL);
2487 }
2488