xref: /illumos-gate/usr/src/uts/common/io/ppp/sppptun/sppptun.c (revision 5b6e8d437b064342671e0a40b3146d7f98802a64)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/debug.h>
29 #include <sys/param.h>
30 #include <sys/stat.h>
31 #include <sys/systm.h>
32 #include <sys/socket.h>
33 #include <sys/stream.h>
34 #include <sys/stropts.h>
35 #include <sys/errno.h>
36 #include <sys/time.h>
37 #include <sys/cmn_err.h>
38 #include <sys/sdt.h>
39 #include <sys/conf.h>
40 #include <sys/dlpi.h>
41 #include <sys/ddi.h>
42 #include <sys/kstat.h>
43 #include <sys/strsun.h>
44 #include <sys/bitmap.h>
45 #include <sys/sysmacros.h>
46 #include <sys/note.h>
47 #include <sys/policy.h>
48 #include <net/ppp_defs.h>
49 #include <net/pppio.h>
50 #include <net/sppptun.h>
51 #include <net/pppoe.h>
52 #include <netinet/in.h>
53 
54 #include "s_common.h"
55 #include "sppptun_mod.h"
56 #include "sppptun_impl.h"
57 
58 #define	NTUN_INITIAL 16			/* Initial number of sppptun slots */
59 #define	NTUN_PERCENT 5			/* Percent of memory to use */
60 
61 /*
62  * This is used to tag official Solaris sources.  Please do not define
63  * "INTERNAL_BUILD" when building this software outside of Sun
64  * Microsystems.
65  */
66 #ifdef INTERNAL_BUILD
67 /* MODINFO is limited to 32 characters. */
68 const char sppptun_driver_description[] = "PPP 4.0 tunnel driver";
69 const char sppptun_module_description[] = "PPP 4.0 tunnel module";
70 #else
71 const char sppptun_driver_description[] = "ANU PPP tundrv";
72 const char sppptun_module_description[] = "ANU PPP tunmod";
73 
74 /* LINTED */
75 static const char buildtime[] = "Built " __DATE__ " at " __TIME__
76 #ifdef DEBUG
77 " DEBUG"
78 #endif
79 "\n";
80 #endif
81 
82 /*
83  * Tunable values; these are similar to the values used in ptms_conf.c.
84  * Override these settings via /etc/system.
85  */
86 uint_t	sppptun_cnt = 0;		/* Minimum number of tunnels */
87 size_t	sppptun_max_pty = 0;		/* Maximum number of tunnels */
88 uint_t	sppptun_init_cnt = NTUN_INITIAL; /* Initial number of tunnel slots */
89 uint_t	sppptun_pctofmem = NTUN_PERCENT; /* Percent of memory to use */
90 
91 typedef struct ether_dest_s {
92 	ether_addr_t addr;
93 	ushort_t type;
94 } ether_dest_t;
95 
96 /* Allows unaligned access. */
97 #define	GETLONG(x)	(((x)[0]<<24)|((x)[1]<<16)|((x)[2]<<8)|(x)[3])
98 
99 static const char *tll_kstats_list[] = { TLL_KSTATS_NAMES };
100 static const char *tcl_kstats_list[] = { TCL_KSTATS_NAMES };
101 
102 #define	KREF(p, m, vn)	p->m.vn.value.ui64
103 #define	KINCR(p, m, vn)	++KREF(p, m, vn)
104 #define	KDECR(p, m, vn)	--KREF(p, m, vn)
105 
106 #define	KLINCR(vn)	KINCR(tll, tll_kstats, vn)
107 #define	KLDECR(vn)	KDECR(tll, tll_kstats, vn)
108 
109 #define	KCINCR(vn)	KINCR(tcl, tcl_kstats, vn)
110 #define	KCDECR(vn)	KDECR(tcl, tcl_kstats, vn)
111 
112 static int	sppptun_open(queue_t *, dev_t *, int, int, cred_t *);
113 static int	sppptun_close(queue_t *, int, cred_t *);
114 static int	sppptun_urput(queue_t *, mblk_t *);
115 static int	sppptun_uwput(queue_t *, mblk_t *);
116 static int	sppptun_ursrv(queue_t *);
117 static int	sppptun_uwsrv(queue_t *);
118 static int	sppptun_lrput(queue_t *, mblk_t *);
119 static int	sppptun_lwput(queue_t *, mblk_t *);
120 
121 /*
122  * This is the hash table of clients.  Clients are the programs that
123  * open /dev/sppptun as a device.  There may be a large number of
124  * these; one per tunneled PPP session.
125  *
126  * Note: slots are offset from minor node value by 1 because
127  * vmem_alloc returns 0 for failure.
128  *
129  * The tcl_slots array entries are modified only when exclusive on
130  * both inner and outer perimeters.  This ensures that threads on
131  * shared perimeters always view this as unchanging memory with no
132  * need to lock around accesses.  (Specifically, the tcl_slots array
133  * is modified by entry to sppptun_open, sppptun_close, and _fini.)
134  */
135 static tuncl_t **tcl_slots = NULL;	/* Slots for tuncl_t */
136 static size_t tcl_nslots = 0;		/* Size of slot array */
137 static size_t tcl_minormax = 0;		/* Maximum number of tunnels */
138 static size_t tcl_inuse = 0;		/* # of tunnels currently allocated */
139 static krwlock_t tcl_rwlock;
140 static struct kmem_cache *tcl_cache = NULL;	/* tunnel cache */
141 static vmem_t *tcl_minor_arena = NULL; /* Arena for device minors */
142 
143 /*
144  * This is the simple list of lower layers.  For PPPoE, there is one
145  * of these per Ethernet interface.  Lower layers are established by
146  * "plumbing" -- using I_PLINK to connect the tunnel multiplexor to
147  * the physical interface.
148  */
149 static struct qelem tunll_list;
150 static int tunll_index;
151 
152 /* Test value; if all zeroes, then address hasn't been set yet. */
153 static const ether_addr_t zero_mac_addr = { 0, 0, 0, 0, 0, 0 };
154 
155 #define	MIN_SET_FASTPATH_UNITDATAREQ_SIZE	\
156 	(sizeof (dl_unitdata_req_t) + 4)
157 
158 #define	TUN_MI_ID	2104	/* officially allocated module ID */
159 #define	TUN_MI_MINPSZ	(0)
160 #define	TUN_MI_MAXPSZ	(PPP_MAXMTU)
161 #define	TUN_MI_HIWAT	(PPP_MTU * 8)
162 #define	TUN_MI_LOWAT	(128)
163 
164 static struct module_info sppptun_modinfo = {
165 	TUN_MI_ID,		/* mi_idnum */
166 	PPP_TUN_NAME,		/* mi_idname */
167 	TUN_MI_MINPSZ,		/* mi_minpsz */
168 	TUN_MI_MAXPSZ,		/* mi_maxpsz */
169 	TUN_MI_HIWAT,		/* mi_hiwat */
170 	TUN_MI_LOWAT		/* mi_lowat */
171 };
172 
173 static struct qinit sppptun_urinit = {
174 	sppptun_urput,		/* qi_putp */
175 	sppptun_ursrv,		/* qi_srvp */
176 	sppptun_open,		/* qi_qopen */
177 	sppptun_close,		/* qi_qclose */
178 	NULL,			/* qi_qadmin */
179 	&sppptun_modinfo,	/* qi_minfo */
180 	NULL			/* qi_mstat */
181 };
182 
183 static struct qinit sppptun_uwinit = {
184 	(int (*)())sppptun_uwput, /* qi_putp */
185 	sppptun_uwsrv,		/* qi_srvp */
186 	NULL,			/* qi_qopen */
187 	NULL,			/* qi_qclose */
188 	NULL,			/* qi_qadmin */
189 	&sppptun_modinfo,	/* qi_minfo */
190 	NULL			/* qi_mstat */
191 };
192 
193 static struct qinit sppptun_lrinit = {
194 	(int (*)())sppptun_lrput, /* qi_putp */
195 	NULL,			/* qi_srvp */
196 	NULL,			/* qi_qopen */
197 	NULL,			/* qi_qclose */
198 	NULL,			/* qi_qadmin */
199 	&sppptun_modinfo,	/* qi_minfo */
200 	NULL			/* qi_mstat */
201 };
202 
203 static struct qinit sppptun_lwinit = {
204 	(int (*)())sppptun_lwput, /* qi_putp */
205 	NULL,			/* qi_srvp */
206 	NULL,			/* qi_qopen */
207 	NULL,			/* qi_qclose */
208 	NULL,			/* qi_qadmin */
209 	&sppptun_modinfo,	/* qi_minfo */
210 	NULL			/* qi_mstat */
211 };
212 
213 /*
214  * This is referenced in sppptun_mod.c.
215  */
216 struct streamtab sppptun_tab = {
217 	&sppptun_urinit,	/* st_rdinit */
218 	&sppptun_uwinit,	/* st_wrinit */
219 	&sppptun_lrinit,	/* st_muxrinit */
220 	&sppptun_lwinit		/* st_muxwrinit */
221 };
222 
223 /*
224  * Allocate another slot table twice as large as the original one
225  * (limited to global maximum).  Migrate all tunnels to the new slot
226  * table and free the original one.  Assumes we're exclusive on both
227  * inner and outer perimeters, and thus there are no other users of
228  * the tcl_slots array.
229  */
230 static minor_t
231 tcl_grow(void)
232 {
233 	minor_t old_size = tcl_nslots;
234 	minor_t new_size = 2 * old_size;
235 	tuncl_t **tcl_old = tcl_slots;
236 	tuncl_t **tcl_new;
237 	void  *vaddr;			/* vmem_add return value */
238 
239 	ASSERT(RW_LOCK_HELD(&tcl_rwlock));
240 
241 	/* Allocate new ptms array */
242 	tcl_new = kmem_zalloc(new_size * sizeof (tuncl_t *), KM_NOSLEEP);
243 	if (tcl_new == NULL)
244 		return ((minor_t)0);
245 
246 	/* Increase clone index space */
247 	vaddr = vmem_add(tcl_minor_arena, (void*)((uintptr_t)old_size + 1),
248 	    new_size - old_size, VM_NOSLEEP);
249 
250 	if (vaddr == NULL) {
251 		kmem_free(tcl_new, new_size * sizeof (tuncl_t *));
252 		return ((minor_t)0);
253 	}
254 
255 	/* Migrate tuncl_t entries to a new location */
256 	tcl_nslots = new_size;
257 	bcopy(tcl_old, tcl_new, old_size * sizeof (tuncl_t *));
258 	tcl_slots = tcl_new;
259 	kmem_free(tcl_old, old_size * sizeof (tuncl_t *));
260 
261 	/* Allocate minor number and return it */
262 	return ((minor_t)(uintptr_t)vmem_alloc(tcl_minor_arena, 1, VM_NOSLEEP));
263 }
264 
265 /*
266  * Allocate new minor number and tunnel client entry.  Returns the new
267  * entry or NULL if no memory or maximum number of entries reached.
268  * Assumes we're exclusive on both inner and outer perimeters, and
269  * thus there are no other users of the tcl_slots array.
270  */
271 static tuncl_t *
272 tuncl_alloc(int wantminor)
273 {
274 	minor_t dminor;
275 	tuncl_t *tcl = NULL;
276 
277 	rw_enter(&tcl_rwlock, RW_WRITER);
278 
279 	ASSERT(tcl_slots != NULL);
280 
281 	/*
282 	 * Always try to allocate new pty when sppptun_cnt minimum
283 	 * limit is not achieved. If it is achieved, the maximum is
284 	 * determined by either user-specified value (if it is
285 	 * non-zero) or our memory estimations - whatever is less.
286 	 */
287 	if (tcl_inuse >= sppptun_cnt) {
288 		/*
289 		 * When system achieved required minimum of tunnels,
290 		 * check for the denial of service limits.
291 		 *
292 		 * Get user-imposed maximum, if configured, or
293 		 * calculated memory constraint.
294 		 */
295 		size_t user_max = (sppptun_max_pty == 0 ? tcl_minormax :
296 		    min(sppptun_max_pty, tcl_minormax));
297 
298 		/* Do not try to allocate more than allowed */
299 		if (tcl_inuse >= user_max) {
300 			rw_exit(&tcl_rwlock);
301 			return (NULL);
302 		}
303 	}
304 	tcl_inuse++;
305 
306 	/*
307 	 * Allocate new minor number. If this fails, all slots are
308 	 * busy and we need to grow the hash.
309 	 */
310 	if (wantminor <= 0) {
311 		dminor = (minor_t)(uintptr_t)vmem_alloc(tcl_minor_arena, 1,
312 		    VM_NOSLEEP);
313 		if (dminor == 0) {
314 			/* Grow the cache and retry allocation */
315 			dminor = tcl_grow();
316 		}
317 	} else {
318 		dminor = (minor_t)(uintptr_t)vmem_xalloc(tcl_minor_arena, 1,
319 		    0, 0, 0, (void *)(uintptr_t)wantminor,
320 		    (void *)((uintptr_t)wantminor+1), VM_NOSLEEP);
321 		if (dminor != 0 && dminor != wantminor) {
322 			vmem_free(tcl_minor_arena, (void *)(uintptr_t)dminor,
323 			    1);
324 			dminor = 0;
325 		}
326 	}
327 
328 	if (dminor == 0) {
329 		/* Not enough memory now */
330 		tcl_inuse--;
331 		rw_exit(&tcl_rwlock);
332 		return (NULL);
333 	}
334 
335 	tcl = kmem_cache_alloc(tcl_cache, KM_NOSLEEP);
336 	if (tcl == NULL) {
337 		/* Not enough memory - this entry can't be used now. */
338 		vmem_free(tcl_minor_arena, (void *)(uintptr_t)dminor, 1);
339 		tcl_inuse--;
340 	} else {
341 		bzero(tcl, sizeof (*tcl));
342 		tcl->tcl_lsessid = dminor;
343 		ASSERT(tcl_slots[dminor - 1] == NULL);
344 		tcl_slots[dminor - 1] = tcl;
345 	}
346 
347 	rw_exit(&tcl_rwlock);
348 	return (tcl);
349 }
350 
351 /*
352  * This routine frees an upper level (client) stream by removing it
353  * from the minor number pool and freeing the state structure storage.
354  * Assumes we're exclusive on both inner and outer perimeters, and
355  * thus there are no other concurrent users of the tcl_slots array or
356  * of any entry in that array.
357  */
358 static void
359 tuncl_free(tuncl_t *tcl)
360 {
361 	rw_enter(&tcl_rwlock, RW_WRITER);
362 	ASSERT(tcl->tcl_lsessid <= tcl_nslots);
363 	ASSERT(tcl_slots[tcl->tcl_lsessid - 1] == tcl);
364 	ASSERT(tcl_inuse > 0);
365 	tcl_inuse--;
366 	tcl_slots[tcl->tcl_lsessid - 1] = NULL;
367 
368 	if (tcl->tcl_ksp != NULL) {
369 		kstat_delete(tcl->tcl_ksp);
370 		tcl->tcl_ksp = NULL;
371 	}
372 
373 	/* Return minor number to the pool of minors */
374 	vmem_free(tcl_minor_arena, (void *)(uintptr_t)tcl->tcl_lsessid, 1);
375 
376 	/* Return tuncl_t to the cache */
377 	kmem_cache_free(tcl_cache, tcl);
378 	rw_exit(&tcl_rwlock);
379 }
380 
381 /*
382  * Get tuncl_t structure by minor number.  Returns NULL when minor is
383  * out of range.  Note that lookup of tcl pointers (and use of those
384  * pointers) is safe because modification is done only when exclusive
385  * on both inner and outer perimeters.
386  */
387 static tuncl_t *
388 tcl_by_minor(minor_t dminor)
389 {
390 	tuncl_t *tcl = NULL;
391 
392 	if ((dminor >= 1) && (dminor <= tcl_nslots) && tcl_slots != NULL) {
393 		tcl = tcl_slots[dminor - 1];
394 	}
395 
396 	return (tcl);
397 }
398 
399 /*
400  * Set up kstats for upper or lower stream.
401  */
402 static kstat_t *
403 kstat_setup(kstat_named_t *knt, const char **names, int nstat,
404     const char *modname, int unitnum)
405 {
406 	kstat_t *ksp;
407 	char unitname[KSTAT_STRLEN];
408 	int i;
409 
410 	for (i = 0; i < nstat; i++) {
411 		kstat_set_string(knt[i].name, names[i]);
412 		knt[i].data_type = KSTAT_DATA_UINT64;
413 	}
414 	(void) sprintf(unitname, "%s" "%d", modname, unitnum);
415 	ksp = kstat_create(modname, unitnum, unitname, "net",
416 	    KSTAT_TYPE_NAMED, nstat, KSTAT_FLAG_VIRTUAL);
417 	if (ksp != NULL) {
418 		ksp->ks_data = (void *)knt;
419 		kstat_install(ksp);
420 	}
421 	return (ksp);
422 }
423 
424 /*
425  * sppptun_open()
426  *
427  * MT-Perimeters:
428  *    exclusive inner, exclusive outer.
429  *
430  * Description:
431  *    Common open procedure for module and driver.
432  */
433 static int
434 sppptun_open(queue_t *q, dev_t *devp, int oflag, int sflag, cred_t *credp)
435 {
436 	_NOTE(ARGUNUSED(oflag))
437 
438 	/* Allow a re-open */
439 	if (q->q_ptr != NULL)
440 		return (0);
441 
442 	/* In the off chance that we're on our way out, just return error */
443 	if (tcl_slots == NULL)
444 		return (EINVAL);
445 
446 	if (sflag & MODOPEN) {
447 		tunll_t *tll;
448 		char *cp;
449 
450 		/* ordinary users have no need to push this module */
451 		if (secpolicy_ppp_config(credp) != 0)
452 			return (EPERM);
453 
454 		tll = kmem_zalloc(sizeof (tunll_t), KM_SLEEP);
455 
456 		tll->tll_index = tunll_index++;
457 
458 		tll->tll_wq = WR(q);
459 		tll->tll_zoneid = crgetzoneid(credp);
460 
461 		/* Insert at end of list */
462 		insque(&tll->tll_next, tunll_list.q_back);
463 		q->q_ptr = WR(q)->q_ptr = tll;
464 
465 		tll->tll_style = PTS_PPPOE;
466 		tll->tll_alen = sizeof (tll->tll_lcladdr.pta_pppoe);
467 
468 		tll->tll_ksp = kstat_setup((kstat_named_t *)&tll->tll_kstats,
469 		    tll_kstats_list, Dim(tll_kstats_list), "tll",
470 		    tll->tll_index);
471 
472 		/*
473 		 * Find the name of the driver somewhere beneath us.
474 		 * Note that we have no driver under us until after
475 		 * qprocson().
476 		 */
477 		qprocson(q);
478 		for (q = WR(q); q->q_next != NULL; q = q->q_next)
479 			;
480 		cp = NULL;
481 		if (q->q_qinfo != NULL && q->q_qinfo->qi_minfo != NULL)
482 			cp = q->q_qinfo->qi_minfo->mi_idname;
483 		if (cp != NULL && *cp == '\0')
484 			cp = NULL;
485 
486 		/* Set initial name; user should overwrite. */
487 		if (cp == NULL)
488 			(void) snprintf(tll->tll_name, sizeof (tll->tll_name),
489 			    PPP_TUN_NAME "%d", tll->tll_index);
490 		else
491 			(void) snprintf(tll->tll_name, sizeof (tll->tll_name),
492 			    "%s:tun%d", cp, tll->tll_index);
493 	} else {
494 		tuncl_t	*tcl;
495 
496 		ASSERT(devp != NULL);
497 		if (sflag & CLONEOPEN) {
498 			tcl = tuncl_alloc(-1);
499 		} else {
500 			minor_t mn;
501 
502 			/*
503 			 * Support of non-clone open (ie, mknod with
504 			 * defined minor number) is supported for
505 			 * testing purposes so that 'arbitrary' minor
506 			 * numbers can be used.
507 			 */
508 			mn = getminor(*devp);
509 			if (mn == 0 || (tcl = tcl_by_minor(mn)) != NULL) {
510 				return (EPERM);
511 			}
512 			tcl = tuncl_alloc(mn);
513 		}
514 		if (tcl == NULL)
515 			return (ENOSR);
516 		tcl->tcl_rq = q;		/* save read queue pointer */
517 		tcl->tcl_flags |= TCLF_ISCLIENT;	/* sanity check */
518 		tcl->tcl_zoneid = crgetzoneid(credp);
519 
520 		q->q_ptr = WR(q)->q_ptr = (caddr_t)tcl;
521 		*devp = makedevice(getmajor(*devp), tcl->tcl_lsessid);
522 
523 		tcl->tcl_ksp = kstat_setup((kstat_named_t *)&tcl->tcl_kstats,
524 		    tcl_kstats_list, Dim(tcl_kstats_list), "tcl",
525 		    tcl->tcl_lsessid);
526 
527 		qprocson(q);
528 	}
529 	return (0);
530 }
531 
532 /*
533  * Create an appropriate control message for this client event.
534  */
535 static mblk_t *
536 make_control(tuncl_t *tclabout, tunll_t *tllabout, int action, tuncl_t *tclto)
537 {
538 	struct ppptun_control *ptc;
539 	mblk_t *mp = allocb(sizeof (*ptc), BPRI_HI);
540 
541 	if (mp != NULL) {
542 		MTYPE(mp) = M_PROTO;
543 		ptc = (struct ppptun_control *)mp->b_wptr;
544 		bzero(ptc, sizeof (*ptc));
545 		mp->b_wptr += sizeof (*ptc);
546 		if (tclabout != NULL) {
547 			ptc->ptc_rsessid = tclabout->tcl_rsessid;
548 			ptc->ptc_address = tclabout->tcl_address;
549 		}
550 		ptc->ptc_discrim = tclto->tcl_ctlval;
551 		ptc->ptc_action = action;
552 		if (tllabout != NULL) {
553 			(void) strncpy(ptc->ptc_name, tllabout->tll_name,
554 			    sizeof (ptc->ptc_name));
555 		}
556 	}
557 	return (mp);
558 }
559 
560 /*
561  * Send an appropriate control message up this client session.
562  */
563 static void
564 send_control(tuncl_t *tclabout, tunll_t *tllabout, int action, tuncl_t *tcl)
565 {
566 	mblk_t *mp;
567 
568 	if (tcl->tcl_rq != NULL) {
569 		mp = make_control(tclabout, tllabout, action, tcl);
570 		if (mp != NULL) {
571 			KCINCR(cks_octrl_spec);
572 			putnext(tcl->tcl_rq, mp);
573 		}
574 	}
575 }
576 
577 /*
578  * If a lower stream is being unplumbed, then the upper streams
579  * connected to this lower stream must be disconnected.  This routine
580  * accomplishes this by sending M_HANGUP to data streams and M_PROTO
581  * messages to control streams.  This is called by vmem_walk, and
582  * handles a span of minor node numbers.
583  *
584  * No need to update lks_clients here; the lower stream is on its way
585  * out.
586  */
587 static void
588 tclvm_remove_tll(void *arg, void *firstv, size_t numv)
589 {
590 	tunll_t *tll = (tunll_t *)arg;
591 	int minorn = (int)(uintptr_t)firstv;
592 	int minormax = minorn + numv;
593 	tuncl_t *tcl;
594 	mblk_t *mp;
595 
596 	while (minorn < minormax) {
597 		tcl = tcl_slots[minorn - 1];
598 		ASSERT(tcl != NULL);
599 		if (tcl->tcl_data_tll == tll && tcl->tcl_rq != NULL) {
600 			tcl->tcl_data_tll = NULL;
601 			mp = allocb(0, BPRI_HI);
602 			if (mp != NULL) {
603 				MTYPE(mp) = M_HANGUP;
604 				putnext(tcl->tcl_rq, mp);
605 				if (tcl->tcl_ctrl_tll == tll)
606 					tcl->tcl_ctrl_tll = NULL;
607 			}
608 		}
609 		if (tcl->tcl_ctrl_tll == tll) {
610 			send_control(tcl, tll, PTCA_UNPLUMB, tcl);
611 			tcl->tcl_ctrl_tll = NULL;
612 		}
613 		minorn++;
614 	}
615 }
616 
617 /*
618  * sppptun_close()
619  *
620  * MT-Perimeters:
621  *    exclusive inner, exclusive outer.
622  *
623  * Description:
624  *    Common close procedure for module and driver.
625  */
626 /* ARGSUSED */
627 static int
628 sppptun_close(queue_t *q, int flags __unused, cred_t *credp __unused)
629 {
630 	int err;
631 	void *qptr;
632 	tunll_t *tll;
633 	tuncl_t *tcl;
634 
635 	qptr = q->q_ptr;
636 
637 	err = 0;
638 	tll = qptr;
639 	if (!(tll->tll_flags & TLLF_NOTLOWER)) {
640 		/* q_next is set on modules */
641 		ASSERT(WR(q)->q_next != NULL);
642 
643 		/* unlink any clients using this lower layer. */
644 		vmem_walk(tcl_minor_arena, VMEM_ALLOC, tclvm_remove_tll, tll);
645 
646 		/* tell daemon that this has been removed. */
647 		if ((tcl = tll->tll_defcl) != NULL)
648 			send_control(NULL, tll, PTCA_UNPLUMB, tcl);
649 
650 		tll->tll_flags |= TLLF_CLOSING;
651 		while (!(tll->tll_flags & TLLF_CLOSE_DONE)) {
652 			qenable(tll->tll_wq);
653 			qwait(tll->tll_wq);
654 		}
655 		tll->tll_error = 0;
656 		while (!(tll->tll_flags & TLLF_SHUTDOWN_DONE)) {
657 			if (!qwait_sig(tll->tll_wq))
658 				break;
659 		}
660 
661 		qprocsoff(q);
662 		q->q_ptr = WR(q)->q_ptr = NULL;
663 		tll->tll_wq = NULL;
664 		remque(&tll->tll_next);
665 		err = tll->tll_error;
666 		if (tll->tll_ksp != NULL)
667 			kstat_delete(tll->tll_ksp);
668 		kmem_free(tll, sizeof (*tll));
669 	} else {
670 		tcl = qptr;
671 
672 		/* devices are end of line; no q_next. */
673 		ASSERT(WR(q)->q_next == NULL);
674 
675 		qprocsoff(q);
676 		DTRACE_PROBE1(sppptun__client__close, tuncl_t *, tcl);
677 		tcl->tcl_rq = NULL;
678 		q->q_ptr = WR(q)->q_ptr = NULL;
679 
680 		tll = TO_TLL(tunll_list.q_forw);
681 		while (tll != TO_TLL(&tunll_list)) {
682 			if (tll->tll_defcl == tcl)
683 				tll->tll_defcl = NULL;
684 			if (tll->tll_lastcl == tcl)
685 				tll->tll_lastcl = NULL;
686 			tll = TO_TLL(tll->tll_next);
687 		}
688 		/*
689 		 * If this was a normal session, then tell the daemon.
690 		 */
691 		if (!(tcl->tcl_flags & TCLF_DAEMON) &&
692 		    (tll = tcl->tcl_ctrl_tll) != NULL &&
693 		    tll->tll_defcl != NULL) {
694 			send_control(tcl, tll, PTCA_DISCONNECT,
695 			    tll->tll_defcl);
696 		}
697 
698 		/* Update statistics for references being dropped. */
699 		if ((tll = tcl->tcl_data_tll) != NULL) {
700 			KLDECR(lks_clients);
701 		}
702 		if ((tll = tcl->tcl_ctrl_tll) != NULL) {
703 			KLDECR(lks_clients);
704 		}
705 
706 		tuncl_free(tcl);
707 	}
708 
709 	return (err);
710 }
711 
712 /*
713  * Allocate and initialize a DLPI or TPI template of the specified
714  * length.
715  */
716 static mblk_t *
717 pi_alloc(size_t len, int prim)
718 {
719 	mblk_t	*mp;
720 
721 	mp = allocb(len, BPRI_MED);
722 	if (mp != NULL) {
723 		MTYPE(mp) = M_PROTO;
724 		mp->b_wptr = mp->b_rptr + len;
725 		bzero(mp->b_rptr, len);
726 		*(int *)mp->b_rptr = prim;
727 	}
728 	return (mp);
729 }
730 
731 #define	dlpi_alloc(l, p)	pi_alloc((l), (p))
732 
733 /*
734  * Prepend some room to an mblk.  Try to reuse the existing buffer, if
735  * at all possible, rather than allocating a new one.  (Fast-path
736  * output should be able to use this.)
737  *
738  * (XXX why isn't this a library function ...?)
739  */
740 static mblk_t *
741 prependb(mblk_t *mp, size_t len, size_t align)
742 {
743 	mblk_t *newmp;
744 
745 
746 	if (align == 0)
747 		align = 8;
748 	if (DB_REF(mp) > 1 || mp->b_datap->db_base+len > mp->b_rptr ||
749 	    ((uint_t)((uintptr_t)mp->b_rptr - len) % align) != 0) {
750 		if ((newmp = allocb(len, BPRI_LO)) == NULL) {
751 			freemsg(mp);
752 			return (NULL);
753 		}
754 		newmp->b_wptr = newmp->b_rptr + len;
755 		newmp->b_cont = mp;
756 		return (newmp);
757 	}
758 	mp->b_rptr -= len;
759 	return (mp);
760 }
761 
762 /*
763  * sppptun_outpkt()
764  *
765  * MT-Perimeters:
766  *	shared inner, shared outer (if called from sppptun_uwput),
767  *	exclusive inner, shared outer (if called from sppptun_uwsrv).
768  *
769  * Description:
770  *    Called from sppptun_uwput or sppptun_uwsrv when processing a
771  *    M_DATA, M_PROTO, or M_PCPROTO message.  For all cases, it tries
772  *    to prepare the data to be sent to the module below this driver
773  *    if there is a lower stream linked underneath.  If no lower
774  *    stream exists, then the data will be discarded and an ENXIO
775  *    error returned.
776  *
777  * Returns:
778  *	pointer to queue if caller should do putnext, otherwise
779  *	*mpp != NULL if message should be enqueued, otherwise
780  *	*mpp == NULL if message is gone.
781  */
782 static queue_t *
783 sppptun_outpkt(queue_t *q, mblk_t **mpp)
784 {
785 	mblk_t *mp;
786 	tuncl_t *tcl;
787 	tunll_t *tll;
788 	mblk_t *encmb;
789 	mblk_t *datamb;
790 	dl_unitdata_req_t *dur;
791 	queue_t *lowerq;
792 	poep_t *poep;
793 	int len;
794 	ether_dest_t *edestp;
795 	enum { luNone, luCopy, luSend } loopup;
796 	boolean_t isdata;
797 	struct ppptun_control *ptc;
798 
799 	mp = *mpp;
800 	tcl = q->q_ptr;
801 
802 	*mpp = NULL;
803 	if (!(tcl->tcl_flags & TCLF_ISCLIENT)) {
804 		/* This should never happen on a lower layer stream */
805 		freemsg(mp);
806 		return (NULL);
807 	}
808 
809 	isdata = (MTYPE(mp) == M_DATA);
810 	if (isdata) {
811 		tll = tcl->tcl_data_tll;
812 		ptc = NULL;
813 	} else {
814 		/*
815 		 * If data are unaligned or otherwise unsuitable, then
816 		 * discard.
817 		 */
818 		if (MBLKL(mp) != sizeof (*ptc) || DB_REF(mp) > 1 ||
819 		    !IS_P2ALIGNED(mp->b_rptr, sizeof (ptc))) {
820 			KCINCR(cks_octrl_drop);
821 			DTRACE_PROBE2(sppptun__bad__control, tuncl_t *, tcl,
822 			    mblk_t *, mp);
823 			send_control(tcl, tcl->tcl_ctrl_tll, PTCA_BADCTRL, tcl);
824 			freemsg(mp);
825 			return (NULL);
826 		}
827 		ptc = (struct ppptun_control *)mp->b_rptr;
828 
829 		/* Set stream discriminator value if not yet set. */
830 		if (tcl->tcl_ctlval == 0)
831 			tcl->tcl_ctlval = ptc->ptc_discrim;
832 
833 		/* If this is a test message, then reply to caller. */
834 		if (ptc->ptc_action == PTCA_TEST) {
835 			DTRACE_PROBE2(sppptun__test, tuncl_t *, tcl,
836 			    struct ppptun_control *, ptc);
837 			if (mp->b_cont != NULL) {
838 				freemsg(mp->b_cont);
839 				mp->b_cont = NULL;
840 			}
841 			ptc->ptc_discrim = tcl->tcl_ctlval;
842 			putnext(RD(q), mp);
843 			return (NULL);
844 		}
845 
846 		/* If this one isn't for us, then discard it */
847 		if (tcl->tcl_ctlval != ptc->ptc_discrim) {
848 			DTRACE_PROBE2(sppptun__bad__discrim, tuncl_t *, tcl,
849 			    struct ppptun_control *, ptc);
850 			freemsg(mp);
851 			return (NULL);
852 		}
853 
854 		/* Don't allow empty control packets. */
855 		tll = tcl->tcl_ctrl_tll;
856 		if (mp->b_cont == NULL) {
857 			KCINCR(cks_octrl_drop);
858 			DTRACE_PROBE2(sppptun__bad__control, tuncl_t *, tcl,
859 			    mblk_t *, mp);
860 			send_control(tcl, tll, PTCA_BADCTRL, tcl);
861 			freemsg(mp);
862 			return (NULL);
863 		}
864 	}
865 
866 	if (tll == NULL || (lowerq = tll->tll_wq) == NULL) {
867 		DTRACE_PROBE3(sppptun__cannot__send, tuncl_t *, tcl,
868 		    tunll_t *, tll, mblk_t *, mp);
869 		send_control(tcl, tll, PTCA_UNPLUMB, tcl);
870 		freemsg(mp);
871 		if (isdata) {
872 			tcl->tcl_stats.ppp_oerrors++;
873 		} else {
874 			KCINCR(cks_octrl_drop);
875 		}
876 		return (NULL);
877 	}
878 
879 	/*
880 	 * If so, then try to send it down.  The lower queue is only
881 	 * ever detached while holding an exclusive lock on the whole
882 	 * driver, so we can be confident that the lower queue is
883 	 * still there.
884 	 */
885 	if (!bcanputnext(lowerq, mp->b_band)) {
886 		DTRACE_PROBE3(sppptun__flow__control, tuncl_t *, tcl,
887 		    tunll_t *, tll, mblk_t *, mp);
888 		*mpp = mp;
889 		return (NULL);
890 	}
891 
892 	/*
893 	 * Note: DLPI and TPI expect that the first buffer contains
894 	 * the control (unitdata-req) header, destination address, and
895 	 * nothing else.  Any protocol headers must go in the next
896 	 * buffer.
897 	 */
898 	loopup = luNone;
899 	encmb = NULL;
900 	if (isdata) {
901 		if (tll->tll_alen != 0 &&
902 		    bcmp(&tcl->tcl_address, &tll->tll_lcladdr,
903 		    tll->tll_alen) == 0)
904 			loopup = luSend;
905 		switch (tll->tll_style) {
906 		case PTS_PPPOE:
907 			/* Strip address and control fields if present. */
908 			if (mp->b_rptr[0] == 0xFF) {
909 				if (MBLKL(mp) < 3) {
910 					encmb = msgpullup(mp, 3);
911 					freemsg(mp);
912 					if ((mp = encmb) == NULL)
913 						break;
914 				}
915 				mp->b_rptr += 2;
916 			}
917 			/* Broadcasting data is probably not a good idea. */
918 			if (tcl->tcl_address.pta_pppoe.ptma_mac[0] & 1)
919 				break;
920 			encmb = dlpi_alloc(sizeof (*dur) + sizeof (*edestp),
921 			    DL_UNITDATA_REQ);
922 			if (encmb == NULL)
923 				break;
924 
925 			dur = (dl_unitdata_req_t *)encmb->b_rptr;
926 			dur->dl_dest_addr_length = sizeof (*edestp);
927 			dur->dl_dest_addr_offset = sizeof (*dur);
928 			edestp = (ether_dest_t *)(dur + 1);
929 			ether_copy(tcl->tcl_address.pta_pppoe.ptma_mac,
930 			    edestp->addr);
931 			/* DLPI SAPs are in host byte order! */
932 			edestp->type = tll->tll_sap;
933 
934 			/* Make sure the protocol field isn't compressed. */
935 			len = (*mp->b_rptr & 1);
936 			mp = prependb(mp, sizeof (*poep) + len, POE_HDR_ALIGN);
937 			if (mp == NULL)
938 				break;
939 			poep = (poep_t *)mp->b_rptr;
940 			poep->poep_version_type = POE_VERSION;
941 			poep->poep_code = POECODE_DATA;
942 			poep->poep_session_id = htons(tcl->tcl_rsessid);
943 			poep->poep_length = htons(msgsize(mp) -
944 			    sizeof (*poep));
945 			if (len > 0)
946 				*(char *)(poep + 1) = '\0';
947 			break;
948 
949 		default:
950 			ASSERT(0);
951 		}
952 	} else {
953 		/*
954 		 * Control side encapsulation.
955 		 */
956 		if (bcmp(&ptc->ptc_address, &tll->tll_lcladdr, tll->tll_alen)
957 		    == 0)
958 			loopup = luSend;
959 		datamb = mp->b_cont;
960 		switch (tll->tll_style) {
961 		case PTS_PPPOE:
962 			/*
963 			 * Don't allow a loopback session to establish
964 			 * itself.  PPPoE is broken; it uses only one
965 			 * session ID for both data directions, so the
966 			 * loopback data path can simply never work.
967 			 */
968 			if (loopup == luSend &&
969 			    ((poep_t *)datamb->b_rptr)->poep_code ==
970 			    POECODE_PADR)
971 				break;
972 			encmb = dlpi_alloc(sizeof (*dur) + sizeof (*edestp),
973 			    DL_UNITDATA_REQ);
974 			if (encmb == NULL)
975 				break;
976 			dur = (dl_unitdata_req_t *)encmb->b_rptr;
977 			dur->dl_dest_addr_length = sizeof (*edestp);
978 			dur->dl_dest_addr_offset = sizeof (*dur);
979 
980 			edestp = (ether_dest_t *)(dur + 1);
981 			/* DLPI SAPs are in host byte order! */
982 			edestp->type = tll->tll_sap;
983 
984 			/*
985 			 * If destination isn't set yet, then we have to
986 			 * allow anything at all.  Otherwise, force use
987 			 * of configured peer address.
988 			 */
989 			if (bcmp(tcl->tcl_address.pta_pppoe.ptma_mac,
990 			    zero_mac_addr, sizeof (zero_mac_addr)) == 0 ||
991 			    (tcl->tcl_flags & TCLF_DAEMON)) {
992 				ether_copy(ptc->ptc_address.pta_pppoe.ptma_mac,
993 				    edestp->addr);
994 			} else {
995 				ether_copy(tcl->tcl_address.pta_pppoe.ptma_mac,
996 				    edestp->addr);
997 			}
998 			/* Reflect multicast/broadcast back up. */
999 			if (edestp->addr[0] & 1)
1000 				loopup = luCopy;
1001 			break;
1002 
1003 		case PTS_PPTP:
1004 			/*
1005 			 * PPTP's control side is actually done over
1006 			 * separate TCP connections.
1007 			 */
1008 		default:
1009 			ASSERT(0);
1010 		}
1011 		freeb(mp);
1012 		mp = datamb;
1013 	}
1014 	if (mp == NULL || encmb == NULL) {
1015 		DTRACE_PROBE1(sppptun__output__failure, tuncl_t *, tcl);
1016 		freemsg(mp);
1017 		freemsg(encmb);
1018 		if (isdata) {
1019 			tcl->tcl_stats.ppp_oerrors++;
1020 		} else {
1021 			KCINCR(cks_octrl_drop);
1022 			KLINCR(lks_octrl_drop);
1023 		}
1024 		lowerq = NULL;
1025 	} else {
1026 		if (isdata) {
1027 			tcl->tcl_stats.ppp_obytes += msgsize(mp);
1028 			tcl->tcl_stats.ppp_opackets++;
1029 		} else {
1030 			KCINCR(cks_octrls);
1031 			KLINCR(lks_octrls);
1032 		}
1033 		if (encmb != mp)
1034 			encmb->b_cont = mp;
1035 		switch (loopup) {
1036 		case luNone:
1037 			*mpp = encmb;
1038 			break;
1039 		case luCopy:
1040 			mp = copymsg(encmb);
1041 			if (mp != NULL)
1042 				(void) sppptun_urput(RD(lowerq), mp);
1043 			*mpp = encmb;
1044 			break;
1045 		case luSend:
1046 			(void) sppptun_urput(RD(lowerq), encmb);
1047 			lowerq = NULL;
1048 			break;
1049 		}
1050 	}
1051 	return (lowerq);
1052 }
1053 
1054 /*
1055  * Enqueue a message to be sent when the lower stream is closed.  This
1056  * is done so that we're guaranteed that we always have the necessary
1057  * resources to properly detach ourselves from the system.  (If we
1058  * waited until the close was done to allocate these messages, then
1059  * the message allocation could fail, and we'd be unable to properly
1060  * detach.)
1061  */
1062 static void
1063 save_for_close(tunll_t *tll, mblk_t *mp)
1064 {
1065 	mblk_t *onc;
1066 
1067 	if ((onc = tll->tll_onclose) == NULL)
1068 		tll->tll_onclose = mp;
1069 	else {
1070 		while (onc->b_next != NULL)
1071 			onc = onc->b_next;
1072 		onc->b_next = mp;
1073 	}
1074 }
1075 
1076 /*
1077  * Given the lower stream name, locate the state structure.  Note that
1078  * lookup of tcl pointers (and use of those pointers) is safe because
1079  * modification is done only when exclusive on both inner and outer
1080  * perimeters.
1081  */
1082 static tunll_t *
1083 tll_lookup_on_name(const char *dname, zoneid_t zoneid)
1084 {
1085 	tunll_t *tll;
1086 
1087 	tll = TO_TLL(tunll_list.q_forw);
1088 	for (; tll != TO_TLL(&tunll_list); tll = TO_TLL(tll->tll_next))
1089 		if (tll->tll_zoneid == zoneid &&
1090 		    strcmp(dname, tll->tll_name) == 0)
1091 			return (tll);
1092 	return (NULL);
1093 }
1094 
1095 /*
1096  * sppptun_inner_ioctl()
1097  *
1098  * MT-Perimeters:
1099  *    exclusive inner, shared outer.
1100  *
1101  * Description:
1102  *    Called by qwriter from sppptun_ioctl as the result of receiving
1103  *    a handled ioctl.
1104  */
1105 static void
1106 sppptun_inner_ioctl(queue_t *q, mblk_t *mp)
1107 {
1108 	struct iocblk *iop;
1109 	int rc = 0;
1110 	int len = 0;
1111 	int i;
1112 	tuncl_t *tcl;
1113 	tunll_t *tll;
1114 	union ppptun_name *ptn;
1115 	struct ppptun_info *pti;
1116 	struct ppptun_peer *ptp;
1117 	mblk_t *mptmp;
1118 	ppptun_atype *pap;
1119 	struct ppp_stats64 *psp;
1120 	zoneid_t zoneid;
1121 
1122 	iop = (struct iocblk *)mp->b_rptr;
1123 	tcl = NULL;
1124 	tll = q->q_ptr;
1125 	if (tll->tll_flags & TLLF_NOTLOWER) {
1126 		tcl = (tuncl_t *)tll;
1127 		tll = NULL;
1128 	}
1129 
1130 	DTRACE_PROBE3(sppptun__ioctl, tuncl_t *, tcl, tunll_t *, tll,
1131 	    struct iocblk *, iop);
1132 
1133 	switch (iop->ioc_cmd) {
1134 	case PPPIO_DEBUG:
1135 		/*
1136 		 * Debug requests are now ignored; use dtrace or wireshark
1137 		 * instead.
1138 		 */
1139 		break;
1140 
1141 	case PPPIO_GETSTAT:
1142 		rc = EINVAL;
1143 		break;
1144 
1145 	case PPPIO_GETSTAT64:
1146 		/* Client (device) side only */
1147 		if (tcl == NULL) {
1148 			rc = EINVAL;
1149 			break;
1150 		}
1151 		mptmp = allocb(sizeof (*psp), BPRI_HI);
1152 		if (mptmp == NULL) {
1153 			rc = ENOSR;
1154 			break;
1155 		}
1156 		freemsg(mp->b_cont);
1157 		mp->b_cont = mptmp;
1158 
1159 		psp = (struct ppp_stats64 *)mptmp->b_wptr;
1160 		bzero((caddr_t)psp, sizeof (*psp));
1161 		psp->p = tcl->tcl_stats;
1162 
1163 		len = sizeof (*psp);
1164 		break;
1165 
1166 	case PPPTUN_SNAME:
1167 		/* This is done on the *module* (lower level) side. */
1168 		if (tll == NULL || mp->b_cont == NULL ||
1169 		    iop->ioc_count != sizeof (*ptn) ||
1170 		    *mp->b_cont->b_rptr == '\0') {
1171 			rc = EINVAL;
1172 			break;
1173 		}
1174 
1175 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1176 		ptn->ptn_name[sizeof (ptn->ptn_name) - 1] = '\0';
1177 
1178 		tll = tll_lookup_on_name(ptn->ptn_name, tll->tll_zoneid);
1179 		if (tll != NULL) {
1180 			rc = EEXIST;
1181 			break;
1182 		}
1183 		tll = (tunll_t *)q->q_ptr;
1184 		(void) strcpy(tll->tll_name, ptn->ptn_name);
1185 		break;
1186 
1187 	case PPPTUN_SINFO:
1188 	case PPPTUN_GINFO:
1189 		/* Either side */
1190 		if (mp->b_cont == NULL || iop->ioc_count != sizeof (*pti)) {
1191 			rc = EINVAL;
1192 			break;
1193 		}
1194 		pti = (struct ppptun_info *)mp->b_cont->b_rptr;
1195 		if (pti->pti_name[0] != '\0')
1196 			tll = tll_lookup_on_name(pti->pti_name,
1197 			    tcl == NULL ? tll->tll_zoneid : tcl->tcl_zoneid);
1198 		if (tll == NULL) {
1199 			/* Driver (client) side must have name */
1200 			if (tcl != NULL && pti->pti_name[0] == '\0')
1201 				rc = EINVAL;
1202 			else
1203 				rc = ESRCH;
1204 			break;
1205 		}
1206 		if (iop->ioc_cmd == PPPTUN_GINFO) {
1207 			pti->pti_muxid = tll->tll_muxid;
1208 			pti->pti_style = tll->tll_style;
1209 			len = sizeof (*pti);
1210 			break;
1211 		}
1212 		tll->tll_muxid = pti->pti_muxid;
1213 		tll->tll_style = pti->pti_style;
1214 		switch (tll->tll_style) {
1215 		case PTS_PPPOE:		/* DLPI type */
1216 			tll->tll_alen = sizeof (tll->tll_lcladdr.pta_pppoe);
1217 			mptmp = dlpi_alloc(sizeof (dl_unbind_req_t),
1218 			    DL_UNBIND_REQ);
1219 			if (mptmp == NULL) {
1220 				rc = ENOSR;
1221 				break;
1222 			}
1223 			save_for_close(tll, mptmp);
1224 			mptmp = dlpi_alloc(sizeof (dl_detach_req_t),
1225 			    DL_DETACH_REQ);
1226 			if (mptmp == NULL) {
1227 				rc = ENOSR;
1228 				break;
1229 			}
1230 			save_for_close(tll, mptmp);
1231 			break;
1232 		default:
1233 			tll->tll_style = PTS_NONE;
1234 			tll->tll_alen = 0;
1235 			rc = EINVAL;
1236 			break;
1237 		}
1238 		break;
1239 
1240 	case PPPTUN_GNNAME:
1241 		/* This can be done on either side. */
1242 		if (mp->b_cont == NULL || iop->ioc_count < sizeof (uint32_t)) {
1243 			rc = EINVAL;
1244 			break;
1245 		}
1246 		zoneid = tcl == NULL ? tll->tll_zoneid : tcl->tcl_zoneid;
1247 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1248 		i = ptn->ptn_index;
1249 		tll = TO_TLL(tunll_list.q_forw);
1250 		while (tll != TO_TLL(&tunll_list)) {
1251 			if (tll->tll_zoneid == zoneid && --i < 0)
1252 				break;
1253 			tll = TO_TLL(tll->tll_next);
1254 		}
1255 		if (tll != TO_TLL(&tunll_list)) {
1256 			bcopy(tll->tll_name, ptn->ptn_name,
1257 			    sizeof (ptn->ptn_name));
1258 		} else {
1259 			bzero(ptn, sizeof (*ptn));
1260 		}
1261 		len = sizeof (*ptn);
1262 		break;
1263 
1264 	case PPPTUN_LCLADDR:
1265 		/* This is done on the *module* (lower level) side. */
1266 		if (tll == NULL || mp->b_cont == NULL) {
1267 			rc = EINVAL;
1268 			break;
1269 		}
1270 
1271 		pap = &tll->tll_lcladdr;
1272 		len = tll->tll_alen;
1273 		if (len == 0 || len > iop->ioc_count) {
1274 			rc = EINVAL;
1275 			break;
1276 		}
1277 		bcopy(mp->b_cont->b_rptr, pap, len);
1278 		len = 0;
1279 		break;
1280 
1281 	case PPPTUN_SPEER:
1282 		/* Client (device) side only; before SDATA */
1283 		if (tcl == NULL || mp->b_cont == NULL ||
1284 		    iop->ioc_count != sizeof (*ptp)) {
1285 			rc = EINVAL;
1286 			break;
1287 		}
1288 		if (tcl->tcl_data_tll != NULL) {
1289 			rc = EINVAL;
1290 			break;
1291 		}
1292 		ptp = (struct ppptun_peer *)mp->b_cont->b_rptr;
1293 		DTRACE_PROBE2(sppptun__speer, tuncl_t *, tcl,
1294 		    struct ppptun_peer *, ptp);
1295 		/* Once set, the style cannot change. */
1296 		if (tcl->tcl_style != PTS_NONE &&
1297 		    tcl->tcl_style != ptp->ptp_style) {
1298 			rc = EINVAL;
1299 			break;
1300 		}
1301 		if (ptp->ptp_flags & PTPF_DAEMON) {
1302 			/* User requests registration for tunnel 0 */
1303 			if ((tcl->tcl_flags & TCLF_SPEER_DONE) ||
1304 			    ptp->ptp_ltunid != 0 || ptp->ptp_rtunid != 0 ||
1305 			    ptp->ptp_lsessid != 0 || ptp->ptp_rsessid != 0) {
1306 				rc = EINVAL;
1307 				break;
1308 			}
1309 			tcl->tcl_flags |= TCLF_DAEMON;
1310 		} else {
1311 			/* Normal client connection */
1312 			if (tcl->tcl_flags & TCLF_DAEMON) {
1313 				rc = EINVAL;
1314 				break;
1315 			}
1316 			if (ptp->ptp_lsessid != 0 &&
1317 			    ptp->ptp_lsessid != tcl->tcl_lsessid) {
1318 				rc = EINVAL;
1319 				break;
1320 			}
1321 			/*
1322 			 * If we're reassigning the peer data, then
1323 			 * the previous assignment must have been for
1324 			 * a client control connection.  Check that.
1325 			 */
1326 			if ((tcl->tcl_flags & TCLF_SPEER_DONE) &&
1327 			    ((tcl->tcl_ltunid != 0 &&
1328 			    tcl->tcl_ltunid != ptp->ptp_ltunid) ||
1329 			    (tcl->tcl_rtunid != 0 &&
1330 			    tcl->tcl_rtunid != ptp->ptp_rtunid) ||
1331 			    (tcl->tcl_rsessid != 0 &&
1332 			    tcl->tcl_rsessid != ptp->ptp_rsessid))) {
1333 				rc = EINVAL;
1334 				break;
1335 			}
1336 			if ((tcl->tcl_ltunid = ptp->ptp_ltunid) == 0 &&
1337 			    tcl->tcl_style == PTS_L2FTP)
1338 				tcl->tcl_ltunid = ptp->ptp_lsessid;
1339 			tcl->tcl_rtunid = ptp->ptp_rtunid;
1340 			tcl->tcl_rsessid = ptp->ptp_rsessid;
1341 		}
1342 		tcl->tcl_flags |= TCLF_SPEER_DONE;
1343 		tcl->tcl_style = ptp->ptp_style;
1344 		tcl->tcl_address = ptp->ptp_address;
1345 		goto fill_in_peer;
1346 
1347 	case PPPTUN_GPEER:
1348 		/* Client (device) side only */
1349 		if (tcl == NULL) {
1350 			rc = EINVAL;
1351 			break;
1352 		}
1353 		if (mp->b_cont != NULL)
1354 			freemsg(mp->b_cont);
1355 		mp->b_cont = allocb(sizeof (*ptp), BPRI_HI);
1356 		if (mp->b_cont == NULL) {
1357 			rc = ENOSR;
1358 			break;
1359 		}
1360 		ptp = (struct ppptun_peer *)mp->b_cont->b_rptr;
1361 	fill_in_peer:
1362 		ptp->ptp_style = tcl->tcl_style;
1363 		ptp->ptp_flags = (tcl->tcl_flags & TCLF_DAEMON) ? PTPF_DAEMON :
1364 		    0;
1365 		ptp->ptp_ltunid = tcl->tcl_ltunid;
1366 		ptp->ptp_rtunid = tcl->tcl_rtunid;
1367 		ptp->ptp_lsessid = tcl->tcl_lsessid;
1368 		ptp->ptp_rsessid = tcl->tcl_rsessid;
1369 		ptp->ptp_address = tcl->tcl_address;
1370 		len = sizeof (*ptp);
1371 		break;
1372 
1373 	case PPPTUN_SDATA:
1374 	case PPPTUN_SCTL:
1375 		/* Client (device) side only; must do SPEER first */
1376 		if (tcl == NULL || mp->b_cont == NULL ||
1377 		    iop->ioc_count != sizeof (*ptn) ||
1378 		    *mp->b_cont->b_rptr == '\0') {
1379 			rc = EINVAL;
1380 			break;
1381 		}
1382 		if (!(tcl->tcl_flags & TCLF_SPEER_DONE)) {
1383 			rc = EINVAL;
1384 			break;
1385 		}
1386 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1387 		ptn->ptn_name[sizeof (ptn->ptn_name) - 1] = '\0';
1388 		tll = tll_lookup_on_name(ptn->ptn_name, tcl->tcl_zoneid);
1389 		if (tll == NULL) {
1390 			rc = ESRCH;
1391 			break;
1392 		}
1393 		if (tll->tll_style != tcl->tcl_style) {
1394 			rc = ENXIO;
1395 			break;
1396 		}
1397 		if (iop->ioc_cmd == PPPTUN_SDATA) {
1398 			if (tcl->tcl_data_tll != NULL) {
1399 				rc = EEXIST;
1400 				break;
1401 			}
1402 			/* server daemons cannot use regular data */
1403 			if (tcl->tcl_flags & TCLF_DAEMON) {
1404 				rc = EINVAL;
1405 				break;
1406 			}
1407 			tcl->tcl_data_tll = tll;
1408 		} else if (tcl->tcl_flags & TCLF_DAEMON) {
1409 			if (tll->tll_defcl != NULL && tll->tll_defcl != tcl) {
1410 				rc = EEXIST;
1411 				break;
1412 			}
1413 			tll->tll_defcl = tcl;
1414 			if (tcl->tcl_ctrl_tll != NULL) {
1415 				KDECR(tcl->tcl_ctrl_tll, tll_kstats,
1416 				    lks_clients);
1417 			}
1418 			tcl->tcl_ctrl_tll = tll;
1419 		} else {
1420 			if (tcl->tcl_ctrl_tll != NULL) {
1421 				rc = EEXIST;
1422 				break;
1423 			}
1424 			tcl->tcl_ctrl_tll = tll;
1425 		}
1426 		KLINCR(lks_clients);
1427 		break;
1428 
1429 	case PPPTUN_GDATA:
1430 	case PPPTUN_GCTL:
1431 		/* Client (device) side only */
1432 		if (tcl == NULL) {
1433 			rc = EINVAL;
1434 			break;
1435 		}
1436 		if (mp->b_cont != NULL)
1437 			freemsg(mp->b_cont);
1438 		mp->b_cont = allocb(sizeof (*ptn), BPRI_HI);
1439 		if (mp->b_cont == NULL) {
1440 			rc = ENOSR;
1441 			break;
1442 		}
1443 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1444 		if (iop->ioc_cmd == PPPTUN_GDATA)
1445 			tll = tcl->tcl_data_tll;
1446 		else
1447 			tll = tcl->tcl_ctrl_tll;
1448 		if (tll == NULL)
1449 			bzero(ptn, sizeof (*ptn));
1450 		else
1451 			bcopy(tll->tll_name, ptn->ptn_name,
1452 			    sizeof (ptn->ptn_name));
1453 		len = sizeof (*ptn);
1454 		break;
1455 
1456 	case PPPTUN_DCTL:
1457 		/* Client (device) side daemon mode only */
1458 		if (tcl == NULL || mp->b_cont == NULL ||
1459 		    iop->ioc_count != sizeof (*ptn) ||
1460 		    !(tcl->tcl_flags & TCLF_DAEMON)) {
1461 			rc = EINVAL;
1462 			break;
1463 		}
1464 		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1465 		ptn->ptn_name[sizeof (ptn->ptn_name) - 1] = '\0';
1466 		tll = tll_lookup_on_name(ptn->ptn_name, tcl->tcl_zoneid);
1467 		if (tll == NULL || tll->tll_defcl != tcl) {
1468 			rc = ESRCH;
1469 			break;
1470 		}
1471 		tll->tll_defcl = NULL;
1472 		break;
1473 
1474 	case PPPTUN_SSAP:
1475 		/* This is done on the *module* (lower level) side. */
1476 		if (tll == NULL || mp->b_cont == NULL ||
1477 		    iop->ioc_count != sizeof (uint_t)) {
1478 			rc = EINVAL;
1479 			break;
1480 		}
1481 
1482 		tll->tll_sap = *(uint_t *)mp->b_cont->b_rptr;
1483 		break;
1484 
1485 	default:
1486 		/* Caller should already have checked command value */
1487 		ASSERT(0);
1488 	}
1489 	if (rc != 0) {
1490 		miocnak(q, mp, 0, rc);
1491 	} else {
1492 		if (len > 0)
1493 			mp->b_cont->b_wptr = mp->b_cont->b_rptr + len;
1494 		miocack(q, mp, len, 0);
1495 	}
1496 }
1497 
1498 /*
1499  * sppptun_ioctl()
1500  *
1501  * MT-Perimeters:
1502  *    shared inner, shared outer.
1503  *
1504  * Description:
1505  *    Called by sppptun_uwput as the result of receiving a M_IOCTL command.
1506  */
1507 static void
1508 sppptun_ioctl(queue_t *q, mblk_t *mp)
1509 {
1510 	struct iocblk *iop;
1511 	int rc = 0;
1512 	int len = 0;
1513 	uint32_t val = 0;
1514 	tunll_t *tll;
1515 
1516 	iop = (struct iocblk *)mp->b_rptr;
1517 
1518 	switch (iop->ioc_cmd) {
1519 	case PPPIO_DEBUG:
1520 	case PPPIO_GETSTAT:
1521 	case PPPIO_GETSTAT64:
1522 	case PPPTUN_SNAME:
1523 	case PPPTUN_SINFO:
1524 	case PPPTUN_GINFO:
1525 	case PPPTUN_GNNAME:
1526 	case PPPTUN_LCLADDR:
1527 	case PPPTUN_SPEER:
1528 	case PPPTUN_GPEER:
1529 	case PPPTUN_SDATA:
1530 	case PPPTUN_GDATA:
1531 	case PPPTUN_SCTL:
1532 	case PPPTUN_GCTL:
1533 	case PPPTUN_DCTL:
1534 	case PPPTUN_SSAP:
1535 		qwriter(q, mp, sppptun_inner_ioctl, PERIM_INNER);
1536 		return;
1537 
1538 	case PPPIO_GCLEAN:	/* always clean */
1539 		val = RCV_B7_1 | RCV_B7_0 | RCV_ODDP | RCV_EVNP;
1540 		len = sizeof (uint32_t);
1541 		break;
1542 
1543 	case PPPIO_GTYPE:	/* we look like an async driver. */
1544 		val = PPPTYP_AHDLC;
1545 		len = sizeof (uint32_t);
1546 		break;
1547 
1548 	case PPPIO_CFLAGS:	/* never compress headers */
1549 		val = 0;
1550 		len = sizeof (uint32_t);
1551 		break;
1552 
1553 		/* quietly ack PPP things we don't need to do. */
1554 	case PPPIO_XFCS:
1555 	case PPPIO_RFCS:
1556 	case PPPIO_XACCM:
1557 	case PPPIO_RACCM:
1558 	case PPPIO_LASTMOD:
1559 	case PPPIO_MUX:
1560 	case I_PLINK:
1561 	case I_PUNLINK:
1562 	case I_LINK:
1563 	case I_UNLINK:
1564 		break;
1565 
1566 	default:
1567 		tll = (tunll_t *)q->q_ptr;
1568 		if (!(tll->tll_flags & TLLF_NOTLOWER)) {
1569 			/* module side; pass this through. */
1570 			putnext(q, mp);
1571 			return;
1572 		}
1573 		rc = EINVAL;
1574 		break;
1575 	}
1576 	if (rc == 0 && len == sizeof (uint32_t)) {
1577 		if (mp->b_cont != NULL)
1578 			freemsg(mp->b_cont);
1579 		mp->b_cont = allocb(sizeof (uint32_t), BPRI_HI);
1580 		if (mp->b_cont == NULL) {
1581 			rc = ENOSR;
1582 		} else {
1583 			*(uint32_t *)mp->b_cont->b_wptr = val;
1584 			mp->b_cont->b_wptr += sizeof (uint32_t);
1585 		}
1586 	}
1587 	if (rc == 0) {
1588 		miocack(q, mp, len, 0);
1589 	} else {
1590 		miocnak(q, mp, 0, rc);
1591 	}
1592 }
1593 
1594 /*
1595  * sppptun_inner_mctl()
1596  *
1597  * MT-Perimeters:
1598  *    exclusive inner, shared outer.
1599  *
1600  * Description:
1601  *    Called by qwriter (via sppptun_uwput) as the result of receiving
1602  *    an M_CTL.  Called only on the client (driver) side.
1603  */
1604 static void
1605 sppptun_inner_mctl(queue_t *q, mblk_t *mp)
1606 {
1607 	int msglen;
1608 	tuncl_t *tcl;
1609 
1610 	tcl = q->q_ptr;
1611 
1612 	if (!(tcl->tcl_flags & TCLF_ISCLIENT)) {
1613 		freemsg(mp);
1614 		return;
1615 	}
1616 
1617 	msglen = MBLKL(mp);
1618 	switch (*mp->b_rptr) {
1619 	case PPPCTL_UNIT:
1620 		if (msglen == 2)
1621 			tcl->tcl_unit = mp->b_rptr[1];
1622 		else if (msglen == 8)
1623 			tcl->tcl_unit = ((uint32_t *)mp->b_rptr)[1];
1624 		break;
1625 	}
1626 	freemsg(mp);
1627 }
1628 
1629 /*
1630  * sppptun_uwput()
1631  *
1632  * MT-Perimeters:
1633  *    shared inner, shared outer.
1634  *
1635  * Description:
1636  *	Regular output data and controls pass through here.
1637  */
1638 static int
1639 sppptun_uwput(queue_t *q, mblk_t *mp)
1640 {
1641 	queue_t *nextq;
1642 	tuncl_t *tcl;
1643 
1644 	ASSERT(q->q_ptr != NULL);
1645 
1646 	switch (MTYPE(mp)) {
1647 	case M_DATA:
1648 	case M_PROTO:
1649 	case M_PCPROTO:
1650 		if (q->q_first == NULL &&
1651 		    (nextq = sppptun_outpkt(q, &mp)) != NULL) {
1652 			putnext(nextq, mp);
1653 		} else if (mp != NULL && !putq(q, mp)) {
1654 			freemsg(mp);
1655 		}
1656 		break;
1657 	case M_IOCTL:
1658 		sppptun_ioctl(q, mp);
1659 		break;
1660 	case M_CTL:
1661 		qwriter(q, mp, sppptun_inner_mctl, PERIM_INNER);
1662 		break;
1663 	default:
1664 		tcl = (tuncl_t *)q->q_ptr;
1665 		/*
1666 		 * If we're the driver, then discard unknown junk.
1667 		 * Otherwise, if we're the module, then forward along.
1668 		 */
1669 		if (tcl->tcl_flags & TCLF_ISCLIENT)
1670 			freemsg(mp);
1671 		else
1672 			putnext(q, mp);
1673 		break;
1674 	}
1675 	return (0);
1676 }
1677 
1678 /*
1679  * Send a DLPI/TPI control message to the driver but make sure there
1680  * is only one outstanding message.  Uses tll_msg_pending to tell when
1681  * it must queue.  sppptun_urput calls message_done() when an ACK or a
1682  * NAK is received to process the next queued message.
1683  */
1684 static void
1685 message_send(tunll_t *tll, mblk_t *mp)
1686 {
1687 	mblk_t **mpp;
1688 
1689 	if (tll->tll_msg_pending) {
1690 		/* Must queue message. Tail insertion */
1691 		mpp = &tll->tll_msg_deferred;
1692 		while (*mpp != NULL)
1693 			mpp = &((*mpp)->b_next);
1694 		*mpp = mp;
1695 		return;
1696 	}
1697 	tll->tll_msg_pending = 1;
1698 	putnext(tll->tll_wq, mp);
1699 }
1700 
1701 /*
1702  * Called when an DLPI/TPI control message has been acked or nacked to
1703  * send down the next queued message (if any).
1704  */
1705 static void
1706 message_done(tunll_t *tll)
1707 {
1708 	mblk_t *mp;
1709 
1710 	ASSERT(tll->tll_msg_pending);
1711 	tll->tll_msg_pending = 0;
1712 	mp = tll->tll_msg_deferred;
1713 	if (mp != NULL) {
1714 		tll->tll_msg_deferred = mp->b_next;
1715 		mp->b_next = NULL;
1716 		tll->tll_msg_pending = 1;
1717 		putnext(tll->tll_wq, mp);
1718 	}
1719 }
1720 
1721 /*
1722  * Send down queued "close" messages to lower stream.  These were
1723  * enqueued right after the stream was originally allocated, when the
1724  * tll_style was set by PPPTUN_SINFO.
1725  */
1726 static int
1727 tll_close_req(tunll_t *tll)
1728 {
1729 	mblk_t *mb, *mbnext;
1730 
1731 	if ((mb = tll->tll_onclose) == NULL)
1732 		tll->tll_flags |= TLLF_SHUTDOWN_DONE;
1733 	else {
1734 		tll->tll_onclose = NULL;
1735 		while (mb != NULL) {
1736 			mbnext = mb->b_next;
1737 			mb->b_next = NULL;
1738 			message_send(tll, mb);
1739 			mb = mbnext;
1740 		}
1741 	}
1742 	return (0);
1743 }
1744 
1745 /*
1746  * This function is called when a backenable occurs on the write side of a
1747  * lower stream.  It walks over the client streams, looking for ones that use
1748  * the given tunll_t lower stream.  Each client is then backenabled.
1749  */
1750 static void
1751 tclvm_backenable(void *arg, void *firstv, size_t numv)
1752 {
1753 	tunll_t *tll = arg;
1754 	int minorn = (int)(uintptr_t)firstv;
1755 	int minormax = minorn + numv;
1756 	tuncl_t *tcl;
1757 	queue_t *q;
1758 
1759 	while (minorn < minormax) {
1760 		tcl = tcl_slots[minorn - 1];
1761 		if ((tcl->tcl_data_tll == tll ||
1762 		    tcl->tcl_ctrl_tll == tll) &&
1763 		    (q = tcl->tcl_rq) != NULL) {
1764 			qenable(OTHERQ(q));
1765 		}
1766 		minorn++;
1767 	}
1768 }
1769 
1770 /*
1771  * sppptun_uwsrv()
1772  *
1773  * MT-Perimeters:
1774  *    exclusive inner, shared outer.
1775  *
1776  * Description:
1777  *    Upper write-side service procedure.  In addition to the usual
1778  *    STREAMS queue service handling, this routine also handles the
1779  *    transmission of the unbind/detach messages to the lower stream
1780  *    driver when a lower stream is being closed.  (See the use of
1781  *    qenable/qwait in sppptun_close().)
1782  */
1783 static int
1784 sppptun_uwsrv(queue_t *q)
1785 {
1786 	tuncl_t	*tcl;
1787 	mblk_t *mp;
1788 	queue_t *nextq;
1789 
1790 	tcl = q->q_ptr;
1791 	if (!(tcl->tcl_flags & TCLF_ISCLIENT)) {
1792 		tunll_t *tll = (tunll_t *)tcl;
1793 
1794 		if ((tll->tll_flags & (TLLF_CLOSING|TLLF_CLOSE_DONE)) ==
1795 		    TLLF_CLOSING) {
1796 			tll->tll_error = tll_close_req(tll);
1797 			tll->tll_flags |= TLLF_CLOSE_DONE;
1798 		} else {
1799 			/*
1800 			 * We've been enabled here because of a backenable on
1801 			 * output flow control.  Backenable clients using this
1802 			 * lower layer.
1803 			 */
1804 			vmem_walk(tcl_minor_arena, VMEM_ALLOC, tclvm_backenable,
1805 			    tll);
1806 		}
1807 		return (0);
1808 	}
1809 
1810 	while ((mp = getq(q)) != NULL) {
1811 		if ((nextq = sppptun_outpkt(q, &mp)) != NULL) {
1812 			putnext(nextq, mp);
1813 		} else if (mp != NULL) {
1814 			(void) putbq(q, mp);
1815 			break;
1816 		}
1817 	}
1818 	return (0);
1819 }
1820 
1821 /*
1822  * sppptun_lwput()
1823  *
1824  * MT-Perimeters:
1825  *    shared inner, shared outer.
1826  *
1827  * Description:
1828  *    Lower write-side put procedure.  Nothing should be sending
1829  *    packets down this stream.
1830  */
1831 static int
1832 sppptun_lwput(queue_t *q, mblk_t *mp)
1833 {
1834 	switch (MTYPE(mp)) {
1835 	case M_PROTO:
1836 		putnext(q, mp);
1837 		break;
1838 	default:
1839 		freemsg(mp);
1840 		break;
1841 	}
1842 	return (0);
1843 }
1844 
1845 /*
1846  * sppptun_lrput()
1847  *
1848  * MT-Perimeters:
1849  *    shared inner, shared outer.
1850  *
1851  * Description:
1852  *    Lower read-side put procedure.  Nothing should arrive here.
1853  */
1854 static int
1855 sppptun_lrput(queue_t *q, mblk_t *mp)
1856 {
1857 	tuncl_t *tcl;
1858 
1859 	switch (MTYPE(mp)) {
1860 	case M_IOCTL:
1861 		miocnak(q, mp, 0, EINVAL);
1862 		return (0);
1863 	case M_FLUSH:
1864 		if (*mp->b_rptr & FLUSHR) {
1865 			flushq(q, FLUSHDATA);
1866 		}
1867 		if (*mp->b_rptr & FLUSHW) {
1868 			*mp->b_rptr &= ~FLUSHR;
1869 			qreply(q, mp);
1870 		} else {
1871 			freemsg(mp);
1872 		}
1873 		return (0);
1874 	}
1875 	/*
1876 	 * Try to forward the message to the put procedure for the upper
1877 	 * control stream for this lower stream. If there are already messages
1878 	 * queued here, queue this one up to preserve message ordering.
1879 	 */
1880 	if ((tcl = (tuncl_t *)q->q_ptr) == NULL || tcl->tcl_rq == NULL) {
1881 		freemsg(mp);
1882 		return (0);
1883 	}
1884 	if (queclass(mp) == QPCTL ||
1885 	    (q->q_first == NULL && canput(tcl->tcl_rq))) {
1886 		put(tcl->tcl_rq, mp);
1887 	} else {
1888 		if (!putq(q, mp))
1889 			freemsg(mp);
1890 	}
1891 	return (0);
1892 }
1893 
1894 /*
1895  * MT-Perimeters:
1896  *    shared inner, shared outer.
1897  *
1898  *    Handle non-data DLPI messages.  Used with PPPoE, which runs over
1899  *    Ethernet only.
1900  */
1901 static void
1902 urput_dlpi(queue_t *q, mblk_t *mp)
1903 {
1904 	int err;
1905 	union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr;
1906 	tunll_t *tll = q->q_ptr;
1907 	size_t mlen = MBLKL(mp);
1908 
1909 	switch (dlp->dl_primitive) {
1910 	case DL_UDERROR_IND:
1911 		break;
1912 
1913 	case DL_ERROR_ACK:
1914 		if (mlen < DL_ERROR_ACK_SIZE)
1915 			break;
1916 		err = dlp->error_ack.dl_unix_errno ?
1917 		    dlp->error_ack.dl_unix_errno : ENXIO;
1918 		switch (dlp->error_ack.dl_error_primitive) {
1919 		case DL_UNBIND_REQ:
1920 			message_done(tll);
1921 			break;
1922 		case DL_DETACH_REQ:
1923 			message_done(tll);
1924 			tll->tll_error = err;
1925 			tll->tll_flags |= TLLF_SHUTDOWN_DONE;
1926 			break;
1927 		case DL_PHYS_ADDR_REQ:
1928 			message_done(tll);
1929 			break;
1930 		case DL_INFO_REQ:
1931 		case DL_ATTACH_REQ:
1932 		case DL_BIND_REQ:
1933 			message_done(tll);
1934 			tll->tll_error = err;
1935 			break;
1936 		}
1937 		break;
1938 
1939 	case DL_INFO_ACK:
1940 		message_done(tll);
1941 		break;
1942 
1943 	case DL_BIND_ACK:
1944 		message_done(tll);
1945 		break;
1946 
1947 	case DL_PHYS_ADDR_ACK:
1948 		break;
1949 
1950 	case DL_OK_ACK:
1951 		if (mlen < DL_OK_ACK_SIZE)
1952 			break;
1953 		switch (dlp->ok_ack.dl_correct_primitive) {
1954 		case DL_UNBIND_REQ:
1955 			message_done(tll);
1956 			break;
1957 		case DL_DETACH_REQ:
1958 			tll->tll_flags |= TLLF_SHUTDOWN_DONE;
1959 			break;
1960 		case DL_ATTACH_REQ:
1961 			message_done(tll);
1962 			break;
1963 		}
1964 		break;
1965 	}
1966 	freemsg(mp);
1967 }
1968 
1969 /* Search structure used with PPPoE only; see tclvm_pppoe_search(). */
1970 struct poedat {
1971 	uint_t sessid;
1972 	tunll_t *tll;
1973 	const void *srcaddr;
1974 	int isdata;
1975 	tuncl_t *tcl;
1976 };
1977 
1978 /*
1979  * This function is called by vmem_walk from within sppptun_recv.  It
1980  * iterates over a span of allocated minor node numbers to search for
1981  * the appropriate lower stream, session ID, and peer MAC address.
1982  *
1983  * (This is necessary due to a design flaw in the PPPoE protocol
1984  * itself.  The protocol assigns session IDs from the server side
1985  * only.  Both server and client use the same number.  Thus, if there
1986  * are multiple clients on a single host, there can be session ID
1987  * conflicts between servers and there's no way to detangle them
1988  * except by looking at the remote MAC address.)
1989  *
1990  * (This could have been handled by linking together sessions that
1991  * differ only in the remote MAC address.  This isn't done because it
1992  * would involve extra per-session storage and it's very unlikely that
1993  * PPPoE would be used this way.)
1994  */
1995 static void
1996 tclvm_pppoe_search(void *arg, void *firstv, size_t numv)
1997 {
1998 	struct poedat *poedat = (struct poedat *)arg;
1999 	int minorn = (int)(uintptr_t)firstv;
2000 	int minormax = minorn + numv;
2001 	tuncl_t *tcl;
2002 
2003 	if (poedat->tcl != NULL)
2004 		return;
2005 	while (minorn < minormax) {
2006 		tcl = tcl_slots[minorn - 1];
2007 		ASSERT(tcl != NULL);
2008 		if (tcl->tcl_rsessid == poedat->sessid &&
2009 		    ((!poedat->isdata && tcl->tcl_ctrl_tll == poedat->tll) ||
2010 		    (poedat->isdata && tcl->tcl_data_tll == poedat->tll)) &&
2011 		    bcmp(tcl->tcl_address.pta_pppoe.ptma_mac,
2012 		    poedat->srcaddr,
2013 		    sizeof (tcl->tcl_address.pta_pppoe.ptma_mac)) == 0) {
2014 			poedat->tcl = tcl;
2015 			break;
2016 		}
2017 		minorn++;
2018 	}
2019 }
2020 
2021 /*
2022  * sppptun_recv()
2023  *
2024  * MT-Perimeters:
2025  *    shared inner, shared outer.
2026  *
2027  * Description:
2028  *    Receive function called by sppptun_urput, which is called when
2029  *    the lower read-side put or service procedure sends a message
2030  *    upstream to the a device user (PPP).  It attempts to find an
2031  *    appropriate queue on the module above us (depending on what the
2032  *    associated upper stream for the protocol would be), and if not
2033  *    possible, it will find an upper control stream for the protocol.
2034  *    Returns a pointer to the upper queue_t, or NULL if the message
2035  *    has been discarded.
2036  *
2037  * About demultiplexing:
2038  *
2039  *	All four protocols (L2F, PPTP, L2TP, and PPPoE) support a
2040  *	locally assigned ID for demultiplexing incoming traffic.  For
2041  *	L2F, this is called the Client ID, for PPTP the Call ID, for
2042  *	L2TP the Session ID, and for PPPoE the SESSION_ID.  This is a
2043  *	16 bit number for all four protocols, and is used to directly
2044  *	index into a list of upper streams.  With the upper stream in
2045  *	hand, we verify that this is the right stream and deliver the
2046  *	data.
2047  *
2048  *	L2TP has a Tunnel ID, which represents a bundle of PPP
2049  *	sessions between the peers.  Because we always assign unique
2050  *	session ID numbers, we merely check that the given ID matches
2051  *	the assigned ID for the upper stream.
2052  *
2053  *	L2F has a Multiplex ID, which is unique per connection.  It
2054  *	does not have L2TP's concept of multiple-connections-within-
2055  *	a-tunnel.  The same checking is done.
2056  *
2057  *	PPPoE is a horribly broken protocol.  Only one ID is assigned
2058  *	per connection.  The client must somehow demultiplex based on
2059  *	an ID number assigned by the server.  It's not necessarily
2060  *	unique.  The search is done based on {ID,peerEthernet} (using
2061  *	tcl_rsessid) for all packet types except PADI and PADS.
2062  *
2063  *	Neither PPPoE nor PPTP supports additional ID numbers.
2064  *
2065  *	Both L2F and L2TP come in over UDP.  They are distinguished by
2066  *	looking at the GRE version field -- 001 for L2F and 010 for
2067  *	L2TP.
2068  */
2069 static queue_t *
2070 sppptun_recv(queue_t *q, mblk_t **mpp, const void *srcaddr)
2071 {
2072 	mblk_t *mp;
2073 	tunll_t *tll;
2074 	tuncl_t *tcl;
2075 	int sessid;
2076 	int remlen;
2077 	int msglen;
2078 	int isdata;
2079 	int i;
2080 	const uchar_t *ucp;
2081 	const poep_t *poep;
2082 	mblk_t *mnew;
2083 	ppptun_atype *pap;
2084 
2085 	mp = *mpp;
2086 
2087 	tll = q->q_ptr;
2088 	ASSERT(!(tll->tll_flags & TLLF_NOTLOWER));
2089 
2090 	tcl = NULL;
2091 	switch (tll->tll_style) {
2092 	case PTS_PPPOE:
2093 		/* Note that poep_t alignment is uint16_t */
2094 		if ((!IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)) ||
2095 		    MBLKL(mp) < sizeof (poep_t)) &&
2096 		    !pullupmsg(mp, sizeof (poep_t)))
2097 			break;
2098 		poep = (const poep_t *)mp->b_rptr;
2099 		if (poep->poep_version_type != POE_VERSION)
2100 			break;
2101 		/*
2102 		 * First, extract a session ID number.  All protocols have
2103 		 * this.
2104 		 */
2105 		isdata = (poep->poep_code == POECODE_DATA);
2106 		sessid = ntohs(poep->poep_session_id);
2107 		remlen = sizeof (*poep);
2108 		msglen = ntohs(poep->poep_length);
2109 		i = poep->poep_code;
2110 		if (i == POECODE_PADI || i == POECODE_PADR) {
2111 			/* These go to the server daemon only. */
2112 			tcl = tll->tll_defcl;
2113 		} else if (i == POECODE_PADO || i == POECODE_PADS) {
2114 			/*
2115 			 * These go to a client only, and are demuxed
2116 			 * by the Host-Uniq field (into which we stuff
2117 			 * our local ID number when generating
2118 			 * PADI/PADR).
2119 			 */
2120 			ucp = (const uchar_t *)(poep + 1);
2121 			i = msglen;
2122 			while (i > POET_HDRLEN) {
2123 				if (POET_GET_TYPE(ucp) == POETT_END) {
2124 					i = 0;
2125 					break;
2126 				}
2127 				if (POET_GET_TYPE(ucp) == POETT_UNIQ &&
2128 				    POET_GET_LENG(ucp) >= sizeof (uint32_t))
2129 					break;
2130 				i -= POET_GET_LENG(ucp) + POET_HDRLEN;
2131 				ucp = POET_NEXT(ucp);
2132 			}
2133 			if (i >= POET_HDRLEN + 4)
2134 				sessid = GETLONG(ucp + POET_HDRLEN);
2135 			tcl = tcl_by_minor((minor_t)sessid);
2136 		} else {
2137 			/*
2138 			 * Try minor number as session ID first, since
2139 			 * it's used that way on server side.  It's
2140 			 * not used that way on the client, though, so
2141 			 * this might not work.  If this isn't the
2142 			 * right one, then try the tll cache.  If
2143 			 * neither is right, then search all open
2144 			 * clients.  Did I mention that the PPPoE
2145 			 * protocol is badly designed?
2146 			 */
2147 			tcl = tcl_by_minor((minor_t)sessid);
2148 			if (tcl == NULL ||
2149 			    (!isdata && tcl->tcl_ctrl_tll != tll) ||
2150 			    (isdata && tcl->tcl_data_tll != tll) ||
2151 			    sessid != tcl->tcl_rsessid ||
2152 			    bcmp(srcaddr, tcl->tcl_address.pta_pppoe.ptma_mac,
2153 			    sizeof (tcl->tcl_address.pta_pppoe.ptma_mac)) != 0)
2154 				tcl = tll->tll_lastcl;
2155 			if (tcl == NULL ||
2156 			    (!isdata && tcl->tcl_ctrl_tll != tll) ||
2157 			    (isdata && tcl->tcl_data_tll != tll) ||
2158 			    sessid != tcl->tcl_rsessid ||
2159 			    bcmp(srcaddr, tcl->tcl_address.pta_pppoe.ptma_mac,
2160 			    sizeof (tcl->tcl_address.pta_pppoe.ptma_mac)) != 0)
2161 				tcl = NULL;
2162 			if (tcl == NULL && sessid != 0) {
2163 				struct poedat poedat;
2164 
2165 				/*
2166 				 * Slow mode.  Too bad.  If you don't like it,
2167 				 * you can always choose a better protocol.
2168 				 */
2169 				poedat.sessid = sessid;
2170 				poedat.tll = tll;
2171 				poedat.srcaddr = srcaddr;
2172 				poedat.tcl = NULL;
2173 				poedat.isdata = isdata;
2174 				vmem_walk(tcl_minor_arena, VMEM_ALLOC,
2175 				    tclvm_pppoe_search, &poedat);
2176 				KLINCR(lks_walks);
2177 				if ((tcl = poedat.tcl) != NULL) {
2178 					tll->tll_lastcl = tcl;
2179 					KCINCR(cks_walks);
2180 				}
2181 			}
2182 		}
2183 		break;
2184 	}
2185 
2186 	if (tcl == NULL || tcl->tcl_rq == NULL) {
2187 		DTRACE_PROBE3(sppptun__recv__discard, int, sessid,
2188 		    tuncl_t *, tcl, mblk_t *, mp);
2189 		if (tcl == NULL) {
2190 			KLINCR(lks_in_nomatch);
2191 		}
2192 		if (isdata) {
2193 			KLINCR(lks_indata_drops);
2194 			if (tcl != NULL)
2195 				tcl->tcl_stats.ppp_ierrors++;
2196 		} else {
2197 			KLINCR(lks_inctrl_drops);
2198 			if (tcl != NULL) {
2199 				KCINCR(cks_inctrl_drops);
2200 			}
2201 		}
2202 		freemsg(mp);
2203 		return (NULL);
2204 	}
2205 
2206 	if (tcl->tcl_data_tll == tll && isdata) {
2207 		if (!adjmsg(mp, remlen) ||
2208 		    (i = msgsize(mp)) < msglen ||
2209 		    (i > msglen && !adjmsg(mp, msglen - i))) {
2210 			KLINCR(lks_indata_drops);
2211 			tcl->tcl_stats.ppp_ierrors++;
2212 			freemsg(mp);
2213 			return (NULL);
2214 		}
2215 		/* XXX -- address/control handling in pppd needs help. */
2216 		if (*mp->b_rptr != 0xFF) {
2217 			if ((mp = prependb(mp, 2, 1)) == NULL) {
2218 				KLINCR(lks_indata_drops);
2219 				tcl->tcl_stats.ppp_ierrors++;
2220 				return (NULL);
2221 			}
2222 			mp->b_rptr[0] = 0xFF;
2223 			mp->b_rptr[1] = 0x03;
2224 		}
2225 		MTYPE(mp) = M_DATA;
2226 		tcl->tcl_stats.ppp_ibytes += msgsize(mp);
2227 		tcl->tcl_stats.ppp_ipackets++;
2228 		KLINCR(lks_indata);
2229 	} else {
2230 		if (isdata || tcl->tcl_ctrl_tll != tll ||
2231 		    (mnew = make_control(tcl, tll, PTCA_CONTROL, tcl)) ==
2232 		    NULL) {
2233 			KLINCR(lks_inctrl_drops);
2234 			KCINCR(cks_inctrl_drops);
2235 			freemsg(mp);
2236 			return (NULL);
2237 		}
2238 		/* Fix up source address; peer might not be set yet. */
2239 		pap = &((struct ppptun_control *)mnew->b_rptr)->ptc_address;
2240 		bcopy(srcaddr, pap->pta_pppoe.ptma_mac,
2241 		    sizeof (pap->pta_pppoe.ptma_mac));
2242 		mnew->b_cont = mp;
2243 		mp = mnew;
2244 		KLINCR(lks_inctrls);
2245 		KCINCR(cks_inctrls);
2246 	}
2247 	*mpp = mp;
2248 	return (tcl->tcl_rq);
2249 }
2250 
2251 /*
2252  * sppptun_urput()
2253  *
2254  * MT-Perimeters:
2255  *    shared inner, shared outer.
2256  *
2257  * Description:
2258  *    Upper read-side put procedure.  Messages from the underlying
2259  *    lower stream driver arrive here.  See sppptun_recv for the
2260  *    demultiplexing logic.
2261  */
2262 static int
2263 sppptun_urput(queue_t *q, mblk_t *mp)
2264 {
2265 	union DL_primitives *dlprim;
2266 	mblk_t *mpnext;
2267 	tunll_t *tll;
2268 	queue_t *nextq;
2269 
2270 	tll = q->q_ptr;
2271 	ASSERT(!(tll->tll_flags & TLLF_NOTLOWER));
2272 
2273 	switch (MTYPE(mp)) {
2274 	case M_DATA:
2275 		/*
2276 		 * When we're bound over IP, data arrives here.  The
2277 		 * packet starts with the IP header itself.
2278 		 */
2279 		if ((nextq = sppptun_recv(q, &mp, NULL)) != NULL)
2280 			putnext(nextq, mp);
2281 		break;
2282 
2283 	case M_PROTO:
2284 	case M_PCPROTO:
2285 		/* Data arrives here for UDP or raw Ethernet, not IP. */
2286 		switch (tll->tll_style) {
2287 			/* PPTP control messages are over TCP only. */
2288 		case PTS_PPTP:
2289 		default:
2290 			ASSERT(0);	/* how'd that happen? */
2291 			break;
2292 
2293 		case PTS_PPPOE:		/* DLPI message */
2294 			if (MBLKL(mp) < sizeof (t_uscalar_t))
2295 				break;
2296 			dlprim = (union DL_primitives *)mp->b_rptr;
2297 			switch (dlprim->dl_primitive) {
2298 			case DL_UNITDATA_IND: {
2299 				size_t mlen = MBLKL(mp);
2300 
2301 				if (mlen < DL_UNITDATA_IND_SIZE)
2302 					break;
2303 				if (dlprim->unitdata_ind.dl_src_addr_offset <
2304 				    DL_UNITDATA_IND_SIZE ||
2305 				    dlprim->unitdata_ind.dl_src_addr_offset +
2306 				    dlprim->unitdata_ind.dl_src_addr_length >
2307 				    mlen)
2308 					break;
2309 			}
2310 				/* FALLTHROUGH */
2311 			case DL_UNITDATA_REQ:	/* For loopback support. */
2312 				if (dlprim->dl_primitive == DL_UNITDATA_REQ &&
2313 				    MBLKL(mp) < DL_UNITDATA_REQ_SIZE)
2314 					break;
2315 				if ((mpnext = mp->b_cont) == NULL)
2316 					break;
2317 				MTYPE(mpnext) = M_DATA;
2318 				nextq = sppptun_recv(q, &mpnext,
2319 				    dlprim->dl_primitive == DL_UNITDATA_IND ?
2320 				    mp->b_rptr +
2321 				    dlprim->unitdata_ind.dl_src_addr_offset :
2322 				    tll->tll_lcladdr.pta_pppoe.ptma_mac);
2323 				if (nextq != NULL)
2324 					putnext(nextq, mpnext);
2325 				freeb(mp);
2326 				return (0);
2327 
2328 			default:
2329 				urput_dlpi(q, mp);
2330 				return (0);
2331 			}
2332 			break;
2333 		}
2334 		freemsg(mp);
2335 		break;
2336 
2337 	default:
2338 		freemsg(mp);
2339 		break;
2340 	}
2341 	return (0);
2342 }
2343 
2344 /*
2345  * sppptun_ursrv()
2346  *
2347  * MT-Perimeters:
2348  *    exclusive inner, shared outer.
2349  *
2350  * Description:
2351  *    Upper read-side service procedure.  This procedure services the
2352  *    client streams.  We get here because the client (PPP) asserts
2353  *    flow control down to us.
2354  */
2355 static int
2356 sppptun_ursrv(queue_t *q)
2357 {
2358 	mblk_t		*mp;
2359 
2360 	ASSERT(q->q_ptr != NULL);
2361 
2362 	while ((mp = getq(q)) != NULL) {
2363 		if (canputnext(q)) {
2364 			putnext(q, mp);
2365 		} else {
2366 			(void) putbq(q, mp);
2367 			break;
2368 		}
2369 	}
2370 	return (0);
2371 }
2372 
2373 /*
2374  * Dummy constructor/destructor functions for kmem_cache_create.
2375  * We're just using kmem as an allocator of integers, not real
2376  * storage.
2377  */
2378 
2379 /*ARGSUSED*/
2380 static int
2381 tcl_constructor(void *maddr, void *arg, int kmflags)
2382 {
2383 	return (0);
2384 }
2385 
2386 /*ARGSUSED*/
2387 static void
2388 tcl_destructor(void *maddr, void *arg)
2389 {
2390 }
2391 
2392 /*
2393  * Total size occupied by one tunnel client.  Each tunnel client
2394  * consumes one pointer for tcl_slots array, one tuncl_t structure and
2395  * two messages preallocated for close.
2396  */
2397 #define	TUNCL_SIZE (sizeof (tuncl_t) + sizeof (tuncl_t *) + \
2398 			2 * sizeof (dblk_t))
2399 
2400 /*
2401  * Clear all bits of x except the highest bit
2402  */
2403 #define	truncate(x)	((x) <= 2 ? (x) : (1 << (highbit(x) - 1)))
2404 
2405 /*
2406  * This function initializes some well-known global variables inside
2407  * the module.
2408  *
2409  * Called by sppptun_mod.c:_init() before installing the module.
2410  */
2411 void
2412 sppptun_init(void)
2413 {
2414 	tunll_list.q_forw = tunll_list.q_back = &tunll_list;
2415 }
2416 
2417 /*
2418  * This function allocates the initial internal storage for the
2419  * sppptun driver.
2420  *
2421  * Called by sppptun_mod.c:_init() after installing module.
2422  */
2423 void
2424 sppptun_tcl_init(void)
2425 {
2426 	uint_t i, j;
2427 
2428 	rw_init(&tcl_rwlock, NULL, RW_DRIVER, NULL);
2429 	rw_enter(&tcl_rwlock, RW_WRITER);
2430 	tcl_nslots = sppptun_init_cnt;
2431 	tcl_slots = kmem_zalloc(tcl_nslots * sizeof (tuncl_t *), KM_SLEEP);
2432 
2433 	tcl_cache = kmem_cache_create("sppptun_map", sizeof (tuncl_t), 0,
2434 	    tcl_constructor, tcl_destructor, NULL, NULL, NULL, 0);
2435 
2436 	/* Allocate integer space for minor numbers */
2437 	tcl_minor_arena = vmem_create("sppptun_minor", (void *)1, tcl_nslots,
2438 	    1, NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
2439 
2440 	/*
2441 	 * Calculate available number of tunnels - how many tunnels
2442 	 * can we allocate in sppptun_pctofmem % of available
2443 	 * memory.  The value is rounded up to the nearest power of 2.
2444 	 */
2445 	i = (sppptun_pctofmem * kmem_maxavail()) / (100 * TUNCL_SIZE);
2446 	j = truncate(i);	/* i with non-high bits stripped */
2447 	if (i != j)
2448 		j *= 2;
2449 	tcl_minormax = j;
2450 	rw_exit(&tcl_rwlock);
2451 }
2452 
2453 /*
2454  * This function checks that there are no plumbed streams or other users.
2455  *
2456  * Called by sppptun_mod.c:_fini().  Assumes that we're exclusive on
2457  * both perimeters.
2458  */
2459 int
2460 sppptun_tcl_fintest(void)
2461 {
2462 	if (tunll_list.q_forw != &tunll_list || tcl_inuse > 0)
2463 		return (EBUSY);
2464 	else
2465 		return (0);
2466 }
2467 
2468 /*
2469  * If no lower streams are plumbed, then this function deallocates all
2470  * internal storage in preparation for unload.
2471  *
2472  * Called by sppptun_mod.c:_fini().  Assumes that we're exclusive on
2473  * both perimeters.
2474  */
2475 void
2476 sppptun_tcl_fini(void)
2477 {
2478 	if (tcl_minor_arena != NULL) {
2479 		vmem_destroy(tcl_minor_arena);
2480 		tcl_minor_arena = NULL;
2481 	}
2482 	if (tcl_cache != NULL) {
2483 		kmem_cache_destroy(tcl_cache);
2484 		tcl_cache = NULL;
2485 	}
2486 	kmem_free(tcl_slots, tcl_nslots * sizeof (tuncl_t *));
2487 	tcl_slots = NULL;
2488 	rw_destroy(&tcl_rwlock);
2489 	ASSERT(tcl_slots == NULL);
2490 	ASSERT(tcl_cache == NULL);
2491 	ASSERT(tcl_minor_arena == NULL);
2492 }
2493