xref: /titanic_51/usr/src/uts/common/io/softmac/softmac_fp.c (revision 2d39cb4c2c63a5cd31332527611ae6366103b733)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Softmac data-path switching:
28  *
29  * - Fast-path model
30  *
31  * When the softmac fast-path is used, a dedicated lower-stream
32  * will be opened over the legacy device for each IP/ARP (upper-)stream
33  * over the softMAC, and all DLPI messages (including control messages
34  * and data messages) will be exchanged between the upper-stream and
35  * the corresponding lower-stream directly. Therefore, the data
36  * demultiplexing, filtering and classification processing will be done
37  * by the lower-stream, and the GLDv3 DLS/MAC layer processing will be
38  * no longer needed.
39  *
40  * - Slow-path model
41  *
42  * Some GLDv3 features requires the GLDv3 DLS/MAC layer processing to
43  * not be bypassed to assure its function correctness. For example,
44  * softmac fast-path must be disabled to support GLDv3 VNIC functionality.
45  * In this case, a shared lower-stream will be opened over the legacy
46  * device, which is responsible for implementing the GLDv3 callbacks
47  * and passing RAW data messages between the legacy devices and the GLDv3
48  * framework.
49  *
50  * By default, the softmac fast-path mode will be used to assure the
51  * performance; MAC clients will be able to request to disable the softmac
52  * fast-path mode to support certain features, and if that succeeds,
53  * the system will fallback to the slow-path softmac data-path model.
54  *
55  *
56  * The details of the softmac data fast-path model is stated as below
57  *
58  * 1. When a stream is opened on a softMAC, the softmac module will takes
59  *    over the DLPI processing on this stream;
60  *
61  * 2. For IP/ARP streams over a softMAC, softmac data fast-path will be
62  *    used by default, unless fast-path is disabled by any MAC client
63  *    explicitly. The softmac module first identifies an IP/ARP stream
64  *    by seeing whether there is a SIOCSLIFNAME ioctl sent from upstream,
65  *    if there is one, this stream is either an IP or an ARP stream
66  *    and will use fast-path potentially;
67  *
68  * 3. When the softmac fast-path is used, an dedicated lower-stream will
69  *    be setup for each IP/ARP stream (1-1 mapping). From that point on,
70  *    all control and data messages will be exchanged between the IP/ARP
71  *    upper-stream and the legacy device through this dedicated
72  *    lower-stream. As a result, the DLS/MAC layer processing in GLDv3
73  *    will be skipped, and this greatly improves the performance;
74  *
75  * 4. When the softmac data fast-path is disabled by a MAC client (e.g.,
76  *    by a VNIC), all the IP/ARP upper streams will try to switch from
77  *    the fast-path to the slow-path. The dedicated lower-stream will be
78  *    destroyed, and all the control and data-messages will go through the
79  *    existing GLDv3 code path and (in the end) the shared lower-stream;
80  *
81  * 5. On the other hand, when the last MAC client cancels its fast-path
82  *    disable request, all the IP/ARP streams will try to switch back to
83  *    the fast-path mode;
84  *
85  * Step 5 and 6 both rely on the data-path mode switching process
86  * described below:
87  *
88  * 1) To switch the softmac data-path mode (between fast-path and slow-path),
89  *    softmac will first send a DL_NOTE_REPLUMB DL_NOTIFY_IND message
90  *    upstream over each IP/ARP streams that needs data-path mode switching;
91  *
92  * 2) When IP receives this DL_NOTE_REPLUMB message, it will bring down
93  *    all the IP interfaces on the corresponding ill (IP Lower level
94  *    structure), and bring up those interfaces over again; this will in
95  *    turn cause the ARP to "replumb" the interface.
96  *
97  *    During the replumb process, both IP and ARP will send downstream the
98  *    necessary DL_DISABMULTI_REQ and DL_UNBIND_REQ messages and cleanup
99  *    the old state of the underlying softMAC, following with the necessary
100  *    DL_BIND_REQ and DL_ENABMULTI_REQ messages to setup the new state.
101  *    Between the cleanup and re-setup process, IP/ARP will also send down
102  *    a DL_NOTE_REPLUMB_DONE DL_NOTIFY_CONF messages to the softMAC to
103  *    indicate the *switching point*;
104  *
105  * 3) When softmac receives the DL_NOTE_REPLUMB_DONE message, it either
106  *    creates or destroys the dedicated lower-stream (depending on which
107  *    data-path mode the softMAC switches to), and change the softmac
108  *    data-path mode. From then on, softmac will process all the succeeding
109  *    control messages (including the DL_BIND_REQ and DL_ENABMULTI_REQ
110  *    messages) and data messages based on new data-path mode.
111  */
112 
113 #include <sys/types.h>
114 #include <sys/disp.h>
115 #include <sys/callb.h>
116 #include <sys/sysmacros.h>
117 #include <sys/file.h>
118 #include <sys/vlan.h>
119 #include <sys/dld.h>
120 #include <sys/sockio.h>
121 #include <sys/softmac_impl.h>
122 
123 static kmutex_t		softmac_taskq_lock;
124 static kcondvar_t	softmac_taskq_cv;
125 static list_t		softmac_taskq_list;	/* List of softmac_upper_t */
126 boolean_t		softmac_taskq_quit;
127 boolean_t		softmac_taskq_done;
128 
129 static void		softmac_taskq_dispatch();
130 static int		softmac_fastpath_setup(softmac_upper_t *);
131 static mac_tx_cookie_t	softmac_fastpath_wput_data(softmac_upper_t *, mblk_t *,
132 			    uintptr_t, uint16_t);
133 static void		softmac_datapath_switch_done(softmac_upper_t *);
134 
135 void
136 softmac_fp_init()
137 {
138 	mutex_init(&softmac_taskq_lock, NULL, MUTEX_DRIVER, NULL);
139 	cv_init(&softmac_taskq_cv, NULL, CV_DRIVER, NULL);
140 
141 	softmac_taskq_quit = B_FALSE;
142 	softmac_taskq_done = B_FALSE;
143 	list_create(&softmac_taskq_list, sizeof (softmac_upper_t),
144 	    offsetof(softmac_upper_t, su_taskq_list_node));
145 	(void) thread_create(NULL, 0, softmac_taskq_dispatch, NULL, 0,
146 	    &p0, TS_RUN, minclsyspri);
147 }
148 
149 void
150 softmac_fp_fini()
151 {
152 	/*
153 	 * Request the softmac_taskq thread to quit and wait for it to be done.
154 	 */
155 	mutex_enter(&softmac_taskq_lock);
156 	softmac_taskq_quit = B_TRUE;
157 	cv_signal(&softmac_taskq_cv);
158 	while (!softmac_taskq_done)
159 		cv_wait(&softmac_taskq_cv, &softmac_taskq_lock);
160 	mutex_exit(&softmac_taskq_lock);
161 	list_destroy(&softmac_taskq_list);
162 
163 	mutex_destroy(&softmac_taskq_lock);
164 	cv_destroy(&softmac_taskq_cv);
165 }
166 
167 static boolean_t
168 check_ip_above(queue_t *q)
169 {
170 	queue_t		*next_q;
171 	boolean_t	ret = B_TRUE;
172 
173 	claimstr(q);
174 	next_q = q->q_next;
175 	if (strcmp(next_q->q_qinfo->qi_minfo->mi_idname, "ip") != 0)
176 		ret = B_FALSE;
177 	releasestr(q);
178 	return (ret);
179 }
180 
181 /* ARGSUSED */
182 static int
183 softmac_capab_perim(softmac_upper_t *sup, void *data, uint_t flags)
184 {
185 	switch (flags) {
186 	case DLD_ENABLE:
187 		mutex_enter(&sup->su_mutex);
188 		break;
189 	case DLD_DISABLE:
190 		mutex_exit(&sup->su_mutex);
191 		break;
192 	case DLD_QUERY:
193 		return (MUTEX_HELD(&sup->su_mutex));
194 	}
195 	return (0);
196 }
197 
198 /* ARGSUSED */
199 static mac_tx_notify_handle_t
200 softmac_client_tx_notify(void *txcb, mac_tx_notify_t func, void *arg)
201 {
202 	return (NULL);
203 }
204 
205 static int
206 softmac_capab_direct(softmac_upper_t *sup, void *data, uint_t flags)
207 {
208 	dld_capab_direct_t	*direct = data;
209 	softmac_lower_t		*slp = sup->su_slp;
210 
211 	ASSERT(MUTEX_HELD(&sup->su_mutex));
212 
213 	ASSERT(sup->su_mode == SOFTMAC_FASTPATH);
214 
215 	switch (flags) {
216 	case DLD_ENABLE:
217 		if (sup->su_direct)
218 			return (0);
219 
220 		sup->su_direct_rxinfo.slr_rx = (softmac_rx_t)direct->di_rx_cf;
221 		sup->su_direct_rxinfo.slr_arg = direct->di_rx_ch;
222 		slp->sl_rxinfo = &sup->su_direct_rxinfo;
223 		direct->di_tx_df = (uintptr_t)softmac_fastpath_wput_data;
224 		direct->di_tx_dh = sup;
225 
226 		/*
227 		 * We relying on the STREAM flow-control to backenable
228 		 * the IP stream. Therefore, no notify callback needs to
229 		 * be registered. But IP requires this to be a valid function
230 		 * pointer.
231 		 */
232 		direct->di_tx_cb_df = (uintptr_t)softmac_client_tx_notify;
233 		direct->di_tx_cb_dh = NULL;
234 		sup->su_direct = B_TRUE;
235 		return (0);
236 
237 	case DLD_DISABLE:
238 		if (!sup->su_direct)
239 			return (0);
240 
241 		slp->sl_rxinfo = &sup->su_rxinfo;
242 		sup->su_direct = B_FALSE;
243 		return (0);
244 	}
245 	return (ENOTSUP);
246 }
247 
248 static int
249 softmac_dld_capab(softmac_upper_t *sup, uint_t type, void *data, uint_t flags)
250 {
251 	int	err;
252 
253 	/*
254 	 * Don't enable direct callback capabilities unless the caller is
255 	 * the IP client. When a module is inserted in a stream (_I_INSERT)
256 	 * the stack initiates capability disable, but due to races, the
257 	 * module insertion may complete before the capability disable
258 	 * completes. So we limit the check to DLD_ENABLE case.
259 	 */
260 	if ((flags == DLD_ENABLE && type != DLD_CAPAB_PERIM) &&
261 	    !check_ip_above(sup->su_rq)) {
262 		return (ENOTSUP);
263 	}
264 
265 	switch (type) {
266 	case DLD_CAPAB_DIRECT:
267 		err = softmac_capab_direct(sup, data, flags);
268 		break;
269 
270 	case DLD_CAPAB_PERIM:
271 		err = softmac_capab_perim(sup, data, flags);
272 		break;
273 
274 	default:
275 		err = ENOTSUP;
276 		break;
277 	}
278 	return (err);
279 }
280 
281 static void
282 softmac_capability_advertise(softmac_upper_t *sup, mblk_t *mp)
283 {
284 	dl_capability_ack_t	*dlap;
285 	dl_capability_sub_t	*dlsp;
286 	t_uscalar_t		subsize;
287 	uint8_t			*ptr;
288 	queue_t			*q = sup->su_wq;
289 	mblk_t			*mp1;
290 	softmac_t		*softmac = sup->su_softmac;
291 	boolean_t		dld_capable = B_FALSE;
292 	boolean_t		hcksum_capable = B_FALSE;
293 	boolean_t		zcopy_capable = B_FALSE;
294 	boolean_t		mdt_capable = B_FALSE;
295 
296 	ASSERT(sup->su_mode == SOFTMAC_FASTPATH);
297 
298 	/*
299 	 * Initially assume no capabilities.
300 	 */
301 	subsize = 0;
302 
303 	/*
304 	 * Direct capability negotiation interface between IP and softmac
305 	 */
306 	if (check_ip_above(sup->su_rq)) {
307 		dld_capable = B_TRUE;
308 		subsize += sizeof (dl_capability_sub_t) +
309 		    sizeof (dl_capab_dld_t);
310 	}
311 
312 	/*
313 	 * Check if checksum offload is supported on this MAC.
314 	 */
315 	if (softmac->smac_capab_flags & MAC_CAPAB_HCKSUM) {
316 		hcksum_capable = B_TRUE;
317 		subsize += sizeof (dl_capability_sub_t) +
318 		    sizeof (dl_capab_hcksum_t);
319 	}
320 
321 	/*
322 	 * Check if zerocopy is supported on this interface.
323 	 */
324 	if (!(softmac->smac_capab_flags & MAC_CAPAB_NO_ZCOPY)) {
325 		zcopy_capable = B_TRUE;
326 		subsize += sizeof (dl_capability_sub_t) +
327 		    sizeof (dl_capab_zerocopy_t);
328 	}
329 
330 	if (softmac->smac_mdt) {
331 		mdt_capable = B_TRUE;
332 		subsize += sizeof (dl_capability_sub_t) +
333 		    sizeof (dl_capab_mdt_t);
334 	}
335 
336 	/*
337 	 * If there are no capabilities to advertise or if we
338 	 * can't allocate a response, send a DL_ERROR_ACK.
339 	 */
340 	if ((subsize == 0) || (mp1 = reallocb(mp,
341 	    sizeof (dl_capability_ack_t) + subsize, 0)) == NULL) {
342 		dlerrorack(q, mp, DL_CAPABILITY_REQ, DL_NOTSUPPORTED, 0);
343 		return;
344 	}
345 
346 	mp = mp1;
347 	DB_TYPE(mp) = M_PROTO;
348 	mp->b_wptr = mp->b_rptr + sizeof (dl_capability_ack_t) + subsize;
349 	bzero(mp->b_rptr, MBLKL(mp));
350 	dlap = (dl_capability_ack_t *)mp->b_rptr;
351 	dlap->dl_primitive = DL_CAPABILITY_ACK;
352 	dlap->dl_sub_offset = sizeof (dl_capability_ack_t);
353 	dlap->dl_sub_length = subsize;
354 	ptr = (uint8_t *)&dlap[1];
355 
356 	/*
357 	 * IP polling interface.
358 	 */
359 	if (dld_capable) {
360 		dl_capab_dld_t		dld;
361 
362 		dlsp = (dl_capability_sub_t *)ptr;
363 		dlsp->dl_cap = DL_CAPAB_DLD;
364 		dlsp->dl_length = sizeof (dl_capab_dld_t);
365 		ptr += sizeof (dl_capability_sub_t);
366 
367 		bzero(&dld, sizeof (dl_capab_dld_t));
368 		dld.dld_version = DLD_CURRENT_VERSION;
369 		dld.dld_capab = (uintptr_t)softmac_dld_capab;
370 		dld.dld_capab_handle = (uintptr_t)sup;
371 
372 		dlcapabsetqid(&(dld.dld_mid), sup->su_rq);
373 		bcopy(&dld, ptr, sizeof (dl_capab_dld_t));
374 		ptr += sizeof (dl_capab_dld_t);
375 	}
376 
377 	/*
378 	 * TCP/IP checksum offload.
379 	 */
380 	if (hcksum_capable) {
381 		dl_capab_hcksum_t	hcksum;
382 
383 		dlsp = (dl_capability_sub_t *)ptr;
384 
385 		dlsp->dl_cap = DL_CAPAB_HCKSUM;
386 		dlsp->dl_length = sizeof (dl_capab_hcksum_t);
387 		ptr += sizeof (dl_capability_sub_t);
388 
389 		bzero(&hcksum, sizeof (dl_capab_hcksum_t));
390 		hcksum.hcksum_version = HCKSUM_VERSION_1;
391 		hcksum.hcksum_txflags = softmac->smac_hcksum_txflags;
392 		dlcapabsetqid(&(hcksum.hcksum_mid), sup->su_rq);
393 		bcopy(&hcksum, ptr, sizeof (dl_capab_hcksum_t));
394 		ptr += sizeof (dl_capab_hcksum_t);
395 	}
396 
397 	/*
398 	 * Zero copy
399 	 */
400 	if (zcopy_capable) {
401 		dl_capab_zerocopy_t	zcopy;
402 
403 		dlsp = (dl_capability_sub_t *)ptr;
404 
405 		dlsp->dl_cap = DL_CAPAB_ZEROCOPY;
406 		dlsp->dl_length = sizeof (dl_capab_zerocopy_t);
407 		ptr += sizeof (dl_capability_sub_t);
408 
409 		bzero(&zcopy, sizeof (dl_capab_zerocopy_t));
410 		zcopy.zerocopy_version = ZEROCOPY_VERSION_1;
411 		zcopy.zerocopy_flags = DL_CAPAB_VMSAFE_MEM;
412 		dlcapabsetqid(&(zcopy.zerocopy_mid), sup->su_rq);
413 		bcopy(&zcopy, ptr, sizeof (dl_capab_zerocopy_t));
414 		ptr += sizeof (dl_capab_zerocopy_t);
415 	}
416 
417 	/*
418 	 * MDT
419 	 */
420 	if (mdt_capable) {
421 		dl_capab_mdt_t mdt;
422 
423 		dlsp = (dl_capability_sub_t *)ptr;
424 
425 		dlsp->dl_cap = DL_CAPAB_MDT;
426 		dlsp->dl_length = sizeof (dl_capab_mdt_t);
427 		ptr += sizeof (dl_capability_sub_t);
428 
429 		bzero(&mdt, sizeof (dl_capab_mdt_t));
430 		mdt.mdt_version = MDT_VERSION_2;
431 		mdt.mdt_flags = DL_CAPAB_MDT_ENABLE;
432 		mdt.mdt_hdr_head = softmac->smac_mdt_capab.mdt_hdr_head;
433 		mdt.mdt_hdr_tail = softmac->smac_mdt_capab.mdt_hdr_tail;
434 		mdt.mdt_max_pld = softmac->smac_mdt_capab.mdt_max_pld;
435 		mdt.mdt_span_limit = softmac->smac_mdt_capab.mdt_span_limit;
436 		dlcapabsetqid(&(mdt.mdt_mid), sup->su_rq);
437 		bcopy(&mdt, ptr, sizeof (dl_capab_mdt_t));
438 		ptr += sizeof (dl_capab_mdt_t);
439 	}
440 
441 	ASSERT(ptr == mp->b_rptr + sizeof (dl_capability_ack_t) + subsize);
442 	qreply(q, mp);
443 }
444 
445 static void
446 softmac_capability_req(softmac_upper_t *sup, mblk_t *mp)
447 {
448 	dl_capability_req_t	*dlp = (dl_capability_req_t *)mp->b_rptr;
449 	dl_capability_sub_t	*sp;
450 	size_t			size, len;
451 	offset_t		off, end;
452 	t_uscalar_t		dl_err;
453 	queue_t			*q = sup->su_wq;
454 
455 	ASSERT(sup->su_mode == SOFTMAC_FASTPATH);
456 	if (MBLKL(mp) < sizeof (dl_capability_req_t)) {
457 		dl_err = DL_BADPRIM;
458 		goto failed;
459 	}
460 
461 	if (!sup->su_bound) {
462 		dl_err = DL_OUTSTATE;
463 		goto failed;
464 	}
465 
466 	/*
467 	 * This request is overloaded. If there are no requested capabilities
468 	 * then we just want to acknowledge with all the capabilities we
469 	 * support. Otherwise we enable the set of capabilities requested.
470 	 */
471 	if (dlp->dl_sub_length == 0) {
472 		softmac_capability_advertise(sup, mp);
473 		return;
474 	}
475 
476 	if (!MBLKIN(mp, dlp->dl_sub_offset, dlp->dl_sub_length)) {
477 		dl_err = DL_BADPRIM;
478 		goto failed;
479 	}
480 
481 	dlp->dl_primitive = DL_CAPABILITY_ACK;
482 
483 	off = dlp->dl_sub_offset;
484 	len = dlp->dl_sub_length;
485 
486 	/*
487 	 * Walk the list of capabilities to be enabled.
488 	 */
489 	for (end = off + len; off < end; ) {
490 		sp = (dl_capability_sub_t *)(mp->b_rptr + off);
491 		size = sizeof (dl_capability_sub_t) + sp->dl_length;
492 
493 		if (off + size > end ||
494 		    !IS_P2ALIGNED(off, sizeof (uint32_t))) {
495 			dl_err = DL_BADPRIM;
496 			goto failed;
497 		}
498 
499 		switch (sp->dl_cap) {
500 		/*
501 		 * TCP/IP checksum offload to hardware.
502 		 */
503 		case DL_CAPAB_HCKSUM: {
504 			dl_capab_hcksum_t *hcksump;
505 			dl_capab_hcksum_t hcksum;
506 
507 			hcksump = (dl_capab_hcksum_t *)&sp[1];
508 			/*
509 			 * Copy for alignment.
510 			 */
511 			bcopy(hcksump, &hcksum, sizeof (dl_capab_hcksum_t));
512 			dlcapabsetqid(&(hcksum.hcksum_mid), sup->su_rq);
513 			bcopy(&hcksum, hcksump, sizeof (dl_capab_hcksum_t));
514 			break;
515 		}
516 
517 		default:
518 			break;
519 		}
520 
521 		off += size;
522 	}
523 	qreply(q, mp);
524 	return;
525 failed:
526 	dlerrorack(q, mp, DL_CAPABILITY_REQ, dl_err, 0);
527 }
528 
529 static void
530 softmac_bind_req(softmac_upper_t *sup, mblk_t *mp)
531 {
532 	softmac_lower_t	*slp = sup->su_slp;
533 	softmac_t	*softmac = sup->su_softmac;
534 	mblk_t		*ackmp, *mp1;
535 	int		err;
536 
537 	if (MBLKL(mp) < DL_BIND_REQ_SIZE) {
538 		freemsg(mp);
539 		return;
540 	}
541 
542 	/*
543 	 * Allocate ackmp incase the underlying driver does not ack timely.
544 	 */
545 	if ((mp1 = allocb(sizeof (dl_error_ack_t), BPRI_HI)) == NULL) {
546 		dlerrorack(sup->su_wq, mp, DL_BIND_REQ, DL_SYSERR, ENOMEM);
547 		return;
548 	}
549 
550 	err = softmac_output(slp, mp, DL_BIND_REQ, DL_BIND_ACK, &ackmp);
551 	if (ackmp != NULL) {
552 		freemsg(mp1);
553 	} else {
554 		/*
555 		 * The driver does not ack timely.
556 		 */
557 		ASSERT(err == ENOMSG);
558 		ackmp = mp1;
559 	}
560 	if (err != 0)
561 		goto failed;
562 
563 	/*
564 	 * Enable capabilities the underlying driver claims to support.
565 	 */
566 	if ((err = softmac_capab_enable(slp)) != 0)
567 		goto failed;
568 
569 	/*
570 	 * Check whether this softmac is already marked as exclusively used,
571 	 * e.g., an aggregation is created over it. Fail the BIND_REQ if so.
572 	 */
573 	mutex_enter(&softmac->smac_active_mutex);
574 	if (softmac->smac_active) {
575 		mutex_exit(&softmac->smac_active_mutex);
576 		err = EBUSY;
577 		goto failed;
578 	}
579 	softmac->smac_nactive++;
580 	sup->su_active = B_TRUE;
581 	mutex_exit(&softmac->smac_active_mutex);
582 	sup->su_bound = B_TRUE;
583 
584 	qreply(sup->su_wq, ackmp);
585 	return;
586 failed:
587 	if (err != 0) {
588 		dlerrorack(sup->su_wq, ackmp, DL_BIND_REQ, DL_SYSERR, err);
589 		return;
590 	}
591 }
592 
593 static void
594 softmac_unbind_req(softmac_upper_t *sup, mblk_t *mp)
595 {
596 	softmac_lower_t	*slp = sup->su_slp;
597 	softmac_t	*softmac = sup->su_softmac;
598 	mblk_t		*ackmp, *mp1;
599 	int		err;
600 
601 	if (MBLKL(mp) < DL_UNBIND_REQ_SIZE) {
602 		freemsg(mp);
603 		return;
604 	}
605 
606 	if (!sup->su_bound) {
607 		dlerrorack(sup->su_wq, mp, DL_UNBIND_REQ, DL_OUTSTATE, 0);
608 		return;
609 	}
610 
611 	/*
612 	 * Allocate ackmp incase the underlying driver does not ack timely.
613 	 */
614 	if ((mp1 = allocb(sizeof (dl_error_ack_t), BPRI_HI)) == NULL) {
615 		dlerrorack(sup->su_wq, mp, DL_UNBIND_REQ, DL_SYSERR, ENOMEM);
616 		return;
617 	}
618 
619 	err = softmac_output(slp, mp, DL_UNBIND_REQ, DL_OK_ACK, &ackmp);
620 	if (ackmp != NULL) {
621 		freemsg(mp1);
622 	} else {
623 		/*
624 		 * The driver does not ack timely.
625 		 */
626 		ASSERT(err == ENOMSG);
627 		ackmp = mp1;
628 	}
629 	if (err != 0) {
630 		dlerrorack(sup->su_wq, ackmp, DL_UNBIND_REQ, DL_SYSERR, err);
631 		return;
632 	}
633 
634 	sup->su_bound = B_FALSE;
635 
636 	mutex_enter(&softmac->smac_active_mutex);
637 	if (sup->su_active) {
638 		ASSERT(!softmac->smac_active);
639 		softmac->smac_nactive--;
640 		sup->su_active = B_FALSE;
641 	}
642 	mutex_exit(&softmac->smac_active_mutex);
643 
644 done:
645 	qreply(sup->su_wq, ackmp);
646 }
647 
648 /*
649  * Process the non-data mblk.
650  */
651 static void
652 softmac_wput_single_nondata(softmac_upper_t *sup, mblk_t *mp)
653 {
654 	softmac_t *softmac = sup->su_softmac;
655 	softmac_lower_t	*slp = sup->su_slp;
656 	unsigned char	dbtype;
657 	t_uscalar_t	prim;
658 
659 	dbtype = DB_TYPE(mp);
660 	switch (dbtype) {
661 	case M_IOCTL:
662 	case M_CTL: {
663 		uint32_t	expected_mode;
664 
665 		if (((struct iocblk *)(mp->b_rptr))->ioc_cmd != SIOCSLIFNAME)
666 			break;
667 
668 		/*
669 		 * Nak the M_IOCTL based on the STREAMS specification.
670 		 */
671 		if (dbtype == M_IOCTL)
672 			miocnak(sup->su_wq, mp, 0, EINVAL);
673 
674 		/*
675 		 * This stream is either IP or ARP. See whether
676 		 * we need to setup a dedicated-lower-stream for it.
677 		 */
678 		mutex_enter(&softmac->smac_fp_mutex);
679 
680 		expected_mode = DATAPATH_MODE(softmac);
681 		if (expected_mode == SOFTMAC_SLOWPATH)
682 			sup->su_mode = SOFTMAC_SLOWPATH;
683 		list_insert_head(&softmac->smac_sup_list, sup);
684 		mutex_exit(&softmac->smac_fp_mutex);
685 
686 		/*
687 		 * Setup the fast-path dedicated lower stream if fast-path
688 		 * is expected. Note that no lock is held here, and if
689 		 * smac_expected_mode is changed from SOFTMAC_FASTPATH to
690 		 * SOFTMAC_SLOWPATH, the DL_NOTE_REPLUMB message used for
691 		 * data-path switching would already be queued and will
692 		 * be processed by softmac_wput_single_nondata() later.
693 		 */
694 		if (expected_mode == SOFTMAC_FASTPATH)
695 			(void) softmac_fastpath_setup(sup);
696 		return;
697 	}
698 	case M_PROTO:
699 	case M_PCPROTO:
700 		if (MBLKL(mp) < sizeof (t_uscalar_t)) {
701 			freemsg(mp);
702 			return;
703 		}
704 		prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive;
705 		switch (prim) {
706 		case DL_NOTIFY_IND:
707 			if (MBLKL(mp) < sizeof (dl_notify_ind_t) ||
708 			    ((dl_notify_ind_t *)mp->b_rptr)->dl_notification !=
709 			    DL_NOTE_REPLUMB) {
710 				freemsg(mp);
711 				return;
712 			}
713 			/*
714 			 * This DL_NOTE_REPLUMB message is initiated
715 			 * and queued by the softmac itself, when the
716 			 * sup is trying to switching its datapath mode
717 			 * between SOFTMAC_SLOWPATH and SOFTMAC_FASTPATH.
718 			 * Send this message upstream.
719 			 */
720 			qreply(sup->su_wq, mp);
721 			return;
722 		case DL_NOTIFY_CONF:
723 			if (MBLKL(mp) < sizeof (dl_notify_conf_t) ||
724 			    ((dl_notify_conf_t *)mp->b_rptr)->dl_notification !=
725 			    DL_NOTE_REPLUMB_DONE) {
726 				freemsg(mp);
727 				return;
728 			}
729 			/*
730 			 * This is an indication from IP/ARP that the
731 			 * fastpath->slowpath switch is done.
732 			 */
733 			freemsg(mp);
734 			softmac_datapath_switch_done(sup);
735 			return;
736 		}
737 		break;
738 	}
739 
740 	/*
741 	 * No need to hold lock to check su_mode, since su_mode updating only
742 	 * operation is is serialized by softmac_wput_nondata_task().
743 	 */
744 	if (sup->su_mode != SOFTMAC_FASTPATH) {
745 		dld_wput(sup->su_wq, mp);
746 		return;
747 	}
748 
749 	/*
750 	 * Fastpath non-data message processing. Most of non-data messages
751 	 * can be directly passed down to the dedicated-lower-stream, aside
752 	 * from the following M_PROTO/M_PCPROTO messages.
753 	 */
754 	switch (dbtype) {
755 	case M_PROTO:
756 	case M_PCPROTO:
757 		switch (prim) {
758 		case DL_BIND_REQ:
759 			softmac_bind_req(sup, mp);
760 			break;
761 		case DL_UNBIND_REQ:
762 			softmac_unbind_req(sup, mp);
763 			break;
764 		case DL_CAPABILITY_REQ:
765 			softmac_capability_req(sup, mp);
766 			break;
767 		default:
768 			putnext(slp->sl_wq, mp);
769 			break;
770 		}
771 		break;
772 	default:
773 		putnext(slp->sl_wq, mp);
774 		break;
775 	}
776 }
777 
778 /*
779  * The worker thread which processes non-data messages. Note we only process
780  * one message at one time in order to be able to "flush" the queued message
781  * and serialize the processing.
782  */
783 static void
784 softmac_wput_nondata_task(void *arg)
785 {
786 	softmac_upper_t	*sup = arg;
787 	mblk_t		*mp;
788 
789 	mutex_enter(&sup->su_disp_mutex);
790 
791 	while (sup->su_pending_head != NULL) {
792 		if (sup->su_closing)
793 			break;
794 
795 		SOFTMAC_DQ_PENDING(sup, &mp);
796 		mutex_exit(&sup->su_disp_mutex);
797 		softmac_wput_single_nondata(sup, mp);
798 		mutex_enter(&sup->su_disp_mutex);
799 	}
800 
801 	/*
802 	 * If the stream is closing, flush all queued messages and inform
803 	 * the stream to be closed.
804 	 */
805 	freemsgchain(sup->su_pending_head);
806 	sup->su_pending_head = sup->su_pending_tail = NULL;
807 	sup->su_dlpi_pending = B_FALSE;
808 	cv_signal(&sup->su_disp_cv);
809 	mutex_exit(&sup->su_disp_mutex);
810 }
811 
812 /*
813  * Kernel thread to handle taskq dispatch failures in softmac_wput_nondata().
814  * This thread is started when the softmac module is first loaded.
815  */
816 static void
817 softmac_taskq_dispatch(void)
818 {
819 	callb_cpr_t	cprinfo;
820 	softmac_upper_t	*sup;
821 
822 	CALLB_CPR_INIT(&cprinfo, &softmac_taskq_lock, callb_generic_cpr,
823 	    "softmac_taskq_dispatch");
824 	mutex_enter(&softmac_taskq_lock);
825 
826 	while (!softmac_taskq_quit) {
827 		sup = list_head(&softmac_taskq_list);
828 		while (sup != NULL) {
829 			list_remove(&softmac_taskq_list, sup);
830 			sup->su_taskq_scheduled = B_FALSE;
831 			mutex_exit(&softmac_taskq_lock);
832 			VERIFY(taskq_dispatch(system_taskq,
833 			    softmac_wput_nondata_task, sup, TQ_SLEEP) != NULL);
834 			mutex_enter(&softmac_taskq_lock);
835 			sup = list_head(&softmac_taskq_list);
836 		}
837 
838 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
839 		cv_wait(&softmac_taskq_cv, &softmac_taskq_lock);
840 		CALLB_CPR_SAFE_END(&cprinfo, &softmac_taskq_lock);
841 	}
842 
843 	softmac_taskq_done = B_TRUE;
844 	cv_signal(&softmac_taskq_cv);
845 	CALLB_CPR_EXIT(&cprinfo);
846 	thread_exit();
847 }
848 
849 void
850 softmac_wput_nondata(softmac_upper_t *sup, mblk_t *mp)
851 {
852 	/*
853 	 * The processing of the message might block. Enqueue the
854 	 * message for later processing.
855 	 */
856 	mutex_enter(&sup->su_disp_mutex);
857 
858 	if (sup->su_closing) {
859 		mutex_exit(&sup->su_disp_mutex);
860 		freemsg(mp);
861 		return;
862 	}
863 
864 	SOFTMAC_EQ_PENDING(sup, mp);
865 
866 	if (sup->su_dlpi_pending) {
867 		mutex_exit(&sup->su_disp_mutex);
868 		return;
869 	}
870 	sup->su_dlpi_pending = B_TRUE;
871 	mutex_exit(&sup->su_disp_mutex);
872 
873 	if (taskq_dispatch(system_taskq, softmac_wput_nondata_task,
874 	    sup, TQ_NOSLEEP) != NULL) {
875 		return;
876 	}
877 
878 	mutex_enter(&softmac_taskq_lock);
879 	if (!sup->su_taskq_scheduled) {
880 		list_insert_tail(&softmac_taskq_list, sup);
881 		cv_signal(&softmac_taskq_cv);
882 	}
883 	sup->su_taskq_scheduled = B_TRUE;
884 	mutex_exit(&softmac_taskq_lock);
885 }
886 
887 /*
888  * Setup the dedicated-lower-stream (fast-path) for the IP/ARP upperstream.
889  */
890 static int
891 softmac_fastpath_setup(softmac_upper_t *sup)
892 {
893 	softmac_t	*softmac = sup->su_softmac;
894 	softmac_lower_t	*slp;
895 	int		err;
896 
897 	err = softmac_lower_setup(softmac, sup, &slp);
898 
899 	mutex_enter(&sup->su_mutex);
900 	/*
901 	 * Wait for all data messages to be processed so that we can change
902 	 * the su_mode.
903 	 */
904 	while (sup->su_tx_inprocess != 0)
905 		cv_wait(&sup->su_cv, &sup->su_mutex);
906 
907 	ASSERT(sup->su_mode != SOFTMAC_FASTPATH);
908 	ASSERT(sup->su_slp == NULL);
909 	if (err != 0) {
910 		sup->su_mode = SOFTMAC_SLOWPATH;
911 	} else {
912 		sup->su_slp = slp;
913 		sup->su_mode = SOFTMAC_FASTPATH;
914 	}
915 	mutex_exit(&sup->su_mutex);
916 	return (err);
917 }
918 
919 /*
920  * Tear down the dedicated-lower-stream (fast-path) for the IP/ARP upperstream.
921  */
922 static void
923 softmac_fastpath_tear(softmac_upper_t *sup)
924 {
925 	mutex_enter(&sup->su_mutex);
926 	/*
927 	 * Wait for all data messages in the dedicated-lower-stream
928 	 * to be processed.
929 	 */
930 	while (sup->su_tx_inprocess != 0)
931 		cv_wait(&sup->su_cv, &sup->su_mutex);
932 
933 	if (sup->su_tx_busy) {
934 		ASSERT(sup->su_tx_flow_mp == NULL);
935 		sup->su_tx_flow_mp = getq(sup->su_wq);
936 		sup->su_tx_busy = B_FALSE;
937 	}
938 
939 	sup->su_mode = SOFTMAC_SLOWPATH;
940 
941 	/*
942 	 * Destroy the dedicated-lower-stream. Note that slp is destroyed
943 	 * when lh is closed.
944 	 */
945 	(void) ldi_close(sup->su_slp->sl_lh, FREAD|FWRITE, kcred);
946 	sup->su_slp = NULL;
947 	mutex_exit(&sup->su_mutex);
948 }
949 
950 void
951 softmac_wput_data(softmac_upper_t *sup, mblk_t *mp)
952 {
953 	/*
954 	 * No lock is required to access the su_mode field since the data
955 	 * traffic is quiesce by IP when the data-path mode is in the
956 	 * process of switching.
957 	 */
958 	if (sup->su_mode != SOFTMAC_FASTPATH)
959 		dld_wput(sup->su_wq, mp);
960 	else
961 		(void) softmac_fastpath_wput_data(sup, mp, NULL, 0);
962 }
963 
964 /*ARGSUSED*/
965 static mac_tx_cookie_t
966 softmac_fastpath_wput_data(softmac_upper_t *sup, mblk_t *mp, uintptr_t f_hint,
967     uint16_t flag)
968 {
969 	queue_t		*wq = sup->su_slp->sl_wq;
970 
971 	/*
972 	 * This function is called from IP, only the MAC_DROP_ON_NO_DESC
973 	 * flag can be specified.
974 	 */
975 	ASSERT((flag & ~MAC_DROP_ON_NO_DESC) == 0);
976 	ASSERT(mp->b_next == NULL);
977 
978 	/*
979 	 * Check wether the dedicated-lower-stream is able to handle more
980 	 * messages, and enable the flow-control if it is not.
981 	 *
982 	 * Note that in order not to introduce any packet reordering, we
983 	 * always send the message down to the dedicated-lower-stream:
984 	 *
985 	 * If the flow-control is already enabled, but we still get
986 	 * the messages from the upper-stream, it means that the upper
987 	 * stream does not respect STREAMS flow-control (e.g., TCP). Simply
988 	 * pass the message down to the lower-stream in that case.
989 	 */
990 	if (SOFTMAC_CANPUTNEXT(wq)) {
991 		putnext(wq, mp);
992 		return (NULL);
993 	}
994 
995 	if ((flag & MAC_DROP_ON_NO_DESC) != 0) {
996 		freemsg(mp);
997 		return ((mac_tx_cookie_t)wq);
998 	}
999 
1000 	if (sup->su_tx_busy) {
1001 		putnext(wq, mp);
1002 		return ((mac_tx_cookie_t)wq);
1003 	}
1004 
1005 	mutex_enter(&sup->su_mutex);
1006 	if (!sup->su_tx_busy) {
1007 		ASSERT(sup->su_tx_flow_mp != NULL);
1008 		(void) putq(sup->su_wq, sup->su_tx_flow_mp);
1009 		sup->su_tx_flow_mp = NULL;
1010 		sup->su_tx_busy = B_TRUE;
1011 		qenable(wq);
1012 	}
1013 	mutex_exit(&sup->su_mutex);
1014 	putnext(wq, mp);
1015 	return ((mac_tx_cookie_t)wq);
1016 }
1017 
1018 boolean_t
1019 softmac_active_set(void *arg)
1020 {
1021 	softmac_t	*softmac = arg;
1022 
1023 	mutex_enter(&softmac->smac_active_mutex);
1024 	if (softmac->smac_nactive != 0) {
1025 		mutex_exit(&softmac->smac_active_mutex);
1026 		return (B_FALSE);
1027 	}
1028 	softmac->smac_active = B_TRUE;
1029 	mutex_exit(&softmac->smac_active_mutex);
1030 	return (B_TRUE);
1031 }
1032 
1033 void
1034 softmac_active_clear(void *arg)
1035 {
1036 	softmac_t	*softmac = arg;
1037 
1038 	mutex_enter(&softmac->smac_active_mutex);
1039 	ASSERT(softmac->smac_active && (softmac->smac_nactive == 0));
1040 	softmac->smac_active = B_FALSE;
1041 	mutex_exit(&softmac->smac_active_mutex);
1042 }
1043 
1044 /*
1045  * Disable/reenable fastpath on given softmac. This request could come from a
1046  * MAC client or directly from administrators.
1047  */
1048 int
1049 softmac_datapath_switch(softmac_t *softmac, boolean_t disable, boolean_t admin)
1050 {
1051 	softmac_upper_t		*sup;
1052 	mblk_t			*head = NULL, *tail = NULL, *mp;
1053 	list_t			reqlist;
1054 	softmac_switch_req_t	*req;
1055 	uint32_t		current_mode, expected_mode;
1056 	int			err = 0;
1057 
1058 	mutex_enter(&softmac->smac_fp_mutex);
1059 
1060 	current_mode = DATAPATH_MODE(softmac);
1061 	if (admin) {
1062 		if (softmac->smac_fastpath_admin_disabled == disable) {
1063 			mutex_exit(&softmac->smac_fp_mutex);
1064 			return (0);
1065 		}
1066 		softmac->smac_fastpath_admin_disabled = disable;
1067 	} else if (disable) {
1068 		softmac->smac_fp_disable_clients++;
1069 	} else {
1070 		ASSERT(softmac->smac_fp_disable_clients != 0);
1071 		softmac->smac_fp_disable_clients--;
1072 	}
1073 
1074 	expected_mode = DATAPATH_MODE(softmac);
1075 	if (current_mode == expected_mode) {
1076 		mutex_exit(&softmac->smac_fp_mutex);
1077 		return (0);
1078 	}
1079 
1080 	/*
1081 	 * The expected mode is different from whatever datapath mode
1082 	 * this softmac is expected from last request, enqueue the data-path
1083 	 * switch request.
1084 	 */
1085 	list_create(&reqlist, sizeof (softmac_switch_req_t),
1086 	    offsetof(softmac_switch_req_t, ssq_req_list_node));
1087 
1088 	/*
1089 	 * Allocate all DL_NOTIFY_IND messages and request structures that
1090 	 * are required to switch each IP/ARP stream to the expected mode.
1091 	 */
1092 	for (sup = list_head(&softmac->smac_sup_list); sup != NULL;
1093 	    sup = list_next(&softmac->smac_sup_list, sup)) {
1094 		dl_notify_ind_t	*dlip;
1095 
1096 		req = kmem_alloc(sizeof (softmac_switch_req_t), KM_NOSLEEP);
1097 		if (req == NULL)
1098 			break;
1099 
1100 		req->ssq_expected_mode = expected_mode;
1101 
1102 		/*
1103 		 * Allocate the DL_NOTE_REPLUMB message.
1104 		 */
1105 		if ((mp = allocb(sizeof (dl_notify_ind_t), BPRI_LO)) == NULL) {
1106 			kmem_free(req, sizeof (softmac_switch_req_t));
1107 			break;
1108 		}
1109 
1110 		list_insert_tail(&reqlist, req);
1111 
1112 		mp->b_wptr = mp->b_rptr + sizeof (dl_notify_ind_t);
1113 		mp->b_datap->db_type = M_PROTO;
1114 		bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
1115 		dlip = (dl_notify_ind_t *)mp->b_rptr;
1116 		dlip->dl_primitive = DL_NOTIFY_IND;
1117 		dlip->dl_notification = DL_NOTE_REPLUMB;
1118 		if (head == NULL) {
1119 			head = tail = mp;
1120 		} else {
1121 			tail->b_next = mp;
1122 			tail = mp;
1123 		}
1124 	}
1125 
1126 	/*
1127 	 * Note that it is fine if the expected data-path mode is fast-path
1128 	 * and some of streams fails to switch. Only return failure if we
1129 	 * are expected to switch to the slow-path.
1130 	 */
1131 	if (sup != NULL && expected_mode == SOFTMAC_SLOWPATH) {
1132 		err = ENOMEM;
1133 		goto fail;
1134 	}
1135 
1136 	/*
1137 	 * Start switching for each IP/ARP stream. The switching operation
1138 	 * will eventually succeed and there is no need to wait for it
1139 	 * to finish.
1140 	 */
1141 	for (sup = list_head(&softmac->smac_sup_list); sup != NULL;
1142 	    sup = list_next(&softmac->smac_sup_list, sup)) {
1143 		mp = head->b_next;
1144 		head->b_next = NULL;
1145 
1146 		/*
1147 		 * Add the swtich request to the requests list of the stream.
1148 		 */
1149 		req = list_head(&reqlist);
1150 		ASSERT(req != NULL);
1151 		list_remove(&reqlist, req);
1152 		list_insert_tail(&sup->su_req_list, req);
1153 		softmac_wput_nondata(sup, head);
1154 		head = mp;
1155 	}
1156 
1157 	mutex_exit(&softmac->smac_fp_mutex);
1158 	ASSERT(list_is_empty(&reqlist));
1159 	list_destroy(&reqlist);
1160 	return (0);
1161 fail:
1162 	if (admin) {
1163 		softmac->smac_fastpath_admin_disabled = !disable;
1164 	} else if (disable) {
1165 		softmac->smac_fp_disable_clients--;
1166 	} else {
1167 		softmac->smac_fp_disable_clients++;
1168 	}
1169 
1170 	mutex_exit(&softmac->smac_fp_mutex);
1171 	while ((req = list_head(&reqlist)) != NULL) {
1172 		list_remove(&reqlist, req);
1173 		kmem_free(req, sizeof (softmac_switch_req_t));
1174 	}
1175 	freemsgchain(head);
1176 	list_destroy(&reqlist);
1177 	return (err);
1178 }
1179 
1180 int
1181 softmac_fastpath_disable(void *arg)
1182 {
1183 	return (softmac_datapath_switch((softmac_t *)arg, B_TRUE, B_FALSE));
1184 }
1185 
1186 void
1187 softmac_fastpath_enable(void *arg)
1188 {
1189 	VERIFY(softmac_datapath_switch((softmac_t *)arg, B_FALSE,
1190 	    B_FALSE) == 0);
1191 }
1192 
1193 void
1194 softmac_upperstream_close(softmac_upper_t *sup)
1195 {
1196 	softmac_t		*softmac = sup->su_softmac;
1197 	softmac_switch_req_t	*req;
1198 
1199 	mutex_enter(&softmac->smac_fp_mutex);
1200 
1201 	if (sup->su_mode == SOFTMAC_FASTPATH)
1202 		softmac_fastpath_tear(sup);
1203 
1204 	if (sup->su_mode != SOFTMAC_UNKNOWN) {
1205 		list_remove(&softmac->smac_sup_list, sup);
1206 		sup->su_mode = SOFTMAC_UNKNOWN;
1207 	}
1208 
1209 	/*
1210 	 * Cleanup all the switch requests queueed on this stream.
1211 	 */
1212 	while ((req = list_head(&sup->su_req_list)) != NULL) {
1213 		list_remove(&sup->su_req_list, req);
1214 		kmem_free(req, sizeof (softmac_switch_req_t));
1215 	}
1216 	mutex_exit(&softmac->smac_fp_mutex);
1217 }
1218 
1219 /*
1220  * Handle the DL_NOTE_REPLUMB_DONE indication from IP/ARP. Change the upper
1221  * stream from the fastpath mode to the slowpath mode.
1222  */
1223 static void
1224 softmac_datapath_switch_done(softmac_upper_t *sup)
1225 {
1226 	softmac_t		*softmac = sup->su_softmac;
1227 	softmac_switch_req_t	*req;
1228 	uint32_t		expected_mode;
1229 
1230 	mutex_enter(&softmac->smac_fp_mutex);
1231 	req = list_head(&sup->su_req_list);
1232 	list_remove(&sup->su_req_list, req);
1233 	expected_mode = req->ssq_expected_mode;
1234 	kmem_free(req, sizeof (softmac_switch_req_t));
1235 
1236 	if (expected_mode == sup->su_mode) {
1237 		mutex_exit(&softmac->smac_fp_mutex);
1238 		return;
1239 	}
1240 
1241 	ASSERT(!sup->su_bound);
1242 	mutex_exit(&softmac->smac_fp_mutex);
1243 
1244 	/*
1245 	 * It is fine if the expected mode is fast-path and we fail
1246 	 * to enable fastpath on this stream.
1247 	 */
1248 	if (expected_mode == SOFTMAC_SLOWPATH)
1249 		softmac_fastpath_tear(sup);
1250 	else
1251 		(void) softmac_fastpath_setup(sup);
1252 }
1253