xref: /illumos-gate/usr/src/uts/common/io/dld/dld_proto.c (revision 00954d7bf18589273e08e0d7fa824c69ce96e70b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Data-Link Driver
30  */
31 
32 #include <sys/types.h>
33 #include <sys/debug.h>
34 #include <sys/sysmacros.h>
35 #include <sys/stream.h>
36 #include <sys/ddi.h>
37 #include <sys/sunddi.h>
38 #include <sys/strsun.h>
39 #include <sys/cpuvar.h>
40 #include <sys/dlpi.h>
41 #include <netinet/in.h>
42 #include <sys/sdt.h>
43 #include <sys/strsubr.h>
44 #include <sys/vlan.h>
45 #include <sys/mac.h>
46 #include <sys/dls.h>
47 #include <sys/dld.h>
48 #include <sys/dld_impl.h>
49 #include <sys/dls_soft_ring.h>
50 
51 typedef boolean_t proto_reqfunc_t(dld_str_t *, union DL_primitives *, mblk_t *);
52 
53 static proto_reqfunc_t proto_info_req, proto_attach_req, proto_detach_req,
54     proto_bind_req, proto_unbind_req, proto_promiscon_req, proto_promiscoff_req,
55     proto_enabmulti_req, proto_disabmulti_req, proto_physaddr_req,
56     proto_setphysaddr_req, proto_udqos_req, proto_req, proto_capability_req,
57     proto_notify_req, proto_passive_req;
58 
59 static void proto_poll_disable(dld_str_t *);
60 static boolean_t proto_poll_enable(dld_str_t *, dl_capab_dls_t *);
61 
62 static void proto_soft_ring_disable(dld_str_t *);
63 static boolean_t proto_soft_ring_enable(dld_str_t *, dl_capab_dls_t *);
64 static boolean_t proto_capability_advertise(dld_str_t *, mblk_t *);
65 static void proto_change_soft_ring_fanout(dld_str_t *, int);
66 
67 #define	DL_ACK_PENDING(state) \
68 	((state) == DL_ATTACH_PENDING || \
69 	(state) == DL_DETACH_PENDING || \
70 	(state) == DL_BIND_PENDING || \
71 	(state) == DL_UNBIND_PENDING)
72 
73 /*
74  * Process a DLPI protocol message.
75  * The primitives DL_BIND_REQ, DL_ENABMULTI_REQ, DL_PROMISCON_REQ,
76  * DL_SET_PHYS_ADDR_REQ put the data link below our dld_str_t into an
77  * 'active' state. The primitive DL_PASSIVE_REQ marks our dld_str_t
78  * as 'passive' and forbids it from being subsequently made 'active'
79  * by the above primitives.
80  */
81 void
82 dld_wput_proto_nondata(dld_str_t *dsp, mblk_t *mp)
83 {
84 	union DL_primitives	*udlp;
85 	t_uscalar_t		prim;
86 
87 	ASSERT(MBLKL(mp) >= sizeof (t_uscalar_t));
88 
89 	udlp = (union DL_primitives *)mp->b_rptr;
90 	prim = udlp->dl_primitive;
91 
92 	switch (prim) {
93 	case DL_INFO_REQ:
94 		(void) proto_info_req(dsp, udlp, mp);
95 		break;
96 	case DL_BIND_REQ:
97 		(void) proto_bind_req(dsp, udlp, mp);
98 		break;
99 	case DL_UNBIND_REQ:
100 		(void) proto_unbind_req(dsp, udlp, mp);
101 		break;
102 	case DL_UDQOS_REQ:
103 		(void) proto_udqos_req(dsp, udlp, mp);
104 		break;
105 	case DL_ATTACH_REQ:
106 		(void) proto_attach_req(dsp, udlp, mp);
107 		break;
108 	case DL_DETACH_REQ:
109 		(void) proto_detach_req(dsp, udlp, mp);
110 		break;
111 	case DL_ENABMULTI_REQ:
112 		(void) proto_enabmulti_req(dsp, udlp, mp);
113 		break;
114 	case DL_DISABMULTI_REQ:
115 		(void) proto_disabmulti_req(dsp, udlp, mp);
116 		break;
117 	case DL_PROMISCON_REQ:
118 		(void) proto_promiscon_req(dsp, udlp, mp);
119 		break;
120 	case DL_PROMISCOFF_REQ:
121 		(void) proto_promiscoff_req(dsp, udlp, mp);
122 		break;
123 	case DL_PHYS_ADDR_REQ:
124 		(void) proto_physaddr_req(dsp, udlp, mp);
125 		break;
126 	case DL_SET_PHYS_ADDR_REQ:
127 		(void) proto_setphysaddr_req(dsp, udlp, mp);
128 		break;
129 	case DL_NOTIFY_REQ:
130 		(void) proto_notify_req(dsp, udlp, mp);
131 		break;
132 	case DL_CAPABILITY_REQ:
133 		(void) proto_capability_req(dsp, udlp, mp);
134 		break;
135 	case DL_PASSIVE_REQ:
136 		(void) proto_passive_req(dsp, udlp, mp);
137 		break;
138 	default:
139 		(void) proto_req(dsp, udlp, mp);
140 		break;
141 	}
142 }
143 
144 #define	NEG(x)	-(x)
145 
146 typedef struct dl_info_ack_wrapper {
147 	dl_info_ack_t		dl_info;
148 	uint8_t			dl_addr[MAXMACADDRLEN + sizeof (uint16_t)];
149 	uint8_t			dl_brdcst_addr[MAXMACADDRLEN];
150 	dl_qos_cl_range1_t	dl_qos_range1;
151 	dl_qos_cl_sel1_t	dl_qos_sel1;
152 } dl_info_ack_wrapper_t;
153 
154 /*
155  * DL_INFO_REQ
156  */
157 /*ARGSUSED*/
158 static boolean_t
159 proto_info_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
160 {
161 	dl_info_ack_wrapper_t	*dlwp;
162 	dl_info_ack_t		*dlp;
163 	dl_qos_cl_sel1_t	*selp;
164 	dl_qos_cl_range1_t	*rangep;
165 	uint8_t			*addr;
166 	uint8_t			*brdcst_addr;
167 	uint_t			addr_length;
168 	uint_t			sap_length;
169 	mac_info_t		minfo;
170 	mac_info_t		*minfop;
171 	queue_t			*q = dsp->ds_wq;
172 
173 	/*
174 	 * Swap the request message for one large enough to contain the
175 	 * wrapper structure defined above.
176 	 */
177 	if ((mp = mexchange(q, mp, sizeof (dl_info_ack_wrapper_t),
178 	    M_PCPROTO, 0)) == NULL)
179 		return (B_FALSE);
180 
181 	rw_enter(&dsp->ds_lock, RW_READER);
182 
183 	bzero(mp->b_rptr, sizeof (dl_info_ack_wrapper_t));
184 	dlwp = (dl_info_ack_wrapper_t *)mp->b_rptr;
185 
186 	dlp = &(dlwp->dl_info);
187 	ASSERT(dlp == (dl_info_ack_t *)mp->b_rptr);
188 
189 	dlp->dl_primitive = DL_INFO_ACK;
190 
191 	/*
192 	 * Set up the sub-structure pointers.
193 	 */
194 	addr = dlwp->dl_addr;
195 	brdcst_addr = dlwp->dl_brdcst_addr;
196 	rangep = &(dlwp->dl_qos_range1);
197 	selp = &(dlwp->dl_qos_sel1);
198 
199 	/*
200 	 * This driver supports only version 2 connectionless DLPI provider
201 	 * nodes.
202 	 */
203 	dlp->dl_service_mode = DL_CLDLS;
204 	dlp->dl_version = DL_VERSION_2;
205 
206 	/*
207 	 * Set the style of the provider
208 	 */
209 	dlp->dl_provider_style = dsp->ds_style;
210 	ASSERT(dlp->dl_provider_style == DL_STYLE1 ||
211 	    dlp->dl_provider_style == DL_STYLE2);
212 
213 	/*
214 	 * Set the current DLPI state.
215 	 */
216 	dlp->dl_current_state = dsp->ds_dlstate;
217 
218 	/*
219 	 * Gratuitously set the media type. This is to deal with modules
220 	 * that assume the media type is known prior to DL_ATTACH_REQ
221 	 * being completed.
222 	 */
223 	dlp->dl_mac_type = DL_ETHER;
224 
225 	/*
226 	 * If the stream is not at least attached we try to retrieve the
227 	 * mac_info using mac_info_get()
228 	 */
229 	if (dsp->ds_dlstate == DL_UNATTACHED ||
230 	    dsp->ds_dlstate == DL_ATTACH_PENDING ||
231 	    dsp->ds_dlstate == DL_DETACH_PENDING) {
232 		if (!mac_info_get(ddi_major_to_name(dsp->ds_major), &minfo)) {
233 			/*
234 			 * Cannot find mac_info. giving up.
235 			 */
236 			goto done;
237 		}
238 		minfop = &minfo;
239 	} else {
240 		minfop = (mac_info_t *)dsp->ds_mip;
241 	}
242 
243 	/*
244 	 * Set the media type (properly this time).
245 	 */
246 	if (dsp->ds_native)
247 		dlp->dl_mac_type = minfop->mi_nativemedia;
248 	else
249 		dlp->dl_mac_type = minfop->mi_media;
250 
251 	/*
252 	 * Set the DLSAP length. We only support 16 bit values and they
253 	 * appear after the MAC address portion of DLSAP addresses.
254 	 */
255 	sap_length = sizeof (uint16_t);
256 	dlp->dl_sap_length = NEG(sap_length);
257 
258 	/*
259 	 * Set the minimum and maximum payload sizes.
260 	 */
261 	dlp->dl_min_sdu = minfop->mi_sdu_min;
262 	dlp->dl_max_sdu = minfop->mi_sdu_max;
263 
264 	addr_length = minfop->mi_addr_length;
265 
266 	/*
267 	 * Copy in the media broadcast address.
268 	 */
269 	if (minfop->mi_brdcst_addr != NULL) {
270 		dlp->dl_brdcst_addr_offset =
271 		    (uintptr_t)brdcst_addr - (uintptr_t)dlp;
272 		bcopy(minfop->mi_brdcst_addr, brdcst_addr, addr_length);
273 		dlp->dl_brdcst_addr_length = addr_length;
274 	}
275 
276 	dlp->dl_qos_range_offset = (uintptr_t)rangep - (uintptr_t)dlp;
277 	dlp->dl_qos_range_length = sizeof (dl_qos_cl_range1_t);
278 
279 	rangep->dl_qos_type = DL_QOS_CL_RANGE1;
280 	rangep->dl_trans_delay.dl_target_value = DL_UNKNOWN;
281 	rangep->dl_trans_delay.dl_accept_value = DL_UNKNOWN;
282 	rangep->dl_protection.dl_min = DL_UNKNOWN;
283 	rangep->dl_protection.dl_max = DL_UNKNOWN;
284 	rangep->dl_residual_error = DL_UNKNOWN;
285 
286 	/*
287 	 * Specify the supported range of priorities.
288 	 */
289 	rangep->dl_priority.dl_min = 0;
290 	rangep->dl_priority.dl_max = (1 << VLAN_PRI_SIZE) - 1;
291 
292 	dlp->dl_qos_offset = (uintptr_t)selp - (uintptr_t)dlp;
293 	dlp->dl_qos_length = sizeof (dl_qos_cl_sel1_t);
294 
295 	selp->dl_qos_type = DL_QOS_CL_SEL1;
296 	selp->dl_trans_delay = DL_UNKNOWN;
297 	selp->dl_protection = DL_UNKNOWN;
298 	selp->dl_residual_error = DL_UNKNOWN;
299 
300 	/*
301 	 * Specify the current priority (which can be changed by
302 	 * the DL_UDQOS_REQ primitive).
303 	 */
304 	selp->dl_priority = dsp->ds_pri;
305 
306 	dlp->dl_addr_length = addr_length + sizeof (uint16_t);
307 	if (dsp->ds_dlstate == DL_IDLE) {
308 		/*
309 		 * The stream is bound. Therefore we can formulate a valid
310 		 * DLSAP address.
311 		 */
312 		dlp->dl_addr_offset = (uintptr_t)addr - (uintptr_t)dlp;
313 		if (addr_length > 0)
314 			bcopy(dsp->ds_curr_addr, addr, addr_length);
315 		*(uint16_t *)(addr + addr_length) = dsp->ds_sap;
316 	}
317 
318 done:
319 	ASSERT(IMPLY(dlp->dl_qos_offset != 0, dlp->dl_qos_length != 0));
320 	ASSERT(IMPLY(dlp->dl_qos_range_offset != 0,
321 	    dlp->dl_qos_range_length != 0));
322 	ASSERT(IMPLY(dlp->dl_addr_offset != 0, dlp->dl_addr_length != 0));
323 	ASSERT(IMPLY(dlp->dl_brdcst_addr_offset != 0,
324 	    dlp->dl_brdcst_addr_length != 0));
325 
326 	rw_exit(&dsp->ds_lock);
327 
328 	qreply(q, mp);
329 	return (B_TRUE);
330 }
331 
332 /*
333  * DL_ATTACH_REQ
334  */
335 static boolean_t
336 proto_attach_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
337 {
338 	dl_attach_req_t	*dlp = (dl_attach_req_t *)udlp;
339 	int		err = 0;
340 	t_uscalar_t	dl_err;
341 	queue_t		*q = dsp->ds_wq;
342 
343 	rw_enter(&dsp->ds_lock, RW_WRITER);
344 
345 	if (MBLKL(mp) < sizeof (dl_attach_req_t) ||
346 	    dlp->dl_ppa < 0 || dsp->ds_style == DL_STYLE1) {
347 		dl_err = DL_BADPRIM;
348 		goto failed;
349 	}
350 
351 	if (dsp->ds_dlstate != DL_UNATTACHED) {
352 		dl_err = DL_OUTSTATE;
353 		goto failed;
354 	}
355 
356 	dsp->ds_dlstate = DL_ATTACH_PENDING;
357 
358 	err = dld_str_attach(dsp, dlp->dl_ppa);
359 	if (err != 0) {
360 		switch (err) {
361 		case ENOENT:
362 			dl_err = DL_BADPPA;
363 			err = 0;
364 			break;
365 		default:
366 			dl_err = DL_SYSERR;
367 			break;
368 		}
369 		dsp->ds_dlstate = DL_UNATTACHED;
370 		goto failed;
371 	}
372 	ASSERT(dsp->ds_dlstate == DL_UNBOUND);
373 	rw_exit(&dsp->ds_lock);
374 
375 	dlokack(q, mp, DL_ATTACH_REQ);
376 	return (B_TRUE);
377 failed:
378 	rw_exit(&dsp->ds_lock);
379 	dlerrorack(q, mp, DL_ATTACH_REQ, dl_err, (t_uscalar_t)err);
380 	return (B_FALSE);
381 }
382 
383 /*ARGSUSED*/
384 static boolean_t
385 proto_detach_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
386 {
387 	queue_t		*q = dsp->ds_wq;
388 	t_uscalar_t	dl_err;
389 
390 	rw_enter(&dsp->ds_lock, RW_WRITER);
391 
392 	if (MBLKL(mp) < sizeof (dl_detach_req_t)) {
393 		dl_err = DL_BADPRIM;
394 		goto failed;
395 	}
396 
397 	if (dsp->ds_dlstate != DL_UNBOUND) {
398 		dl_err = DL_OUTSTATE;
399 		goto failed;
400 	}
401 
402 	if (dsp->ds_style == DL_STYLE1) {
403 		dl_err = DL_BADPRIM;
404 		goto failed;
405 	}
406 
407 	dsp->ds_dlstate = DL_DETACH_PENDING;
408 	dld_str_detach(dsp);
409 
410 	rw_exit(&dsp->ds_lock);
411 	dlokack(dsp->ds_wq, mp, DL_DETACH_REQ);
412 	return (B_TRUE);
413 failed:
414 	rw_exit(&dsp->ds_lock);
415 	dlerrorack(q, mp, DL_DETACH_REQ, dl_err, 0);
416 	return (B_FALSE);
417 }
418 
419 /*
420  * DL_BIND_REQ
421  */
422 static boolean_t
423 proto_bind_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
424 {
425 	dl_bind_req_t	*dlp = (dl_bind_req_t *)udlp;
426 	int		err = 0;
427 	uint8_t		dlsap_addr[MAXMACADDRLEN + sizeof (uint16_t)];
428 	uint_t		dlsap_addr_length;
429 	t_uscalar_t	dl_err;
430 	t_scalar_t	sap;
431 	queue_t		*q = dsp->ds_wq;
432 
433 	/*
434 	 * Because control message processing is serialized, we don't need
435 	 * to hold any locks to read any fields of dsp; we only need ds_lock
436 	 * to update the ds_dlstate, ds_sap and ds_passivestate fields.
437 	 */
438 	if (MBLKL(mp) < sizeof (dl_bind_req_t)) {
439 		dl_err = DL_BADPRIM;
440 		goto failed;
441 	}
442 
443 	if (dlp->dl_xidtest_flg != 0) {
444 		dl_err = DL_NOAUTO;
445 		goto failed;
446 	}
447 
448 	if (dlp->dl_service_mode != DL_CLDLS) {
449 		dl_err = DL_UNSUPPORTED;
450 		goto failed;
451 	}
452 
453 	if (dsp->ds_dlstate != DL_UNBOUND) {
454 		dl_err = DL_OUTSTATE;
455 		goto failed;
456 	}
457 
458 	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
459 	    !dls_active_set(dsp->ds_dc)) {
460 		dl_err = DL_SYSERR;
461 		err = EBUSY;
462 		goto failed;
463 	}
464 
465 	/*
466 	 * Set the receive callback.
467 	 */
468 	dls_rx_set(dsp->ds_dc, (dsp->ds_mode == DLD_RAW) ?
469 	    dld_str_rx_raw : dld_str_rx_unitdata, dsp);
470 
471 	/*
472 	 * Bind the channel such that it can receive packets.
473 	 */
474 	sap = dlp->dl_sap;
475 	err = dls_bind(dsp->ds_dc, sap);
476 	if (err != 0) {
477 		switch (err) {
478 		case EINVAL:
479 			dl_err = DL_BADADDR;
480 			err = 0;
481 			break;
482 		default:
483 			dl_err = DL_SYSERR;
484 			break;
485 		}
486 
487 		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
488 			dls_active_clear(dsp->ds_dc);
489 
490 		goto failed;
491 	}
492 
493 	/*
494 	 * Copy in MAC address.
495 	 */
496 	dlsap_addr_length = dsp->ds_mip->mi_addr_length;
497 	bcopy(dsp->ds_curr_addr, dlsap_addr, dlsap_addr_length);
498 
499 	/*
500 	 * Copy in the SAP.
501 	 */
502 	*(uint16_t *)(dlsap_addr + dlsap_addr_length) = sap;
503 	dlsap_addr_length += sizeof (uint16_t);
504 
505 	rw_enter(&dsp->ds_lock, RW_WRITER);
506 
507 	dsp->ds_dlstate = DL_IDLE;
508 	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
509 		dsp->ds_passivestate = DLD_ACTIVE;
510 	dsp->ds_sap = sap;
511 
512 	if (dsp->ds_mode == DLD_FASTPATH)
513 		dsp->ds_tx = str_mdata_fastpath_put;
514 	else if (dsp->ds_mode == DLD_RAW)
515 		dsp->ds_tx = str_mdata_raw_put;
516 	dsp->ds_unitdata_tx = dld_wput_proto_data;
517 
518 	rw_exit(&dsp->ds_lock);
519 
520 	dlbindack(q, mp, sap, dlsap_addr, dlsap_addr_length, 0, 0);
521 	return (B_TRUE);
522 failed:
523 	dlerrorack(q, mp, DL_BIND_REQ, dl_err, (t_uscalar_t)err);
524 	return (B_FALSE);
525 }
526 
527 /*
528  * DL_UNBIND_REQ
529  */
530 /*ARGSUSED*/
531 static boolean_t
532 proto_unbind_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
533 {
534 	queue_t		*q = dsp->ds_wq;
535 	t_uscalar_t	dl_err;
536 
537 	if (MBLKL(mp) < sizeof (dl_unbind_req_t)) {
538 		dl_err = DL_BADPRIM;
539 		goto failed;
540 	}
541 
542 	if (dsp->ds_dlstate != DL_IDLE) {
543 		dl_err = DL_OUTSTATE;
544 		goto failed;
545 	}
546 
547 	/*
548 	 * Flush any remaining packets scheduled for transmission.
549 	 */
550 	dld_tx_flush(dsp);
551 
552 	/*
553 	 * Unbind the channel to stop packets being received.
554 	 */
555 	dls_unbind(dsp->ds_dc);
556 
557 	/*
558 	 * Clear the receive callback.
559 	 */
560 	dls_rx_set(dsp->ds_dc, NULL, NULL);
561 
562 	rw_enter(&dsp->ds_lock, RW_WRITER);
563 
564 	/*
565 	 * Disable polling mode, if it is enabled.
566 	 */
567 	proto_poll_disable(dsp);
568 
569 	/*
570 	 * If soft rings were enabled, the workers should be quiesced.
571 	 */
572 	dls_soft_ring_disable(dsp->ds_dc);
573 
574 	/*
575 	 * Clear LSO flags.
576 	 */
577 	dsp->ds_lso = B_FALSE;
578 	dsp->ds_lso_max = 0;
579 
580 	/*
581 	 * Set the mode back to the default (unitdata).
582 	 */
583 	dsp->ds_mode = DLD_UNITDATA;
584 	dsp->ds_dlstate = DL_UNBOUND;
585 	DLD_TX_QUIESCE(dsp);
586 	rw_exit(&dsp->ds_lock);
587 
588 	dlokack(q, mp, DL_UNBIND_REQ);
589 
590 	return (B_TRUE);
591 failed:
592 	dlerrorack(q, mp, DL_UNBIND_REQ, dl_err, 0);
593 	return (B_FALSE);
594 }
595 
596 /*
597  * DL_PROMISCON_REQ
598  */
599 static boolean_t
600 proto_promiscon_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
601 {
602 	dl_promiscon_req_t *dlp = (dl_promiscon_req_t *)udlp;
603 	int		err = 0;
604 	t_uscalar_t	dl_err;
605 	uint32_t	promisc;
606 	queue_t		*q = dsp->ds_wq;
607 
608 	/*
609 	 * Because control message processing is serialized, we don't need
610 	 * to hold any locks to read any fields of dsp; we only need ds_lock
611 	 * to update the ds_promisc and ds_passivestate fields.
612 	 */
613 	if (MBLKL(mp) < sizeof (dl_promiscon_req_t)) {
614 		dl_err = DL_BADPRIM;
615 		goto failed;
616 	}
617 
618 	if (dsp->ds_dlstate == DL_UNATTACHED ||
619 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
620 		dl_err = DL_OUTSTATE;
621 		goto failed;
622 	}
623 
624 	switch (dlp->dl_level) {
625 	case DL_PROMISC_SAP:
626 		promisc = DLS_PROMISC_SAP;
627 		break;
628 	case DL_PROMISC_MULTI:
629 		promisc = DLS_PROMISC_MULTI;
630 		break;
631 	case DL_PROMISC_PHYS:
632 		promisc = DLS_PROMISC_PHYS;
633 		break;
634 	default:
635 		dl_err = DL_NOTSUPPORTED;
636 		goto failed;
637 	}
638 
639 	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
640 	    !dls_active_set(dsp->ds_dc)) {
641 		dl_err = DL_SYSERR;
642 		err = EBUSY;
643 		goto failed;
644 	}
645 
646 	/*
647 	 * Adjust channel promiscuity.
648 	 */
649 	promisc = (dsp->ds_promisc | promisc);
650 	err = dls_promisc(dsp->ds_dc, promisc);
651 	if (err != 0) {
652 		dl_err = DL_SYSERR;
653 		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
654 			dls_active_clear(dsp->ds_dc);
655 		goto failed;
656 	}
657 
658 	rw_enter(&dsp->ds_lock, RW_WRITER);
659 	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
660 		dsp->ds_passivestate = DLD_ACTIVE;
661 	dsp->ds_promisc = promisc;
662 	rw_exit(&dsp->ds_lock);
663 
664 	dlokack(q, mp, DL_PROMISCON_REQ);
665 	return (B_TRUE);
666 failed:
667 	dlerrorack(q, mp, DL_PROMISCON_REQ, dl_err, (t_uscalar_t)err);
668 	return (B_FALSE);
669 }
670 
671 /*
672  * DL_PROMISCOFF_REQ
673  */
674 static boolean_t
675 proto_promiscoff_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
676 {
677 	dl_promiscoff_req_t *dlp = (dl_promiscoff_req_t *)udlp;
678 	int		err = 0;
679 	t_uscalar_t	dl_err;
680 	uint32_t	promisc;
681 	queue_t		*q = dsp->ds_wq;
682 
683 	/*
684 	 * Because control messages processing is serialized, we don't need
685 	 * to hold any lock to read any field of dsp; we hold ds_lock to
686 	 * update the ds_promisc field.
687 	 */
688 	if (MBLKL(mp) < sizeof (dl_promiscoff_req_t)) {
689 		dl_err = DL_BADPRIM;
690 		goto failed;
691 	}
692 
693 	if (dsp->ds_dlstate == DL_UNATTACHED ||
694 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
695 		dl_err = DL_OUTSTATE;
696 		goto failed;
697 	}
698 
699 	switch (dlp->dl_level) {
700 	case DL_PROMISC_SAP:
701 		promisc = DLS_PROMISC_SAP;
702 		break;
703 	case DL_PROMISC_MULTI:
704 		promisc = DLS_PROMISC_MULTI;
705 		break;
706 	case DL_PROMISC_PHYS:
707 		promisc = DLS_PROMISC_PHYS;
708 		break;
709 	default:
710 		dl_err = DL_NOTSUPPORTED;
711 		goto failed;
712 	}
713 
714 	if (!(dsp->ds_promisc & promisc)) {
715 		dl_err = DL_NOTENAB;
716 		goto failed;
717 	}
718 
719 	promisc = (dsp->ds_promisc & ~promisc);
720 	err = dls_promisc(dsp->ds_dc, promisc);
721 	if (err != 0) {
722 		dl_err = DL_SYSERR;
723 		goto failed;
724 	}
725 
726 	rw_enter(&dsp->ds_lock, RW_WRITER);
727 	dsp->ds_promisc = promisc;
728 	rw_exit(&dsp->ds_lock);
729 
730 	dlokack(q, mp, DL_PROMISCOFF_REQ);
731 	return (B_TRUE);
732 failed:
733 	dlerrorack(q, mp, DL_PROMISCOFF_REQ, dl_err, (t_uscalar_t)err);
734 	return (B_FALSE);
735 }
736 
737 /*
738  * DL_ENABMULTI_REQ
739  */
740 static boolean_t
741 proto_enabmulti_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
742 {
743 	dl_enabmulti_req_t *dlp = (dl_enabmulti_req_t *)udlp;
744 	int		err = 0;
745 	t_uscalar_t	dl_err;
746 	queue_t		*q = dsp->ds_wq;
747 
748 	/*
749 	 * Because control messages processing is serialized, we don't need
750 	 * to hold any lock to read any field of dsp; we hold ds_lock to
751 	 * update the ds_passivestate field.
752 	 */
753 	if (dsp->ds_dlstate == DL_UNATTACHED ||
754 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
755 		dl_err = DL_OUTSTATE;
756 		goto failed;
757 	}
758 
759 	if (MBLKL(mp) < sizeof (dl_enabmulti_req_t) ||
760 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
761 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
762 		dl_err = DL_BADPRIM;
763 		goto failed;
764 	}
765 
766 	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
767 	    !dls_active_set(dsp->ds_dc)) {
768 		dl_err = DL_SYSERR;
769 		err = EBUSY;
770 		goto failed;
771 	}
772 
773 	err = dls_multicst_add(dsp->ds_dc, mp->b_rptr + dlp->dl_addr_offset);
774 	if (err != 0) {
775 		switch (err) {
776 		case EINVAL:
777 			dl_err = DL_BADADDR;
778 			err = 0;
779 			break;
780 		case ENOSPC:
781 			dl_err = DL_TOOMANY;
782 			err = 0;
783 			break;
784 		default:
785 			dl_err = DL_SYSERR;
786 			break;
787 		}
788 
789 		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
790 			dls_active_clear(dsp->ds_dc);
791 
792 		goto failed;
793 	}
794 
795 	rw_enter(&dsp->ds_lock, RW_WRITER);
796 	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
797 		dsp->ds_passivestate = DLD_ACTIVE;
798 	rw_exit(&dsp->ds_lock);
799 
800 	dlokack(q, mp, DL_ENABMULTI_REQ);
801 	return (B_TRUE);
802 failed:
803 	dlerrorack(q, mp, DL_ENABMULTI_REQ, dl_err, (t_uscalar_t)err);
804 	return (B_FALSE);
805 }
806 
807 /*
808  * DL_DISABMULTI_REQ
809  */
810 static boolean_t
811 proto_disabmulti_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
812 {
813 	dl_disabmulti_req_t *dlp = (dl_disabmulti_req_t *)udlp;
814 	int		err = 0;
815 	t_uscalar_t	dl_err;
816 	queue_t		*q = dsp->ds_wq;
817 
818 	/*
819 	 * Because control messages processing is serialized, we don't need
820 	 * to hold any lock to read any field of dsp.
821 	 */
822 	if (dsp->ds_dlstate == DL_UNATTACHED ||
823 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
824 		dl_err = DL_OUTSTATE;
825 		goto failed;
826 	}
827 
828 	if (MBLKL(mp) < sizeof (dl_disabmulti_req_t) ||
829 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
830 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
831 		dl_err = DL_BADPRIM;
832 		goto failed;
833 	}
834 
835 	err = dls_multicst_remove(dsp->ds_dc, mp->b_rptr + dlp->dl_addr_offset);
836 	if (err != 0) {
837 		switch (err) {
838 		case EINVAL:
839 			dl_err = DL_BADADDR;
840 			err = 0;
841 			break;
842 		case ENOENT:
843 			dl_err = DL_NOTENAB;
844 			err = 0;
845 			break;
846 		default:
847 			dl_err = DL_SYSERR;
848 			break;
849 		}
850 		goto failed;
851 	}
852 
853 	dlokack(q, mp, DL_DISABMULTI_REQ);
854 	return (B_TRUE);
855 failed:
856 	dlerrorack(q, mp, DL_DISABMULTI_REQ, dl_err, (t_uscalar_t)err);
857 	return (B_FALSE);
858 }
859 
860 /*
861  * DL_PHYS_ADDR_REQ
862  */
863 static boolean_t
864 proto_physaddr_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
865 {
866 	dl_phys_addr_req_t *dlp = (dl_phys_addr_req_t *)udlp;
867 	queue_t		*q = dsp->ds_wq;
868 	t_uscalar_t	dl_err;
869 	char		*addr;
870 	uint_t		addr_length;
871 
872 	rw_enter(&dsp->ds_lock, RW_READER);
873 
874 	if (MBLKL(mp) < sizeof (dl_phys_addr_req_t)) {
875 		dl_err = DL_BADPRIM;
876 		goto failed;
877 	}
878 
879 	if (dsp->ds_dlstate == DL_UNATTACHED ||
880 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
881 		dl_err = DL_OUTSTATE;
882 		goto failed;
883 	}
884 
885 	if (dlp->dl_addr_type != DL_CURR_PHYS_ADDR &&
886 	    dlp->dl_addr_type != DL_FACT_PHYS_ADDR) {
887 		dl_err = DL_UNSUPPORTED;
888 		goto failed;
889 	}
890 
891 	addr_length = dsp->ds_mip->mi_addr_length;
892 	addr = kmem_alloc(addr_length, KM_NOSLEEP);
893 	if (addr == NULL) {
894 		rw_exit(&dsp->ds_lock);
895 		merror(q, mp, ENOSR);
896 		return (B_FALSE);
897 	}
898 
899 	/*
900 	 * Copy out the address before we drop the lock; we don't
901 	 * want to call dlphysaddrack() while holding ds_lock.
902 	 */
903 	bcopy((dlp->dl_addr_type == DL_CURR_PHYS_ADDR) ?
904 	    dsp->ds_curr_addr : dsp->ds_fact_addr, addr, addr_length);
905 
906 	rw_exit(&dsp->ds_lock);
907 	dlphysaddrack(q, mp, addr, (t_uscalar_t)addr_length);
908 	kmem_free(addr, addr_length);
909 	return (B_TRUE);
910 failed:
911 	rw_exit(&dsp->ds_lock);
912 	dlerrorack(q, mp, DL_PHYS_ADDR_REQ, dl_err, 0);
913 	return (B_FALSE);
914 }
915 
916 /*
917  * DL_SET_PHYS_ADDR_REQ
918  */
919 static boolean_t
920 proto_setphysaddr_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
921 {
922 	dl_set_phys_addr_req_t *dlp = (dl_set_phys_addr_req_t *)udlp;
923 	int		err = 0;
924 	t_uscalar_t	dl_err;
925 	queue_t		*q = dsp->ds_wq;
926 
927 	/*
928 	 * Because control message processing is serialized, we don't need
929 	 * to hold any locks to read any fields of dsp; we only need ds_lock
930 	 * to update the ds_passivestate field.
931 	 */
932 	if (dsp->ds_dlstate == DL_UNATTACHED ||
933 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
934 		dl_err = DL_OUTSTATE;
935 		goto failed;
936 	}
937 
938 	if (MBLKL(mp) < sizeof (dl_set_phys_addr_req_t) ||
939 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
940 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
941 		dl_err = DL_BADPRIM;
942 		goto failed;
943 	}
944 
945 	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
946 	    !dls_active_set(dsp->ds_dc)) {
947 		dl_err = DL_SYSERR;
948 		err = EBUSY;
949 		goto failed;
950 	}
951 
952 	err = mac_unicst_set(dsp->ds_mh, mp->b_rptr + dlp->dl_addr_offset);
953 	if (err != 0) {
954 		switch (err) {
955 		case EINVAL:
956 			dl_err = DL_BADADDR;
957 			err = 0;
958 			break;
959 
960 		default:
961 			dl_err = DL_SYSERR;
962 			break;
963 		}
964 
965 		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
966 			dls_active_clear(dsp->ds_dc);
967 
968 		goto failed;
969 	}
970 
971 	rw_enter(&dsp->ds_lock, RW_WRITER);
972 	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
973 		dsp->ds_passivestate = DLD_ACTIVE;
974 	rw_exit(&dsp->ds_lock);
975 
976 	dlokack(q, mp, DL_SET_PHYS_ADDR_REQ);
977 	return (B_TRUE);
978 failed:
979 	dlerrorack(q, mp, DL_SET_PHYS_ADDR_REQ, dl_err, (t_uscalar_t)err);
980 	return (B_FALSE);
981 }
982 
983 /*
984  * DL_UDQOS_REQ
985  */
986 static boolean_t
987 proto_udqos_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
988 {
989 	dl_udqos_req_t *dlp = (dl_udqos_req_t *)udlp;
990 	dl_qos_cl_sel1_t *selp;
991 	int		off, len;
992 	t_uscalar_t	dl_err;
993 	queue_t		*q = dsp->ds_wq;
994 
995 	off = dlp->dl_qos_offset;
996 	len = dlp->dl_qos_length;
997 
998 	if (MBLKL(mp) < sizeof (dl_udqos_req_t) || !MBLKIN(mp, off, len)) {
999 		dl_err = DL_BADPRIM;
1000 		goto failed;
1001 	}
1002 
1003 	selp = (dl_qos_cl_sel1_t *)(mp->b_rptr + off);
1004 	if (selp->dl_qos_type != DL_QOS_CL_SEL1) {
1005 		dl_err = DL_BADQOSTYPE;
1006 		goto failed;
1007 	}
1008 
1009 	if (selp->dl_priority > (1 << VLAN_PRI_SIZE) - 1 ||
1010 	    selp->dl_priority < 0) {
1011 		dl_err = DL_BADQOSPARAM;
1012 		goto failed;
1013 	}
1014 
1015 	if (dsp->ds_dlstate == DL_UNATTACHED ||
1016 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1017 		dl_err = DL_OUTSTATE;
1018 		goto failed;
1019 	}
1020 
1021 	rw_enter(&dsp->ds_lock, RW_WRITER);
1022 	dsp->ds_pri = selp->dl_priority;
1023 	rw_exit(&dsp->ds_lock);
1024 
1025 	dlokack(q, mp, DL_UDQOS_REQ);
1026 	return (B_TRUE);
1027 failed:
1028 	dlerrorack(q, mp, DL_UDQOS_REQ, dl_err, 0);
1029 	return (B_FALSE);
1030 }
1031 
1032 static boolean_t
1033 check_ip_above(queue_t *q)
1034 {
1035 	queue_t		*next_q;
1036 	boolean_t	ret = B_TRUE;
1037 
1038 	claimstr(q);
1039 	next_q = q->q_next;
1040 	if (strcmp(next_q->q_qinfo->qi_minfo->mi_idname, "ip") != 0)
1041 		ret = B_FALSE;
1042 	releasestr(q);
1043 	return (ret);
1044 }
1045 
1046 /*
1047  * DL_CAPABILITY_REQ
1048  */
1049 /*ARGSUSED*/
1050 static boolean_t
1051 proto_capability_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
1052 {
1053 	dl_capability_req_t *dlp = (dl_capability_req_t *)udlp;
1054 	dl_capability_sub_t *sp;
1055 	size_t		size, len;
1056 	offset_t	off, end;
1057 	t_uscalar_t	dl_err;
1058 	queue_t		*q = dsp->ds_wq;
1059 
1060 	rw_enter(&dsp->ds_lock, RW_WRITER);
1061 
1062 	if (MBLKL(mp) < sizeof (dl_capability_req_t)) {
1063 		dl_err = DL_BADPRIM;
1064 		goto failed;
1065 	}
1066 
1067 	if (dsp->ds_dlstate == DL_UNATTACHED ||
1068 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1069 		dl_err = DL_OUTSTATE;
1070 		goto failed;
1071 	}
1072 
1073 	/*
1074 	 * This request is overloaded. If there are no requested capabilities
1075 	 * then we just want to acknowledge with all the capabilities we
1076 	 * support. Otherwise we enable the set of capabilities requested.
1077 	 */
1078 	if (dlp->dl_sub_length == 0) {
1079 		/* callee drops lock */
1080 		return (proto_capability_advertise(dsp, mp));
1081 	}
1082 
1083 	if (!MBLKIN(mp, dlp->dl_sub_offset, dlp->dl_sub_length)) {
1084 		dl_err = DL_BADPRIM;
1085 		goto failed;
1086 	}
1087 
1088 	dlp->dl_primitive = DL_CAPABILITY_ACK;
1089 
1090 	off = dlp->dl_sub_offset;
1091 	len = dlp->dl_sub_length;
1092 
1093 	/*
1094 	 * Walk the list of capabilities to be enabled.
1095 	 */
1096 	for (end = off + len; off < end; ) {
1097 		sp = (dl_capability_sub_t *)(mp->b_rptr + off);
1098 		size = sizeof (dl_capability_sub_t) + sp->dl_length;
1099 
1100 		if (off + size > end ||
1101 		    !IS_P2ALIGNED(off, sizeof (uint32_t))) {
1102 			dl_err = DL_BADPRIM;
1103 			goto failed;
1104 		}
1105 
1106 		switch (sp->dl_cap) {
1107 		/*
1108 		 * TCP/IP checksum offload to hardware.
1109 		 */
1110 		case DL_CAPAB_HCKSUM: {
1111 			dl_capab_hcksum_t *hcksump;
1112 			dl_capab_hcksum_t hcksum;
1113 
1114 			hcksump = (dl_capab_hcksum_t *)&sp[1];
1115 			/*
1116 			 * Copy for alignment.
1117 			 */
1118 			bcopy(hcksump, &hcksum, sizeof (dl_capab_hcksum_t));
1119 			dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1120 			bcopy(&hcksum, hcksump, sizeof (dl_capab_hcksum_t));
1121 			break;
1122 		}
1123 
1124 		/*
1125 		 * Large segment offload. (LSO)
1126 		 */
1127 		case DL_CAPAB_LSO: {
1128 			dl_capab_lso_t *lsop;
1129 			dl_capab_lso_t lso;
1130 
1131 			lsop = (dl_capab_lso_t *)&sp[1];
1132 			/*
1133 			 * Copy for alignment.
1134 			 */
1135 			bcopy(lsop, &lso, sizeof (dl_capab_lso_t));
1136 			dlcapabsetqid(&(lso.lso_mid), dsp->ds_rq);
1137 			bcopy(&lso, lsop, sizeof (dl_capab_lso_t));
1138 			break;
1139 		}
1140 
1141 		/*
1142 		 * IP polling interface.
1143 		 */
1144 		case DL_CAPAB_POLL: {
1145 			dl_capab_dls_t *pollp;
1146 			dl_capab_dls_t	poll;
1147 
1148 			pollp = (dl_capab_dls_t *)&sp[1];
1149 			/*
1150 			 * Copy for alignment.
1151 			 */
1152 			bcopy(pollp, &poll, sizeof (dl_capab_dls_t));
1153 
1154 			switch (poll.dls_flags) {
1155 			default:
1156 				/*FALLTHRU*/
1157 			case POLL_DISABLE:
1158 				proto_poll_disable(dsp);
1159 				break;
1160 
1161 			case POLL_ENABLE:
1162 				ASSERT(!(dld_opt & DLD_OPT_NO_POLL));
1163 
1164 				/*
1165 				 * Make sure polling is disabled.
1166 				 */
1167 				proto_poll_disable(dsp);
1168 
1169 				/*
1170 				 * Note that only IP should enable POLL.
1171 				 */
1172 				if (check_ip_above(dsp->ds_rq) &&
1173 				    proto_poll_enable(dsp, &poll)) {
1174 					bzero(&poll, sizeof (dl_capab_dls_t));
1175 					poll.dls_flags = POLL_ENABLE;
1176 				} else {
1177 					bzero(&poll, sizeof (dl_capab_dls_t));
1178 					poll.dls_flags = POLL_DISABLE;
1179 				}
1180 				break;
1181 			}
1182 
1183 			dlcapabsetqid(&(poll.dls_mid), dsp->ds_rq);
1184 			bcopy(&poll, pollp, sizeof (dl_capab_dls_t));
1185 			break;
1186 		}
1187 		case DL_CAPAB_SOFT_RING: {
1188 			dl_capab_dls_t *soft_ringp;
1189 			dl_capab_dls_t soft_ring;
1190 
1191 			soft_ringp = (dl_capab_dls_t *)&sp[1];
1192 			/*
1193 			 * Copy for alignment.
1194 			 */
1195 			bcopy(soft_ringp, &soft_ring,
1196 			    sizeof (dl_capab_dls_t));
1197 
1198 			switch (soft_ring.dls_flags) {
1199 			default:
1200 				/*FALLTHRU*/
1201 			case SOFT_RING_DISABLE:
1202 				proto_soft_ring_disable(dsp);
1203 				break;
1204 
1205 			case SOFT_RING_ENABLE:
1206 				ASSERT(!(dld_opt & DLD_OPT_NO_SOFTRING));
1207 				/*
1208 				 * Make sure soft_ring is disabled.
1209 				 */
1210 				proto_soft_ring_disable(dsp);
1211 
1212 				/*
1213 				 * Note that only IP can enable soft ring.
1214 				 */
1215 				if (check_ip_above(dsp->ds_rq) &&
1216 				    proto_soft_ring_enable(dsp, &soft_ring)) {
1217 					bzero(&soft_ring,
1218 					    sizeof (dl_capab_dls_t));
1219 					soft_ring.dls_flags = SOFT_RING_ENABLE;
1220 				} else {
1221 					bzero(&soft_ring,
1222 					    sizeof (dl_capab_dls_t));
1223 					soft_ring.dls_flags = SOFT_RING_DISABLE;
1224 				}
1225 				break;
1226 			}
1227 
1228 			dlcapabsetqid(&(soft_ring.dls_mid), dsp->ds_rq);
1229 			bcopy(&soft_ring, soft_ringp,
1230 			    sizeof (dl_capab_dls_t));
1231 			break;
1232 		}
1233 		default:
1234 			break;
1235 		}
1236 
1237 		off += size;
1238 	}
1239 	rw_exit(&dsp->ds_lock);
1240 	qreply(q, mp);
1241 	return (B_TRUE);
1242 failed:
1243 	rw_exit(&dsp->ds_lock);
1244 	dlerrorack(q, mp, DL_CAPABILITY_REQ, dl_err, 0);
1245 	return (B_FALSE);
1246 }
1247 
1248 /*
1249  * DL_NOTIFY_REQ
1250  */
1251 static boolean_t
1252 proto_notify_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
1253 {
1254 	dl_notify_req_t	*dlp = (dl_notify_req_t *)udlp;
1255 	t_uscalar_t	dl_err;
1256 	queue_t		*q = dsp->ds_wq;
1257 	uint_t		note =
1258 	    DL_NOTE_PROMISC_ON_PHYS |
1259 	    DL_NOTE_PROMISC_OFF_PHYS |
1260 	    DL_NOTE_PHYS_ADDR |
1261 	    DL_NOTE_LINK_UP |
1262 	    DL_NOTE_LINK_DOWN |
1263 	    DL_NOTE_CAPAB_RENEG |
1264 	    DL_NOTE_SPEED;
1265 
1266 	rw_enter(&dsp->ds_lock, RW_WRITER);
1267 
1268 	if (MBLKL(mp) < sizeof (dl_notify_req_t)) {
1269 		dl_err = DL_BADPRIM;
1270 		goto failed;
1271 	}
1272 
1273 	if (dsp->ds_dlstate == DL_UNATTACHED ||
1274 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1275 		dl_err = DL_OUTSTATE;
1276 		goto failed;
1277 	}
1278 
1279 	note &= ~(mac_no_notification(dsp->ds_mh));
1280 
1281 	/*
1282 	 * Cache the notifications that are being enabled.
1283 	 */
1284 	dsp->ds_notifications = dlp->dl_notifications & note;
1285 	rw_exit(&dsp->ds_lock);
1286 	/*
1287 	 * The ACK carries all notifications regardless of which set is
1288 	 * being enabled.
1289 	 */
1290 	dlnotifyack(q, mp, note);
1291 
1292 	/*
1293 	 * Solicit DL_NOTIFY_IND messages for each enabled notification.
1294 	 */
1295 	rw_enter(&dsp->ds_lock, RW_READER);
1296 	if (dsp->ds_notifications != 0) {
1297 		rw_exit(&dsp->ds_lock);
1298 		dld_str_notify_ind(dsp);
1299 	} else {
1300 		rw_exit(&dsp->ds_lock);
1301 	}
1302 	return (B_TRUE);
1303 failed:
1304 	rw_exit(&dsp->ds_lock);
1305 	dlerrorack(q, mp, DL_NOTIFY_REQ, dl_err, 0);
1306 	return (B_FALSE);
1307 }
1308 
1309 /*
1310  * DL_UNITDATA_REQ
1311  */
1312 void
1313 dld_wput_proto_data(dld_str_t *dsp, mblk_t *mp)
1314 {
1315 	queue_t			*q = dsp->ds_wq;
1316 	dl_unitdata_req_t	*dlp = (dl_unitdata_req_t *)mp->b_rptr;
1317 	off_t			off;
1318 	size_t			len, size;
1319 	const uint8_t		*addr;
1320 	uint16_t		sap;
1321 	uint_t			addr_length;
1322 	mblk_t			*bp, *payload;
1323 	uint32_t		start, stuff, end, value, flags;
1324 	t_uscalar_t		dl_err;
1325 
1326 	if (MBLKL(mp) < sizeof (dl_unitdata_req_t) || mp->b_cont == NULL) {
1327 		dl_err = DL_BADPRIM;
1328 		goto failed;
1329 	}
1330 
1331 	addr_length = dsp->ds_mip->mi_addr_length;
1332 
1333 	off = dlp->dl_dest_addr_offset;
1334 	len = dlp->dl_dest_addr_length;
1335 
1336 	if (!MBLKIN(mp, off, len) || !IS_P2ALIGNED(off, sizeof (uint16_t))) {
1337 		dl_err = DL_BADPRIM;
1338 		goto failed;
1339 	}
1340 
1341 	if (len != addr_length + sizeof (uint16_t)) {
1342 		dl_err = DL_BADADDR;
1343 		goto failed;
1344 	}
1345 
1346 	addr = mp->b_rptr + off;
1347 	sap = *(uint16_t *)(mp->b_rptr + off + addr_length);
1348 
1349 	/*
1350 	 * Check the length of the packet and the block types.
1351 	 */
1352 	size = 0;
1353 	payload = mp->b_cont;
1354 	for (bp = payload; bp != NULL; bp = bp->b_cont) {
1355 		if (DB_TYPE(bp) != M_DATA)
1356 			goto baddata;
1357 
1358 		size += MBLKL(bp);
1359 	}
1360 
1361 	if (size > dsp->ds_mip->mi_sdu_max)
1362 		goto baddata;
1363 
1364 	/*
1365 	 * Build a packet header.
1366 	 */
1367 	if ((bp = dls_header(dsp->ds_dc, addr, sap, dlp->dl_priority.dl_max,
1368 	    &payload)) == NULL) {
1369 		dl_err = DL_BADADDR;
1370 		goto failed;
1371 	}
1372 
1373 	/*
1374 	 * We no longer need the M_PROTO header, so free it.
1375 	 */
1376 	freeb(mp);
1377 
1378 	/*
1379 	 * Transfer the checksum offload information if it is present.
1380 	 */
1381 	hcksum_retrieve(payload, NULL, NULL, &start, &stuff, &end, &value,
1382 	    &flags);
1383 	(void) hcksum_assoc(bp, NULL, NULL, start, stuff, end, value, flags, 0);
1384 
1385 	/*
1386 	 * Link the payload onto the new header.
1387 	 */
1388 	ASSERT(bp->b_cont == NULL);
1389 	bp->b_cont = payload;
1390 	dld_tx_single(dsp, bp);
1391 	return;
1392 failed:
1393 	dlerrorack(q, mp, DL_UNITDATA_REQ, dl_err, 0);
1394 	return;
1395 
1396 baddata:
1397 	dluderrorind(q, mp, (void *)addr, len, DL_BADDATA, 0);
1398 }
1399 
1400 /*
1401  * DL_PASSIVE_REQ
1402  */
1403 /* ARGSUSED */
1404 static boolean_t
1405 proto_passive_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
1406 {
1407 	t_uscalar_t dl_err;
1408 
1409 	/*
1410 	 * READER lock is enough because ds_passivestate can only be changed
1411 	 * as the result of non-data message processing.
1412 	 */
1413 	rw_enter(&dsp->ds_lock, RW_READER);
1414 
1415 	/*
1416 	 * If we've already become active by issuing an active primitive,
1417 	 * then it's too late to try to become passive.
1418 	 */
1419 	if (dsp->ds_passivestate == DLD_ACTIVE) {
1420 		dl_err = DL_OUTSTATE;
1421 		goto failed;
1422 	}
1423 
1424 	if (MBLKL(mp) < sizeof (dl_passive_req_t)) {
1425 		dl_err = DL_BADPRIM;
1426 		goto failed;
1427 	}
1428 
1429 	dsp->ds_passivestate = DLD_PASSIVE;
1430 	rw_exit(&dsp->ds_lock);
1431 	dlokack(dsp->ds_wq, mp, DL_PASSIVE_REQ);
1432 	return (B_TRUE);
1433 failed:
1434 	rw_exit(&dsp->ds_lock);
1435 	dlerrorack(dsp->ds_wq, mp, DL_PASSIVE_REQ, dl_err, 0);
1436 	return (B_FALSE);
1437 }
1438 
1439 /*
1440  * Catch-all handler.
1441  */
1442 static boolean_t
1443 proto_req(dld_str_t *dsp, union DL_primitives *dlp, mblk_t *mp)
1444 {
1445 	dlerrorack(dsp->ds_wq, mp, dlp->dl_primitive, DL_UNSUPPORTED, 0);
1446 	return (B_FALSE);
1447 }
1448 
1449 static void
1450 proto_poll_disable(dld_str_t *dsp)
1451 {
1452 	mac_handle_t	mh;
1453 
1454 	ASSERT(RW_WRITE_HELD(&dsp->ds_lock));
1455 
1456 	if (!dsp->ds_polling)
1457 		return;
1458 
1459 	/*
1460 	 * It should be impossible to enable raw mode if polling is turned on.
1461 	 */
1462 	ASSERT(dsp->ds_mode != DLD_RAW);
1463 
1464 	/*
1465 	 * Reset the resource_add callback.
1466 	 */
1467 	mh = dls_mac(dsp->ds_dc);
1468 	mac_resource_set(mh, NULL, NULL);
1469 	mac_resources(mh);
1470 
1471 	/*
1472 	 * Set receive function back to default.
1473 	 */
1474 	dls_rx_set(dsp->ds_dc, (dsp->ds_mode == DLD_FASTPATH) ?
1475 	    dld_str_rx_fastpath : dld_str_rx_unitdata, dsp);
1476 
1477 	/*
1478 	 * Note that polling is disabled.
1479 	 */
1480 	dsp->ds_polling = B_FALSE;
1481 }
1482 
1483 static boolean_t
1484 proto_poll_enable(dld_str_t *dsp, dl_capab_dls_t *pollp)
1485 {
1486 	mac_handle_t	mh;
1487 
1488 	ASSERT(RW_WRITE_HELD(&dsp->ds_lock));
1489 	ASSERT(!dsp->ds_polling);
1490 
1491 	/*
1492 	 * We cannot enable polling if raw mode
1493 	 * has been enabled.
1494 	 */
1495 	if (dsp->ds_mode == DLD_RAW)
1496 		return (B_FALSE);
1497 
1498 	mh = dls_mac(dsp->ds_dc);
1499 
1500 	/*
1501 	 * Register resources.
1502 	 */
1503 	mac_resource_set(mh, (mac_resource_add_t)pollp->dls_ring_add,
1504 	    (void *)pollp->dls_rx_handle);
1505 
1506 	mac_resources(mh);
1507 
1508 	/*
1509 	 * Set the upstream receive function.
1510 	 */
1511 	dls_rx_set(dsp->ds_dc, (dls_rx_t)pollp->dls_rx,
1512 	    (void *)pollp->dls_rx_handle);
1513 
1514 	/*
1515 	 * Note that polling is enabled. This prevents further DLIOCHDRINFO
1516 	 * ioctls from overwriting the receive function pointer.
1517 	 */
1518 	dsp->ds_polling = B_TRUE;
1519 	return (B_TRUE);
1520 }
1521 
1522 static void
1523 proto_soft_ring_disable(dld_str_t *dsp)
1524 {
1525 	ASSERT(RW_WRITE_HELD(&dsp->ds_lock));
1526 
1527 	if (!dsp->ds_soft_ring)
1528 		return;
1529 
1530 	/*
1531 	 * It should be impossible to enable raw mode if soft_ring is turned on.
1532 	 */
1533 	ASSERT(dsp->ds_mode != DLD_RAW);
1534 	proto_change_soft_ring_fanout(dsp, SOFT_RING_NONE);
1535 	/*
1536 	 * Note that fanout is disabled.
1537 	 */
1538 	dsp->ds_soft_ring = B_FALSE;
1539 }
1540 
1541 static boolean_t
1542 proto_soft_ring_enable(dld_str_t *dsp, dl_capab_dls_t *soft_ringp)
1543 {
1544 	ASSERT(RW_WRITE_HELD(&dsp->ds_lock));
1545 	ASSERT(!dsp->ds_soft_ring);
1546 
1547 	/*
1548 	 * We cannot enable soft_ring if raw mode
1549 	 * has been enabled.
1550 	 */
1551 	if (dsp->ds_mode == DLD_RAW)
1552 		return (B_FALSE);
1553 
1554 	if (dls_soft_ring_enable(dsp->ds_dc, soft_ringp) == B_FALSE)
1555 		return (B_FALSE);
1556 
1557 	dsp->ds_soft_ring = B_TRUE;
1558 	return (B_TRUE);
1559 }
1560 
1561 static void
1562 proto_change_soft_ring_fanout(dld_str_t *dsp, int type)
1563 {
1564 	dls_channel_t	dc = dsp->ds_dc;
1565 
1566 	if (type == SOFT_RING_NONE) {
1567 		dls_rx_set(dc, (dsp->ds_mode == DLD_FASTPATH) ?
1568 		    dld_str_rx_fastpath : dld_str_rx_unitdata, dsp);
1569 	} else if (type != SOFT_RING_NONE) {
1570 		dls_rx_set(dc, (dls_rx_t)dls_soft_ring_fanout, dc);
1571 	}
1572 }
1573 
1574 /*
1575  * DL_CAPABILITY_ACK/DL_ERROR_ACK
1576  */
1577 static boolean_t
1578 proto_capability_advertise(dld_str_t *dsp, mblk_t *mp)
1579 {
1580 	dl_capability_ack_t	*dlap;
1581 	dl_capability_sub_t	*dlsp;
1582 	size_t			subsize;
1583 	dl_capab_dls_t		poll;
1584 	dl_capab_dls_t		soft_ring;
1585 	dl_capab_hcksum_t	hcksum;
1586 	dl_capab_lso_t		lso;
1587 	dl_capab_zerocopy_t	zcopy;
1588 	uint8_t			*ptr;
1589 	queue_t			*q = dsp->ds_wq;
1590 	mblk_t			*mp1;
1591 	boolean_t		is_vlan = (dsp->ds_vid != VLAN_ID_NONE);
1592 	boolean_t		poll_capable = B_FALSE;
1593 	boolean_t		soft_ring_capable = B_FALSE;
1594 	boolean_t		hcksum_capable = B_FALSE;
1595 	boolean_t		zcopy_capable = B_FALSE;
1596 	boolean_t		lso_capable = B_FALSE;
1597 	mac_capab_lso_t		mac_lso;
1598 
1599 	ASSERT(RW_WRITE_HELD(&dsp->ds_lock));
1600 
1601 	/*
1602 	 * Initially assume no capabilities.
1603 	 */
1604 	subsize = 0;
1605 
1606 	/*
1607 	 * Check if soft ring can be enabled on this interface. Note that we
1608 	 * do not enable softring on any legacy drivers, because doing that
1609 	 * would hurt the performance if the legacy driver has its own taskq
1610 	 * implementation. Further, most high-performance legacy drivers do
1611 	 * have their own taskq implementation.
1612 	 *
1613 	 * If advertising DL_CAPAB_SOFT_RING has not been explicitly disabled,
1614 	 * reserve space for that capability.
1615 	 */
1616 	if (!mac_is_legacy(dsp->ds_mh) && !(dld_opt & DLD_OPT_NO_SOFTRING)) {
1617 		soft_ring_capable = B_TRUE;
1618 		subsize += sizeof (dl_capability_sub_t) +
1619 		    sizeof (dl_capab_dls_t);
1620 	}
1621 
1622 	/*
1623 	 * Check if polling can be enabled on this interface.
1624 	 * If advertising DL_CAPAB_POLL has not been explicitly disabled
1625 	 * then reserve space for that capability.
1626 	 */
1627 	if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_POLL, NULL) &&
1628 	    !(dld_opt & DLD_OPT_NO_POLL) && !is_vlan) {
1629 		poll_capable = B_TRUE;
1630 		subsize += sizeof (dl_capability_sub_t) +
1631 		    sizeof (dl_capab_dls_t);
1632 	}
1633 
1634 	/*
1635 	 * Check if checksum offload is supported on this MAC.  Don't
1636 	 * advertise DL_CAPAB_HCKSUM if the underlying MAC is VLAN incapable,
1637 	 * since it might not be able to do the hardware checksum offload
1638 	 * with the correct offset.
1639 	 */
1640 	bzero(&hcksum, sizeof (dl_capab_hcksum_t));
1641 	if ((!is_vlan || (!mac_capab_get(dsp->ds_mh, MAC_CAPAB_NO_NATIVEVLAN,
1642 	    NULL))) && mac_capab_get(dsp->ds_mh, MAC_CAPAB_HCKSUM,
1643 	    &hcksum.hcksum_txflags)) {
1644 		if (hcksum.hcksum_txflags != 0) {
1645 			hcksum_capable = B_TRUE;
1646 			subsize += sizeof (dl_capability_sub_t) +
1647 			    sizeof (dl_capab_hcksum_t);
1648 		}
1649 	}
1650 
1651 	/*
1652 	 * Check if LSO is supported on this MAC, then reserve space for
1653 	 * the DL_CAPAB_LSO capability.
1654 	 */
1655 	if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_LSO, &mac_lso)) {
1656 		lso_capable = B_TRUE;
1657 		subsize += sizeof (dl_capability_sub_t) +
1658 		    sizeof (dl_capab_lso_t);
1659 	}
1660 
1661 	/*
1662 	 * Check if zerocopy is supported on this interface.
1663 	 * If advertising DL_CAPAB_ZEROCOPY has not been explicitly disabled
1664 	 * then reserve space for that capability.
1665 	 */
1666 	if (!mac_capab_get(dsp->ds_mh, MAC_CAPAB_NO_ZCOPY, NULL) &&
1667 	    !(dld_opt & DLD_OPT_NO_ZEROCOPY)) {
1668 		zcopy_capable = B_TRUE;
1669 		subsize += sizeof (dl_capability_sub_t) +
1670 		    sizeof (dl_capab_zerocopy_t);
1671 	}
1672 
1673 	/*
1674 	 * If there are no capabilities to advertise or if we
1675 	 * can't allocate a response, send a DL_ERROR_ACK.
1676 	 */
1677 	if ((mp1 = reallocb(mp,
1678 	    sizeof (dl_capability_ack_t) + subsize, 0)) == NULL) {
1679 		rw_exit(&dsp->ds_lock);
1680 		dlerrorack(q, mp, DL_CAPABILITY_REQ, DL_NOTSUPPORTED, 0);
1681 		return (B_FALSE);
1682 	}
1683 
1684 	mp = mp1;
1685 	DB_TYPE(mp) = M_PROTO;
1686 	mp->b_wptr = mp->b_rptr + sizeof (dl_capability_ack_t) + subsize;
1687 	bzero(mp->b_rptr, MBLKL(mp));
1688 	dlap = (dl_capability_ack_t *)mp->b_rptr;
1689 	dlap->dl_primitive = DL_CAPABILITY_ACK;
1690 	dlap->dl_sub_offset = sizeof (dl_capability_ack_t);
1691 	dlap->dl_sub_length = subsize;
1692 	ptr = (uint8_t *)&dlap[1];
1693 
1694 	/*
1695 	 * IP polling interface.
1696 	 */
1697 	if (poll_capable) {
1698 		/*
1699 		 * Attempt to disable just in case this is a re-negotiation;
1700 		 * READER lock is enough because ds_polling can only be
1701 		 * changed as the result of non-data message processing.
1702 		 */
1703 		proto_poll_disable(dsp);
1704 
1705 		dlsp = (dl_capability_sub_t *)ptr;
1706 
1707 		dlsp->dl_cap = DL_CAPAB_POLL;
1708 		dlsp->dl_length = sizeof (dl_capab_dls_t);
1709 		ptr += sizeof (dl_capability_sub_t);
1710 
1711 		bzero(&poll, sizeof (dl_capab_dls_t));
1712 		poll.dls_version = POLL_VERSION_1;
1713 		poll.dls_flags = POLL_CAPABLE;
1714 		poll.dls_tx_handle = (uintptr_t)dsp;
1715 		poll.dls_tx = (uintptr_t)str_mdata_fastpath_put;
1716 		dlcapabsetqid(&(poll.dls_mid), dsp->ds_rq);
1717 		bcopy(&poll, ptr, sizeof (dl_capab_dls_t));
1718 		ptr += sizeof (dl_capab_dls_t);
1719 	}
1720 
1721 
1722 	if (soft_ring_capable) {
1723 		dlsp = (dl_capability_sub_t *)ptr;
1724 
1725 		dlsp->dl_cap = DL_CAPAB_SOFT_RING;
1726 		dlsp->dl_length = sizeof (dl_capab_dls_t);
1727 		ptr += sizeof (dl_capability_sub_t);
1728 
1729 		bzero(&soft_ring, sizeof (dl_capab_dls_t));
1730 		soft_ring.dls_version = SOFT_RING_VERSION_1;
1731 		soft_ring.dls_flags = SOFT_RING_CAPABLE;
1732 		soft_ring.dls_tx_handle = (uintptr_t)dsp;
1733 		soft_ring.dls_tx = (uintptr_t)str_mdata_fastpath_put;
1734 		soft_ring.dls_ring_change_status =
1735 		    (uintptr_t)proto_change_soft_ring_fanout;
1736 		soft_ring.dls_ring_bind = (uintptr_t)soft_ring_bind;
1737 		soft_ring.dls_ring_unbind = (uintptr_t)soft_ring_unbind;
1738 
1739 		dlcapabsetqid(&(soft_ring.dls_mid), dsp->ds_rq);
1740 		bcopy(&soft_ring, ptr, sizeof (dl_capab_dls_t));
1741 		ptr += sizeof (dl_capab_dls_t);
1742 	}
1743 
1744 	/*
1745 	 * TCP/IP checksum offload.
1746 	 */
1747 	if (hcksum_capable) {
1748 		dlsp = (dl_capability_sub_t *)ptr;
1749 
1750 		dlsp->dl_cap = DL_CAPAB_HCKSUM;
1751 		dlsp->dl_length = sizeof (dl_capab_hcksum_t);
1752 		ptr += sizeof (dl_capability_sub_t);
1753 
1754 		hcksum.hcksum_version = HCKSUM_VERSION_1;
1755 		dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1756 		bcopy(&hcksum, ptr, sizeof (dl_capab_hcksum_t));
1757 		ptr += sizeof (dl_capab_hcksum_t);
1758 	}
1759 
1760 	/*
1761 	 * Large segment offload. (LSO)
1762 	 */
1763 	if (lso_capable) {
1764 		dlsp = (dl_capability_sub_t *)ptr;
1765 
1766 		dlsp->dl_cap = DL_CAPAB_LSO;
1767 		dlsp->dl_length = sizeof (dl_capab_lso_t);
1768 		ptr += sizeof (dl_capability_sub_t);
1769 
1770 		lso.lso_version = LSO_VERSION_1;
1771 		lso.lso_flags = mac_lso.lso_flags;
1772 		lso.lso_max = mac_lso.lso_basic_tcp_ipv4.lso_max;
1773 
1774 		/* Simply enable LSO with DLD */
1775 		dsp->ds_lso = B_TRUE;
1776 		dsp->ds_lso_max = lso.lso_max;
1777 
1778 		dlcapabsetqid(&(lso.lso_mid), dsp->ds_rq);
1779 		bcopy(&lso, ptr, sizeof (dl_capab_lso_t));
1780 		ptr += sizeof (dl_capab_lso_t);
1781 	} else {
1782 		dsp->ds_lso = B_FALSE;
1783 		dsp->ds_lso_max = 0;
1784 	}
1785 
1786 	/*
1787 	 * Zero copy
1788 	 */
1789 	if (zcopy_capable) {
1790 		dlsp = (dl_capability_sub_t *)ptr;
1791 
1792 		dlsp->dl_cap = DL_CAPAB_ZEROCOPY;
1793 		dlsp->dl_length = sizeof (dl_capab_zerocopy_t);
1794 		ptr += sizeof (dl_capability_sub_t);
1795 
1796 		bzero(&zcopy, sizeof (dl_capab_zerocopy_t));
1797 		zcopy.zerocopy_version = ZEROCOPY_VERSION_1;
1798 		zcopy.zerocopy_flags = DL_CAPAB_VMSAFE_MEM;
1799 
1800 		dlcapabsetqid(&(zcopy.zerocopy_mid), dsp->ds_rq);
1801 		bcopy(&zcopy, ptr, sizeof (dl_capab_zerocopy_t));
1802 		ptr += sizeof (dl_capab_zerocopy_t);
1803 	}
1804 
1805 	ASSERT(ptr == mp->b_rptr + sizeof (dl_capability_ack_t) + subsize);
1806 
1807 	rw_exit(&dsp->ds_lock);
1808 	qreply(q, mp);
1809 	return (B_TRUE);
1810 }
1811 
1812 /*
1813  * Disable any enabled capabilities.
1814  */
1815 void
1816 dld_capabilities_disable(dld_str_t *dsp)
1817 {
1818 	if (dsp->ds_polling)
1819 		proto_poll_disable(dsp);
1820 
1821 	if (dsp->ds_soft_ring)
1822 		proto_soft_ring_disable(dsp);
1823 }
1824