xref: /titanic_41/usr/src/uts/common/io/dld/dld_proto.c (revision 4ebb14b236958cfe1ef4ff3b7a50216d9e51f997)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Data-Link Driver
30  */
31 
32 #include <sys/types.h>
33 #include <sys/debug.h>
34 #include <sys/sysmacros.h>
35 #include <sys/stream.h>
36 #include <sys/ddi.h>
37 #include <sys/sunddi.h>
38 #include <sys/strsun.h>
39 #include <sys/cpuvar.h>
40 #include <sys/dlpi.h>
41 #include <netinet/in.h>
42 #include <sys/sdt.h>
43 #include <sys/strsubr.h>
44 #include <sys/vlan.h>
45 #include <sys/mac.h>
46 #include <sys/dls.h>
47 #include <sys/dld.h>
48 #include <sys/dld_impl.h>
49 #include <sys/dls_soft_ring.h>
50 
51 typedef boolean_t proto_reqfunc_t(dld_str_t *, union DL_primitives *, mblk_t *);
52 
53 static proto_reqfunc_t proto_info_req, proto_attach_req, proto_detach_req,
54     proto_bind_req, proto_unbind_req, proto_promiscon_req, proto_promiscoff_req,
55     proto_enabmulti_req, proto_disabmulti_req, proto_physaddr_req,
56     proto_setphysaddr_req, proto_udqos_req, proto_req, proto_capability_req,
57     proto_notify_req, proto_unitdata_req, proto_passive_req;
58 
59 static void proto_poll_disable(dld_str_t *);
60 static boolean_t proto_poll_enable(dld_str_t *, dl_capab_dls_t *);
61 static boolean_t proto_capability_advertise(dld_str_t *, mblk_t *);
62 
63 static task_func_t proto_process_unbind_req, proto_process_detach_req;
64 
65 static void proto_soft_ring_disable(dld_str_t *);
66 static boolean_t proto_soft_ring_enable(dld_str_t *, dl_capab_dls_t *);
67 static boolean_t proto_capability_advertise(dld_str_t *, mblk_t *);
68 static void proto_change_soft_ring_fanout(dld_str_t *, int);
69 
70 #define	DL_ACK_PENDING(state) \
71 	((state) == DL_ATTACH_PENDING || \
72 	(state) == DL_DETACH_PENDING || \
73 	(state) == DL_BIND_PENDING || \
74 	(state) == DL_UNBIND_PENDING)
75 
76 /*
77  * Process a DLPI protocol message.
78  * The primitives DL_BIND_REQ, DL_ENABMULTI_REQ, DL_PROMISCON_REQ,
79  * DL_SET_PHYS_ADDR_REQ put the data link below our dld_str_t into an
80  * 'active' state. The primitive DL_PASSIVE_REQ marks our dld_str_t
81  * as 'passive' and forbids it from being subsequently made 'active'
82  * by the above primitives.
83  */
84 void
85 dld_proto(dld_str_t *dsp, mblk_t *mp)
86 {
87 	union DL_primitives	*udlp;
88 	t_uscalar_t		prim;
89 
90 	if (MBLKL(mp) < sizeof (t_uscalar_t)) {
91 		freemsg(mp);
92 		return;
93 	}
94 
95 	udlp = (union DL_primitives *)mp->b_rptr;
96 	prim = udlp->dl_primitive;
97 
98 	switch (prim) {
99 	case DL_INFO_REQ:
100 		(void) proto_info_req(dsp, udlp, mp);
101 		break;
102 	case DL_BIND_REQ:
103 		(void) proto_bind_req(dsp, udlp, mp);
104 		break;
105 	case DL_UNBIND_REQ:
106 		(void) proto_unbind_req(dsp, udlp, mp);
107 		break;
108 	case DL_UNITDATA_REQ:
109 		(void) proto_unitdata_req(dsp, udlp, mp);
110 		break;
111 	case DL_UDQOS_REQ:
112 		(void) proto_udqos_req(dsp, udlp, mp);
113 		break;
114 	case DL_ATTACH_REQ:
115 		(void) proto_attach_req(dsp, udlp, mp);
116 		break;
117 	case DL_DETACH_REQ:
118 		(void) proto_detach_req(dsp, udlp, mp);
119 		break;
120 	case DL_ENABMULTI_REQ:
121 		(void) proto_enabmulti_req(dsp, udlp, mp);
122 		break;
123 	case DL_DISABMULTI_REQ:
124 		(void) proto_disabmulti_req(dsp, udlp, mp);
125 		break;
126 	case DL_PROMISCON_REQ:
127 		(void) proto_promiscon_req(dsp, udlp, mp);
128 		break;
129 	case DL_PROMISCOFF_REQ:
130 		(void) proto_promiscoff_req(dsp, udlp, mp);
131 		break;
132 	case DL_PHYS_ADDR_REQ:
133 		(void) proto_physaddr_req(dsp, udlp, mp);
134 		break;
135 	case DL_SET_PHYS_ADDR_REQ:
136 		(void) proto_setphysaddr_req(dsp, udlp, mp);
137 		break;
138 	case DL_NOTIFY_REQ:
139 		(void) proto_notify_req(dsp, udlp, mp);
140 		break;
141 	case DL_CAPABILITY_REQ:
142 		(void) proto_capability_req(dsp, udlp, mp);
143 		break;
144 	case DL_PASSIVE_REQ:
145 		(void) proto_passive_req(dsp, udlp, mp);
146 		break;
147 	default:
148 		(void) proto_req(dsp, udlp, mp);
149 		break;
150 	}
151 }
152 
153 /*
154  * Finish any pending operations.
155  * Requests that need to be processed asynchronously will be handled
156  * by a separate thread. After this function returns, other threads
157  * will be allowed to enter dld; they will not be able to do anything
158  * until ds_dlstate transitions to a non-pending state.
159  */
160 void
161 dld_finish_pending_ops(dld_str_t *dsp)
162 {
163 	task_func_t *op = NULL;
164 
165 	ASSERT(MUTEX_HELD(&dsp->ds_thr_lock));
166 	ASSERT(dsp->ds_thr == 0);
167 
168 	op = dsp->ds_pending_op;
169 	dsp->ds_pending_op = NULL;
170 	mutex_exit(&dsp->ds_thr_lock);
171 	if (op != NULL)
172 		(void) taskq_dispatch(system_taskq, op, dsp, TQ_SLEEP);
173 }
174 
175 #define	NEG(x)	-(x)
176 
177 typedef struct dl_info_ack_wrapper {
178 	dl_info_ack_t		dl_info;
179 	uint8_t			dl_addr[MAXMACADDRLEN + sizeof (uint16_t)];
180 	uint8_t			dl_brdcst_addr[MAXMACADDRLEN];
181 	dl_qos_cl_range1_t	dl_qos_range1;
182 	dl_qos_cl_sel1_t	dl_qos_sel1;
183 } dl_info_ack_wrapper_t;
184 
185 /*
186  * DL_INFO_REQ
187  */
188 /*ARGSUSED*/
189 static boolean_t
190 proto_info_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
191 {
192 	dl_info_ack_wrapper_t	*dlwp;
193 	dl_info_ack_t		*dlp;
194 	dl_qos_cl_sel1_t	*selp;
195 	dl_qos_cl_range1_t	*rangep;
196 	uint8_t			*addr;
197 	uint8_t			*brdcst_addr;
198 	uint_t			addr_length;
199 	uint_t			sap_length;
200 	mac_info_t		minfo;
201 	mac_info_t		*minfop;
202 	queue_t			*q = dsp->ds_wq;
203 
204 	/*
205 	 * Swap the request message for one large enough to contain the
206 	 * wrapper structure defined above.
207 	 */
208 	if ((mp = mexchange(q, mp, sizeof (dl_info_ack_wrapper_t),
209 	    M_PCPROTO, 0)) == NULL)
210 		return (B_FALSE);
211 
212 	rw_enter(&dsp->ds_lock, RW_READER);
213 
214 	bzero(mp->b_rptr, sizeof (dl_info_ack_wrapper_t));
215 	dlwp = (dl_info_ack_wrapper_t *)mp->b_rptr;
216 
217 	dlp = &(dlwp->dl_info);
218 	ASSERT(dlp == (dl_info_ack_t *)mp->b_rptr);
219 
220 	dlp->dl_primitive = DL_INFO_ACK;
221 
222 	/*
223 	 * Set up the sub-structure pointers.
224 	 */
225 	addr = dlwp->dl_addr;
226 	brdcst_addr = dlwp->dl_brdcst_addr;
227 	rangep = &(dlwp->dl_qos_range1);
228 	selp = &(dlwp->dl_qos_sel1);
229 
230 	/*
231 	 * This driver supports only version 2 connectionless DLPI provider
232 	 * nodes.
233 	 */
234 	dlp->dl_service_mode = DL_CLDLS;
235 	dlp->dl_version = DL_VERSION_2;
236 
237 	/*
238 	 * Set the style of the provider
239 	 */
240 	dlp->dl_provider_style = dsp->ds_style;
241 	ASSERT(dlp->dl_provider_style == DL_STYLE1 ||
242 	    dlp->dl_provider_style == DL_STYLE2);
243 
244 	/*
245 	 * Set the current DLPI state.
246 	 */
247 	dlp->dl_current_state = dsp->ds_dlstate;
248 
249 	/*
250 	 * Gratuitously set the media type. This is to deal with modules
251 	 * that assume the media type is known prior to DL_ATTACH_REQ
252 	 * being completed.
253 	 */
254 	dlp->dl_mac_type = DL_ETHER;
255 
256 	/*
257 	 * If the stream is not at least attached we try to retrieve the
258 	 * mac_info using mac_info_get()
259 	 */
260 	if (dsp->ds_dlstate == DL_UNATTACHED ||
261 	    dsp->ds_dlstate == DL_ATTACH_PENDING ||
262 	    dsp->ds_dlstate == DL_DETACH_PENDING) {
263 		if (!mac_info_get(ddi_major_to_name(dsp->ds_major), &minfo)) {
264 			/*
265 			 * Cannot find mac_info. giving up.
266 			 */
267 			goto done;
268 		}
269 		minfop = &minfo;
270 	} else {
271 		minfop = (mac_info_t *)dsp->ds_mip;
272 	}
273 
274 	/*
275 	 * Set the media type (properly this time).
276 	 */
277 	dlp->dl_mac_type = minfop->mi_media;
278 
279 	/*
280 	 * Set the DLSAP length. We only support 16 bit values and they
281 	 * appear after the MAC address portion of DLSAP addresses.
282 	 */
283 	sap_length = sizeof (uint16_t);
284 	dlp->dl_sap_length = NEG(sap_length);
285 
286 	/*
287 	 * Set the minimum and maximum payload sizes.
288 	 */
289 	dlp->dl_min_sdu = minfop->mi_sdu_min;
290 	dlp->dl_max_sdu = minfop->mi_sdu_max;
291 
292 	addr_length = minfop->mi_addr_length;
293 
294 	/*
295 	 * Copy in the media broadcast address.
296 	 */
297 	if (minfop->mi_brdcst_addr != NULL) {
298 		dlp->dl_brdcst_addr_offset =
299 		    (uintptr_t)brdcst_addr - (uintptr_t)dlp;
300 		bcopy(minfop->mi_brdcst_addr, brdcst_addr, addr_length);
301 		dlp->dl_brdcst_addr_length = addr_length;
302 	}
303 
304 	dlp->dl_qos_range_offset = (uintptr_t)rangep - (uintptr_t)dlp;
305 	dlp->dl_qos_range_length = sizeof (dl_qos_cl_range1_t);
306 
307 	rangep->dl_qos_type = DL_QOS_CL_RANGE1;
308 	rangep->dl_trans_delay.dl_target_value = DL_UNKNOWN;
309 	rangep->dl_trans_delay.dl_accept_value = DL_UNKNOWN;
310 	rangep->dl_protection.dl_min = DL_UNKNOWN;
311 	rangep->dl_protection.dl_max = DL_UNKNOWN;
312 	rangep->dl_residual_error = DL_UNKNOWN;
313 
314 	/*
315 	 * Specify the supported range of priorities.
316 	 */
317 	rangep->dl_priority.dl_min = 0;
318 	rangep->dl_priority.dl_max = (1 << VLAN_PRI_SIZE) - 1;
319 
320 	dlp->dl_qos_offset = (uintptr_t)selp - (uintptr_t)dlp;
321 	dlp->dl_qos_length = sizeof (dl_qos_cl_sel1_t);
322 
323 	selp->dl_qos_type = DL_QOS_CL_SEL1;
324 	selp->dl_trans_delay = DL_UNKNOWN;
325 	selp->dl_protection = DL_UNKNOWN;
326 	selp->dl_residual_error = DL_UNKNOWN;
327 
328 	/*
329 	 * Specify the current priority (which can be changed by
330 	 * the DL_UDQOS_REQ primitive).
331 	 */
332 	selp->dl_priority = dsp->ds_pri;
333 
334 	dlp->dl_addr_length = addr_length + sizeof (uint16_t);
335 	if (dsp->ds_dlstate == DL_IDLE) {
336 		/*
337 		 * The stream is bound. Therefore we can formulate a valid
338 		 * DLSAP address.
339 		 */
340 		dlp->dl_addr_offset = (uintptr_t)addr - (uintptr_t)dlp;
341 		if (addr_length > 0)
342 			bcopy(dsp->ds_curr_addr, addr, addr_length);
343 		*(uint16_t *)(addr + addr_length) = dsp->ds_sap;
344 	}
345 
346 done:
347 	ASSERT(IMPLY(dlp->dl_qos_offset != 0, dlp->dl_qos_length != 0));
348 	ASSERT(IMPLY(dlp->dl_qos_range_offset != 0,
349 	    dlp->dl_qos_range_length != 0));
350 	ASSERT(IMPLY(dlp->dl_addr_offset != 0, dlp->dl_addr_length != 0));
351 	ASSERT(IMPLY(dlp->dl_brdcst_addr_offset != 0,
352 	    dlp->dl_brdcst_addr_length != 0));
353 
354 	rw_exit(&dsp->ds_lock);
355 
356 	qreply(q, mp);
357 	return (B_TRUE);
358 }
359 
360 /*
361  * DL_ATTACH_REQ
362  */
363 static boolean_t
364 proto_attach_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
365 {
366 	dl_attach_req_t	*dlp = (dl_attach_req_t *)udlp;
367 	int		err = 0;
368 	t_uscalar_t	dl_err;
369 	queue_t		*q = dsp->ds_wq;
370 
371 	rw_enter(&dsp->ds_lock, RW_WRITER);
372 
373 	if (MBLKL(mp) < sizeof (dl_attach_req_t) ||
374 	    dlp->dl_ppa < 0 || dsp->ds_style == DL_STYLE1) {
375 		dl_err = DL_BADPRIM;
376 		goto failed;
377 	}
378 
379 	if (dsp->ds_dlstate != DL_UNATTACHED) {
380 		dl_err = DL_OUTSTATE;
381 		goto failed;
382 	}
383 
384 	dsp->ds_dlstate = DL_ATTACH_PENDING;
385 
386 	err = dld_str_attach(dsp, dlp->dl_ppa);
387 	if (err != 0) {
388 		switch (err) {
389 		case ENOENT:
390 			dl_err = DL_BADPPA;
391 			err = 0;
392 			break;
393 		default:
394 			dl_err = DL_SYSERR;
395 			break;
396 		}
397 		dsp->ds_dlstate = DL_UNATTACHED;
398 		goto failed;
399 	}
400 	ASSERT(dsp->ds_dlstate == DL_UNBOUND);
401 	rw_exit(&dsp->ds_lock);
402 
403 	dlokack(q, mp, DL_ATTACH_REQ);
404 	return (B_TRUE);
405 failed:
406 	rw_exit(&dsp->ds_lock);
407 	dlerrorack(q, mp, DL_ATTACH_REQ, dl_err, (t_uscalar_t)err);
408 	return (B_FALSE);
409 }
410 
411 /*
412  * DL_DETACH_REQ
413  */
414 static void
415 proto_process_detach_req(void *arg)
416 {
417 	dld_str_t	*dsp = arg;
418 	mblk_t		*mp;
419 
420 	/*
421 	 * We don't need to hold locks because no other thread
422 	 * would manipulate dsp while it is in a PENDING state.
423 	 */
424 	ASSERT(dsp->ds_pending_req != NULL);
425 	ASSERT(dsp->ds_dlstate == DL_DETACH_PENDING);
426 
427 	mp = dsp->ds_pending_req;
428 	dsp->ds_pending_req = NULL;
429 	dld_str_detach(dsp);
430 	dlokack(dsp->ds_wq, mp, DL_DETACH_REQ);
431 
432 	DLD_WAKEUP(dsp);
433 }
434 
435 /*ARGSUSED*/
436 static boolean_t
437 proto_detach_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
438 {
439 	queue_t		*q = dsp->ds_wq;
440 	t_uscalar_t	dl_err;
441 
442 	rw_enter(&dsp->ds_lock, RW_WRITER);
443 
444 	if (MBLKL(mp) < sizeof (dl_detach_req_t)) {
445 		dl_err = DL_BADPRIM;
446 		goto failed;
447 	}
448 
449 	if (dsp->ds_dlstate != DL_UNBOUND) {
450 		dl_err = DL_OUTSTATE;
451 		goto failed;
452 	}
453 
454 	if (dsp->ds_style == DL_STYLE1) {
455 		dl_err = DL_BADPRIM;
456 		goto failed;
457 	}
458 
459 	dsp->ds_dlstate = DL_DETACH_PENDING;
460 
461 	/*
462 	 * Complete the detach when the driver is single-threaded.
463 	 */
464 	mutex_enter(&dsp->ds_thr_lock);
465 	ASSERT(dsp->ds_pending_req == NULL);
466 	dsp->ds_pending_req = mp;
467 	dsp->ds_pending_op = proto_process_detach_req;
468 	dsp->ds_pending_cnt++;
469 	mutex_exit(&dsp->ds_thr_lock);
470 	rw_exit(&dsp->ds_lock);
471 
472 	return (B_TRUE);
473 failed:
474 	rw_exit(&dsp->ds_lock);
475 	dlerrorack(q, mp, DL_DETACH_REQ, dl_err, 0);
476 	return (B_FALSE);
477 }
478 
479 /*
480  * DL_BIND_REQ
481  */
482 static boolean_t
483 proto_bind_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
484 {
485 	dl_bind_req_t	*dlp = (dl_bind_req_t *)udlp;
486 	int		err = 0;
487 	uint8_t		dlsap_addr[MAXMACADDRLEN + sizeof (uint16_t)];
488 	uint_t		dlsap_addr_length;
489 	t_uscalar_t	dl_err;
490 	t_scalar_t	sap;
491 	queue_t		*q = dsp->ds_wq;
492 
493 	rw_enter(&dsp->ds_lock, RW_WRITER);
494 
495 	if (MBLKL(mp) < sizeof (dl_bind_req_t)) {
496 		dl_err = DL_BADPRIM;
497 		goto failed;
498 	}
499 
500 	if (dlp->dl_xidtest_flg != 0) {
501 		dl_err = DL_NOAUTO;
502 		goto failed;
503 	}
504 
505 	if (dlp->dl_service_mode != DL_CLDLS) {
506 		dl_err = DL_UNSUPPORTED;
507 		goto failed;
508 	}
509 
510 	if (dsp->ds_dlstate != DL_UNBOUND) {
511 		dl_err = DL_OUTSTATE;
512 		goto failed;
513 	}
514 
515 	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
516 	    !dls_active_set(dsp->ds_dc)) {
517 		dl_err = DL_SYSERR;
518 		err = EBUSY;
519 		goto failed;
520 	}
521 
522 	dsp->ds_dlstate = DL_BIND_PENDING;
523 	/*
524 	 * Set the receive callback.
525 	 */
526 	dls_rx_set(dsp->ds_dc, (dsp->ds_mode == DLD_RAW) ?
527 	    dld_str_rx_raw : dld_str_rx_unitdata, dsp);
528 
529 	/*
530 	 * Bind the channel such that it can receive packets.
531 	 */
532 	sap = dsp->ds_sap = dlp->dl_sap;
533 	err = dls_bind(dsp->ds_dc, dlp->dl_sap);
534 	if (err != 0) {
535 		switch (err) {
536 		case EINVAL:
537 			dl_err = DL_BADADDR;
538 			err = 0;
539 			break;
540 		default:
541 			dl_err = DL_SYSERR;
542 			break;
543 		}
544 		dsp->ds_dlstate = DL_UNBOUND;
545 		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
546 			dls_active_clear(dsp->ds_dc);
547 
548 		goto failed;
549 	}
550 
551 	/*
552 	 * Copy in MAC address.
553 	 */
554 	dlsap_addr_length = dsp->ds_mip->mi_addr_length;
555 	bcopy(dsp->ds_curr_addr, dlsap_addr, dlsap_addr_length);
556 
557 	/*
558 	 * Copy in the SAP.
559 	 */
560 	*(uint16_t *)(dlsap_addr + dlsap_addr_length) = dsp->ds_sap;
561 	dlsap_addr_length += sizeof (uint16_t);
562 
563 	dsp->ds_dlstate = DL_IDLE;
564 	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
565 		dsp->ds_passivestate = DLD_ACTIVE;
566 
567 	rw_exit(&dsp->ds_lock);
568 
569 	dlbindack(q, mp, sap, dlsap_addr, dlsap_addr_length, 0, 0);
570 	return (B_TRUE);
571 failed:
572 	rw_exit(&dsp->ds_lock);
573 	dlerrorack(q, mp, DL_BIND_REQ, dl_err, (t_uscalar_t)err);
574 	return (B_FALSE);
575 }
576 
577 /*
578  * DL_UNBIND_REQ
579  */
580 /*ARGSUSED*/
581 static void
582 proto_process_unbind_req(void *arg)
583 {
584 	dld_str_t	*dsp = arg;
585 	mblk_t		*mp;
586 
587 	/*
588 	 * We don't need to hold locks because no other thread
589 	 * would manipulate dsp while it is in a PENDING state.
590 	 */
591 	ASSERT(dsp->ds_pending_req != NULL);
592 	ASSERT(dsp->ds_dlstate == DL_UNBIND_PENDING);
593 
594 	/*
595 	 * Flush any remaining packets scheduled for transmission.
596 	 */
597 	dld_tx_flush(dsp);
598 
599 	/*
600 	 * Unbind the channel to stop packets being received.
601 	 */
602 	dls_unbind(dsp->ds_dc);
603 
604 	/*
605 	 * Disable polling mode, if it is enabled.
606 	 */
607 	proto_poll_disable(dsp);
608 
609 	/*
610 	 * Clear LSO flags.
611 	 */
612 	dsp->ds_lso = B_FALSE;
613 	dsp->ds_lso_max = 0;
614 
615 	/*
616 	 * Clear the receive callback.
617 	 */
618 	dls_rx_set(dsp->ds_dc, NULL, NULL);
619 
620 	/*
621 	 * Set the mode back to the default (unitdata).
622 	 */
623 	dsp->ds_mode = DLD_UNITDATA;
624 
625 	/*
626 	 * If soft rings were enabled, the workers
627 	 * should be quiesced. We cannot check for
628 	 * ds_soft_ring flag because
629 	 * proto_soft_ring_disable() called from
630 	 * proto_capability_req() would have reset it.
631 	 */
632 	if (dls_soft_ring_workers(dsp->ds_dc))
633 		dls_soft_ring_disable(dsp->ds_dc);
634 
635 	mp = dsp->ds_pending_req;
636 	dsp->ds_pending_req = NULL;
637 	dsp->ds_dlstate = DL_UNBOUND;
638 	dlokack(dsp->ds_wq, mp, DL_UNBIND_REQ);
639 
640 	DLD_WAKEUP(dsp);
641 }
642 
643 /*ARGSUSED*/
644 static boolean_t
645 proto_unbind_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
646 {
647 	queue_t		*q = dsp->ds_wq;
648 	t_uscalar_t	dl_err;
649 
650 	rw_enter(&dsp->ds_lock, RW_WRITER);
651 
652 	if (MBLKL(mp) < sizeof (dl_unbind_req_t)) {
653 		dl_err = DL_BADPRIM;
654 		goto failed;
655 	}
656 
657 	if (dsp->ds_dlstate != DL_IDLE) {
658 		dl_err = DL_OUTSTATE;
659 		goto failed;
660 	}
661 
662 	dsp->ds_dlstate = DL_UNBIND_PENDING;
663 
664 	mutex_enter(&dsp->ds_thr_lock);
665 	ASSERT(dsp->ds_pending_req == NULL);
666 	dsp->ds_pending_req = mp;
667 	dsp->ds_pending_op = proto_process_unbind_req;
668 	dsp->ds_pending_cnt++;
669 	mutex_exit(&dsp->ds_thr_lock);
670 	rw_exit(&dsp->ds_lock);
671 
672 	return (B_TRUE);
673 failed:
674 	rw_exit(&dsp->ds_lock);
675 	dlerrorack(q, mp, DL_UNBIND_REQ, dl_err, 0);
676 	return (B_FALSE);
677 }
678 
679 /*
680  * DL_PROMISCON_REQ
681  */
682 static boolean_t
683 proto_promiscon_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
684 {
685 	dl_promiscon_req_t *dlp = (dl_promiscon_req_t *)udlp;
686 	int		err = 0;
687 	t_uscalar_t	dl_err;
688 	uint32_t	promisc_saved;
689 	queue_t		*q = dsp->ds_wq;
690 
691 	rw_enter(&dsp->ds_lock, RW_WRITER);
692 
693 	if (MBLKL(mp) < sizeof (dl_promiscon_req_t)) {
694 		dl_err = DL_BADPRIM;
695 		goto failed;
696 	}
697 
698 	if (dsp->ds_dlstate == DL_UNATTACHED ||
699 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
700 		dl_err = DL_OUTSTATE;
701 		goto failed;
702 	}
703 
704 	promisc_saved = dsp->ds_promisc;
705 	switch (dlp->dl_level) {
706 	case DL_PROMISC_SAP:
707 		dsp->ds_promisc |= DLS_PROMISC_SAP;
708 		break;
709 
710 	case DL_PROMISC_MULTI:
711 		dsp->ds_promisc |= DLS_PROMISC_MULTI;
712 		break;
713 
714 	case DL_PROMISC_PHYS:
715 		dsp->ds_promisc |= DLS_PROMISC_PHYS;
716 		break;
717 
718 	default:
719 		dl_err = DL_NOTSUPPORTED;
720 		goto failed;
721 	}
722 
723 	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
724 	    !dls_active_set(dsp->ds_dc)) {
725 		dsp->ds_promisc = promisc_saved;
726 		dl_err = DL_SYSERR;
727 		err = EBUSY;
728 		goto failed;
729 	}
730 
731 	/*
732 	 * Adjust channel promiscuity.
733 	 */
734 	err = dls_promisc(dsp->ds_dc, dsp->ds_promisc);
735 	if (err != 0) {
736 		dl_err = DL_SYSERR;
737 		dsp->ds_promisc = promisc_saved;
738 		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
739 			dls_active_clear(dsp->ds_dc);
740 
741 		goto failed;
742 	}
743 
744 	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
745 		dsp->ds_passivestate = DLD_ACTIVE;
746 
747 	rw_exit(&dsp->ds_lock);
748 	dlokack(q, mp, DL_PROMISCON_REQ);
749 	return (B_TRUE);
750 failed:
751 	rw_exit(&dsp->ds_lock);
752 	dlerrorack(q, mp, DL_PROMISCON_REQ, dl_err, (t_uscalar_t)err);
753 	return (B_FALSE);
754 }
755 
756 /*
757  * DL_PROMISCOFF_REQ
758  */
759 static boolean_t
760 proto_promiscoff_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
761 {
762 	dl_promiscoff_req_t *dlp = (dl_promiscoff_req_t *)udlp;
763 	int		err = 0;
764 	t_uscalar_t	dl_err;
765 	uint32_t	promisc_saved;
766 	queue_t		*q = dsp->ds_wq;
767 
768 	rw_enter(&dsp->ds_lock, RW_WRITER);
769 
770 	if (MBLKL(mp) < sizeof (dl_promiscoff_req_t)) {
771 		dl_err = DL_BADPRIM;
772 		goto failed;
773 	}
774 
775 	if (dsp->ds_dlstate == DL_UNATTACHED ||
776 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
777 		dl_err = DL_OUTSTATE;
778 		goto failed;
779 	}
780 
781 	promisc_saved = dsp->ds_promisc;
782 	switch (dlp->dl_level) {
783 	case DL_PROMISC_SAP:
784 		if (!(dsp->ds_promisc & DLS_PROMISC_SAP)) {
785 			dl_err = DL_NOTENAB;
786 			goto failed;
787 		}
788 		dsp->ds_promisc &= ~DLS_PROMISC_SAP;
789 		break;
790 
791 	case DL_PROMISC_MULTI:
792 		if (!(dsp->ds_promisc & DLS_PROMISC_MULTI)) {
793 			dl_err = DL_NOTENAB;
794 			goto failed;
795 		}
796 		dsp->ds_promisc &= ~DLS_PROMISC_MULTI;
797 		break;
798 
799 	case DL_PROMISC_PHYS:
800 		if (!(dsp->ds_promisc & DLS_PROMISC_PHYS)) {
801 			dl_err = DL_NOTENAB;
802 			goto failed;
803 		}
804 		dsp->ds_promisc &= ~DLS_PROMISC_PHYS;
805 		break;
806 
807 	default:
808 		dl_err = DL_NOTSUPPORTED;
809 		goto failed;
810 	}
811 
812 	/*
813 	 * Adjust channel promiscuity.
814 	 */
815 	err = dls_promisc(dsp->ds_dc, dsp->ds_promisc);
816 	if (err != 0) {
817 		dsp->ds_promisc = promisc_saved;
818 		dl_err = DL_SYSERR;
819 		goto failed;
820 	}
821 
822 	rw_exit(&dsp->ds_lock);
823 	dlokack(q, mp, DL_PROMISCOFF_REQ);
824 	return (B_TRUE);
825 failed:
826 	rw_exit(&dsp->ds_lock);
827 	dlerrorack(q, mp, DL_PROMISCOFF_REQ, dl_err, (t_uscalar_t)err);
828 	return (B_FALSE);
829 }
830 
831 /*
832  * DL_ENABMULTI_REQ
833  */
834 static boolean_t
835 proto_enabmulti_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
836 {
837 	dl_enabmulti_req_t *dlp = (dl_enabmulti_req_t *)udlp;
838 	int		err = 0;
839 	t_uscalar_t	dl_err;
840 	queue_t		*q = dsp->ds_wq;
841 
842 	rw_enter(&dsp->ds_lock, RW_WRITER);
843 
844 	if (dsp->ds_dlstate == DL_UNATTACHED ||
845 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
846 		dl_err = DL_OUTSTATE;
847 		goto failed;
848 	}
849 
850 	if (MBLKL(mp) < sizeof (dl_enabmulti_req_t) ||
851 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
852 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
853 		dl_err = DL_BADPRIM;
854 		goto failed;
855 	}
856 
857 	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
858 	    !dls_active_set(dsp->ds_dc)) {
859 		dl_err = DL_SYSERR;
860 		err = EBUSY;
861 		goto failed;
862 	}
863 
864 	err = dls_multicst_add(dsp->ds_dc, mp->b_rptr + dlp->dl_addr_offset);
865 	if (err != 0) {
866 		switch (err) {
867 		case EINVAL:
868 			dl_err = DL_BADADDR;
869 			err = 0;
870 			break;
871 		case ENOSPC:
872 			dl_err = DL_TOOMANY;
873 			err = 0;
874 			break;
875 		default:
876 			dl_err = DL_SYSERR;
877 			break;
878 		}
879 		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
880 			dls_active_clear(dsp->ds_dc);
881 
882 		goto failed;
883 	}
884 
885 	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
886 		dsp->ds_passivestate = DLD_ACTIVE;
887 
888 	rw_exit(&dsp->ds_lock);
889 	dlokack(q, mp, DL_ENABMULTI_REQ);
890 	return (B_TRUE);
891 failed:
892 	rw_exit(&dsp->ds_lock);
893 	dlerrorack(q, mp, DL_ENABMULTI_REQ, dl_err, (t_uscalar_t)err);
894 	return (B_FALSE);
895 }
896 
897 /*
898  * DL_DISABMULTI_REQ
899  */
900 static boolean_t
901 proto_disabmulti_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
902 {
903 	dl_disabmulti_req_t *dlp = (dl_disabmulti_req_t *)udlp;
904 	int		err = 0;
905 	t_uscalar_t	dl_err;
906 	queue_t		*q = dsp->ds_wq;
907 
908 	rw_enter(&dsp->ds_lock, RW_READER);
909 
910 	if (dsp->ds_dlstate == DL_UNATTACHED ||
911 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
912 		dl_err = DL_OUTSTATE;
913 		goto failed;
914 	}
915 
916 	if (MBLKL(mp) < sizeof (dl_disabmulti_req_t) ||
917 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
918 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
919 		dl_err = DL_BADPRIM;
920 		goto failed;
921 	}
922 
923 	err = dls_multicst_remove(dsp->ds_dc, mp->b_rptr + dlp->dl_addr_offset);
924 	if (err != 0) {
925 	switch (err) {
926 		case EINVAL:
927 			dl_err = DL_BADADDR;
928 			err = 0;
929 			break;
930 
931 		case ENOENT:
932 			dl_err = DL_NOTENAB;
933 			err = 0;
934 			break;
935 
936 		default:
937 			dl_err = DL_SYSERR;
938 			break;
939 		}
940 		goto failed;
941 	}
942 
943 	rw_exit(&dsp->ds_lock);
944 	dlokack(q, mp, DL_DISABMULTI_REQ);
945 	return (B_TRUE);
946 failed:
947 	rw_exit(&dsp->ds_lock);
948 	dlerrorack(q, mp, DL_DISABMULTI_REQ, dl_err, (t_uscalar_t)err);
949 	return (B_FALSE);
950 }
951 
952 /*
953  * DL_PHYS_ADDR_REQ
954  */
955 static boolean_t
956 proto_physaddr_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
957 {
958 	dl_phys_addr_req_t *dlp = (dl_phys_addr_req_t *)udlp;
959 	queue_t		*q = dsp->ds_wq;
960 	t_uscalar_t	dl_err;
961 	char		*addr;
962 	uint_t		addr_length;
963 
964 	rw_enter(&dsp->ds_lock, RW_READER);
965 
966 	if (MBLKL(mp) < sizeof (dl_phys_addr_req_t)) {
967 		dl_err = DL_BADPRIM;
968 		goto failed;
969 	}
970 
971 	if (dsp->ds_dlstate == DL_UNATTACHED ||
972 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
973 		dl_err = DL_OUTSTATE;
974 		goto failed;
975 	}
976 
977 	if (dlp->dl_addr_type != DL_CURR_PHYS_ADDR &&
978 	    dlp->dl_addr_type != DL_FACT_PHYS_ADDR) {
979 		dl_err = DL_UNSUPPORTED;
980 		goto failed;
981 	}
982 
983 	addr_length = dsp->ds_mip->mi_addr_length;
984 	addr = kmem_alloc(addr_length, KM_NOSLEEP);
985 	if (addr == NULL) {
986 		rw_exit(&dsp->ds_lock);
987 		merror(q, mp, ENOSR);
988 		return (B_FALSE);
989 	}
990 
991 	/*
992 	 * Copy out the address before we drop the lock; we don't
993 	 * want to call dlphysaddrack() while holding ds_lock.
994 	 */
995 	bcopy((dlp->dl_addr_type == DL_CURR_PHYS_ADDR) ?
996 	    dsp->ds_curr_addr : dsp->ds_fact_addr, addr, addr_length);
997 
998 	rw_exit(&dsp->ds_lock);
999 	dlphysaddrack(q, mp, addr, (t_uscalar_t)addr_length);
1000 	kmem_free(addr, addr_length);
1001 	return (B_TRUE);
1002 failed:
1003 	rw_exit(&dsp->ds_lock);
1004 	dlerrorack(q, mp, DL_PHYS_ADDR_REQ, dl_err, 0);
1005 	return (B_FALSE);
1006 }
1007 
1008 /*
1009  * DL_SET_PHYS_ADDR_REQ
1010  */
1011 static boolean_t
1012 proto_setphysaddr_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
1013 {
1014 	dl_set_phys_addr_req_t *dlp = (dl_set_phys_addr_req_t *)udlp;
1015 	int		err = 0;
1016 	t_uscalar_t	dl_err;
1017 	queue_t		*q = dsp->ds_wq;
1018 
1019 	rw_enter(&dsp->ds_lock, RW_WRITER);
1020 
1021 	if (dsp->ds_dlstate == DL_UNATTACHED ||
1022 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1023 		dl_err = DL_OUTSTATE;
1024 		goto failed;
1025 	}
1026 
1027 	if (MBLKL(mp) < sizeof (dl_set_phys_addr_req_t) ||
1028 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
1029 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
1030 		dl_err = DL_BADPRIM;
1031 		goto failed;
1032 	}
1033 
1034 	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
1035 	    !dls_active_set(dsp->ds_dc)) {
1036 		dl_err = DL_SYSERR;
1037 		err = EBUSY;
1038 		goto failed;
1039 	}
1040 
1041 	err = mac_unicst_set(dsp->ds_mh, mp->b_rptr + dlp->dl_addr_offset);
1042 	if (err != 0) {
1043 		switch (err) {
1044 		case EINVAL:
1045 			dl_err = DL_BADADDR;
1046 			err = 0;
1047 			break;
1048 
1049 		default:
1050 			dl_err = DL_SYSERR;
1051 			break;
1052 		}
1053 		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
1054 			dls_active_clear(dsp->ds_dc);
1055 
1056 		goto failed;
1057 	}
1058 	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
1059 		dsp->ds_passivestate = DLD_ACTIVE;
1060 
1061 	rw_exit(&dsp->ds_lock);
1062 	dlokack(q, mp, DL_SET_PHYS_ADDR_REQ);
1063 	return (B_TRUE);
1064 failed:
1065 	rw_exit(&dsp->ds_lock);
1066 	dlerrorack(q, mp, DL_SET_PHYS_ADDR_REQ, dl_err, (t_uscalar_t)err);
1067 	return (B_FALSE);
1068 }
1069 
1070 /*
1071  * DL_UDQOS_REQ
1072  */
1073 static boolean_t
1074 proto_udqos_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
1075 {
1076 	dl_udqos_req_t *dlp = (dl_udqos_req_t *)udlp;
1077 	dl_qos_cl_sel1_t *selp;
1078 	int		off, len;
1079 	t_uscalar_t	dl_err;
1080 	queue_t		*q = dsp->ds_wq;
1081 
1082 	off = dlp->dl_qos_offset;
1083 	len = dlp->dl_qos_length;
1084 
1085 	rw_enter(&dsp->ds_lock, RW_WRITER);
1086 
1087 	if (MBLKL(mp) < sizeof (dl_udqos_req_t) || !MBLKIN(mp, off, len)) {
1088 		dl_err = DL_BADPRIM;
1089 		goto failed;
1090 	}
1091 
1092 	selp = (dl_qos_cl_sel1_t *)(mp->b_rptr + off);
1093 	if (selp->dl_qos_type != DL_QOS_CL_SEL1) {
1094 		dl_err = DL_BADQOSTYPE;
1095 		goto failed;
1096 	}
1097 
1098 	if (selp->dl_priority > (1 << VLAN_PRI_SIZE) - 1 ||
1099 	    selp->dl_priority < 0) {
1100 		dl_err = DL_BADQOSPARAM;
1101 		goto failed;
1102 	}
1103 
1104 	dsp->ds_pri = selp->dl_priority;
1105 
1106 	rw_exit(&dsp->ds_lock);
1107 	dlokack(q, mp, DL_UDQOS_REQ);
1108 	return (B_TRUE);
1109 failed:
1110 	rw_exit(&dsp->ds_lock);
1111 	dlerrorack(q, mp, DL_UDQOS_REQ, dl_err, 0);
1112 	return (B_FALSE);
1113 }
1114 
1115 static boolean_t
1116 check_ip_above(queue_t *q)
1117 {
1118 	queue_t		*next_q;
1119 	boolean_t	ret = B_TRUE;
1120 
1121 	claimstr(q);
1122 	next_q = q->q_next;
1123 	if (strcmp(next_q->q_qinfo->qi_minfo->mi_idname, "ip") != 0)
1124 		ret = B_FALSE;
1125 	releasestr(q);
1126 	return (ret);
1127 }
1128 
1129 /*
1130  * DL_CAPABILITY_REQ
1131  */
1132 /*ARGSUSED*/
1133 static boolean_t
1134 proto_capability_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
1135 {
1136 	dl_capability_req_t *dlp = (dl_capability_req_t *)udlp;
1137 	dl_capability_sub_t *sp;
1138 	size_t		size, len;
1139 	offset_t	off, end;
1140 	t_uscalar_t	dl_err;
1141 	queue_t		*q = dsp->ds_wq;
1142 	boolean_t	upgraded;
1143 
1144 	rw_enter(&dsp->ds_lock, RW_READER);
1145 
1146 	if (MBLKL(mp) < sizeof (dl_capability_req_t)) {
1147 		dl_err = DL_BADPRIM;
1148 		goto failed;
1149 	}
1150 
1151 	if (dsp->ds_dlstate == DL_UNATTACHED ||
1152 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1153 		dl_err = DL_OUTSTATE;
1154 		goto failed;
1155 	}
1156 
1157 	/*
1158 	 * This request is overloaded. If there are no requested capabilities
1159 	 * then we just want to acknowledge with all the capabilities we
1160 	 * support. Otherwise we enable the set of capabilities requested.
1161 	 */
1162 	if (dlp->dl_sub_length == 0) {
1163 		/* callee drops lock */
1164 		return (proto_capability_advertise(dsp, mp));
1165 	}
1166 
1167 	if (!MBLKIN(mp, dlp->dl_sub_offset, dlp->dl_sub_length)) {
1168 		dl_err = DL_BADPRIM;
1169 		goto failed;
1170 	}
1171 
1172 	dlp->dl_primitive = DL_CAPABILITY_ACK;
1173 
1174 	off = dlp->dl_sub_offset;
1175 	len = dlp->dl_sub_length;
1176 
1177 	/*
1178 	 * Walk the list of capabilities to be enabled.
1179 	 */
1180 	upgraded = B_FALSE;
1181 	for (end = off + len; off < end; ) {
1182 		sp = (dl_capability_sub_t *)(mp->b_rptr + off);
1183 		size = sizeof (dl_capability_sub_t) + sp->dl_length;
1184 
1185 		if (off + size > end ||
1186 		    !IS_P2ALIGNED(off, sizeof (uint32_t))) {
1187 			dl_err = DL_BADPRIM;
1188 			goto failed;
1189 		}
1190 
1191 		switch (sp->dl_cap) {
1192 		/*
1193 		 * TCP/IP checksum offload to hardware.
1194 		 */
1195 		case DL_CAPAB_HCKSUM: {
1196 			dl_capab_hcksum_t *hcksump;
1197 			dl_capab_hcksum_t hcksum;
1198 
1199 			hcksump = (dl_capab_hcksum_t *)&sp[1];
1200 			/*
1201 			 * Copy for alignment.
1202 			 */
1203 			bcopy(hcksump, &hcksum, sizeof (dl_capab_hcksum_t));
1204 			dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1205 			bcopy(&hcksum, hcksump, sizeof (dl_capab_hcksum_t));
1206 			break;
1207 		}
1208 
1209 		/*
1210 		 * Large segment offload. (LSO)
1211 		 */
1212 		case DL_CAPAB_LSO: {
1213 			dl_capab_lso_t *lsop;
1214 			dl_capab_lso_t lso;
1215 
1216 			lsop = (dl_capab_lso_t *)&sp[1];
1217 			/*
1218 			 * Copy for alignment.
1219 			 */
1220 			bcopy(lsop, &lso, sizeof (dl_capab_lso_t));
1221 			dlcapabsetqid(&(lso.lso_mid), dsp->ds_rq);
1222 			bcopy(&lso, lsop, sizeof (dl_capab_lso_t));
1223 			break;
1224 		}
1225 
1226 		/*
1227 		 * IP polling interface.
1228 		 */
1229 		case DL_CAPAB_POLL: {
1230 			dl_capab_dls_t *pollp;
1231 			dl_capab_dls_t	poll;
1232 
1233 			pollp = (dl_capab_dls_t *)&sp[1];
1234 			/*
1235 			 * Copy for alignment.
1236 			 */
1237 			bcopy(pollp, &poll, sizeof (dl_capab_dls_t));
1238 
1239 			/*
1240 			 * We need to become writer before enabling and/or
1241 			 * disabling the polling interface.  If we couldn'
1242 			 * upgrade, check state again after re-acquiring the
1243 			 * lock to make sure we can proceed.
1244 			 */
1245 			if (!upgraded && !rw_tryupgrade(&dsp->ds_lock)) {
1246 				rw_exit(&dsp->ds_lock);
1247 				rw_enter(&dsp->ds_lock, RW_WRITER);
1248 
1249 				if (dsp->ds_dlstate == DL_UNATTACHED ||
1250 				    DL_ACK_PENDING(dsp->ds_dlstate)) {
1251 					dl_err = DL_OUTSTATE;
1252 					goto failed;
1253 				}
1254 			}
1255 			upgraded = B_TRUE;
1256 
1257 			switch (poll.dls_flags) {
1258 			default:
1259 				/*FALLTHRU*/
1260 			case POLL_DISABLE:
1261 				proto_poll_disable(dsp);
1262 				break;
1263 
1264 			case POLL_ENABLE:
1265 				ASSERT(!(dld_opt & DLD_OPT_NO_POLL));
1266 
1267 				/*
1268 				 * Make sure polling is disabled.
1269 				 */
1270 				proto_poll_disable(dsp);
1271 
1272 				/*
1273 				 * Now attempt enable it.
1274 				 */
1275 				if (check_ip_above(dsp->ds_rq) &&
1276 				    proto_poll_enable(dsp, &poll)) {
1277 					bzero(&poll, sizeof (dl_capab_dls_t));
1278 					poll.dls_flags = POLL_ENABLE;
1279 				}
1280 				break;
1281 			}
1282 
1283 			dlcapabsetqid(&(poll.dls_mid), dsp->ds_rq);
1284 			bcopy(&poll, pollp, sizeof (dl_capab_dls_t));
1285 			break;
1286 		}
1287 		case DL_CAPAB_SOFT_RING: {
1288 			dl_capab_dls_t *soft_ringp;
1289 			dl_capab_dls_t soft_ring;
1290 
1291 			soft_ringp = (dl_capab_dls_t *)&sp[1];
1292 			/*
1293 			 * Copy for alignment.
1294 			 */
1295 			bcopy(soft_ringp, &soft_ring,
1296 			    sizeof (dl_capab_dls_t));
1297 
1298 			/*
1299 			 * We need to become writer before enabling and/or
1300 			 * disabling the soft_ring interface.  If we couldn'
1301 			 * upgrade, check state again after re-acquiring the
1302 			 * lock to make sure we can proceed.
1303 			 */
1304 			if (!upgraded && !rw_tryupgrade(&dsp->ds_lock)) {
1305 				rw_exit(&dsp->ds_lock);
1306 				rw_enter(&dsp->ds_lock, RW_WRITER);
1307 
1308 				if (dsp->ds_dlstate == DL_UNATTACHED ||
1309 				    DL_ACK_PENDING(dsp->ds_dlstate)) {
1310 					dl_err = DL_OUTSTATE;
1311 					goto failed;
1312 				}
1313 			}
1314 			upgraded = B_TRUE;
1315 
1316 			switch (soft_ring.dls_flags) {
1317 			default:
1318 				/*FALLTHRU*/
1319 			case SOFT_RING_DISABLE:
1320 				proto_soft_ring_disable(dsp);
1321 				break;
1322 
1323 			case SOFT_RING_ENABLE:
1324 				/*
1325 				 * Make sure soft_ring is disabled.
1326 				 */
1327 				proto_soft_ring_disable(dsp);
1328 
1329 				/*
1330 				 * Now attempt enable it.
1331 				 */
1332 				if (check_ip_above(dsp->ds_rq) &&
1333 				    proto_soft_ring_enable(dsp, &soft_ring)) {
1334 					bzero(&soft_ring,
1335 					    sizeof (dl_capab_dls_t));
1336 					soft_ring.dls_flags =
1337 					    SOFT_RING_ENABLE;
1338 				} else {
1339 					bzero(&soft_ring,
1340 					    sizeof (dl_capab_dls_t));
1341 					soft_ring.dls_flags =
1342 					    SOFT_RING_DISABLE;
1343 				}
1344 				break;
1345 			}
1346 
1347 			dlcapabsetqid(&(soft_ring.dls_mid), dsp->ds_rq);
1348 			bcopy(&soft_ring, soft_ringp,
1349 			    sizeof (dl_capab_dls_t));
1350 			break;
1351 		}
1352 		default:
1353 			break;
1354 		}
1355 
1356 		off += size;
1357 	}
1358 	rw_exit(&dsp->ds_lock);
1359 	qreply(q, mp);
1360 	return (B_TRUE);
1361 failed:
1362 	rw_exit(&dsp->ds_lock);
1363 	dlerrorack(q, mp, DL_CAPABILITY_REQ, dl_err, 0);
1364 	return (B_FALSE);
1365 }
1366 
1367 /*
1368  * DL_NOTIFY_REQ
1369  */
1370 static boolean_t
1371 proto_notify_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
1372 {
1373 	dl_notify_req_t	*dlp = (dl_notify_req_t *)udlp;
1374 	t_uscalar_t	dl_err;
1375 	queue_t		*q = dsp->ds_wq;
1376 	uint_t		note =
1377 	    DL_NOTE_PROMISC_ON_PHYS |
1378 	    DL_NOTE_PROMISC_OFF_PHYS |
1379 	    DL_NOTE_PHYS_ADDR |
1380 	    DL_NOTE_LINK_UP |
1381 	    DL_NOTE_LINK_DOWN |
1382 	    DL_NOTE_CAPAB_RENEG |
1383 	    DL_NOTE_SPEED;
1384 
1385 	rw_enter(&dsp->ds_lock, RW_WRITER);
1386 
1387 	if (MBLKL(mp) < sizeof (dl_notify_req_t)) {
1388 		dl_err = DL_BADPRIM;
1389 		goto failed;
1390 	}
1391 
1392 	if (dsp->ds_dlstate == DL_UNATTACHED ||
1393 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1394 		dl_err = DL_OUTSTATE;
1395 		goto failed;
1396 	}
1397 
1398 	/*
1399 	 * Cache the notifications that are being enabled.
1400 	 */
1401 	dsp->ds_notifications = dlp->dl_notifications & note;
1402 	rw_exit(&dsp->ds_lock);
1403 	/*
1404 	 * The ACK carries all notifications regardless of which set is
1405 	 * being enabled.
1406 	 */
1407 	dlnotifyack(q, mp, note);
1408 
1409 	/*
1410 	 * Solicit DL_NOTIFY_IND messages for each enabled notification.
1411 	 */
1412 	rw_enter(&dsp->ds_lock, RW_READER);
1413 	if (dsp->ds_notifications != 0) {
1414 		rw_exit(&dsp->ds_lock);
1415 		dld_str_notify_ind(dsp);
1416 	} else {
1417 		rw_exit(&dsp->ds_lock);
1418 	}
1419 	return (B_TRUE);
1420 failed:
1421 	rw_exit(&dsp->ds_lock);
1422 	dlerrorack(q, mp, DL_NOTIFY_REQ, dl_err, 0);
1423 	return (B_FALSE);
1424 }
1425 
1426 /*
1427  * DL_UINTDATA_REQ
1428  */
1429 static boolean_t
1430 proto_unitdata_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
1431 {
1432 	queue_t			*q = dsp->ds_wq;
1433 	dl_unitdata_req_t	*dlp = (dl_unitdata_req_t *)udlp;
1434 	off_t			off;
1435 	size_t			len, size;
1436 	const uint8_t		*addr;
1437 	uint16_t		sap;
1438 	uint_t			addr_length;
1439 	mblk_t			*bp, *payload;
1440 	uint32_t		start, stuff, end, value, flags;
1441 	t_uscalar_t		dl_err;
1442 
1443 	rw_enter(&dsp->ds_lock, RW_READER);
1444 
1445 	if (MBLKL(mp) < sizeof (dl_unitdata_req_t) || mp->b_cont == NULL) {
1446 		dl_err = DL_BADPRIM;
1447 		goto failed;
1448 	}
1449 
1450 	if (dsp->ds_dlstate != DL_IDLE) {
1451 		dl_err = DL_OUTSTATE;
1452 		goto failed;
1453 	}
1454 	addr_length = dsp->ds_mip->mi_addr_length;
1455 
1456 	off = dlp->dl_dest_addr_offset;
1457 	len = dlp->dl_dest_addr_length;
1458 
1459 	if (!MBLKIN(mp, off, len) || !IS_P2ALIGNED(off, sizeof (uint16_t))) {
1460 		dl_err = DL_BADPRIM;
1461 		goto failed;
1462 	}
1463 
1464 	if (len != addr_length + sizeof (uint16_t)) {
1465 		dl_err = DL_BADADDR;
1466 		goto failed;
1467 	}
1468 
1469 	addr = mp->b_rptr + off;
1470 	sap = *(uint16_t *)(mp->b_rptr + off + addr_length);
1471 
1472 	/*
1473 	 * Check the length of the packet and the block types.
1474 	 */
1475 	size = 0;
1476 	payload = mp->b_cont;
1477 	for (bp = payload; bp != NULL; bp = bp->b_cont) {
1478 		if (DB_TYPE(bp) != M_DATA)
1479 			goto baddata;
1480 
1481 		size += MBLKL(bp);
1482 	}
1483 
1484 	if (size > dsp->ds_mip->mi_sdu_max)
1485 		goto baddata;
1486 
1487 	/*
1488 	 * Build a packet header.
1489 	 */
1490 	if ((bp = dls_header(dsp->ds_dc, addr, sap, dlp->dl_priority.dl_max,
1491 	    &payload)) == NULL) {
1492 		dl_err = DL_BADADDR;
1493 		goto failed;
1494 	}
1495 
1496 	/*
1497 	 * We no longer need the M_PROTO header, so free it.
1498 	 */
1499 	freeb(mp);
1500 
1501 	/*
1502 	 * Transfer the checksum offload information if it is present.
1503 	 */
1504 	hcksum_retrieve(payload, NULL, NULL, &start, &stuff, &end, &value,
1505 	    &flags);
1506 	(void) hcksum_assoc(bp, NULL, NULL, start, stuff, end, value, flags, 0);
1507 
1508 	/*
1509 	 * Link the payload onto the new header.
1510 	 */
1511 	ASSERT(bp->b_cont == NULL);
1512 	bp->b_cont = payload;
1513 
1514 	dld_tx_single(dsp, bp);
1515 	rw_exit(&dsp->ds_lock);
1516 	return (B_TRUE);
1517 failed:
1518 	rw_exit(&dsp->ds_lock);
1519 	dlerrorack(q, mp, DL_UNITDATA_REQ, dl_err, 0);
1520 	return (B_FALSE);
1521 
1522 baddata:
1523 	rw_exit(&dsp->ds_lock);
1524 	dluderrorind(q, mp, (void *)addr, len, DL_BADDATA, 0);
1525 	return (B_FALSE);
1526 }
1527 
1528 /*
1529  * DL_PASSIVE_REQ
1530  */
1531 /* ARGSUSED */
1532 static boolean_t
1533 proto_passive_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
1534 {
1535 	t_uscalar_t dl_err;
1536 
1537 	rw_enter(&dsp->ds_lock, RW_WRITER);
1538 	/*
1539 	 * If we've already become active by issuing an active primitive,
1540 	 * then it's too late to try to become passive.
1541 	 */
1542 	if (dsp->ds_passivestate == DLD_ACTIVE) {
1543 		dl_err = DL_OUTSTATE;
1544 		goto failed;
1545 	}
1546 
1547 	if (MBLKL(mp) < sizeof (dl_passive_req_t)) {
1548 		dl_err = DL_BADPRIM;
1549 		goto failed;
1550 	}
1551 
1552 	dsp->ds_passivestate = DLD_PASSIVE;
1553 	rw_exit(&dsp->ds_lock);
1554 	dlokack(dsp->ds_wq, mp, DL_PASSIVE_REQ);
1555 	return (B_TRUE);
1556 failed:
1557 	rw_exit(&dsp->ds_lock);
1558 	dlerrorack(dsp->ds_wq, mp, DL_PASSIVE_REQ, dl_err, 0);
1559 	return (B_FALSE);
1560 }
1561 
1562 
1563 /*
1564  * Catch-all handler.
1565  */
1566 static boolean_t
1567 proto_req(dld_str_t *dsp, union DL_primitives *dlp, mblk_t *mp)
1568 {
1569 	dlerrorack(dsp->ds_wq, mp, dlp->dl_primitive, DL_UNSUPPORTED, 0);
1570 	return (B_FALSE);
1571 }
1572 
1573 static void
1574 proto_poll_disable(dld_str_t *dsp)
1575 {
1576 	mac_handle_t	mh;
1577 
1578 	ASSERT(dsp->ds_pending_req != NULL || RW_WRITE_HELD(&dsp->ds_lock));
1579 
1580 	if (!dsp->ds_polling)
1581 		return;
1582 
1583 	/*
1584 	 * It should be impossible to enable raw mode if polling is turned on.
1585 	 */
1586 	ASSERT(dsp->ds_mode != DLD_RAW);
1587 
1588 	/*
1589 	 * Reset the resource_add callback.
1590 	 */
1591 	mh = dls_mac(dsp->ds_dc);
1592 	mac_resource_set(mh, NULL, NULL);
1593 	mac_resources(mh);
1594 
1595 	/*
1596 	 * Set receive function back to default.
1597 	 */
1598 	dls_rx_set(dsp->ds_dc, (dsp->ds_mode == DLD_FASTPATH) ?
1599 	    dld_str_rx_fastpath : dld_str_rx_unitdata, (void *)dsp);
1600 
1601 	/*
1602 	 * Note that polling is disabled.
1603 	 */
1604 	dsp->ds_polling = B_FALSE;
1605 }
1606 
1607 static boolean_t
1608 proto_poll_enable(dld_str_t *dsp, dl_capab_dls_t *pollp)
1609 {
1610 	mac_handle_t	mh;
1611 
1612 	ASSERT(RW_WRITE_HELD(&dsp->ds_lock));
1613 	ASSERT(!dsp->ds_polling);
1614 
1615 	/*
1616 	 * We cannot enable polling if raw mode
1617 	 * has been enabled.
1618 	 */
1619 	if (dsp->ds_mode == DLD_RAW)
1620 		return (B_FALSE);
1621 
1622 	mh = dls_mac(dsp->ds_dc);
1623 
1624 	/*
1625 	 * Register resources.
1626 	 */
1627 	mac_resource_set(mh, (mac_resource_add_t)pollp->dls_ring_add,
1628 	    (void *)pollp->dls_rx_handle);
1629 	mac_resources(mh);
1630 
1631 	/*
1632 	 * Set the receive function.
1633 	 */
1634 	dls_rx_set(dsp->ds_dc, (dls_rx_t)pollp->dls_rx,
1635 	    (void *)pollp->dls_rx_handle);
1636 
1637 	/*
1638 	 * Note that polling is enabled. This prevents further DLIOCHDRINFO
1639 	 * ioctls from overwriting the receive function pointer.
1640 	 */
1641 	dsp->ds_polling = B_TRUE;
1642 	return (B_TRUE);
1643 }
1644 
1645 static void
1646 proto_soft_ring_disable(dld_str_t *dsp)
1647 {
1648 	ASSERT(RW_WRITE_HELD(&dsp->ds_lock));
1649 
1650 	if (!dsp->ds_soft_ring)
1651 		return;
1652 
1653 	/*
1654 	 * It should be impossible to enable raw mode if soft_ring is turned on.
1655 	 */
1656 	ASSERT(dsp->ds_mode != DLD_RAW);
1657 	proto_change_soft_ring_fanout(dsp, SOFT_RING_NONE);
1658 	/*
1659 	 * Note that fanout is disabled.
1660 	 */
1661 	dsp->ds_soft_ring = B_FALSE;
1662 }
1663 
1664 static boolean_t
1665 proto_soft_ring_enable(dld_str_t *dsp, dl_capab_dls_t *soft_ringp)
1666 {
1667 	ASSERT(RW_WRITE_HELD(&dsp->ds_lock));
1668 	ASSERT(!dsp->ds_soft_ring);
1669 
1670 	/*
1671 	 * We cannot enable soft_ring if raw mode
1672 	 * has been enabled.
1673 	 */
1674 	if (dsp->ds_mode == DLD_RAW)
1675 		return (B_FALSE);
1676 
1677 	if (dls_soft_ring_enable(dsp->ds_dc, soft_ringp) == B_FALSE)
1678 		return (B_FALSE);
1679 
1680 	dsp->ds_soft_ring = B_TRUE;
1681 	return (B_TRUE);
1682 }
1683 
1684 static void
1685 proto_change_soft_ring_fanout(dld_str_t *dsp, int type)
1686 {
1687 	dls_rx_t	rx;
1688 
1689 	if (type == SOFT_RING_NONE) {
1690 		rx = (dsp->ds_mode == DLD_FASTPATH) ?
1691 			    dld_str_rx_fastpath : dld_str_rx_unitdata;
1692 	} else {
1693 		rx = (dls_rx_t)dls_ether_soft_ring_fanout;
1694 	}
1695 	dls_soft_ring_rx_set(dsp->ds_dc, rx, dsp, type);
1696 }
1697 
1698 /*
1699  * DL_CAPABILITY_ACK/DL_ERROR_ACK
1700  */
1701 static boolean_t
1702 proto_capability_advertise(dld_str_t *dsp, mblk_t *mp)
1703 {
1704 	dl_capability_ack_t	*dlap;
1705 	dl_capability_sub_t	*dlsp;
1706 	size_t			subsize;
1707 	dl_capab_dls_t		poll;
1708 	dl_capab_dls_t		soft_ring;
1709 	dl_capab_hcksum_t	hcksum;
1710 	dl_capab_lso_t		lso;
1711 	dl_capab_zerocopy_t	zcopy;
1712 	uint8_t			*ptr;
1713 	boolean_t		cksum_cap;
1714 	boolean_t		poll_cap;
1715 	boolean_t		lso_cap;
1716 	mac_capab_lso_t		mac_lso;
1717 	queue_t			*q = dsp->ds_wq;
1718 	mblk_t			*mp1;
1719 
1720 	ASSERT(RW_READ_HELD(&dsp->ds_lock));
1721 
1722 	/*
1723 	 * Initially assume no capabilities.
1724 	 */
1725 	subsize = 0;
1726 
1727 	/*
1728 	 * Advertize soft ring capability if
1729 	 * VLAN_ID_NONE for GLDv3 drivers
1730 	 */
1731 	if (dsp->ds_vid == VLAN_ID_NONE)
1732 		subsize += sizeof (dl_capability_sub_t) +
1733 				    sizeof (dl_capab_dls_t);
1734 
1735 	/*
1736 	 * Check if polling can be enabled on this interface.
1737 	 * If advertising DL_CAPAB_POLL has not been explicitly disabled
1738 	 * then reserve space for that capability.
1739 	 */
1740 	poll_cap = (mac_capab_get(dsp->ds_mh, MAC_CAPAB_POLL, NULL) &&
1741 	    !(dld_opt & DLD_OPT_NO_POLL) && (dsp->ds_vid == VLAN_ID_NONE));
1742 	if (poll_cap) {
1743 		subsize += sizeof (dl_capability_sub_t) +
1744 		    sizeof (dl_capab_dls_t);
1745 	}
1746 
1747 	/*
1748 	 * If the MAC interface supports checksum offload then reserve
1749 	 * space for the DL_CAPAB_HCKSUM capability.
1750 	 */
1751 	if (cksum_cap = mac_capab_get(dsp->ds_mh, MAC_CAPAB_HCKSUM,
1752 	    &hcksum.hcksum_txflags)) {
1753 		subsize += sizeof (dl_capability_sub_t) +
1754 		    sizeof (dl_capab_hcksum_t);
1755 	}
1756 
1757 	/*
1758 	 * If LSO is usable for MAC, reserve space for the DL_CAPAB_LSO
1759 	 * capability.
1760 	 */
1761 	if (lso_cap = mac_capab_get(dsp->ds_mh, MAC_CAPAB_LSO, &mac_lso)) {
1762 		subsize += sizeof (dl_capability_sub_t) +
1763 		    sizeof (dl_capab_lso_t);
1764 	}
1765 
1766 	/*
1767 	 * If DL_CAPAB_ZEROCOPY has not be explicitly disabled then
1768 	 * reserve space for it.
1769 	 */
1770 	if (!(dld_opt & DLD_OPT_NO_ZEROCOPY)) {
1771 		subsize += sizeof (dl_capability_sub_t) +
1772 		    sizeof (dl_capab_zerocopy_t);
1773 	}
1774 
1775 	/*
1776 	 * If there are no capabilities to advertise or if we
1777 	 * can't allocate a response, send a DL_ERROR_ACK.
1778 	 */
1779 	if ((mp1 = reallocb(mp,
1780 	    sizeof (dl_capability_ack_t) + subsize, 0)) == NULL) {
1781 		rw_exit(&dsp->ds_lock);
1782 		dlerrorack(q, mp, DL_CAPABILITY_REQ, DL_NOTSUPPORTED, 0);
1783 		return (B_FALSE);
1784 	}
1785 
1786 	mp = mp1;
1787 	DB_TYPE(mp) = M_PROTO;
1788 	mp->b_wptr = mp->b_rptr + sizeof (dl_capability_ack_t) + subsize;
1789 	bzero(mp->b_rptr, MBLKL(mp));
1790 	dlap = (dl_capability_ack_t *)mp->b_rptr;
1791 	dlap->dl_primitive = DL_CAPABILITY_ACK;
1792 	dlap->dl_sub_offset = sizeof (dl_capability_ack_t);
1793 	dlap->dl_sub_length = subsize;
1794 	ptr = (uint8_t *)&dlap[1];
1795 
1796 	/*
1797 	 * IP polling interface.
1798 	 */
1799 	if (poll_cap) {
1800 		/*
1801 		 * Attempt to disable just in case this is a re-negotiation;
1802 		 * we need to become writer before doing so.
1803 		 */
1804 		if (!rw_tryupgrade(&dsp->ds_lock)) {
1805 			rw_exit(&dsp->ds_lock);
1806 			rw_enter(&dsp->ds_lock, RW_WRITER);
1807 		}
1808 
1809 		/*
1810 		 * Check if polling state has changed after we re-acquired
1811 		 * the lock above, so that we don't mis-advertise it.
1812 		 */
1813 		poll_cap = !(dld_opt & DLD_OPT_NO_POLL) &&
1814 		    (dsp->ds_vid == VLAN_ID_NONE);
1815 
1816 		if (!poll_cap) {
1817 			int poll_capab_size;
1818 
1819 			rw_downgrade(&dsp->ds_lock);
1820 
1821 			poll_capab_size = sizeof (dl_capability_sub_t) +
1822 			    sizeof (dl_capab_dls_t);
1823 
1824 			mp->b_wptr -= poll_capab_size;
1825 			subsize -= poll_capab_size;
1826 			dlap->dl_sub_length = subsize;
1827 		} else {
1828 			proto_poll_disable(dsp);
1829 
1830 			rw_downgrade(&dsp->ds_lock);
1831 
1832 			dlsp = (dl_capability_sub_t *)ptr;
1833 
1834 			dlsp->dl_cap = DL_CAPAB_POLL;
1835 			dlsp->dl_length = sizeof (dl_capab_dls_t);
1836 			ptr += sizeof (dl_capability_sub_t);
1837 
1838 			bzero(&poll, sizeof (dl_capab_dls_t));
1839 			poll.dls_version = POLL_VERSION_1;
1840 			poll.dls_flags = POLL_CAPABLE;
1841 			poll.dls_tx_handle = (uintptr_t)dsp;
1842 			poll.dls_tx = (uintptr_t)str_mdata_fastpath_put;
1843 
1844 			dlcapabsetqid(&(poll.dls_mid), dsp->ds_rq);
1845 			bcopy(&poll, ptr, sizeof (dl_capab_dls_t));
1846 			ptr += sizeof (dl_capab_dls_t);
1847 		}
1848 	}
1849 
1850 	ASSERT(RW_READ_HELD(&dsp->ds_lock));
1851 
1852 	if (dsp->ds_vid == VLAN_ID_NONE) {
1853 		dlsp = (dl_capability_sub_t *)ptr;
1854 
1855 		dlsp->dl_cap = DL_CAPAB_SOFT_RING;
1856 		dlsp->dl_length = sizeof (dl_capab_dls_t);
1857 		ptr += sizeof (dl_capability_sub_t);
1858 
1859 		bzero(&soft_ring, sizeof (dl_capab_dls_t));
1860 		soft_ring.dls_version = SOFT_RING_VERSION_1;
1861 		soft_ring.dls_flags = SOFT_RING_CAPABLE;
1862 		soft_ring.dls_tx_handle = (uintptr_t)dsp;
1863 		soft_ring.dls_tx = (uintptr_t)str_mdata_fastpath_put;
1864 		soft_ring.dls_ring_change_status =
1865 		    (uintptr_t)proto_change_soft_ring_fanout;
1866 		soft_ring.dls_ring_bind = (uintptr_t)soft_ring_bind;
1867 		soft_ring.dls_ring_unbind = (uintptr_t)soft_ring_unbind;
1868 
1869 		dlcapabsetqid(&(soft_ring.dls_mid), dsp->ds_rq);
1870 		bcopy(&soft_ring, ptr, sizeof (dl_capab_dls_t));
1871 		ptr += sizeof (dl_capab_dls_t);
1872 	}
1873 
1874 	/*
1875 	 * TCP/IP checksum offload.
1876 	 */
1877 	if (cksum_cap) {
1878 		dlsp = (dl_capability_sub_t *)ptr;
1879 
1880 		dlsp->dl_cap = DL_CAPAB_HCKSUM;
1881 		dlsp->dl_length = sizeof (dl_capab_hcksum_t);
1882 		ptr += sizeof (dl_capability_sub_t);
1883 
1884 		hcksum.hcksum_version = HCKSUM_VERSION_1;
1885 		dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1886 		bcopy(&hcksum, ptr, sizeof (dl_capab_hcksum_t));
1887 		ptr += sizeof (dl_capab_hcksum_t);
1888 	}
1889 
1890 	/*
1891 	 * Large segment offload. (LSO)
1892 	 */
1893 	if (lso_cap) {
1894 		dlsp = (dl_capability_sub_t *)ptr;
1895 
1896 		dlsp->dl_cap = DL_CAPAB_LSO;
1897 		dlsp->dl_length = sizeof (dl_capab_lso_t);
1898 		ptr += sizeof (dl_capability_sub_t);
1899 
1900 		lso.lso_version = LSO_VERSION_1;
1901 		lso.lso_flags = mac_lso.lso_flags;
1902 		lso.lso_max = mac_lso.lso_basic_tcp_ipv4.lso_max;
1903 
1904 		/* Simply enable LSO with DLD */
1905 		dsp->ds_lso = B_TRUE;
1906 		dsp->ds_lso_max = lso.lso_max;
1907 
1908 		dlcapabsetqid(&(lso.lso_mid), dsp->ds_rq);
1909 		bcopy(&lso, ptr, sizeof (dl_capab_lso_t));
1910 		ptr += sizeof (dl_capab_lso_t);
1911 	} else {
1912 		dsp->ds_lso = B_FALSE;
1913 		dsp->ds_lso_max = 0;
1914 	}
1915 
1916 	/*
1917 	 * Zero copy
1918 	 */
1919 	if (!(dld_opt & DLD_OPT_NO_ZEROCOPY)) {
1920 		dlsp = (dl_capability_sub_t *)ptr;
1921 
1922 		dlsp->dl_cap = DL_CAPAB_ZEROCOPY;
1923 		dlsp->dl_length = sizeof (dl_capab_zerocopy_t);
1924 		ptr += sizeof (dl_capability_sub_t);
1925 
1926 		bzero(&zcopy, sizeof (dl_capab_zerocopy_t));
1927 		zcopy.zerocopy_version = ZEROCOPY_VERSION_1;
1928 		zcopy.zerocopy_flags = DL_CAPAB_VMSAFE_MEM;
1929 
1930 		dlcapabsetqid(&(zcopy.zerocopy_mid), dsp->ds_rq);
1931 		bcopy(&zcopy, ptr, sizeof (dl_capab_zerocopy_t));
1932 		ptr += sizeof (dl_capab_zerocopy_t);
1933 	}
1934 
1935 	ASSERT(ptr == mp->b_rptr + sizeof (dl_capability_ack_t) + subsize);
1936 
1937 	rw_exit(&dsp->ds_lock);
1938 	qreply(q, mp);
1939 	return (B_TRUE);
1940 }
1941