xref: /titanic_52/usr/src/uts/common/io/dld/dld_proto.c (revision fec509a05ddbf645268fe2e537314def7d1b67c8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Data-Link Driver
30  */
31 
32 #include <sys/types.h>
33 #include <sys/debug.h>
34 #include <sys/sysmacros.h>
35 #include <sys/stream.h>
36 #include <sys/ddi.h>
37 #include <sys/sunddi.h>
38 #include <sys/strsun.h>
39 #include <sys/cpuvar.h>
40 #include <sys/dlpi.h>
41 #include <netinet/in.h>
42 #include <sys/sdt.h>
43 #include <sys/strsubr.h>
44 #include <sys/vlan.h>
45 #include <sys/mac.h>
46 #include <sys/dls.h>
47 #include <sys/dld.h>
48 #include <sys/dld_impl.h>
49 #include <sys/dls_soft_ring.h>
50 
51 typedef boolean_t proto_reqfunc_t(dld_str_t *, union DL_primitives *, mblk_t *);
52 
53 static proto_reqfunc_t proto_info_req, proto_attach_req, proto_detach_req,
54     proto_bind_req, proto_unbind_req, proto_promiscon_req, proto_promiscoff_req,
55     proto_enabmulti_req, proto_disabmulti_req, proto_physaddr_req,
56     proto_setphysaddr_req, proto_udqos_req, proto_req, proto_capability_req,
57     proto_notify_req, proto_unitdata_req, proto_passive_req;
58 
59 static void proto_poll_disable(dld_str_t *);
60 static boolean_t proto_poll_enable(dld_str_t *, dl_capab_dls_t *);
61 static boolean_t proto_capability_advertise(dld_str_t *, mblk_t *);
62 
63 static task_func_t proto_process_unbind_req, proto_process_detach_req;
64 
65 static void proto_soft_ring_disable(dld_str_t *);
66 static boolean_t proto_soft_ring_enable(dld_str_t *, dl_capab_dls_t *);
67 static boolean_t proto_capability_advertise(dld_str_t *, mblk_t *);
68 static void proto_change_soft_ring_fanout(dld_str_t *, int);
69 
70 #define	DL_ACK_PENDING(state) \
71 	((state) == DL_ATTACH_PENDING || \
72 	(state) == DL_DETACH_PENDING || \
73 	(state) == DL_BIND_PENDING || \
74 	(state) == DL_UNBIND_PENDING)
75 
76 /*
77  * Process a DLPI protocol message.
78  * The primitives DL_BIND_REQ, DL_ENABMULTI_REQ, DL_PROMISCON_REQ,
79  * DL_SET_PHYS_ADDR_REQ put the data link below our dld_str_t into an
80  * 'active' state. The primitive DL_PASSIVE_REQ marks our dld_str_t
81  * as 'passive' and forbids it from being subsequently made 'active'
82  * by the above primitives.
83  */
84 void
85 dld_proto(dld_str_t *dsp, mblk_t *mp)
86 {
87 	union DL_primitives	*udlp;
88 	t_uscalar_t		prim;
89 
90 	if (MBLKL(mp) < sizeof (t_uscalar_t)) {
91 		freemsg(mp);
92 		return;
93 	}
94 
95 	udlp = (union DL_primitives *)mp->b_rptr;
96 	prim = udlp->dl_primitive;
97 
98 	switch (prim) {
99 	case DL_INFO_REQ:
100 		(void) proto_info_req(dsp, udlp, mp);
101 		break;
102 	case DL_BIND_REQ:
103 		(void) proto_bind_req(dsp, udlp, mp);
104 		break;
105 	case DL_UNBIND_REQ:
106 		(void) proto_unbind_req(dsp, udlp, mp);
107 		break;
108 	case DL_UNITDATA_REQ:
109 		(void) proto_unitdata_req(dsp, udlp, mp);
110 		break;
111 	case DL_UDQOS_REQ:
112 		(void) proto_udqos_req(dsp, udlp, mp);
113 		break;
114 	case DL_ATTACH_REQ:
115 		(void) proto_attach_req(dsp, udlp, mp);
116 		break;
117 	case DL_DETACH_REQ:
118 		(void) proto_detach_req(dsp, udlp, mp);
119 		break;
120 	case DL_ENABMULTI_REQ:
121 		(void) proto_enabmulti_req(dsp, udlp, mp);
122 		break;
123 	case DL_DISABMULTI_REQ:
124 		(void) proto_disabmulti_req(dsp, udlp, mp);
125 		break;
126 	case DL_PROMISCON_REQ:
127 		(void) proto_promiscon_req(dsp, udlp, mp);
128 		break;
129 	case DL_PROMISCOFF_REQ:
130 		(void) proto_promiscoff_req(dsp, udlp, mp);
131 		break;
132 	case DL_PHYS_ADDR_REQ:
133 		(void) proto_physaddr_req(dsp, udlp, mp);
134 		break;
135 	case DL_SET_PHYS_ADDR_REQ:
136 		(void) proto_setphysaddr_req(dsp, udlp, mp);
137 		break;
138 	case DL_NOTIFY_REQ:
139 		(void) proto_notify_req(dsp, udlp, mp);
140 		break;
141 	case DL_CAPABILITY_REQ:
142 		(void) proto_capability_req(dsp, udlp, mp);
143 		break;
144 	case DL_PASSIVE_REQ:
145 		(void) proto_passive_req(dsp, udlp, mp);
146 		break;
147 	default:
148 		(void) proto_req(dsp, udlp, mp);
149 		break;
150 	}
151 }
152 
153 /*
154  * Finish any pending operations.
155  * Requests that need to be processed asynchronously will be handled
156  * by a separate thread. After this function returns, other threads
157  * will be allowed to enter dld; they will not be able to do anything
158  * until ds_dlstate transitions to a non-pending state.
159  */
160 void
161 dld_finish_pending_ops(dld_str_t *dsp)
162 {
163 	task_func_t *op = NULL;
164 
165 	ASSERT(MUTEX_HELD(&dsp->ds_thr_lock));
166 	ASSERT(dsp->ds_thr == 0);
167 
168 	op = dsp->ds_pending_op;
169 	dsp->ds_pending_op = NULL;
170 	mutex_exit(&dsp->ds_thr_lock);
171 	if (op != NULL)
172 		(void) taskq_dispatch(system_taskq, op, dsp, TQ_SLEEP);
173 }
174 
175 #define	NEG(x)	-(x)
176 
177 typedef struct dl_info_ack_wrapper {
178 	dl_info_ack_t		dl_info;
179 	uint8_t			dl_addr[MAXMACADDRLEN + sizeof (uint16_t)];
180 	uint8_t			dl_brdcst_addr[MAXMACADDRLEN];
181 	dl_qos_cl_range1_t	dl_qos_range1;
182 	dl_qos_cl_sel1_t	dl_qos_sel1;
183 } dl_info_ack_wrapper_t;
184 
185 /*
186  * DL_INFO_REQ
187  */
188 /*ARGSUSED*/
189 static boolean_t
190 proto_info_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
191 {
192 	dl_info_ack_wrapper_t	*dlwp;
193 	dl_info_ack_t		*dlp;
194 	dl_qos_cl_sel1_t	*selp;
195 	dl_qos_cl_range1_t	*rangep;
196 	uint8_t			*addr;
197 	uint8_t			*brdcst_addr;
198 	uint_t			addr_length;
199 	uint_t			sap_length;
200 	mac_info_t		minfo;
201 	mac_info_t		*minfop;
202 	queue_t			*q = dsp->ds_wq;
203 
204 	/*
205 	 * Swap the request message for one large enough to contain the
206 	 * wrapper structure defined above.
207 	 */
208 	if ((mp = mexchange(q, mp, sizeof (dl_info_ack_wrapper_t),
209 	    M_PCPROTO, 0)) == NULL)
210 		return (B_FALSE);
211 
212 	rw_enter(&dsp->ds_lock, RW_READER);
213 
214 	bzero(mp->b_rptr, sizeof (dl_info_ack_wrapper_t));
215 	dlwp = (dl_info_ack_wrapper_t *)mp->b_rptr;
216 
217 	dlp = &(dlwp->dl_info);
218 	ASSERT(dlp == (dl_info_ack_t *)mp->b_rptr);
219 
220 	dlp->dl_primitive = DL_INFO_ACK;
221 
222 	/*
223 	 * Set up the sub-structure pointers.
224 	 */
225 	addr = dlwp->dl_addr;
226 	brdcst_addr = dlwp->dl_brdcst_addr;
227 	rangep = &(dlwp->dl_qos_range1);
228 	selp = &(dlwp->dl_qos_sel1);
229 
230 	/*
231 	 * This driver supports only version 2 connectionless DLPI provider
232 	 * nodes.
233 	 */
234 	dlp->dl_service_mode = DL_CLDLS;
235 	dlp->dl_version = DL_VERSION_2;
236 
237 	/*
238 	 * Set the style of the provider
239 	 */
240 	dlp->dl_provider_style = dsp->ds_style;
241 	ASSERT(dlp->dl_provider_style == DL_STYLE1 ||
242 	    dlp->dl_provider_style == DL_STYLE2);
243 
244 	/*
245 	 * Set the current DLPI state.
246 	 */
247 	dlp->dl_current_state = dsp->ds_dlstate;
248 
249 	/*
250 	 * Gratuitously set the media type. This is to deal with modules
251 	 * that assume the media type is known prior to DL_ATTACH_REQ
252 	 * being completed.
253 	 */
254 	dlp->dl_mac_type = DL_ETHER;
255 
256 	/*
257 	 * If the stream is not at least attached we try to retrieve the
258 	 * mac_info using mac_info_get()
259 	 */
260 	if (dsp->ds_dlstate == DL_UNATTACHED ||
261 	    dsp->ds_dlstate == DL_ATTACH_PENDING ||
262 	    dsp->ds_dlstate == DL_DETACH_PENDING) {
263 		if (!mac_info_get(ddi_major_to_name(dsp->ds_major), &minfo)) {
264 			/*
265 			 * Cannot find mac_info. giving up.
266 			 */
267 			goto done;
268 		}
269 		minfop = &minfo;
270 	} else {
271 		minfop = (mac_info_t *)dsp->ds_mip;
272 	}
273 
274 	/*
275 	 * Set the media type (properly this time).
276 	 */
277 	if (dsp->ds_native)
278 		dlp->dl_mac_type = minfop->mi_nativemedia;
279 	else
280 		dlp->dl_mac_type = minfop->mi_media;
281 
282 	/*
283 	 * Set the DLSAP length. We only support 16 bit values and they
284 	 * appear after the MAC address portion of DLSAP addresses.
285 	 */
286 	sap_length = sizeof (uint16_t);
287 	dlp->dl_sap_length = NEG(sap_length);
288 
289 	/*
290 	 * Set the minimum and maximum payload sizes.
291 	 */
292 	dlp->dl_min_sdu = minfop->mi_sdu_min;
293 	dlp->dl_max_sdu = minfop->mi_sdu_max;
294 
295 	addr_length = minfop->mi_addr_length;
296 
297 	/*
298 	 * Copy in the media broadcast address.
299 	 */
300 	if (minfop->mi_brdcst_addr != NULL) {
301 		dlp->dl_brdcst_addr_offset =
302 		    (uintptr_t)brdcst_addr - (uintptr_t)dlp;
303 		bcopy(minfop->mi_brdcst_addr, brdcst_addr, addr_length);
304 		dlp->dl_brdcst_addr_length = addr_length;
305 	}
306 
307 	dlp->dl_qos_range_offset = (uintptr_t)rangep - (uintptr_t)dlp;
308 	dlp->dl_qos_range_length = sizeof (dl_qos_cl_range1_t);
309 
310 	rangep->dl_qos_type = DL_QOS_CL_RANGE1;
311 	rangep->dl_trans_delay.dl_target_value = DL_UNKNOWN;
312 	rangep->dl_trans_delay.dl_accept_value = DL_UNKNOWN;
313 	rangep->dl_protection.dl_min = DL_UNKNOWN;
314 	rangep->dl_protection.dl_max = DL_UNKNOWN;
315 	rangep->dl_residual_error = DL_UNKNOWN;
316 
317 	/*
318 	 * Specify the supported range of priorities.
319 	 */
320 	rangep->dl_priority.dl_min = 0;
321 	rangep->dl_priority.dl_max = (1 << VLAN_PRI_SIZE) - 1;
322 
323 	dlp->dl_qos_offset = (uintptr_t)selp - (uintptr_t)dlp;
324 	dlp->dl_qos_length = sizeof (dl_qos_cl_sel1_t);
325 
326 	selp->dl_qos_type = DL_QOS_CL_SEL1;
327 	selp->dl_trans_delay = DL_UNKNOWN;
328 	selp->dl_protection = DL_UNKNOWN;
329 	selp->dl_residual_error = DL_UNKNOWN;
330 
331 	/*
332 	 * Specify the current priority (which can be changed by
333 	 * the DL_UDQOS_REQ primitive).
334 	 */
335 	selp->dl_priority = dsp->ds_pri;
336 
337 	dlp->dl_addr_length = addr_length + sizeof (uint16_t);
338 	if (dsp->ds_dlstate == DL_IDLE) {
339 		/*
340 		 * The stream is bound. Therefore we can formulate a valid
341 		 * DLSAP address.
342 		 */
343 		dlp->dl_addr_offset = (uintptr_t)addr - (uintptr_t)dlp;
344 		if (addr_length > 0)
345 			bcopy(dsp->ds_curr_addr, addr, addr_length);
346 		*(uint16_t *)(addr + addr_length) = dsp->ds_sap;
347 	}
348 
349 done:
350 	ASSERT(IMPLY(dlp->dl_qos_offset != 0, dlp->dl_qos_length != 0));
351 	ASSERT(IMPLY(dlp->dl_qos_range_offset != 0,
352 	    dlp->dl_qos_range_length != 0));
353 	ASSERT(IMPLY(dlp->dl_addr_offset != 0, dlp->dl_addr_length != 0));
354 	ASSERT(IMPLY(dlp->dl_brdcst_addr_offset != 0,
355 	    dlp->dl_brdcst_addr_length != 0));
356 
357 	rw_exit(&dsp->ds_lock);
358 
359 	qreply(q, mp);
360 	return (B_TRUE);
361 }
362 
363 /*
364  * DL_ATTACH_REQ
365  */
366 static boolean_t
367 proto_attach_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
368 {
369 	dl_attach_req_t	*dlp = (dl_attach_req_t *)udlp;
370 	int		err = 0;
371 	t_uscalar_t	dl_err;
372 	queue_t		*q = dsp->ds_wq;
373 
374 	rw_enter(&dsp->ds_lock, RW_WRITER);
375 
376 	if (MBLKL(mp) < sizeof (dl_attach_req_t) ||
377 	    dlp->dl_ppa < 0 || dsp->ds_style == DL_STYLE1) {
378 		dl_err = DL_BADPRIM;
379 		goto failed;
380 	}
381 
382 	if (dsp->ds_dlstate != DL_UNATTACHED) {
383 		dl_err = DL_OUTSTATE;
384 		goto failed;
385 	}
386 
387 	dsp->ds_dlstate = DL_ATTACH_PENDING;
388 
389 	err = dld_str_attach(dsp, dlp->dl_ppa);
390 	if (err != 0) {
391 		switch (err) {
392 		case ENOENT:
393 			dl_err = DL_BADPPA;
394 			err = 0;
395 			break;
396 		default:
397 			dl_err = DL_SYSERR;
398 			break;
399 		}
400 		dsp->ds_dlstate = DL_UNATTACHED;
401 		goto failed;
402 	}
403 	ASSERT(dsp->ds_dlstate == DL_UNBOUND);
404 	rw_exit(&dsp->ds_lock);
405 
406 	dlokack(q, mp, DL_ATTACH_REQ);
407 	return (B_TRUE);
408 failed:
409 	rw_exit(&dsp->ds_lock);
410 	dlerrorack(q, mp, DL_ATTACH_REQ, dl_err, (t_uscalar_t)err);
411 	return (B_FALSE);
412 }
413 
414 /*
415  * DL_DETACH_REQ
416  */
417 static void
418 proto_process_detach_req(void *arg)
419 {
420 	dld_str_t	*dsp = arg;
421 	mblk_t		*mp;
422 
423 	/*
424 	 * We don't need to hold locks because no other thread
425 	 * would manipulate dsp while it is in a PENDING state.
426 	 */
427 	ASSERT(dsp->ds_pending_req != NULL);
428 	ASSERT(dsp->ds_dlstate == DL_DETACH_PENDING);
429 
430 	mp = dsp->ds_pending_req;
431 	dsp->ds_pending_req = NULL;
432 	dld_str_detach(dsp);
433 	dlokack(dsp->ds_wq, mp, DL_DETACH_REQ);
434 
435 	DLD_WAKEUP(dsp);
436 }
437 
438 /*ARGSUSED*/
439 static boolean_t
440 proto_detach_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
441 {
442 	queue_t		*q = dsp->ds_wq;
443 	t_uscalar_t	dl_err;
444 
445 	rw_enter(&dsp->ds_lock, RW_WRITER);
446 
447 	if (MBLKL(mp) < sizeof (dl_detach_req_t)) {
448 		dl_err = DL_BADPRIM;
449 		goto failed;
450 	}
451 
452 	if (dsp->ds_dlstate != DL_UNBOUND) {
453 		dl_err = DL_OUTSTATE;
454 		goto failed;
455 	}
456 
457 	if (dsp->ds_style == DL_STYLE1) {
458 		dl_err = DL_BADPRIM;
459 		goto failed;
460 	}
461 
462 	dsp->ds_dlstate = DL_DETACH_PENDING;
463 
464 	/*
465 	 * Complete the detach when the driver is single-threaded.
466 	 */
467 	mutex_enter(&dsp->ds_thr_lock);
468 	ASSERT(dsp->ds_pending_req == NULL);
469 	dsp->ds_pending_req = mp;
470 	dsp->ds_pending_op = proto_process_detach_req;
471 	dsp->ds_pending_cnt++;
472 	mutex_exit(&dsp->ds_thr_lock);
473 	rw_exit(&dsp->ds_lock);
474 
475 	return (B_TRUE);
476 failed:
477 	rw_exit(&dsp->ds_lock);
478 	dlerrorack(q, mp, DL_DETACH_REQ, dl_err, 0);
479 	return (B_FALSE);
480 }
481 
482 /*
483  * DL_BIND_REQ
484  */
485 static boolean_t
486 proto_bind_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
487 {
488 	dl_bind_req_t	*dlp = (dl_bind_req_t *)udlp;
489 	int		err = 0;
490 	uint8_t		dlsap_addr[MAXMACADDRLEN + sizeof (uint16_t)];
491 	uint_t		dlsap_addr_length;
492 	t_uscalar_t	dl_err;
493 	t_scalar_t	sap;
494 	queue_t		*q = dsp->ds_wq;
495 
496 	rw_enter(&dsp->ds_lock, RW_WRITER);
497 
498 	if (MBLKL(mp) < sizeof (dl_bind_req_t)) {
499 		dl_err = DL_BADPRIM;
500 		goto failed;
501 	}
502 
503 	if (dlp->dl_xidtest_flg != 0) {
504 		dl_err = DL_NOAUTO;
505 		goto failed;
506 	}
507 
508 	if (dlp->dl_service_mode != DL_CLDLS) {
509 		dl_err = DL_UNSUPPORTED;
510 		goto failed;
511 	}
512 
513 	if (dsp->ds_dlstate != DL_UNBOUND) {
514 		dl_err = DL_OUTSTATE;
515 		goto failed;
516 	}
517 
518 	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
519 	    !dls_active_set(dsp->ds_dc)) {
520 		dl_err = DL_SYSERR;
521 		err = EBUSY;
522 		goto failed;
523 	}
524 
525 	dsp->ds_dlstate = DL_BIND_PENDING;
526 	/*
527 	 * Set the receive callback.
528 	 */
529 	dls_rx_set(dsp->ds_dc, (dsp->ds_mode == DLD_RAW) ?
530 	    dld_str_rx_raw : dld_str_rx_unitdata, dsp);
531 
532 	/*
533 	 * Bind the channel such that it can receive packets.
534 	 */
535 	sap = dsp->ds_sap = dlp->dl_sap;
536 	err = dls_bind(dsp->ds_dc, dlp->dl_sap);
537 	if (err != 0) {
538 		switch (err) {
539 		case EINVAL:
540 			dl_err = DL_BADADDR;
541 			err = 0;
542 			break;
543 		default:
544 			dl_err = DL_SYSERR;
545 			break;
546 		}
547 		dsp->ds_dlstate = DL_UNBOUND;
548 		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
549 			dls_active_clear(dsp->ds_dc);
550 
551 		goto failed;
552 	}
553 
554 	/*
555 	 * Copy in MAC address.
556 	 */
557 	dlsap_addr_length = dsp->ds_mip->mi_addr_length;
558 	bcopy(dsp->ds_curr_addr, dlsap_addr, dlsap_addr_length);
559 
560 	/*
561 	 * Copy in the SAP.
562 	 */
563 	*(uint16_t *)(dlsap_addr + dlsap_addr_length) = dsp->ds_sap;
564 	dlsap_addr_length += sizeof (uint16_t);
565 
566 	dsp->ds_dlstate = DL_IDLE;
567 	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
568 		dsp->ds_passivestate = DLD_ACTIVE;
569 
570 	rw_exit(&dsp->ds_lock);
571 
572 	dlbindack(q, mp, sap, dlsap_addr, dlsap_addr_length, 0, 0);
573 	return (B_TRUE);
574 failed:
575 	rw_exit(&dsp->ds_lock);
576 	dlerrorack(q, mp, DL_BIND_REQ, dl_err, (t_uscalar_t)err);
577 	return (B_FALSE);
578 }
579 
580 /*
581  * DL_UNBIND_REQ
582  */
583 /*ARGSUSED*/
584 static void
585 proto_process_unbind_req(void *arg)
586 {
587 	dld_str_t	*dsp = arg;
588 	mblk_t		*mp;
589 
590 	/*
591 	 * We don't need to hold locks because no other thread
592 	 * would manipulate dsp while it is in a PENDING state.
593 	 */
594 	ASSERT(dsp->ds_pending_req != NULL);
595 	ASSERT(dsp->ds_dlstate == DL_UNBIND_PENDING);
596 
597 	/*
598 	 * Flush any remaining packets scheduled for transmission.
599 	 */
600 	dld_tx_flush(dsp);
601 
602 	/*
603 	 * Unbind the channel to stop packets being received.
604 	 */
605 	dls_unbind(dsp->ds_dc);
606 
607 	/*
608 	 * Disable polling mode, if it is enabled.
609 	 */
610 	proto_poll_disable(dsp);
611 
612 	/*
613 	 * Clear LSO flags.
614 	 */
615 	dsp->ds_lso = B_FALSE;
616 	dsp->ds_lso_max = 0;
617 
618 	/*
619 	 * Clear the receive callback.
620 	 */
621 	dls_rx_set(dsp->ds_dc, NULL, NULL);
622 
623 	/*
624 	 * Set the mode back to the default (unitdata).
625 	 */
626 	dsp->ds_mode = DLD_UNITDATA;
627 
628 	/*
629 	 * If soft rings were enabled, the workers
630 	 * should be quiesced. We cannot check for
631 	 * ds_soft_ring flag because
632 	 * proto_soft_ring_disable() called from
633 	 * proto_capability_req() would have reset it.
634 	 */
635 	if (dls_soft_ring_workers(dsp->ds_dc))
636 		dls_soft_ring_disable(dsp->ds_dc);
637 
638 	mp = dsp->ds_pending_req;
639 	dsp->ds_pending_req = NULL;
640 	dsp->ds_dlstate = DL_UNBOUND;
641 	dlokack(dsp->ds_wq, mp, DL_UNBIND_REQ);
642 
643 	DLD_WAKEUP(dsp);
644 }
645 
646 /*ARGSUSED*/
647 static boolean_t
648 proto_unbind_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
649 {
650 	queue_t		*q = dsp->ds_wq;
651 	t_uscalar_t	dl_err;
652 
653 	rw_enter(&dsp->ds_lock, RW_WRITER);
654 
655 	if (MBLKL(mp) < sizeof (dl_unbind_req_t)) {
656 		dl_err = DL_BADPRIM;
657 		goto failed;
658 	}
659 
660 	if (dsp->ds_dlstate != DL_IDLE) {
661 		dl_err = DL_OUTSTATE;
662 		goto failed;
663 	}
664 
665 	dsp->ds_dlstate = DL_UNBIND_PENDING;
666 
667 	mutex_enter(&dsp->ds_thr_lock);
668 	ASSERT(dsp->ds_pending_req == NULL);
669 	dsp->ds_pending_req = mp;
670 	dsp->ds_pending_op = proto_process_unbind_req;
671 	dsp->ds_pending_cnt++;
672 	mutex_exit(&dsp->ds_thr_lock);
673 	rw_exit(&dsp->ds_lock);
674 
675 	return (B_TRUE);
676 failed:
677 	rw_exit(&dsp->ds_lock);
678 	dlerrorack(q, mp, DL_UNBIND_REQ, dl_err, 0);
679 	return (B_FALSE);
680 }
681 
682 /*
683  * DL_PROMISCON_REQ
684  */
685 static boolean_t
686 proto_promiscon_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
687 {
688 	dl_promiscon_req_t *dlp = (dl_promiscon_req_t *)udlp;
689 	int		err = 0;
690 	t_uscalar_t	dl_err;
691 	uint32_t	promisc_saved;
692 	queue_t		*q = dsp->ds_wq;
693 
694 	rw_enter(&dsp->ds_lock, RW_WRITER);
695 
696 	if (MBLKL(mp) < sizeof (dl_promiscon_req_t)) {
697 		dl_err = DL_BADPRIM;
698 		goto failed;
699 	}
700 
701 	if (dsp->ds_dlstate == DL_UNATTACHED ||
702 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
703 		dl_err = DL_OUTSTATE;
704 		goto failed;
705 	}
706 
707 	promisc_saved = dsp->ds_promisc;
708 	switch (dlp->dl_level) {
709 	case DL_PROMISC_SAP:
710 		dsp->ds_promisc |= DLS_PROMISC_SAP;
711 		break;
712 
713 	case DL_PROMISC_MULTI:
714 		dsp->ds_promisc |= DLS_PROMISC_MULTI;
715 		break;
716 
717 	case DL_PROMISC_PHYS:
718 		dsp->ds_promisc |= DLS_PROMISC_PHYS;
719 		break;
720 
721 	default:
722 		dl_err = DL_NOTSUPPORTED;
723 		goto failed;
724 	}
725 
726 	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
727 	    !dls_active_set(dsp->ds_dc)) {
728 		dsp->ds_promisc = promisc_saved;
729 		dl_err = DL_SYSERR;
730 		err = EBUSY;
731 		goto failed;
732 	}
733 
734 	/*
735 	 * Adjust channel promiscuity.
736 	 */
737 	err = dls_promisc(dsp->ds_dc, dsp->ds_promisc);
738 	if (err != 0) {
739 		dl_err = DL_SYSERR;
740 		dsp->ds_promisc = promisc_saved;
741 		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
742 			dls_active_clear(dsp->ds_dc);
743 
744 		goto failed;
745 	}
746 
747 	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
748 		dsp->ds_passivestate = DLD_ACTIVE;
749 
750 	rw_exit(&dsp->ds_lock);
751 	dlokack(q, mp, DL_PROMISCON_REQ);
752 	return (B_TRUE);
753 failed:
754 	rw_exit(&dsp->ds_lock);
755 	dlerrorack(q, mp, DL_PROMISCON_REQ, dl_err, (t_uscalar_t)err);
756 	return (B_FALSE);
757 }
758 
759 /*
760  * DL_PROMISCOFF_REQ
761  */
762 static boolean_t
763 proto_promiscoff_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
764 {
765 	dl_promiscoff_req_t *dlp = (dl_promiscoff_req_t *)udlp;
766 	int		err = 0;
767 	t_uscalar_t	dl_err;
768 	uint32_t	promisc_saved;
769 	queue_t		*q = dsp->ds_wq;
770 
771 	rw_enter(&dsp->ds_lock, RW_WRITER);
772 
773 	if (MBLKL(mp) < sizeof (dl_promiscoff_req_t)) {
774 		dl_err = DL_BADPRIM;
775 		goto failed;
776 	}
777 
778 	if (dsp->ds_dlstate == DL_UNATTACHED ||
779 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
780 		dl_err = DL_OUTSTATE;
781 		goto failed;
782 	}
783 
784 	promisc_saved = dsp->ds_promisc;
785 	switch (dlp->dl_level) {
786 	case DL_PROMISC_SAP:
787 		if (!(dsp->ds_promisc & DLS_PROMISC_SAP)) {
788 			dl_err = DL_NOTENAB;
789 			goto failed;
790 		}
791 		dsp->ds_promisc &= ~DLS_PROMISC_SAP;
792 		break;
793 
794 	case DL_PROMISC_MULTI:
795 		if (!(dsp->ds_promisc & DLS_PROMISC_MULTI)) {
796 			dl_err = DL_NOTENAB;
797 			goto failed;
798 		}
799 		dsp->ds_promisc &= ~DLS_PROMISC_MULTI;
800 		break;
801 
802 	case DL_PROMISC_PHYS:
803 		if (!(dsp->ds_promisc & DLS_PROMISC_PHYS)) {
804 			dl_err = DL_NOTENAB;
805 			goto failed;
806 		}
807 		dsp->ds_promisc &= ~DLS_PROMISC_PHYS;
808 		break;
809 
810 	default:
811 		dl_err = DL_NOTSUPPORTED;
812 		goto failed;
813 	}
814 
815 	/*
816 	 * Adjust channel promiscuity.
817 	 */
818 	err = dls_promisc(dsp->ds_dc, dsp->ds_promisc);
819 	if (err != 0) {
820 		dsp->ds_promisc = promisc_saved;
821 		dl_err = DL_SYSERR;
822 		goto failed;
823 	}
824 
825 	rw_exit(&dsp->ds_lock);
826 	dlokack(q, mp, DL_PROMISCOFF_REQ);
827 	return (B_TRUE);
828 failed:
829 	rw_exit(&dsp->ds_lock);
830 	dlerrorack(q, mp, DL_PROMISCOFF_REQ, dl_err, (t_uscalar_t)err);
831 	return (B_FALSE);
832 }
833 
834 /*
835  * DL_ENABMULTI_REQ
836  */
837 static boolean_t
838 proto_enabmulti_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
839 {
840 	dl_enabmulti_req_t *dlp = (dl_enabmulti_req_t *)udlp;
841 	int		err = 0;
842 	t_uscalar_t	dl_err;
843 	queue_t		*q = dsp->ds_wq;
844 
845 	rw_enter(&dsp->ds_lock, RW_WRITER);
846 
847 	if (dsp->ds_dlstate == DL_UNATTACHED ||
848 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
849 		dl_err = DL_OUTSTATE;
850 		goto failed;
851 	}
852 
853 	if (MBLKL(mp) < sizeof (dl_enabmulti_req_t) ||
854 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
855 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
856 		dl_err = DL_BADPRIM;
857 		goto failed;
858 	}
859 
860 	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
861 	    !dls_active_set(dsp->ds_dc)) {
862 		dl_err = DL_SYSERR;
863 		err = EBUSY;
864 		goto failed;
865 	}
866 
867 	err = dls_multicst_add(dsp->ds_dc, mp->b_rptr + dlp->dl_addr_offset);
868 	if (err != 0) {
869 		switch (err) {
870 		case EINVAL:
871 			dl_err = DL_BADADDR;
872 			err = 0;
873 			break;
874 		case ENOSPC:
875 			dl_err = DL_TOOMANY;
876 			err = 0;
877 			break;
878 		default:
879 			dl_err = DL_SYSERR;
880 			break;
881 		}
882 		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
883 			dls_active_clear(dsp->ds_dc);
884 
885 		goto failed;
886 	}
887 
888 	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
889 		dsp->ds_passivestate = DLD_ACTIVE;
890 
891 	rw_exit(&dsp->ds_lock);
892 	dlokack(q, mp, DL_ENABMULTI_REQ);
893 	return (B_TRUE);
894 failed:
895 	rw_exit(&dsp->ds_lock);
896 	dlerrorack(q, mp, DL_ENABMULTI_REQ, dl_err, (t_uscalar_t)err);
897 	return (B_FALSE);
898 }
899 
900 /*
901  * DL_DISABMULTI_REQ
902  */
903 static boolean_t
904 proto_disabmulti_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
905 {
906 	dl_disabmulti_req_t *dlp = (dl_disabmulti_req_t *)udlp;
907 	int		err = 0;
908 	t_uscalar_t	dl_err;
909 	queue_t		*q = dsp->ds_wq;
910 
911 	rw_enter(&dsp->ds_lock, RW_READER);
912 
913 	if (dsp->ds_dlstate == DL_UNATTACHED ||
914 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
915 		dl_err = DL_OUTSTATE;
916 		goto failed;
917 	}
918 
919 	if (MBLKL(mp) < sizeof (dl_disabmulti_req_t) ||
920 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
921 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
922 		dl_err = DL_BADPRIM;
923 		goto failed;
924 	}
925 
926 	err = dls_multicst_remove(dsp->ds_dc, mp->b_rptr + dlp->dl_addr_offset);
927 	if (err != 0) {
928 	switch (err) {
929 		case EINVAL:
930 			dl_err = DL_BADADDR;
931 			err = 0;
932 			break;
933 
934 		case ENOENT:
935 			dl_err = DL_NOTENAB;
936 			err = 0;
937 			break;
938 
939 		default:
940 			dl_err = DL_SYSERR;
941 			break;
942 		}
943 		goto failed;
944 	}
945 
946 	rw_exit(&dsp->ds_lock);
947 	dlokack(q, mp, DL_DISABMULTI_REQ);
948 	return (B_TRUE);
949 failed:
950 	rw_exit(&dsp->ds_lock);
951 	dlerrorack(q, mp, DL_DISABMULTI_REQ, dl_err, (t_uscalar_t)err);
952 	return (B_FALSE);
953 }
954 
955 /*
956  * DL_PHYS_ADDR_REQ
957  */
958 static boolean_t
959 proto_physaddr_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
960 {
961 	dl_phys_addr_req_t *dlp = (dl_phys_addr_req_t *)udlp;
962 	queue_t		*q = dsp->ds_wq;
963 	t_uscalar_t	dl_err;
964 	char		*addr;
965 	uint_t		addr_length;
966 
967 	rw_enter(&dsp->ds_lock, RW_READER);
968 
969 	if (MBLKL(mp) < sizeof (dl_phys_addr_req_t)) {
970 		dl_err = DL_BADPRIM;
971 		goto failed;
972 	}
973 
974 	if (dsp->ds_dlstate == DL_UNATTACHED ||
975 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
976 		dl_err = DL_OUTSTATE;
977 		goto failed;
978 	}
979 
980 	if (dlp->dl_addr_type != DL_CURR_PHYS_ADDR &&
981 	    dlp->dl_addr_type != DL_FACT_PHYS_ADDR) {
982 		dl_err = DL_UNSUPPORTED;
983 		goto failed;
984 	}
985 
986 	addr_length = dsp->ds_mip->mi_addr_length;
987 	addr = kmem_alloc(addr_length, KM_NOSLEEP);
988 	if (addr == NULL) {
989 		rw_exit(&dsp->ds_lock);
990 		merror(q, mp, ENOSR);
991 		return (B_FALSE);
992 	}
993 
994 	/*
995 	 * Copy out the address before we drop the lock; we don't
996 	 * want to call dlphysaddrack() while holding ds_lock.
997 	 */
998 	bcopy((dlp->dl_addr_type == DL_CURR_PHYS_ADDR) ?
999 	    dsp->ds_curr_addr : dsp->ds_fact_addr, addr, addr_length);
1000 
1001 	rw_exit(&dsp->ds_lock);
1002 	dlphysaddrack(q, mp, addr, (t_uscalar_t)addr_length);
1003 	kmem_free(addr, addr_length);
1004 	return (B_TRUE);
1005 failed:
1006 	rw_exit(&dsp->ds_lock);
1007 	dlerrorack(q, mp, DL_PHYS_ADDR_REQ, dl_err, 0);
1008 	return (B_FALSE);
1009 }
1010 
1011 /*
1012  * DL_SET_PHYS_ADDR_REQ
1013  */
1014 static boolean_t
1015 proto_setphysaddr_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
1016 {
1017 	dl_set_phys_addr_req_t *dlp = (dl_set_phys_addr_req_t *)udlp;
1018 	int		err = 0;
1019 	t_uscalar_t	dl_err;
1020 	queue_t		*q = dsp->ds_wq;
1021 
1022 	rw_enter(&dsp->ds_lock, RW_WRITER);
1023 
1024 	if (dsp->ds_dlstate == DL_UNATTACHED ||
1025 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1026 		dl_err = DL_OUTSTATE;
1027 		goto failed;
1028 	}
1029 
1030 	if (MBLKL(mp) < sizeof (dl_set_phys_addr_req_t) ||
1031 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
1032 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
1033 		dl_err = DL_BADPRIM;
1034 		goto failed;
1035 	}
1036 
1037 	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
1038 	    !dls_active_set(dsp->ds_dc)) {
1039 		dl_err = DL_SYSERR;
1040 		err = EBUSY;
1041 		goto failed;
1042 	}
1043 
1044 	err = mac_unicst_set(dsp->ds_mh, mp->b_rptr + dlp->dl_addr_offset);
1045 	if (err != 0) {
1046 		switch (err) {
1047 		case EINVAL:
1048 			dl_err = DL_BADADDR;
1049 			err = 0;
1050 			break;
1051 
1052 		default:
1053 			dl_err = DL_SYSERR;
1054 			break;
1055 		}
1056 		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
1057 			dls_active_clear(dsp->ds_dc);
1058 
1059 		goto failed;
1060 	}
1061 	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
1062 		dsp->ds_passivestate = DLD_ACTIVE;
1063 
1064 	rw_exit(&dsp->ds_lock);
1065 	dlokack(q, mp, DL_SET_PHYS_ADDR_REQ);
1066 	return (B_TRUE);
1067 failed:
1068 	rw_exit(&dsp->ds_lock);
1069 	dlerrorack(q, mp, DL_SET_PHYS_ADDR_REQ, dl_err, (t_uscalar_t)err);
1070 	return (B_FALSE);
1071 }
1072 
1073 /*
1074  * DL_UDQOS_REQ
1075  */
1076 static boolean_t
1077 proto_udqos_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
1078 {
1079 	dl_udqos_req_t *dlp = (dl_udqos_req_t *)udlp;
1080 	dl_qos_cl_sel1_t *selp;
1081 	int		off, len;
1082 	t_uscalar_t	dl_err;
1083 	queue_t		*q = dsp->ds_wq;
1084 
1085 	off = dlp->dl_qos_offset;
1086 	len = dlp->dl_qos_length;
1087 
1088 	rw_enter(&dsp->ds_lock, RW_WRITER);
1089 
1090 	if (MBLKL(mp) < sizeof (dl_udqos_req_t) || !MBLKIN(mp, off, len)) {
1091 		dl_err = DL_BADPRIM;
1092 		goto failed;
1093 	}
1094 
1095 	selp = (dl_qos_cl_sel1_t *)(mp->b_rptr + off);
1096 	if (selp->dl_qos_type != DL_QOS_CL_SEL1) {
1097 		dl_err = DL_BADQOSTYPE;
1098 		goto failed;
1099 	}
1100 
1101 	if (selp->dl_priority > (1 << VLAN_PRI_SIZE) - 1 ||
1102 	    selp->dl_priority < 0) {
1103 		dl_err = DL_BADQOSPARAM;
1104 		goto failed;
1105 	}
1106 
1107 	dsp->ds_pri = selp->dl_priority;
1108 
1109 	rw_exit(&dsp->ds_lock);
1110 	dlokack(q, mp, DL_UDQOS_REQ);
1111 	return (B_TRUE);
1112 failed:
1113 	rw_exit(&dsp->ds_lock);
1114 	dlerrorack(q, mp, DL_UDQOS_REQ, dl_err, 0);
1115 	return (B_FALSE);
1116 }
1117 
1118 static boolean_t
1119 check_ip_above(queue_t *q)
1120 {
1121 	queue_t		*next_q;
1122 	boolean_t	ret = B_TRUE;
1123 
1124 	claimstr(q);
1125 	next_q = q->q_next;
1126 	if (strcmp(next_q->q_qinfo->qi_minfo->mi_idname, "ip") != 0)
1127 		ret = B_FALSE;
1128 	releasestr(q);
1129 	return (ret);
1130 }
1131 
1132 /*
1133  * DL_CAPABILITY_REQ
1134  */
1135 /*ARGSUSED*/
1136 static boolean_t
1137 proto_capability_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
1138 {
1139 	dl_capability_req_t *dlp = (dl_capability_req_t *)udlp;
1140 	dl_capability_sub_t *sp;
1141 	size_t		size, len;
1142 	offset_t	off, end;
1143 	t_uscalar_t	dl_err;
1144 	queue_t		*q = dsp->ds_wq;
1145 	boolean_t	upgraded;
1146 
1147 	rw_enter(&dsp->ds_lock, RW_READER);
1148 
1149 	if (MBLKL(mp) < sizeof (dl_capability_req_t)) {
1150 		dl_err = DL_BADPRIM;
1151 		goto failed;
1152 	}
1153 
1154 	if (dsp->ds_dlstate == DL_UNATTACHED ||
1155 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1156 		dl_err = DL_OUTSTATE;
1157 		goto failed;
1158 	}
1159 
1160 	/*
1161 	 * This request is overloaded. If there are no requested capabilities
1162 	 * then we just want to acknowledge with all the capabilities we
1163 	 * support. Otherwise we enable the set of capabilities requested.
1164 	 */
1165 	if (dlp->dl_sub_length == 0) {
1166 		/* callee drops lock */
1167 		return (proto_capability_advertise(dsp, mp));
1168 	}
1169 
1170 	if (!MBLKIN(mp, dlp->dl_sub_offset, dlp->dl_sub_length)) {
1171 		dl_err = DL_BADPRIM;
1172 		goto failed;
1173 	}
1174 
1175 	dlp->dl_primitive = DL_CAPABILITY_ACK;
1176 
1177 	off = dlp->dl_sub_offset;
1178 	len = dlp->dl_sub_length;
1179 
1180 	/*
1181 	 * Walk the list of capabilities to be enabled.
1182 	 */
1183 	upgraded = B_FALSE;
1184 	for (end = off + len; off < end; ) {
1185 		sp = (dl_capability_sub_t *)(mp->b_rptr + off);
1186 		size = sizeof (dl_capability_sub_t) + sp->dl_length;
1187 
1188 		if (off + size > end ||
1189 		    !IS_P2ALIGNED(off, sizeof (uint32_t))) {
1190 			dl_err = DL_BADPRIM;
1191 			goto failed;
1192 		}
1193 
1194 		switch (sp->dl_cap) {
1195 		/*
1196 		 * TCP/IP checksum offload to hardware.
1197 		 */
1198 		case DL_CAPAB_HCKSUM: {
1199 			dl_capab_hcksum_t *hcksump;
1200 			dl_capab_hcksum_t hcksum;
1201 
1202 			hcksump = (dl_capab_hcksum_t *)&sp[1];
1203 			/*
1204 			 * Copy for alignment.
1205 			 */
1206 			bcopy(hcksump, &hcksum, sizeof (dl_capab_hcksum_t));
1207 			dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1208 			bcopy(&hcksum, hcksump, sizeof (dl_capab_hcksum_t));
1209 			break;
1210 		}
1211 
1212 		/*
1213 		 * Large segment offload. (LSO)
1214 		 */
1215 		case DL_CAPAB_LSO: {
1216 			dl_capab_lso_t *lsop;
1217 			dl_capab_lso_t lso;
1218 
1219 			lsop = (dl_capab_lso_t *)&sp[1];
1220 			/*
1221 			 * Copy for alignment.
1222 			 */
1223 			bcopy(lsop, &lso, sizeof (dl_capab_lso_t));
1224 			dlcapabsetqid(&(lso.lso_mid), dsp->ds_rq);
1225 			bcopy(&lso, lsop, sizeof (dl_capab_lso_t));
1226 			break;
1227 		}
1228 
1229 		/*
1230 		 * IP polling interface.
1231 		 */
1232 		case DL_CAPAB_POLL: {
1233 			dl_capab_dls_t *pollp;
1234 			dl_capab_dls_t	poll;
1235 
1236 			pollp = (dl_capab_dls_t *)&sp[1];
1237 			/*
1238 			 * Copy for alignment.
1239 			 */
1240 			bcopy(pollp, &poll, sizeof (dl_capab_dls_t));
1241 
1242 			/*
1243 			 * We need to become writer before enabling and/or
1244 			 * disabling the polling interface.  If we couldn'
1245 			 * upgrade, check state again after re-acquiring the
1246 			 * lock to make sure we can proceed.
1247 			 */
1248 			if (!upgraded && !rw_tryupgrade(&dsp->ds_lock)) {
1249 				rw_exit(&dsp->ds_lock);
1250 				rw_enter(&dsp->ds_lock, RW_WRITER);
1251 
1252 				if (dsp->ds_dlstate == DL_UNATTACHED ||
1253 				    DL_ACK_PENDING(dsp->ds_dlstate)) {
1254 					dl_err = DL_OUTSTATE;
1255 					goto failed;
1256 				}
1257 			}
1258 			upgraded = B_TRUE;
1259 
1260 			switch (poll.dls_flags) {
1261 			default:
1262 				/*FALLTHRU*/
1263 			case POLL_DISABLE:
1264 				proto_poll_disable(dsp);
1265 				break;
1266 
1267 			case POLL_ENABLE:
1268 				ASSERT(!(dld_opt & DLD_OPT_NO_POLL));
1269 
1270 				/*
1271 				 * Make sure polling is disabled.
1272 				 */
1273 				proto_poll_disable(dsp);
1274 
1275 				/*
1276 				 * Now attempt enable it.
1277 				 */
1278 				if (check_ip_above(dsp->ds_rq) &&
1279 				    proto_poll_enable(dsp, &poll)) {
1280 					bzero(&poll, sizeof (dl_capab_dls_t));
1281 					poll.dls_flags = POLL_ENABLE;
1282 				}
1283 				break;
1284 			}
1285 
1286 			dlcapabsetqid(&(poll.dls_mid), dsp->ds_rq);
1287 			bcopy(&poll, pollp, sizeof (dl_capab_dls_t));
1288 			break;
1289 		}
1290 		case DL_CAPAB_SOFT_RING: {
1291 			dl_capab_dls_t *soft_ringp;
1292 			dl_capab_dls_t soft_ring;
1293 
1294 			soft_ringp = (dl_capab_dls_t *)&sp[1];
1295 			/*
1296 			 * Copy for alignment.
1297 			 */
1298 			bcopy(soft_ringp, &soft_ring,
1299 			    sizeof (dl_capab_dls_t));
1300 
1301 			/*
1302 			 * We need to become writer before enabling and/or
1303 			 * disabling the soft_ring interface.  If we couldn'
1304 			 * upgrade, check state again after re-acquiring the
1305 			 * lock to make sure we can proceed.
1306 			 */
1307 			if (!upgraded && !rw_tryupgrade(&dsp->ds_lock)) {
1308 				rw_exit(&dsp->ds_lock);
1309 				rw_enter(&dsp->ds_lock, RW_WRITER);
1310 
1311 				if (dsp->ds_dlstate == DL_UNATTACHED ||
1312 				    DL_ACK_PENDING(dsp->ds_dlstate)) {
1313 					dl_err = DL_OUTSTATE;
1314 					goto failed;
1315 				}
1316 			}
1317 			upgraded = B_TRUE;
1318 
1319 			switch (soft_ring.dls_flags) {
1320 			default:
1321 				/*FALLTHRU*/
1322 			case SOFT_RING_DISABLE:
1323 				proto_soft_ring_disable(dsp);
1324 				break;
1325 
1326 			case SOFT_RING_ENABLE:
1327 				ASSERT(!(dld_opt & DLD_OPT_NO_SOFTRING));
1328 				/*
1329 				 * Make sure soft_ring is disabled.
1330 				 */
1331 				proto_soft_ring_disable(dsp);
1332 
1333 				/*
1334 				 * Now attempt enable it.
1335 				 */
1336 				if (check_ip_above(dsp->ds_rq) &&
1337 				    proto_soft_ring_enable(dsp, &soft_ring)) {
1338 					bzero(&soft_ring,
1339 					    sizeof (dl_capab_dls_t));
1340 					soft_ring.dls_flags =
1341 					    SOFT_RING_ENABLE;
1342 				} else {
1343 					bzero(&soft_ring,
1344 					    sizeof (dl_capab_dls_t));
1345 					soft_ring.dls_flags =
1346 					    SOFT_RING_DISABLE;
1347 				}
1348 				break;
1349 			}
1350 
1351 			dlcapabsetqid(&(soft_ring.dls_mid), dsp->ds_rq);
1352 			bcopy(&soft_ring, soft_ringp,
1353 			    sizeof (dl_capab_dls_t));
1354 			break;
1355 		}
1356 		default:
1357 			break;
1358 		}
1359 
1360 		off += size;
1361 	}
1362 	rw_exit(&dsp->ds_lock);
1363 	qreply(q, mp);
1364 	return (B_TRUE);
1365 failed:
1366 	rw_exit(&dsp->ds_lock);
1367 	dlerrorack(q, mp, DL_CAPABILITY_REQ, dl_err, 0);
1368 	return (B_FALSE);
1369 }
1370 
1371 /*
1372  * DL_NOTIFY_REQ
1373  */
1374 static boolean_t
1375 proto_notify_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
1376 {
1377 	dl_notify_req_t	*dlp = (dl_notify_req_t *)udlp;
1378 	t_uscalar_t	dl_err;
1379 	queue_t		*q = dsp->ds_wq;
1380 	uint_t		note =
1381 	    DL_NOTE_PROMISC_ON_PHYS |
1382 	    DL_NOTE_PROMISC_OFF_PHYS |
1383 	    DL_NOTE_PHYS_ADDR |
1384 	    DL_NOTE_LINK_UP |
1385 	    DL_NOTE_LINK_DOWN |
1386 	    DL_NOTE_CAPAB_RENEG |
1387 	    DL_NOTE_SPEED;
1388 
1389 	rw_enter(&dsp->ds_lock, RW_WRITER);
1390 
1391 	if (MBLKL(mp) < sizeof (dl_notify_req_t)) {
1392 		dl_err = DL_BADPRIM;
1393 		goto failed;
1394 	}
1395 
1396 	if (dsp->ds_dlstate == DL_UNATTACHED ||
1397 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1398 		dl_err = DL_OUTSTATE;
1399 		goto failed;
1400 	}
1401 
1402 	/*
1403 	 * Cache the notifications that are being enabled.
1404 	 */
1405 	dsp->ds_notifications = dlp->dl_notifications & note;
1406 	rw_exit(&dsp->ds_lock);
1407 	/*
1408 	 * The ACK carries all notifications regardless of which set is
1409 	 * being enabled.
1410 	 */
1411 	dlnotifyack(q, mp, note);
1412 
1413 	/*
1414 	 * Solicit DL_NOTIFY_IND messages for each enabled notification.
1415 	 */
1416 	rw_enter(&dsp->ds_lock, RW_READER);
1417 	if (dsp->ds_notifications != 0) {
1418 		rw_exit(&dsp->ds_lock);
1419 		dld_str_notify_ind(dsp);
1420 	} else {
1421 		rw_exit(&dsp->ds_lock);
1422 	}
1423 	return (B_TRUE);
1424 failed:
1425 	rw_exit(&dsp->ds_lock);
1426 	dlerrorack(q, mp, DL_NOTIFY_REQ, dl_err, 0);
1427 	return (B_FALSE);
1428 }
1429 
1430 /*
1431  * DL_UINTDATA_REQ
1432  */
1433 static boolean_t
1434 proto_unitdata_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
1435 {
1436 	queue_t			*q = dsp->ds_wq;
1437 	dl_unitdata_req_t	*dlp = (dl_unitdata_req_t *)udlp;
1438 	off_t			off;
1439 	size_t			len, size;
1440 	const uint8_t		*addr;
1441 	uint16_t		sap;
1442 	uint_t			addr_length;
1443 	mblk_t			*bp, *payload;
1444 	uint32_t		start, stuff, end, value, flags;
1445 	t_uscalar_t		dl_err;
1446 
1447 	rw_enter(&dsp->ds_lock, RW_READER);
1448 
1449 	if (MBLKL(mp) < sizeof (dl_unitdata_req_t) || mp->b_cont == NULL) {
1450 		dl_err = DL_BADPRIM;
1451 		goto failed;
1452 	}
1453 
1454 	if (dsp->ds_dlstate != DL_IDLE) {
1455 		dl_err = DL_OUTSTATE;
1456 		goto failed;
1457 	}
1458 	addr_length = dsp->ds_mip->mi_addr_length;
1459 
1460 	off = dlp->dl_dest_addr_offset;
1461 	len = dlp->dl_dest_addr_length;
1462 
1463 	if (!MBLKIN(mp, off, len) || !IS_P2ALIGNED(off, sizeof (uint16_t))) {
1464 		dl_err = DL_BADPRIM;
1465 		goto failed;
1466 	}
1467 
1468 	if (len != addr_length + sizeof (uint16_t)) {
1469 		dl_err = DL_BADADDR;
1470 		goto failed;
1471 	}
1472 
1473 	addr = mp->b_rptr + off;
1474 	sap = *(uint16_t *)(mp->b_rptr + off + addr_length);
1475 
1476 	/*
1477 	 * Check the length of the packet and the block types.
1478 	 */
1479 	size = 0;
1480 	payload = mp->b_cont;
1481 	for (bp = payload; bp != NULL; bp = bp->b_cont) {
1482 		if (DB_TYPE(bp) != M_DATA)
1483 			goto baddata;
1484 
1485 		size += MBLKL(bp);
1486 	}
1487 
1488 	if (size > dsp->ds_mip->mi_sdu_max)
1489 		goto baddata;
1490 
1491 	/*
1492 	 * Build a packet header.
1493 	 */
1494 	if ((bp = dls_header(dsp->ds_dc, addr, sap, dlp->dl_priority.dl_max,
1495 	    &payload)) == NULL) {
1496 		dl_err = DL_BADADDR;
1497 		goto failed;
1498 	}
1499 
1500 	/*
1501 	 * We no longer need the M_PROTO header, so free it.
1502 	 */
1503 	freeb(mp);
1504 
1505 	/*
1506 	 * Transfer the checksum offload information if it is present.
1507 	 */
1508 	hcksum_retrieve(payload, NULL, NULL, &start, &stuff, &end, &value,
1509 	    &flags);
1510 	(void) hcksum_assoc(bp, NULL, NULL, start, stuff, end, value, flags, 0);
1511 
1512 	/*
1513 	 * Link the payload onto the new header.
1514 	 */
1515 	ASSERT(bp->b_cont == NULL);
1516 	bp->b_cont = payload;
1517 
1518 	dld_tx_single(dsp, bp);
1519 	rw_exit(&dsp->ds_lock);
1520 	return (B_TRUE);
1521 failed:
1522 	rw_exit(&dsp->ds_lock);
1523 	dlerrorack(q, mp, DL_UNITDATA_REQ, dl_err, 0);
1524 	return (B_FALSE);
1525 
1526 baddata:
1527 	rw_exit(&dsp->ds_lock);
1528 	dluderrorind(q, mp, (void *)addr, len, DL_BADDATA, 0);
1529 	return (B_FALSE);
1530 }
1531 
1532 /*
1533  * DL_PASSIVE_REQ
1534  */
1535 /* ARGSUSED */
1536 static boolean_t
1537 proto_passive_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
1538 {
1539 	t_uscalar_t dl_err;
1540 
1541 	rw_enter(&dsp->ds_lock, RW_WRITER);
1542 	/*
1543 	 * If we've already become active by issuing an active primitive,
1544 	 * then it's too late to try to become passive.
1545 	 */
1546 	if (dsp->ds_passivestate == DLD_ACTIVE) {
1547 		dl_err = DL_OUTSTATE;
1548 		goto failed;
1549 	}
1550 
1551 	if (MBLKL(mp) < sizeof (dl_passive_req_t)) {
1552 		dl_err = DL_BADPRIM;
1553 		goto failed;
1554 	}
1555 
1556 	dsp->ds_passivestate = DLD_PASSIVE;
1557 	rw_exit(&dsp->ds_lock);
1558 	dlokack(dsp->ds_wq, mp, DL_PASSIVE_REQ);
1559 	return (B_TRUE);
1560 failed:
1561 	rw_exit(&dsp->ds_lock);
1562 	dlerrorack(dsp->ds_wq, mp, DL_PASSIVE_REQ, dl_err, 0);
1563 	return (B_FALSE);
1564 }
1565 
1566 
1567 /*
1568  * Catch-all handler.
1569  */
1570 static boolean_t
1571 proto_req(dld_str_t *dsp, union DL_primitives *dlp, mblk_t *mp)
1572 {
1573 	dlerrorack(dsp->ds_wq, mp, dlp->dl_primitive, DL_UNSUPPORTED, 0);
1574 	return (B_FALSE);
1575 }
1576 
1577 static void
1578 proto_poll_disable(dld_str_t *dsp)
1579 {
1580 	mac_handle_t	mh;
1581 
1582 	ASSERT(dsp->ds_pending_req != NULL || RW_WRITE_HELD(&dsp->ds_lock));
1583 
1584 	if (!dsp->ds_polling)
1585 		return;
1586 
1587 	/*
1588 	 * It should be impossible to enable raw mode if polling is turned on.
1589 	 */
1590 	ASSERT(dsp->ds_mode != DLD_RAW);
1591 
1592 	/*
1593 	 * Reset the resource_add callback.
1594 	 */
1595 	mh = dls_mac(dsp->ds_dc);
1596 	mac_resource_set(mh, NULL, NULL);
1597 	mac_resources(mh);
1598 
1599 	/*
1600 	 * Set receive function back to default.
1601 	 */
1602 	dls_rx_set(dsp->ds_dc, (dsp->ds_mode == DLD_FASTPATH) ?
1603 	    dld_str_rx_fastpath : dld_str_rx_unitdata, (void *)dsp);
1604 
1605 	/*
1606 	 * Note that polling is disabled.
1607 	 */
1608 	dsp->ds_polling = B_FALSE;
1609 }
1610 
1611 static boolean_t
1612 proto_poll_enable(dld_str_t *dsp, dl_capab_dls_t *pollp)
1613 {
1614 	mac_handle_t	mh;
1615 
1616 	ASSERT(RW_WRITE_HELD(&dsp->ds_lock));
1617 	ASSERT(!dsp->ds_polling);
1618 
1619 	/*
1620 	 * We cannot enable polling if raw mode
1621 	 * has been enabled.
1622 	 */
1623 	if (dsp->ds_mode == DLD_RAW)
1624 		return (B_FALSE);
1625 
1626 	mh = dls_mac(dsp->ds_dc);
1627 
1628 	/*
1629 	 * Register resources.
1630 	 */
1631 	mac_resource_set(mh, (mac_resource_add_t)pollp->dls_ring_add,
1632 	    (void *)pollp->dls_rx_handle);
1633 	mac_resources(mh);
1634 
1635 	/*
1636 	 * Set the receive function.
1637 	 */
1638 	dls_rx_set(dsp->ds_dc, (dls_rx_t)pollp->dls_rx,
1639 	    (void *)pollp->dls_rx_handle);
1640 
1641 	/*
1642 	 * Note that polling is enabled. This prevents further DLIOCHDRINFO
1643 	 * ioctls from overwriting the receive function pointer.
1644 	 */
1645 	dsp->ds_polling = B_TRUE;
1646 	return (B_TRUE);
1647 }
1648 
1649 static void
1650 proto_soft_ring_disable(dld_str_t *dsp)
1651 {
1652 	ASSERT(RW_WRITE_HELD(&dsp->ds_lock));
1653 
1654 	if (!dsp->ds_soft_ring)
1655 		return;
1656 
1657 	/*
1658 	 * It should be impossible to enable raw mode if soft_ring is turned on.
1659 	 */
1660 	ASSERT(dsp->ds_mode != DLD_RAW);
1661 	proto_change_soft_ring_fanout(dsp, SOFT_RING_NONE);
1662 	/*
1663 	 * Note that fanout is disabled.
1664 	 */
1665 	dsp->ds_soft_ring = B_FALSE;
1666 }
1667 
1668 static boolean_t
1669 proto_soft_ring_enable(dld_str_t *dsp, dl_capab_dls_t *soft_ringp)
1670 {
1671 	ASSERT(RW_WRITE_HELD(&dsp->ds_lock));
1672 	ASSERT(!dsp->ds_soft_ring);
1673 
1674 	/*
1675 	 * We cannot enable soft_ring if raw mode
1676 	 * has been enabled.
1677 	 */
1678 	if (dsp->ds_mode == DLD_RAW)
1679 		return (B_FALSE);
1680 
1681 	if (dls_soft_ring_enable(dsp->ds_dc, soft_ringp) == B_FALSE)
1682 		return (B_FALSE);
1683 
1684 	dsp->ds_soft_ring = B_TRUE;
1685 	return (B_TRUE);
1686 }
1687 
1688 static void
1689 proto_change_soft_ring_fanout(dld_str_t *dsp, int type)
1690 {
1691 	dls_rx_t	rx;
1692 
1693 	if (type == SOFT_RING_NONE) {
1694 		rx = (dsp->ds_mode == DLD_FASTPATH) ?
1695 			    dld_str_rx_fastpath : dld_str_rx_unitdata;
1696 	} else {
1697 		rx = (dls_rx_t)dls_soft_ring_fanout;
1698 	}
1699 	dls_soft_ring_rx_set(dsp->ds_dc, rx, dsp, type);
1700 }
1701 
1702 /*
1703  * DL_CAPABILITY_ACK/DL_ERROR_ACK
1704  */
1705 static boolean_t
1706 proto_capability_advertise(dld_str_t *dsp, mblk_t *mp)
1707 {
1708 	dl_capability_ack_t	*dlap;
1709 	dl_capability_sub_t	*dlsp;
1710 	size_t			subsize;
1711 	dl_capab_dls_t		poll;
1712 	dl_capab_dls_t		soft_ring;
1713 	dl_capab_hcksum_t	hcksum;
1714 	dl_capab_lso_t		lso;
1715 	dl_capab_zerocopy_t	zcopy;
1716 	uint8_t			*ptr;
1717 	boolean_t		cksum_cap;
1718 	boolean_t		poll_cap;
1719 	boolean_t		lso_cap;
1720 	mac_capab_lso_t		mac_lso;
1721 	queue_t			*q = dsp->ds_wq;
1722 	mblk_t			*mp1;
1723 
1724 	ASSERT(RW_READ_HELD(&dsp->ds_lock));
1725 
1726 	/*
1727 	 * Initially assume no capabilities.
1728 	 */
1729 	subsize = 0;
1730 
1731 	/*
1732 	 * Advertize soft ring capability unless it has been explicitly
1733 	 * disabled.
1734 	 */
1735 	if (!(dld_opt & DLD_OPT_NO_SOFTRING)) {
1736 		subsize += sizeof (dl_capability_sub_t) +
1737 				    sizeof (dl_capab_dls_t);
1738 	}
1739 
1740 	/*
1741 	 * Check if polling can be enabled on this interface.
1742 	 * If advertising DL_CAPAB_POLL has not been explicitly disabled
1743 	 * then reserve space for that capability.
1744 	 */
1745 	poll_cap = (mac_capab_get(dsp->ds_mh, MAC_CAPAB_POLL, NULL) &&
1746 	    !(dld_opt & DLD_OPT_NO_POLL) && (dsp->ds_vid == VLAN_ID_NONE));
1747 	if (poll_cap) {
1748 		subsize += sizeof (dl_capability_sub_t) +
1749 		    sizeof (dl_capab_dls_t);
1750 	}
1751 
1752 	/*
1753 	 * If the MAC interface supports checksum offload then reserve
1754 	 * space for the DL_CAPAB_HCKSUM capability.
1755 	 */
1756 	if (cksum_cap = mac_capab_get(dsp->ds_mh, MAC_CAPAB_HCKSUM,
1757 	    &hcksum.hcksum_txflags)) {
1758 		subsize += sizeof (dl_capability_sub_t) +
1759 		    sizeof (dl_capab_hcksum_t);
1760 	}
1761 
1762 	/*
1763 	 * If LSO is usable for MAC, reserve space for the DL_CAPAB_LSO
1764 	 * capability.
1765 	 */
1766 	if (lso_cap = mac_capab_get(dsp->ds_mh, MAC_CAPAB_LSO, &mac_lso)) {
1767 		subsize += sizeof (dl_capability_sub_t) +
1768 		    sizeof (dl_capab_lso_t);
1769 	}
1770 
1771 	/*
1772 	 * If DL_CAPAB_ZEROCOPY has not be explicitly disabled then
1773 	 * reserve space for it.
1774 	 */
1775 	if (!(dld_opt & DLD_OPT_NO_ZEROCOPY)) {
1776 		subsize += sizeof (dl_capability_sub_t) +
1777 		    sizeof (dl_capab_zerocopy_t);
1778 	}
1779 
1780 	/*
1781 	 * If there are no capabilities to advertise or if we
1782 	 * can't allocate a response, send a DL_ERROR_ACK.
1783 	 */
1784 	if ((mp1 = reallocb(mp,
1785 	    sizeof (dl_capability_ack_t) + subsize, 0)) == NULL) {
1786 		rw_exit(&dsp->ds_lock);
1787 		dlerrorack(q, mp, DL_CAPABILITY_REQ, DL_NOTSUPPORTED, 0);
1788 		return (B_FALSE);
1789 	}
1790 
1791 	mp = mp1;
1792 	DB_TYPE(mp) = M_PROTO;
1793 	mp->b_wptr = mp->b_rptr + sizeof (dl_capability_ack_t) + subsize;
1794 	bzero(mp->b_rptr, MBLKL(mp));
1795 	dlap = (dl_capability_ack_t *)mp->b_rptr;
1796 	dlap->dl_primitive = DL_CAPABILITY_ACK;
1797 	dlap->dl_sub_offset = sizeof (dl_capability_ack_t);
1798 	dlap->dl_sub_length = subsize;
1799 	ptr = (uint8_t *)&dlap[1];
1800 
1801 	/*
1802 	 * IP polling interface.
1803 	 */
1804 	if (poll_cap) {
1805 		/*
1806 		 * Attempt to disable just in case this is a re-negotiation;
1807 		 * we need to become writer before doing so.
1808 		 */
1809 		if (!rw_tryupgrade(&dsp->ds_lock)) {
1810 			rw_exit(&dsp->ds_lock);
1811 			rw_enter(&dsp->ds_lock, RW_WRITER);
1812 		}
1813 
1814 		/*
1815 		 * Check if polling state has changed after we re-acquired
1816 		 * the lock above, so that we don't mis-advertise it.
1817 		 */
1818 		poll_cap = !(dld_opt & DLD_OPT_NO_POLL) &&
1819 		    (dsp->ds_vid == VLAN_ID_NONE);
1820 
1821 		if (!poll_cap) {
1822 			int poll_capab_size;
1823 
1824 			rw_downgrade(&dsp->ds_lock);
1825 
1826 			poll_capab_size = sizeof (dl_capability_sub_t) +
1827 			    sizeof (dl_capab_dls_t);
1828 
1829 			mp->b_wptr -= poll_capab_size;
1830 			subsize -= poll_capab_size;
1831 			dlap->dl_sub_length = subsize;
1832 		} else {
1833 			proto_poll_disable(dsp);
1834 
1835 			rw_downgrade(&dsp->ds_lock);
1836 
1837 			dlsp = (dl_capability_sub_t *)ptr;
1838 
1839 			dlsp->dl_cap = DL_CAPAB_POLL;
1840 			dlsp->dl_length = sizeof (dl_capab_dls_t);
1841 			ptr += sizeof (dl_capability_sub_t);
1842 
1843 			bzero(&poll, sizeof (dl_capab_dls_t));
1844 			poll.dls_version = POLL_VERSION_1;
1845 			poll.dls_flags = POLL_CAPABLE;
1846 			poll.dls_tx_handle = (uintptr_t)dsp;
1847 			poll.dls_tx = (uintptr_t)str_mdata_fastpath_put;
1848 
1849 			dlcapabsetqid(&(poll.dls_mid), dsp->ds_rq);
1850 			bcopy(&poll, ptr, sizeof (dl_capab_dls_t));
1851 			ptr += sizeof (dl_capab_dls_t);
1852 		}
1853 	}
1854 
1855 	ASSERT(RW_READ_HELD(&dsp->ds_lock));
1856 
1857 	if (!(dld_opt & DLD_OPT_NO_SOFTRING)) {
1858 		dlsp = (dl_capability_sub_t *)ptr;
1859 
1860 		dlsp->dl_cap = DL_CAPAB_SOFT_RING;
1861 		dlsp->dl_length = sizeof (dl_capab_dls_t);
1862 		ptr += sizeof (dl_capability_sub_t);
1863 
1864 		bzero(&soft_ring, sizeof (dl_capab_dls_t));
1865 		soft_ring.dls_version = SOFT_RING_VERSION_1;
1866 		soft_ring.dls_flags = SOFT_RING_CAPABLE;
1867 		soft_ring.dls_tx_handle = (uintptr_t)dsp;
1868 		soft_ring.dls_tx = (uintptr_t)str_mdata_fastpath_put;
1869 		soft_ring.dls_ring_change_status =
1870 		    (uintptr_t)proto_change_soft_ring_fanout;
1871 		soft_ring.dls_ring_bind = (uintptr_t)soft_ring_bind;
1872 		soft_ring.dls_ring_unbind = (uintptr_t)soft_ring_unbind;
1873 
1874 		dlcapabsetqid(&(soft_ring.dls_mid), dsp->ds_rq);
1875 		bcopy(&soft_ring, ptr, sizeof (dl_capab_dls_t));
1876 		ptr += sizeof (dl_capab_dls_t);
1877 	}
1878 
1879 	/*
1880 	 * TCP/IP checksum offload.
1881 	 */
1882 	if (cksum_cap) {
1883 		dlsp = (dl_capability_sub_t *)ptr;
1884 
1885 		dlsp->dl_cap = DL_CAPAB_HCKSUM;
1886 		dlsp->dl_length = sizeof (dl_capab_hcksum_t);
1887 		ptr += sizeof (dl_capability_sub_t);
1888 
1889 		hcksum.hcksum_version = HCKSUM_VERSION_1;
1890 		dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1891 		bcopy(&hcksum, ptr, sizeof (dl_capab_hcksum_t));
1892 		ptr += sizeof (dl_capab_hcksum_t);
1893 	}
1894 
1895 	/*
1896 	 * Large segment offload. (LSO)
1897 	 */
1898 	if (lso_cap) {
1899 		dlsp = (dl_capability_sub_t *)ptr;
1900 
1901 		dlsp->dl_cap = DL_CAPAB_LSO;
1902 		dlsp->dl_length = sizeof (dl_capab_lso_t);
1903 		ptr += sizeof (dl_capability_sub_t);
1904 
1905 		lso.lso_version = LSO_VERSION_1;
1906 		lso.lso_flags = mac_lso.lso_flags;
1907 		lso.lso_max = mac_lso.lso_basic_tcp_ipv4.lso_max;
1908 
1909 		/* Simply enable LSO with DLD */
1910 		dsp->ds_lso = B_TRUE;
1911 		dsp->ds_lso_max = lso.lso_max;
1912 
1913 		dlcapabsetqid(&(lso.lso_mid), dsp->ds_rq);
1914 		bcopy(&lso, ptr, sizeof (dl_capab_lso_t));
1915 		ptr += sizeof (dl_capab_lso_t);
1916 	} else {
1917 		dsp->ds_lso = B_FALSE;
1918 		dsp->ds_lso_max = 0;
1919 	}
1920 
1921 	/*
1922 	 * Zero copy
1923 	 */
1924 	if (!(dld_opt & DLD_OPT_NO_ZEROCOPY)) {
1925 		dlsp = (dl_capability_sub_t *)ptr;
1926 
1927 		dlsp->dl_cap = DL_CAPAB_ZEROCOPY;
1928 		dlsp->dl_length = sizeof (dl_capab_zerocopy_t);
1929 		ptr += sizeof (dl_capability_sub_t);
1930 
1931 		bzero(&zcopy, sizeof (dl_capab_zerocopy_t));
1932 		zcopy.zerocopy_version = ZEROCOPY_VERSION_1;
1933 		zcopy.zerocopy_flags = DL_CAPAB_VMSAFE_MEM;
1934 
1935 		dlcapabsetqid(&(zcopy.zerocopy_mid), dsp->ds_rq);
1936 		bcopy(&zcopy, ptr, sizeof (dl_capab_zerocopy_t));
1937 		ptr += sizeof (dl_capab_zerocopy_t);
1938 	}
1939 
1940 	ASSERT(ptr == mp->b_rptr + sizeof (dl_capability_ack_t) + subsize);
1941 
1942 	rw_exit(&dsp->ds_lock);
1943 	qreply(q, mp);
1944 	return (B_TRUE);
1945 }
1946