xref: /illumos-gate/usr/src/uts/common/io/dld/dld_proto.c (revision 2264ca7f5db194583c672cb5779a67f52bcd92a9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Data-Link Driver
28  */
29 #include <sys/sysmacros.h>
30 #include <sys/strsubr.h>
31 #include <sys/strsun.h>
32 #include <sys/vlan.h>
33 #include <sys/dld_impl.h>
34 #include <sys/mac_client.h>
35 #include <sys/mac_client_impl.h>
36 #include <sys/mac_client_priv.h>
37 
38 typedef void proto_reqfunc_t(dld_str_t *, mblk_t *);
39 
40 static proto_reqfunc_t proto_info_req, proto_attach_req, proto_detach_req,
41     proto_bind_req, proto_unbind_req, proto_promiscon_req, proto_promiscoff_req,
42     proto_enabmulti_req, proto_disabmulti_req, proto_physaddr_req,
43     proto_setphysaddr_req, proto_udqos_req, proto_req, proto_capability_req,
44     proto_notify_req, proto_passive_req;
45 
46 static void proto_capability_advertise(dld_str_t *, mblk_t *);
47 static int dld_capab_poll_disable(dld_str_t *, dld_capab_poll_t *);
48 
49 #define	DL_ACK_PENDING(state) \
50 	((state) == DL_ATTACH_PENDING || \
51 	(state) == DL_DETACH_PENDING || \
52 	(state) == DL_BIND_PENDING || \
53 	(state) == DL_UNBIND_PENDING)
54 
55 /*
56  * Process a DLPI protocol message.
57  * The primitives DL_BIND_REQ, DL_ENABMULTI_REQ, DL_PROMISCON_REQ,
58  * DL_SET_PHYS_ADDR_REQ put the data link below our dld_str_t into an
59  * 'active' state. The primitive DL_PASSIVE_REQ marks our dld_str_t
60  * as 'passive' and forbids it from being subsequently made 'active'
61  * by the above primitives.
62  */
63 void
64 dld_proto(dld_str_t *dsp, mblk_t *mp)
65 {
66 	t_uscalar_t		prim;
67 
68 	if (MBLKL(mp) < sizeof (t_uscalar_t)) {
69 		freemsg(mp);
70 		return;
71 	}
72 	prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive;
73 
74 	switch (prim) {
75 	case DL_INFO_REQ:
76 		proto_info_req(dsp, mp);
77 		break;
78 	case DL_BIND_REQ:
79 		proto_bind_req(dsp, mp);
80 		break;
81 	case DL_UNBIND_REQ:
82 		proto_unbind_req(dsp, mp);
83 		break;
84 	case DL_UNITDATA_REQ:
85 		proto_unitdata_req(dsp, mp);
86 		break;
87 	case DL_UDQOS_REQ:
88 		proto_udqos_req(dsp, mp);
89 		break;
90 	case DL_ATTACH_REQ:
91 		proto_attach_req(dsp, mp);
92 		break;
93 	case DL_DETACH_REQ:
94 		proto_detach_req(dsp, mp);
95 		break;
96 	case DL_ENABMULTI_REQ:
97 		proto_enabmulti_req(dsp, mp);
98 		break;
99 	case DL_DISABMULTI_REQ:
100 		proto_disabmulti_req(dsp, mp);
101 		break;
102 	case DL_PROMISCON_REQ:
103 		proto_promiscon_req(dsp, mp);
104 		break;
105 	case DL_PROMISCOFF_REQ:
106 		proto_promiscoff_req(dsp, mp);
107 		break;
108 	case DL_PHYS_ADDR_REQ:
109 		proto_physaddr_req(dsp, mp);
110 		break;
111 	case DL_SET_PHYS_ADDR_REQ:
112 		proto_setphysaddr_req(dsp, mp);
113 		break;
114 	case DL_NOTIFY_REQ:
115 		proto_notify_req(dsp, mp);
116 		break;
117 	case DL_CAPABILITY_REQ:
118 		proto_capability_req(dsp, mp);
119 		break;
120 	case DL_PASSIVE_REQ:
121 		proto_passive_req(dsp, mp);
122 		break;
123 	default:
124 		proto_req(dsp, mp);
125 		break;
126 	}
127 }
128 
129 #define	NEG(x)	-(x)
130 typedef struct dl_info_ack_wrapper {
131 	dl_info_ack_t		dl_info;
132 	uint8_t			dl_addr[MAXMACADDRLEN + sizeof (uint16_t)];
133 	uint8_t			dl_brdcst_addr[MAXMACADDRLEN];
134 	dl_qos_cl_range1_t	dl_qos_range1;
135 	dl_qos_cl_sel1_t	dl_qos_sel1;
136 } dl_info_ack_wrapper_t;
137 
138 /*
139  * DL_INFO_REQ
140  */
141 static void
142 proto_info_req(dld_str_t *dsp, mblk_t *mp)
143 {
144 	dl_info_ack_wrapper_t	*dlwp;
145 	dl_info_ack_t		*dlp;
146 	dl_qos_cl_sel1_t	*selp;
147 	dl_qos_cl_range1_t	*rangep;
148 	uint8_t			*addr;
149 	uint8_t			*brdcst_addr;
150 	uint_t			addr_length;
151 	uint_t			sap_length;
152 	mac_info_t		minfo;
153 	mac_info_t		*minfop;
154 	queue_t			*q = dsp->ds_wq;
155 
156 	/*
157 	 * Swap the request message for one large enough to contain the
158 	 * wrapper structure defined above.
159 	 */
160 	if ((mp = mexchange(q, mp, sizeof (dl_info_ack_wrapper_t),
161 	    M_PCPROTO, 0)) == NULL)
162 		return;
163 
164 	bzero(mp->b_rptr, sizeof (dl_info_ack_wrapper_t));
165 	dlwp = (dl_info_ack_wrapper_t *)mp->b_rptr;
166 
167 	dlp = &(dlwp->dl_info);
168 	ASSERT(dlp == (dl_info_ack_t *)mp->b_rptr);
169 
170 	dlp->dl_primitive = DL_INFO_ACK;
171 
172 	/*
173 	 * Set up the sub-structure pointers.
174 	 */
175 	addr = dlwp->dl_addr;
176 	brdcst_addr = dlwp->dl_brdcst_addr;
177 	rangep = &(dlwp->dl_qos_range1);
178 	selp = &(dlwp->dl_qos_sel1);
179 
180 	/*
181 	 * This driver supports only version 2 connectionless DLPI provider
182 	 * nodes.
183 	 */
184 	dlp->dl_service_mode = DL_CLDLS;
185 	dlp->dl_version = DL_VERSION_2;
186 
187 	/*
188 	 * Set the style of the provider
189 	 */
190 	dlp->dl_provider_style = dsp->ds_style;
191 	ASSERT(dlp->dl_provider_style == DL_STYLE1 ||
192 	    dlp->dl_provider_style == DL_STYLE2);
193 
194 	/*
195 	 * Set the current DLPI state.
196 	 */
197 	dlp->dl_current_state = dsp->ds_dlstate;
198 
199 	/*
200 	 * Gratuitously set the media type. This is to deal with modules
201 	 * that assume the media type is known prior to DL_ATTACH_REQ
202 	 * being completed.
203 	 */
204 	dlp->dl_mac_type = DL_ETHER;
205 
206 	/*
207 	 * If the stream is not at least attached we try to retrieve the
208 	 * mac_info using mac_info_get()
209 	 */
210 	if (dsp->ds_dlstate == DL_UNATTACHED ||
211 	    dsp->ds_dlstate == DL_ATTACH_PENDING ||
212 	    dsp->ds_dlstate == DL_DETACH_PENDING) {
213 		if (!mac_info_get(ddi_major_to_name(dsp->ds_major), &minfo)) {
214 			/*
215 			 * Cannot find mac_info. giving up.
216 			 */
217 			goto done;
218 		}
219 		minfop = &minfo;
220 	} else {
221 		minfop = (mac_info_t *)dsp->ds_mip;
222 		/* We can only get the sdu if we're attached. */
223 		mac_sdu_get(dsp->ds_mh, &dlp->dl_min_sdu, &dlp->dl_max_sdu);
224 	}
225 
226 	/*
227 	 * Set the media type (properly this time).
228 	 */
229 	if (dsp->ds_native)
230 		dlp->dl_mac_type = minfop->mi_nativemedia;
231 	else
232 		dlp->dl_mac_type = minfop->mi_media;
233 
234 	/*
235 	 * Set the DLSAP length. We only support 16 bit values and they
236 	 * appear after the MAC address portion of DLSAP addresses.
237 	 */
238 	sap_length = sizeof (uint16_t);
239 	dlp->dl_sap_length = NEG(sap_length);
240 
241 	addr_length = minfop->mi_addr_length;
242 
243 	/*
244 	 * Copy in the media broadcast address.
245 	 */
246 	if (minfop->mi_brdcst_addr != NULL) {
247 		dlp->dl_brdcst_addr_offset =
248 		    (uintptr_t)brdcst_addr - (uintptr_t)dlp;
249 		bcopy(minfop->mi_brdcst_addr, brdcst_addr, addr_length);
250 		dlp->dl_brdcst_addr_length = addr_length;
251 	}
252 
253 	/* Only VLAN links and links that have a normal tag mode support QOS. */
254 	if (mac_client_vid(dsp->ds_mch) != VLAN_ID_NONE ||
255 	    dsp->ds_dlp->dl_tagmode == LINK_TAGMODE_NORMAL) {
256 		dlp->dl_qos_range_offset = (uintptr_t)rangep - (uintptr_t)dlp;
257 		dlp->dl_qos_range_length = sizeof (dl_qos_cl_range1_t);
258 
259 		rangep->dl_qos_type = DL_QOS_CL_RANGE1;
260 		rangep->dl_trans_delay.dl_target_value = DL_UNKNOWN;
261 		rangep->dl_trans_delay.dl_accept_value = DL_UNKNOWN;
262 		rangep->dl_protection.dl_min = DL_UNKNOWN;
263 		rangep->dl_protection.dl_max = DL_UNKNOWN;
264 		rangep->dl_residual_error = DL_UNKNOWN;
265 
266 		/*
267 		 * Specify the supported range of priorities.
268 		 */
269 		rangep->dl_priority.dl_min = 0;
270 		rangep->dl_priority.dl_max = (1 << VLAN_PRI_SIZE) - 1;
271 
272 		dlp->dl_qos_offset = (uintptr_t)selp - (uintptr_t)dlp;
273 		dlp->dl_qos_length = sizeof (dl_qos_cl_sel1_t);
274 
275 		selp->dl_qos_type = DL_QOS_CL_SEL1;
276 		selp->dl_trans_delay = DL_UNKNOWN;
277 		selp->dl_protection = DL_UNKNOWN;
278 		selp->dl_residual_error = DL_UNKNOWN;
279 
280 		/*
281 		 * Specify the current priority (which can be changed by
282 		 * the DL_UDQOS_REQ primitive).
283 		 */
284 		selp->dl_priority = dsp->ds_pri;
285 	}
286 
287 	dlp->dl_addr_length = addr_length + sizeof (uint16_t);
288 	if (dsp->ds_dlstate == DL_IDLE) {
289 		/*
290 		 * The stream is bound. Therefore we can formulate a valid
291 		 * DLSAP address.
292 		 */
293 		dlp->dl_addr_offset = (uintptr_t)addr - (uintptr_t)dlp;
294 		if (addr_length > 0)
295 			mac_unicast_primary_get(dsp->ds_mh, addr);
296 
297 		*(uint16_t *)(addr + addr_length) = dsp->ds_sap;
298 	}
299 
300 done:
301 	ASSERT(IMPLY(dlp->dl_qos_offset != 0, dlp->dl_qos_length != 0));
302 	ASSERT(IMPLY(dlp->dl_qos_range_offset != 0,
303 	    dlp->dl_qos_range_length != 0));
304 	ASSERT(IMPLY(dlp->dl_addr_offset != 0, dlp->dl_addr_length != 0));
305 	ASSERT(IMPLY(dlp->dl_brdcst_addr_offset != 0,
306 	    dlp->dl_brdcst_addr_length != 0));
307 
308 	qreply(q, mp);
309 }
310 
311 /*
312  * DL_ATTACH_REQ
313  */
314 static void
315 proto_attach_req(dld_str_t *dsp, mblk_t *mp)
316 {
317 	dl_attach_req_t	*dlp = (dl_attach_req_t *)mp->b_rptr;
318 	int		err = 0;
319 	t_uscalar_t	dl_err;
320 	queue_t		*q = dsp->ds_wq;
321 
322 	if (MBLKL(mp) < sizeof (dl_attach_req_t) ||
323 	    dlp->dl_ppa < 0 || dsp->ds_style == DL_STYLE1) {
324 		dl_err = DL_BADPRIM;
325 		goto failed;
326 	}
327 
328 	if (dsp->ds_dlstate != DL_UNATTACHED) {
329 		dl_err = DL_OUTSTATE;
330 		goto failed;
331 	}
332 
333 	dsp->ds_dlstate = DL_ATTACH_PENDING;
334 
335 	err = dld_str_attach(dsp, dlp->dl_ppa);
336 	if (err != 0) {
337 		switch (err) {
338 		case ENOENT:
339 			dl_err = DL_BADPPA;
340 			err = 0;
341 			break;
342 		default:
343 			dl_err = DL_SYSERR;
344 			break;
345 		}
346 		dsp->ds_dlstate = DL_UNATTACHED;
347 		goto failed;
348 	}
349 	ASSERT(dsp->ds_dlstate == DL_UNBOUND);
350 	dlokack(q, mp, DL_ATTACH_REQ);
351 	return;
352 
353 failed:
354 	dlerrorack(q, mp, DL_ATTACH_REQ, dl_err, (t_uscalar_t)err);
355 }
356 
357 /*
358  * DL_DETACH_REQ
359  */
360 static void
361 proto_detach_req(dld_str_t *dsp, mblk_t *mp)
362 {
363 	queue_t		*q = dsp->ds_wq;
364 	t_uscalar_t	dl_err;
365 
366 	if (MBLKL(mp) < sizeof (dl_detach_req_t)) {
367 		dl_err = DL_BADPRIM;
368 		goto failed;
369 	}
370 
371 	if (dsp->ds_dlstate != DL_UNBOUND) {
372 		dl_err = DL_OUTSTATE;
373 		goto failed;
374 	}
375 
376 	if (dsp->ds_style == DL_STYLE1) {
377 		dl_err = DL_BADPRIM;
378 		goto failed;
379 	}
380 
381 	ASSERT(dsp->ds_datathr_cnt == 0);
382 	dsp->ds_dlstate = DL_DETACH_PENDING;
383 
384 	dld_str_detach(dsp);
385 	dlokack(dsp->ds_wq, mp, DL_DETACH_REQ);
386 	return;
387 
388 failed:
389 	dlerrorack(q, mp, DL_DETACH_REQ, dl_err, 0);
390 }
391 
392 /*
393  * DL_BIND_REQ
394  */
395 static void
396 proto_bind_req(dld_str_t *dsp, mblk_t *mp)
397 {
398 	dl_bind_req_t	*dlp = (dl_bind_req_t *)mp->b_rptr;
399 	int		err = 0;
400 	uint8_t		dlsap_addr[MAXMACADDRLEN + sizeof (uint16_t)];
401 	uint_t		dlsap_addr_length;
402 	t_uscalar_t	dl_err;
403 	t_scalar_t	sap;
404 	queue_t		*q = dsp->ds_wq;
405 	mac_perim_handle_t	mph;
406 	void		*mdip;
407 	int32_t		intr_cpu;
408 
409 	if (MBLKL(mp) < sizeof (dl_bind_req_t)) {
410 		dl_err = DL_BADPRIM;
411 		goto failed;
412 	}
413 
414 	if (dlp->dl_xidtest_flg != 0) {
415 		dl_err = DL_NOAUTO;
416 		goto failed;
417 	}
418 
419 	if (dlp->dl_service_mode != DL_CLDLS) {
420 		dl_err = DL_UNSUPPORTED;
421 		goto failed;
422 	}
423 
424 	if (dsp->ds_dlstate != DL_UNBOUND) {
425 		dl_err = DL_OUTSTATE;
426 		goto failed;
427 	}
428 
429 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
430 
431 	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
432 	    ((err = dls_active_set(dsp)) != 0)) {
433 		dl_err = DL_SYSERR;
434 		goto failed2;
435 	}
436 
437 	dsp->ds_dlstate = DL_BIND_PENDING;
438 	/*
439 	 * Set the receive callback.
440 	 */
441 	dls_rx_set(dsp, (dsp->ds_mode == DLD_RAW) ?
442 	    dld_str_rx_raw : dld_str_rx_unitdata, dsp);
443 
444 	/*
445 	 * Bind the channel such that it can receive packets.
446 	 */
447 	sap = dlp->dl_sap;
448 	err = dls_bind(dsp, sap);
449 	if (err != 0) {
450 		switch (err) {
451 		case EINVAL:
452 			dl_err = DL_BADADDR;
453 			err = 0;
454 			break;
455 		default:
456 			dl_err = DL_SYSERR;
457 			break;
458 		}
459 
460 		dsp->ds_dlstate = DL_UNBOUND;
461 		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
462 			dls_active_clear(dsp);
463 		goto failed2;
464 	}
465 
466 	intr_cpu = mac_client_intr_cpu(dsp->ds_mch);
467 	mdip = mac_get_devinfo(dsp->ds_mh);
468 	mac_perim_exit(mph);
469 
470 	/*
471 	 * We do this after we get out of the perim to avoid deadlocks
472 	 * etc. since part of mac_client_retarget_intr is to walk the
473 	 * device tree in order to find and retarget the interrupts.
474 	 */
475 	mac_client_set_intr_cpu(mdip, dsp->ds_mch, intr_cpu);
476 
477 	/*
478 	 * Copy in MAC address.
479 	 */
480 	dlsap_addr_length = dsp->ds_mip->mi_addr_length;
481 	mac_unicast_primary_get(dsp->ds_mh, dlsap_addr);
482 
483 	/*
484 	 * Copy in the SAP.
485 	 */
486 	*(uint16_t *)(dlsap_addr + dlsap_addr_length) = sap;
487 	dlsap_addr_length += sizeof (uint16_t);
488 
489 	dsp->ds_dlstate = DL_IDLE;
490 	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
491 		dsp->ds_passivestate = DLD_ACTIVE;
492 
493 	dlbindack(q, mp, sap, dlsap_addr, dlsap_addr_length, 0, 0);
494 	return;
495 
496 failed2:
497 	mac_perim_exit(mph);
498 failed:
499 	dlerrorack(q, mp, DL_BIND_REQ, dl_err, (t_uscalar_t)err);
500 }
501 
502 /*
503  * DL_UNBIND_REQ
504  */
505 static void
506 proto_unbind_req(dld_str_t *dsp, mblk_t *mp)
507 {
508 	queue_t		*q = dsp->ds_wq;
509 	t_uscalar_t	dl_err;
510 	mac_perim_handle_t	mph;
511 
512 	if (MBLKL(mp) < sizeof (dl_unbind_req_t)) {
513 		dl_err = DL_BADPRIM;
514 		goto failed;
515 	}
516 
517 	if (dsp->ds_dlstate != DL_IDLE) {
518 		dl_err = DL_OUTSTATE;
519 		goto failed;
520 	}
521 
522 	mutex_enter(&dsp->ds_lock);
523 	while (dsp->ds_datathr_cnt != 0)
524 		cv_wait(&dsp->ds_datathr_cv, &dsp->ds_lock);
525 
526 	dsp->ds_dlstate = DL_UNBIND_PENDING;
527 	mutex_exit(&dsp->ds_lock);
528 
529 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
530 	/*
531 	 * Unbind the channel to stop packets being received.
532 	 */
533 	if (dls_unbind(dsp) != 0) {
534 		dl_err = DL_OUTSTATE;
535 		mac_perim_exit(mph);
536 		goto failed;
537 	}
538 
539 	/*
540 	 * Disable polling mode, if it is enabled.
541 	 */
542 	(void) dld_capab_poll_disable(dsp, NULL);
543 
544 	/*
545 	 * Clear LSO flags.
546 	 */
547 	dsp->ds_lso = B_FALSE;
548 	dsp->ds_lso_max = 0;
549 
550 	/*
551 	 * Clear the receive callback.
552 	 */
553 	dls_rx_set(dsp, NULL, NULL);
554 	dsp->ds_direct = B_FALSE;
555 
556 	/*
557 	 * Set the mode back to the default (unitdata).
558 	 */
559 	dsp->ds_mode = DLD_UNITDATA;
560 	dsp->ds_dlstate = DL_UNBOUND;
561 
562 	mac_perim_exit(mph);
563 	dlokack(dsp->ds_wq, mp, DL_UNBIND_REQ);
564 	return;
565 failed:
566 	dlerrorack(q, mp, DL_UNBIND_REQ, dl_err, 0);
567 }
568 
569 /*
570  * DL_PROMISCON_REQ
571  */
572 static void
573 proto_promiscon_req(dld_str_t *dsp, mblk_t *mp)
574 {
575 	dl_promiscon_req_t *dlp = (dl_promiscon_req_t *)mp->b_rptr;
576 	int		err = 0;
577 	t_uscalar_t	dl_err;
578 	uint32_t	promisc_saved;
579 	queue_t		*q = dsp->ds_wq;
580 	mac_perim_handle_t	mph;
581 
582 	if (MBLKL(mp) < sizeof (dl_promiscon_req_t)) {
583 		dl_err = DL_BADPRIM;
584 		goto failed;
585 	}
586 
587 	if (dsp->ds_dlstate == DL_UNATTACHED ||
588 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
589 		dl_err = DL_OUTSTATE;
590 		goto failed;
591 	}
592 
593 	promisc_saved = dsp->ds_promisc;
594 	switch (dlp->dl_level) {
595 	case DL_PROMISC_SAP:
596 		dsp->ds_promisc |= DLS_PROMISC_SAP;
597 		break;
598 
599 	case DL_PROMISC_MULTI:
600 		dsp->ds_promisc |= DLS_PROMISC_MULTI;
601 		break;
602 
603 	case DL_PROMISC_PHYS:
604 		dsp->ds_promisc |= DLS_PROMISC_PHYS;
605 		break;
606 
607 	default:
608 		dl_err = DL_NOTSUPPORTED;
609 		goto failed;
610 	}
611 
612 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
613 
614 	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
615 	    ((err = dls_active_set(dsp)) != 0)) {
616 		dsp->ds_promisc = promisc_saved;
617 		dl_err = DL_SYSERR;
618 		goto failed2;
619 	}
620 
621 	/*
622 	 * Adjust channel promiscuity.
623 	 */
624 	err = dls_promisc(dsp, promisc_saved);
625 
626 	if (err != 0) {
627 		dl_err = DL_SYSERR;
628 		dsp->ds_promisc = promisc_saved;
629 		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
630 			dls_active_clear(dsp);
631 		goto failed2;
632 	}
633 
634 	mac_perim_exit(mph);
635 
636 	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
637 		dsp->ds_passivestate = DLD_ACTIVE;
638 	dlokack(q, mp, DL_PROMISCON_REQ);
639 	return;
640 
641 failed2:
642 	mac_perim_exit(mph);
643 failed:
644 	dlerrorack(q, mp, DL_PROMISCON_REQ, dl_err, (t_uscalar_t)err);
645 }
646 
647 /*
648  * DL_PROMISCOFF_REQ
649  */
650 static void
651 proto_promiscoff_req(dld_str_t *dsp, mblk_t *mp)
652 {
653 	dl_promiscoff_req_t *dlp = (dl_promiscoff_req_t *)mp->b_rptr;
654 	int		err = 0;
655 	t_uscalar_t	dl_err;
656 	uint32_t	promisc_saved;
657 	queue_t		*q = dsp->ds_wq;
658 	mac_perim_handle_t	mph;
659 
660 	if (MBLKL(mp) < sizeof (dl_promiscoff_req_t)) {
661 		dl_err = DL_BADPRIM;
662 		goto failed;
663 	}
664 
665 	if (dsp->ds_dlstate == DL_UNATTACHED ||
666 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
667 		dl_err = DL_OUTSTATE;
668 		goto failed;
669 	}
670 
671 	promisc_saved = dsp->ds_promisc;
672 	switch (dlp->dl_level) {
673 	case DL_PROMISC_SAP:
674 		if (!(dsp->ds_promisc & DLS_PROMISC_SAP)) {
675 			dl_err = DL_NOTENAB;
676 			goto failed;
677 		}
678 		dsp->ds_promisc &= ~DLS_PROMISC_SAP;
679 		break;
680 
681 	case DL_PROMISC_MULTI:
682 		if (!(dsp->ds_promisc & DLS_PROMISC_MULTI)) {
683 			dl_err = DL_NOTENAB;
684 			goto failed;
685 		}
686 		dsp->ds_promisc &= ~DLS_PROMISC_MULTI;
687 		break;
688 
689 	case DL_PROMISC_PHYS:
690 		if (!(dsp->ds_promisc & DLS_PROMISC_PHYS)) {
691 			dl_err = DL_NOTENAB;
692 			goto failed;
693 		}
694 		dsp->ds_promisc &= ~DLS_PROMISC_PHYS;
695 		break;
696 
697 	default:
698 		dl_err = DL_NOTSUPPORTED;
699 		goto failed;
700 	}
701 
702 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
703 	/*
704 	 * Adjust channel promiscuity.
705 	 */
706 	err = dls_promisc(dsp, promisc_saved);
707 	mac_perim_exit(mph);
708 
709 	if (err != 0) {
710 		dl_err = DL_SYSERR;
711 		goto failed;
712 	}
713 	dlokack(q, mp, DL_PROMISCOFF_REQ);
714 	return;
715 failed:
716 	dlerrorack(q, mp, DL_PROMISCOFF_REQ, dl_err, (t_uscalar_t)err);
717 }
718 
719 /*
720  * DL_ENABMULTI_REQ
721  */
722 static void
723 proto_enabmulti_req(dld_str_t *dsp, mblk_t *mp)
724 {
725 	dl_enabmulti_req_t *dlp = (dl_enabmulti_req_t *)mp->b_rptr;
726 	int		err = 0;
727 	t_uscalar_t	dl_err;
728 	queue_t		*q = dsp->ds_wq;
729 	mac_perim_handle_t	mph;
730 
731 	if (dsp->ds_dlstate == DL_UNATTACHED ||
732 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
733 		dl_err = DL_OUTSTATE;
734 		goto failed;
735 	}
736 
737 	if (MBLKL(mp) < sizeof (dl_enabmulti_req_t) ||
738 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
739 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
740 		dl_err = DL_BADPRIM;
741 		goto failed;
742 	}
743 
744 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
745 
746 	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
747 	    ((err = dls_active_set(dsp)) != 0)) {
748 		dl_err = DL_SYSERR;
749 		goto failed2;
750 	}
751 
752 	err = dls_multicst_add(dsp, mp->b_rptr + dlp->dl_addr_offset);
753 
754 	if (err != 0) {
755 		switch (err) {
756 		case EINVAL:
757 			dl_err = DL_BADADDR;
758 			err = 0;
759 			break;
760 		case ENOSPC:
761 			dl_err = DL_TOOMANY;
762 			err = 0;
763 			break;
764 		default:
765 			dl_err = DL_SYSERR;
766 			break;
767 		}
768 		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
769 			dls_active_clear(dsp);
770 
771 		goto failed2;
772 	}
773 
774 	mac_perim_exit(mph);
775 
776 	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
777 		dsp->ds_passivestate = DLD_ACTIVE;
778 	dlokack(q, mp, DL_ENABMULTI_REQ);
779 	return;
780 
781 failed2:
782 	mac_perim_exit(mph);
783 failed:
784 	dlerrorack(q, mp, DL_ENABMULTI_REQ, dl_err, (t_uscalar_t)err);
785 }
786 
787 /*
788  * DL_DISABMULTI_REQ
789  */
790 static void
791 proto_disabmulti_req(dld_str_t *dsp, mblk_t *mp)
792 {
793 	dl_disabmulti_req_t *dlp = (dl_disabmulti_req_t *)mp->b_rptr;
794 	int		err = 0;
795 	t_uscalar_t	dl_err;
796 	queue_t		*q = dsp->ds_wq;
797 	mac_perim_handle_t	mph;
798 
799 	if (dsp->ds_dlstate == DL_UNATTACHED ||
800 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
801 		dl_err = DL_OUTSTATE;
802 		goto failed;
803 	}
804 
805 	if (MBLKL(mp) < sizeof (dl_disabmulti_req_t) ||
806 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
807 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
808 		dl_err = DL_BADPRIM;
809 		goto failed;
810 	}
811 
812 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
813 	err = dls_multicst_remove(dsp, mp->b_rptr + dlp->dl_addr_offset);
814 	mac_perim_exit(mph);
815 
816 	if (err != 0) {
817 	switch (err) {
818 		case EINVAL:
819 			dl_err = DL_BADADDR;
820 			err = 0;
821 			break;
822 
823 		case ENOENT:
824 			dl_err = DL_NOTENAB;
825 			err = 0;
826 			break;
827 
828 		default:
829 			dl_err = DL_SYSERR;
830 			break;
831 		}
832 		goto failed;
833 	}
834 	dlokack(q, mp, DL_DISABMULTI_REQ);
835 	return;
836 failed:
837 	dlerrorack(q, mp, DL_DISABMULTI_REQ, dl_err, (t_uscalar_t)err);
838 }
839 
840 /*
841  * DL_PHYS_ADDR_REQ
842  */
843 static void
844 proto_physaddr_req(dld_str_t *dsp, mblk_t *mp)
845 {
846 	dl_phys_addr_req_t *dlp = (dl_phys_addr_req_t *)mp->b_rptr;
847 	queue_t		*q = dsp->ds_wq;
848 	t_uscalar_t	dl_err;
849 	char		*addr;
850 	uint_t		addr_length;
851 
852 	if (MBLKL(mp) < sizeof (dl_phys_addr_req_t)) {
853 		dl_err = DL_BADPRIM;
854 		goto failed;
855 	}
856 
857 	if (dsp->ds_dlstate == DL_UNATTACHED ||
858 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
859 		dl_err = DL_OUTSTATE;
860 		goto failed;
861 	}
862 
863 	if (dlp->dl_addr_type != DL_CURR_PHYS_ADDR &&
864 	    dlp->dl_addr_type != DL_FACT_PHYS_ADDR) {
865 		dl_err = DL_UNSUPPORTED;
866 		goto failed;
867 	}
868 
869 	addr_length = dsp->ds_mip->mi_addr_length;
870 	if (addr_length > 0) {
871 		addr = kmem_alloc(addr_length, KM_SLEEP);
872 		if (dlp->dl_addr_type == DL_CURR_PHYS_ADDR)
873 			mac_unicast_primary_get(dsp->ds_mh, (uint8_t *)addr);
874 		else
875 			bcopy(dsp->ds_mip->mi_unicst_addr, addr, addr_length);
876 
877 		dlphysaddrack(q, mp, addr, (t_uscalar_t)addr_length);
878 		kmem_free(addr, addr_length);
879 	} else {
880 		dlphysaddrack(q, mp, NULL, 0);
881 	}
882 	return;
883 failed:
884 	dlerrorack(q, mp, DL_PHYS_ADDR_REQ, dl_err, 0);
885 }
886 
887 /*
888  * DL_SET_PHYS_ADDR_REQ
889  */
890 static void
891 proto_setphysaddr_req(dld_str_t *dsp, mblk_t *mp)
892 {
893 	dl_set_phys_addr_req_t *dlp = (dl_set_phys_addr_req_t *)mp->b_rptr;
894 	int		err = 0;
895 	t_uscalar_t	dl_err;
896 	queue_t		*q = dsp->ds_wq;
897 	mac_perim_handle_t	mph;
898 
899 	if (dsp->ds_dlstate == DL_UNATTACHED ||
900 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
901 		dl_err = DL_OUTSTATE;
902 		goto failed;
903 	}
904 
905 	if (MBLKL(mp) < sizeof (dl_set_phys_addr_req_t) ||
906 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
907 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
908 		dl_err = DL_BADPRIM;
909 		goto failed;
910 	}
911 
912 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
913 
914 	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
915 	    ((err = dls_active_set(dsp)) != 0)) {
916 		dl_err = DL_SYSERR;
917 		goto failed2;
918 	}
919 
920 	err = mac_unicast_primary_set(dsp->ds_mh,
921 	    mp->b_rptr + dlp->dl_addr_offset);
922 	if (err != 0) {
923 		switch (err) {
924 		case EINVAL:
925 			dl_err = DL_BADADDR;
926 			err = 0;
927 			break;
928 
929 		default:
930 			dl_err = DL_SYSERR;
931 			break;
932 		}
933 		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
934 			dls_active_clear(dsp);
935 
936 		goto failed2;
937 
938 	}
939 
940 	mac_perim_exit(mph);
941 
942 	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
943 		dsp->ds_passivestate = DLD_ACTIVE;
944 	dlokack(q, mp, DL_SET_PHYS_ADDR_REQ);
945 	return;
946 
947 failed2:
948 	mac_perim_exit(mph);
949 failed:
950 	dlerrorack(q, mp, DL_SET_PHYS_ADDR_REQ, dl_err, (t_uscalar_t)err);
951 }
952 
953 /*
954  * DL_UDQOS_REQ
955  */
956 static void
957 proto_udqos_req(dld_str_t *dsp, mblk_t *mp)
958 {
959 	dl_udqos_req_t *dlp = (dl_udqos_req_t *)mp->b_rptr;
960 	dl_qos_cl_sel1_t *selp;
961 	int		off, len;
962 	t_uscalar_t	dl_err;
963 	queue_t		*q = dsp->ds_wq;
964 
965 	off = dlp->dl_qos_offset;
966 	len = dlp->dl_qos_length;
967 
968 	if (MBLKL(mp) < sizeof (dl_udqos_req_t) || !MBLKIN(mp, off, len)) {
969 		dl_err = DL_BADPRIM;
970 		goto failed;
971 	}
972 
973 	selp = (dl_qos_cl_sel1_t *)(mp->b_rptr + off);
974 	if (selp->dl_qos_type != DL_QOS_CL_SEL1) {
975 		dl_err = DL_BADQOSTYPE;
976 		goto failed;
977 	}
978 
979 	if (selp->dl_priority > (1 << VLAN_PRI_SIZE) - 1 ||
980 	    selp->dl_priority < 0) {
981 		dl_err = DL_BADQOSPARAM;
982 		goto failed;
983 	}
984 
985 	dsp->ds_pri = selp->dl_priority;
986 	dlokack(q, mp, DL_UDQOS_REQ);
987 	return;
988 failed:
989 	dlerrorack(q, mp, DL_UDQOS_REQ, dl_err, 0);
990 }
991 
992 static boolean_t
993 check_ip_above(queue_t *q)
994 {
995 	queue_t		*next_q;
996 	boolean_t	ret = B_TRUE;
997 
998 	claimstr(q);
999 	next_q = q->q_next;
1000 	if (strcmp(next_q->q_qinfo->qi_minfo->mi_idname, "ip") != 0)
1001 		ret = B_FALSE;
1002 	releasestr(q);
1003 	return (ret);
1004 }
1005 
1006 /*
1007  * DL_CAPABILITY_REQ
1008  */
1009 static void
1010 proto_capability_req(dld_str_t *dsp, mblk_t *mp)
1011 {
1012 	dl_capability_req_t *dlp = (dl_capability_req_t *)mp->b_rptr;
1013 	dl_capability_sub_t *sp;
1014 	size_t		size, len;
1015 	offset_t	off, end;
1016 	t_uscalar_t	dl_err;
1017 	queue_t		*q = dsp->ds_wq;
1018 
1019 	if (MBLKL(mp) < sizeof (dl_capability_req_t)) {
1020 		dl_err = DL_BADPRIM;
1021 		goto failed;
1022 	}
1023 
1024 	if (dsp->ds_dlstate == DL_UNATTACHED ||
1025 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1026 		dl_err = DL_OUTSTATE;
1027 		goto failed;
1028 	}
1029 
1030 	/*
1031 	 * This request is overloaded. If there are no requested capabilities
1032 	 * then we just want to acknowledge with all the capabilities we
1033 	 * support. Otherwise we enable the set of capabilities requested.
1034 	 */
1035 	if (dlp->dl_sub_length == 0) {
1036 		proto_capability_advertise(dsp, mp);
1037 		return;
1038 	}
1039 
1040 	if (!MBLKIN(mp, dlp->dl_sub_offset, dlp->dl_sub_length)) {
1041 		dl_err = DL_BADPRIM;
1042 		goto failed;
1043 	}
1044 
1045 	dlp->dl_primitive = DL_CAPABILITY_ACK;
1046 
1047 	off = dlp->dl_sub_offset;
1048 	len = dlp->dl_sub_length;
1049 
1050 	/*
1051 	 * Walk the list of capabilities to be enabled.
1052 	 */
1053 	for (end = off + len; off < end; ) {
1054 		sp = (dl_capability_sub_t *)(mp->b_rptr + off);
1055 		size = sizeof (dl_capability_sub_t) + sp->dl_length;
1056 
1057 		if (off + size > end ||
1058 		    !IS_P2ALIGNED(off, sizeof (uint32_t))) {
1059 			dl_err = DL_BADPRIM;
1060 			goto failed;
1061 		}
1062 
1063 		switch (sp->dl_cap) {
1064 		/*
1065 		 * TCP/IP checksum offload to hardware.
1066 		 */
1067 		case DL_CAPAB_HCKSUM: {
1068 			dl_capab_hcksum_t *hcksump;
1069 			dl_capab_hcksum_t hcksum;
1070 
1071 			hcksump = (dl_capab_hcksum_t *)&sp[1];
1072 			/*
1073 			 * Copy for alignment.
1074 			 */
1075 			bcopy(hcksump, &hcksum, sizeof (dl_capab_hcksum_t));
1076 			dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1077 			bcopy(&hcksum, hcksump, sizeof (dl_capab_hcksum_t));
1078 			break;
1079 		}
1080 
1081 		case DL_CAPAB_DLD: {
1082 			dl_capab_dld_t	*dldp;
1083 			dl_capab_dld_t	dld;
1084 
1085 			dldp = (dl_capab_dld_t *)&sp[1];
1086 			/*
1087 			 * Copy for alignment.
1088 			 */
1089 			bcopy(dldp, &dld, sizeof (dl_capab_dld_t));
1090 			dlcapabsetqid(&(dld.dld_mid), dsp->ds_rq);
1091 			bcopy(&dld, dldp, sizeof (dl_capab_dld_t));
1092 			break;
1093 		}
1094 		default:
1095 			break;
1096 		}
1097 		off += size;
1098 	}
1099 	qreply(q, mp);
1100 	return;
1101 failed:
1102 	dlerrorack(q, mp, DL_CAPABILITY_REQ, dl_err, 0);
1103 }
1104 
1105 /*
1106  * DL_NOTIFY_REQ
1107  */
1108 static void
1109 proto_notify_req(dld_str_t *dsp, mblk_t *mp)
1110 {
1111 	dl_notify_req_t	*dlp = (dl_notify_req_t *)mp->b_rptr;
1112 	t_uscalar_t	dl_err;
1113 	queue_t		*q = dsp->ds_wq;
1114 	uint_t		note =
1115 	    DL_NOTE_PROMISC_ON_PHYS |
1116 	    DL_NOTE_PROMISC_OFF_PHYS |
1117 	    DL_NOTE_PHYS_ADDR |
1118 	    DL_NOTE_LINK_UP |
1119 	    DL_NOTE_LINK_DOWN |
1120 	    DL_NOTE_CAPAB_RENEG |
1121 	    DL_NOTE_SPEED;
1122 
1123 	if (MBLKL(mp) < sizeof (dl_notify_req_t)) {
1124 		dl_err = DL_BADPRIM;
1125 		goto failed;
1126 	}
1127 
1128 	if (dsp->ds_dlstate == DL_UNATTACHED ||
1129 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1130 		dl_err = DL_OUTSTATE;
1131 		goto failed;
1132 	}
1133 
1134 	note &= ~(mac_no_notification(dsp->ds_mh));
1135 
1136 	/*
1137 	 * Cache the notifications that are being enabled.
1138 	 */
1139 	dsp->ds_notifications = dlp->dl_notifications & note;
1140 	/*
1141 	 * The ACK carries all notifications regardless of which set is
1142 	 * being enabled.
1143 	 */
1144 	dlnotifyack(q, mp, note);
1145 
1146 	/*
1147 	 * Generate DL_NOTIFY_IND messages for each enabled notification.
1148 	 */
1149 	if (dsp->ds_notifications != 0) {
1150 		dld_str_notify_ind(dsp);
1151 	}
1152 	return;
1153 failed:
1154 	dlerrorack(q, mp, DL_NOTIFY_REQ, dl_err, 0);
1155 }
1156 
1157 /*
1158  * DL_UINTDATA_REQ
1159  */
1160 void
1161 proto_unitdata_req(dld_str_t *dsp, mblk_t *mp)
1162 {
1163 	queue_t			*q = dsp->ds_wq;
1164 	dl_unitdata_req_t	*dlp = (dl_unitdata_req_t *)mp->b_rptr;
1165 	off_t			off;
1166 	size_t			len, size;
1167 	const uint8_t		*addr;
1168 	uint16_t		sap;
1169 	uint_t			addr_length;
1170 	mblk_t			*bp, *payload;
1171 	uint32_t		start, stuff, end, value, flags;
1172 	t_uscalar_t		dl_err;
1173 	uint_t			max_sdu;
1174 
1175 	if (MBLKL(mp) < sizeof (dl_unitdata_req_t) || mp->b_cont == NULL) {
1176 		dlerrorack(q, mp, DL_UNITDATA_REQ, DL_BADPRIM, 0);
1177 		return;
1178 	}
1179 
1180 	mutex_enter(&dsp->ds_lock);
1181 	if (dsp->ds_dlstate != DL_IDLE) {
1182 		mutex_exit(&dsp->ds_lock);
1183 		dlerrorack(q, mp, DL_UNITDATA_REQ, DL_OUTSTATE, 0);
1184 		return;
1185 	}
1186 	DLD_DATATHR_INC(dsp);
1187 	mutex_exit(&dsp->ds_lock);
1188 
1189 	addr_length = dsp->ds_mip->mi_addr_length;
1190 
1191 	off = dlp->dl_dest_addr_offset;
1192 	len = dlp->dl_dest_addr_length;
1193 
1194 	if (!MBLKIN(mp, off, len) || !IS_P2ALIGNED(off, sizeof (uint16_t))) {
1195 		dl_err = DL_BADPRIM;
1196 		goto failed;
1197 	}
1198 
1199 	if (len != addr_length + sizeof (uint16_t)) {
1200 		dl_err = DL_BADADDR;
1201 		goto failed;
1202 	}
1203 
1204 	addr = mp->b_rptr + off;
1205 	sap = *(uint16_t *)(mp->b_rptr + off + addr_length);
1206 
1207 	/*
1208 	 * Check the length of the packet and the block types.
1209 	 */
1210 	size = 0;
1211 	payload = mp->b_cont;
1212 	for (bp = payload; bp != NULL; bp = bp->b_cont) {
1213 		if (DB_TYPE(bp) != M_DATA)
1214 			goto baddata;
1215 
1216 		size += MBLKL(bp);
1217 	}
1218 
1219 	mac_sdu_get(dsp->ds_mh, NULL, &max_sdu);
1220 	if (size > max_sdu)
1221 		goto baddata;
1222 
1223 	/*
1224 	 * Build a packet header.
1225 	 */
1226 	if ((bp = dls_header(dsp, addr, sap, dlp->dl_priority.dl_max,
1227 	    &payload)) == NULL) {
1228 		dl_err = DL_BADADDR;
1229 		goto failed;
1230 	}
1231 
1232 	/*
1233 	 * We no longer need the M_PROTO header, so free it.
1234 	 */
1235 	freeb(mp);
1236 
1237 	/*
1238 	 * Transfer the checksum offload information if it is present.
1239 	 */
1240 	hcksum_retrieve(payload, NULL, NULL, &start, &stuff, &end, &value,
1241 	    &flags);
1242 	(void) hcksum_assoc(bp, NULL, NULL, start, stuff, end, value, flags, 0);
1243 
1244 	/*
1245 	 * Link the payload onto the new header.
1246 	 */
1247 	ASSERT(bp->b_cont == NULL);
1248 	bp->b_cont = payload;
1249 
1250 	/*
1251 	 * No lock can be held across modules and putnext()'s,
1252 	 * which can happen here with the call from DLD_TX().
1253 	 */
1254 	if (DLD_TX(dsp, bp, 0, 0) != NULL) {
1255 		/* flow-controlled */
1256 		DLD_SETQFULL(dsp);
1257 	}
1258 	DLD_DATATHR_DCR(dsp);
1259 	return;
1260 
1261 failed:
1262 	dlerrorack(q, mp, DL_UNITDATA_REQ, dl_err, 0);
1263 	DLD_DATATHR_DCR(dsp);
1264 	return;
1265 
1266 baddata:
1267 	dluderrorind(q, mp, (void *)addr, len, DL_BADDATA, 0);
1268 	DLD_DATATHR_DCR(dsp);
1269 }
1270 
1271 /*
1272  * DL_PASSIVE_REQ
1273  */
1274 static void
1275 proto_passive_req(dld_str_t *dsp, mblk_t *mp)
1276 {
1277 	t_uscalar_t dl_err;
1278 
1279 	/*
1280 	 * If we've already become active by issuing an active primitive,
1281 	 * then it's too late to try to become passive.
1282 	 */
1283 	if (dsp->ds_passivestate == DLD_ACTIVE) {
1284 		dl_err = DL_OUTSTATE;
1285 		goto failed;
1286 	}
1287 
1288 	if (MBLKL(mp) < sizeof (dl_passive_req_t)) {
1289 		dl_err = DL_BADPRIM;
1290 		goto failed;
1291 	}
1292 
1293 	dsp->ds_passivestate = DLD_PASSIVE;
1294 	dlokack(dsp->ds_wq, mp, DL_PASSIVE_REQ);
1295 	return;
1296 failed:
1297 	dlerrorack(dsp->ds_wq, mp, DL_PASSIVE_REQ, dl_err, 0);
1298 }
1299 
1300 
1301 /*
1302  * Catch-all handler.
1303  */
1304 static void
1305 proto_req(dld_str_t *dsp, mblk_t *mp)
1306 {
1307 	union DL_primitives	*dlp = (union DL_primitives *)mp->b_rptr;
1308 
1309 	dlerrorack(dsp->ds_wq, mp, dlp->dl_primitive, DL_UNSUPPORTED, 0);
1310 }
1311 
1312 static int
1313 dld_capab_perim(dld_str_t *dsp, void *data, uint_t flags)
1314 {
1315 	switch (flags) {
1316 	case DLD_ENABLE:
1317 		mac_perim_enter_by_mh(dsp->ds_mh, (mac_perim_handle_t *)data);
1318 		return (0);
1319 
1320 	case DLD_DISABLE:
1321 		mac_perim_exit((mac_perim_handle_t)data);
1322 		return (0);
1323 
1324 	case DLD_QUERY:
1325 		return (mac_perim_held(dsp->ds_mh));
1326 	}
1327 	return (0);
1328 }
1329 
1330 static int
1331 dld_capab_direct(dld_str_t *dsp, void *data, uint_t flags)
1332 {
1333 	dld_capab_direct_t	*direct = data;
1334 
1335 	ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1336 
1337 	switch (flags) {
1338 	case DLD_ENABLE:
1339 		dls_rx_set(dsp, (dls_rx_t)direct->di_rx_cf,
1340 		    direct->di_rx_ch);
1341 
1342 		direct->di_tx_df = (uintptr_t)str_mdata_fastpath_put;
1343 		direct->di_tx_dh = dsp;
1344 		direct->di_tx_cb_df = (uintptr_t)mac_client_tx_notify;
1345 		direct->di_tx_cb_dh = dsp->ds_mch;
1346 		direct->di_tx_fctl_df = (uintptr_t)mac_tx_is_flow_blocked;
1347 		direct->di_tx_fctl_dh = dsp->ds_mch;
1348 
1349 		dsp->ds_direct = B_TRUE;
1350 
1351 		return (0);
1352 
1353 	case DLD_DISABLE:
1354 		dls_rx_set(dsp, (dsp->ds_mode == DLD_FASTPATH) ?
1355 		    dld_str_rx_fastpath : dld_str_rx_unitdata, (void *)dsp);
1356 		dsp->ds_direct = B_FALSE;
1357 
1358 		return (0);
1359 	}
1360 	return (ENOTSUP);
1361 }
1362 
1363 /*
1364  * dld_capab_poll_enable()
1365  *
1366  * This function is misnamed. All polling  and fanouts are run out of the
1367  * lower mac (in case of VNIC and the only mac in case of NICs). The
1368  * availability of Rx ring and promiscous mode is all taken care between
1369  * the soft ring set (mac_srs), the Rx ring, and S/W classifier. Any
1370  * fanout necessary is done by the soft rings that are part of the
1371  * mac_srs (by default mac_srs sends the packets up via a TCP and
1372  * non TCP soft ring).
1373  *
1374  * The mac_srs (or its associated soft rings) always store the ill_rx_ring
1375  * (the cookie returned when they registered with IP during plumb) as their
1376  * 2nd argument which is passed up as mac_resource_handle_t. The upcall
1377  * function and 1st argument is what the caller registered when they
1378  * called mac_rx_classify_flow_add() to register the flow. For VNIC,
1379  * the function is vnic_rx and argument is vnic_t. For regular NIC
1380  * case, it mac_rx_default and mac_handle_t. As explained above, the
1381  * mac_srs (or its soft ring) will add the ill_rx_ring (mac_resource_handle_t)
1382  * from its stored 2nd argument.
1383  */
1384 static int
1385 dld_capab_poll_enable(dld_str_t *dsp, dld_capab_poll_t *poll)
1386 {
1387 	if (dsp->ds_polling)
1388 		return (EINVAL);
1389 
1390 	if ((dld_opt & DLD_OPT_NO_POLL) != 0 || dsp->ds_mode == DLD_RAW)
1391 		return (ENOTSUP);
1392 
1393 	/*
1394 	 * Enable client polling if and only if DLS bypass is possible.
1395 	 * Special cases like VLANs need DLS processing in the Rx data path.
1396 	 * In such a case we can neither allow the client (IP) to directly
1397 	 * poll the softring (since DLS processing hasn't been done) nor can
1398 	 * we allow DLS bypass.
1399 	 */
1400 	if (!mac_rx_bypass_set(dsp->ds_mch, dsp->ds_rx, dsp->ds_rx_arg))
1401 		return (ENOTSUP);
1402 
1403 	/*
1404 	 * Register soft ring resources. This will come in handy later if
1405 	 * the user decides to modify CPU bindings to use more CPUs for the
1406 	 * device in which case we will switch to fanout using soft rings.
1407 	 */
1408 	mac_resource_set_common(dsp->ds_mch,
1409 	    (mac_resource_add_t)poll->poll_ring_add_cf,
1410 	    (mac_resource_remove_t)poll->poll_ring_remove_cf,
1411 	    (mac_resource_quiesce_t)poll->poll_ring_quiesce_cf,
1412 	    (mac_resource_restart_t)poll->poll_ring_restart_cf,
1413 	    (mac_resource_bind_t)poll->poll_ring_bind_cf,
1414 	    poll->poll_ring_ch);
1415 
1416 	mac_client_poll_enable(dsp->ds_mch);
1417 
1418 	dsp->ds_polling = B_TRUE;
1419 	return (0);
1420 }
1421 
1422 /* ARGSUSED */
1423 static int
1424 dld_capab_poll_disable(dld_str_t *dsp, dld_capab_poll_t *poll)
1425 {
1426 	if (!dsp->ds_polling)
1427 		return (EINVAL);
1428 
1429 	mac_client_poll_disable(dsp->ds_mch);
1430 	mac_resource_set(dsp->ds_mch, NULL, NULL);
1431 
1432 	dsp->ds_polling = B_FALSE;
1433 	return (0);
1434 }
1435 
1436 static int
1437 dld_capab_poll(dld_str_t *dsp, void *data, uint_t flags)
1438 {
1439 	dld_capab_poll_t	*poll = data;
1440 
1441 	ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1442 
1443 	switch (flags) {
1444 	case DLD_ENABLE:
1445 		return (dld_capab_poll_enable(dsp, poll));
1446 	case DLD_DISABLE:
1447 		return (dld_capab_poll_disable(dsp, poll));
1448 	}
1449 	return (ENOTSUP);
1450 }
1451 
1452 static int
1453 dld_capab_lso(dld_str_t *dsp, void *data, uint_t flags)
1454 {
1455 	dld_capab_lso_t		*lso = data;
1456 
1457 	ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1458 
1459 	switch (flags) {
1460 	case DLD_ENABLE: {
1461 		mac_capab_lso_t		mac_lso;
1462 
1463 		/*
1464 		 * Check if LSO is supported on this MAC & enable LSO
1465 		 * accordingly.
1466 		 */
1467 		if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_LSO, &mac_lso)) {
1468 			lso->lso_max = mac_lso.lso_basic_tcp_ipv4.lso_max;
1469 			lso->lso_flags = 0;
1470 			/* translate the flag for mac clients */
1471 			if ((mac_lso.lso_flags & LSO_TX_BASIC_TCP_IPV4) != 0)
1472 				lso->lso_flags |= DLD_LSO_TX_BASIC_TCP_IPV4;
1473 			dsp->ds_lso = B_TRUE;
1474 			dsp->ds_lso_max = lso->lso_max;
1475 		} else {
1476 			dsp->ds_lso = B_FALSE;
1477 			dsp->ds_lso_max = 0;
1478 			return (ENOTSUP);
1479 		}
1480 		return (0);
1481 	}
1482 	case DLD_DISABLE: {
1483 		dsp->ds_lso = B_FALSE;
1484 		dsp->ds_lso_max = 0;
1485 		return (0);
1486 	}
1487 	}
1488 	return (ENOTSUP);
1489 }
1490 
1491 static int
1492 dld_capab(dld_str_t *dsp, uint_t type, void *data, uint_t flags)
1493 {
1494 	int	err;
1495 
1496 	/*
1497 	 * Don't enable direct callback capabilities unless the caller is
1498 	 * the IP client. When a module is inserted in a stream (_I_INSERT)
1499 	 * the stack initiates capability disable, but due to races, the
1500 	 * module insertion may complete before the capability disable
1501 	 * completes. So we limit the check to DLD_ENABLE case.
1502 	 */
1503 	if ((flags == DLD_ENABLE && type != DLD_CAPAB_PERIM) &&
1504 	    (dsp->ds_sap != ETHERTYPE_IP || !check_ip_above(dsp->ds_rq))) {
1505 		return (ENOTSUP);
1506 	}
1507 
1508 	switch (type) {
1509 	case DLD_CAPAB_DIRECT:
1510 		err = dld_capab_direct(dsp, data, flags);
1511 		break;
1512 
1513 	case DLD_CAPAB_POLL:
1514 		err =  dld_capab_poll(dsp, data, flags);
1515 		break;
1516 
1517 	case DLD_CAPAB_PERIM:
1518 		err = dld_capab_perim(dsp, data, flags);
1519 		break;
1520 
1521 	case DLD_CAPAB_LSO:
1522 		err = dld_capab_lso(dsp, data, flags);
1523 		break;
1524 
1525 	default:
1526 		err = ENOTSUP;
1527 		break;
1528 	}
1529 
1530 	return (err);
1531 }
1532 
1533 /*
1534  * DL_CAPABILITY_ACK/DL_ERROR_ACK
1535  */
1536 static void
1537 proto_capability_advertise(dld_str_t *dsp, mblk_t *mp)
1538 {
1539 	dl_capability_ack_t	*dlap;
1540 	dl_capability_sub_t	*dlsp;
1541 	size_t			subsize;
1542 	dl_capab_dld_t		dld;
1543 	dl_capab_hcksum_t	hcksum;
1544 	dl_capab_zerocopy_t	zcopy;
1545 	uint8_t			*ptr;
1546 	queue_t			*q = dsp->ds_wq;
1547 	mblk_t			*mp1;
1548 	boolean_t		is_vlan;
1549 	boolean_t		hcksum_capable = B_FALSE;
1550 	boolean_t		zcopy_capable = B_FALSE;
1551 	boolean_t		dld_capable = B_FALSE;
1552 
1553 	/*
1554 	 * Initially assume no capabilities.
1555 	 */
1556 	subsize = 0;
1557 	is_vlan = (mac_client_vid(dsp->ds_mch) != VLAN_ID_NONE);
1558 
1559 	/*
1560 	 * Check if checksum offload is supported on this MAC.  Don't
1561 	 * advertise DL_CAPAB_HCKSUM if the underlying MAC is VLAN incapable,
1562 	 * since it might not be able to do the hardware checksum offload
1563 	 * with the correct offset.
1564 	 */
1565 	bzero(&hcksum, sizeof (dl_capab_hcksum_t));
1566 	if ((!is_vlan || (!mac_capab_get(dsp->ds_mh, MAC_CAPAB_NO_NATIVEVLAN,
1567 	    NULL))) && mac_capab_get(dsp->ds_mh, MAC_CAPAB_HCKSUM,
1568 	    &hcksum.hcksum_txflags)) {
1569 		if (hcksum.hcksum_txflags != 0) {
1570 			hcksum_capable = B_TRUE;
1571 			subsize += sizeof (dl_capability_sub_t) +
1572 			    sizeof (dl_capab_hcksum_t);
1573 		}
1574 	}
1575 
1576 	/*
1577 	 * Check if zerocopy is supported on this interface.
1578 	 * If advertising DL_CAPAB_ZEROCOPY has not been explicitly disabled
1579 	 * then reserve space for that capability.
1580 	 */
1581 	if (!mac_capab_get(dsp->ds_mh, MAC_CAPAB_NO_ZCOPY, NULL) &&
1582 	    !(dld_opt & DLD_OPT_NO_ZEROCOPY)) {
1583 		zcopy_capable = B_TRUE;
1584 		subsize += sizeof (dl_capability_sub_t) +
1585 		    sizeof (dl_capab_zerocopy_t);
1586 	}
1587 
1588 	/*
1589 	 * Direct capability negotiation interface between IP and DLD
1590 	 */
1591 	if (dsp->ds_sap == ETHERTYPE_IP && check_ip_above(dsp->ds_rq)) {
1592 		dld_capable = B_TRUE;
1593 		subsize += sizeof (dl_capability_sub_t) +
1594 		    sizeof (dl_capab_dld_t);
1595 	}
1596 
1597 	/*
1598 	 * If there are no capabilities to advertise or if we
1599 	 * can't allocate a response, send a DL_ERROR_ACK.
1600 	 */
1601 	if ((mp1 = reallocb(mp,
1602 	    sizeof (dl_capability_ack_t) + subsize, 0)) == NULL) {
1603 		dlerrorack(q, mp, DL_CAPABILITY_REQ, DL_NOTSUPPORTED, 0);
1604 		return;
1605 	}
1606 
1607 	mp = mp1;
1608 	DB_TYPE(mp) = M_PROTO;
1609 	mp->b_wptr = mp->b_rptr + sizeof (dl_capability_ack_t) + subsize;
1610 	bzero(mp->b_rptr, MBLKL(mp));
1611 	dlap = (dl_capability_ack_t *)mp->b_rptr;
1612 	dlap->dl_primitive = DL_CAPABILITY_ACK;
1613 	dlap->dl_sub_offset = sizeof (dl_capability_ack_t);
1614 	dlap->dl_sub_length = subsize;
1615 	ptr = (uint8_t *)&dlap[1];
1616 
1617 	/*
1618 	 * TCP/IP checksum offload.
1619 	 */
1620 	if (hcksum_capable) {
1621 		dlsp = (dl_capability_sub_t *)ptr;
1622 
1623 		dlsp->dl_cap = DL_CAPAB_HCKSUM;
1624 		dlsp->dl_length = sizeof (dl_capab_hcksum_t);
1625 		ptr += sizeof (dl_capability_sub_t);
1626 
1627 		hcksum.hcksum_version = HCKSUM_VERSION_1;
1628 		dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1629 		bcopy(&hcksum, ptr, sizeof (dl_capab_hcksum_t));
1630 		ptr += sizeof (dl_capab_hcksum_t);
1631 	}
1632 
1633 	/*
1634 	 * Zero copy
1635 	 */
1636 	if (zcopy_capable) {
1637 		dlsp = (dl_capability_sub_t *)ptr;
1638 
1639 		dlsp->dl_cap = DL_CAPAB_ZEROCOPY;
1640 		dlsp->dl_length = sizeof (dl_capab_zerocopy_t);
1641 		ptr += sizeof (dl_capability_sub_t);
1642 
1643 		bzero(&zcopy, sizeof (dl_capab_zerocopy_t));
1644 		zcopy.zerocopy_version = ZEROCOPY_VERSION_1;
1645 		zcopy.zerocopy_flags = DL_CAPAB_VMSAFE_MEM;
1646 
1647 		dlcapabsetqid(&(zcopy.zerocopy_mid), dsp->ds_rq);
1648 		bcopy(&zcopy, ptr, sizeof (dl_capab_zerocopy_t));
1649 		ptr += sizeof (dl_capab_zerocopy_t);
1650 	}
1651 
1652 	/*
1653 	 * Direct capability negotiation interface between IP and DLD.
1654 	 * Refer to dld.h for details.
1655 	 */
1656 	if (dld_capable) {
1657 		dlsp = (dl_capability_sub_t *)ptr;
1658 		dlsp->dl_cap = DL_CAPAB_DLD;
1659 		dlsp->dl_length = sizeof (dl_capab_dld_t);
1660 		ptr += sizeof (dl_capability_sub_t);
1661 
1662 		bzero(&dld, sizeof (dl_capab_dld_t));
1663 		dld.dld_version = DLD_CURRENT_VERSION;
1664 		dld.dld_capab = (uintptr_t)dld_capab;
1665 		dld.dld_capab_handle = (uintptr_t)dsp;
1666 
1667 		dlcapabsetqid(&(dld.dld_mid), dsp->ds_rq);
1668 		bcopy(&dld, ptr, sizeof (dl_capab_dld_t));
1669 		ptr += sizeof (dl_capab_dld_t);
1670 	}
1671 
1672 	ASSERT(ptr == mp->b_rptr + sizeof (dl_capability_ack_t) + subsize);
1673 	qreply(q, mp);
1674 }
1675 
1676 /*
1677  * Disable any enabled capabilities.
1678  */
1679 void
1680 dld_capabilities_disable(dld_str_t *dsp)
1681 {
1682 	if (dsp->ds_polling)
1683 		(void) dld_capab_poll_disable(dsp, NULL);
1684 }
1685