xref: /illumos-gate/usr/src/uts/common/io/dld/dld_proto.c (revision 7a15b0ec33c685e4e6b096454b077a52604acf9b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2012, Nexenta Systems, Inc. All rights reserved.
24  * Copyright (c) 2018, Joyent, Inc.
25  */
26 
27 /*
28  * Data-Link Driver
29  */
30 #include <sys/sysmacros.h>
31 #include <sys/strsubr.h>
32 #include <sys/strsun.h>
33 #include <sys/vlan.h>
34 #include <sys/dld_impl.h>
35 #include <sys/mac_client.h>
36 #include <sys/mac_client_impl.h>
37 #include <sys/mac_client_priv.h>
38 
39 typedef void proto_reqfunc_t(dld_str_t *, mblk_t *);
40 
41 static proto_reqfunc_t proto_info_req, proto_attach_req, proto_detach_req,
42     proto_bind_req, proto_unbind_req, proto_promiscon_req, proto_promiscoff_req,
43     proto_enabmulti_req, proto_disabmulti_req, proto_physaddr_req,
44     proto_setphysaddr_req, proto_udqos_req, proto_req, proto_capability_req,
45     proto_notify_req, proto_passive_req;
46 
47 static void proto_capability_advertise(dld_str_t *, mblk_t *);
48 static int dld_capab_poll_disable(dld_str_t *, dld_capab_poll_t *);
49 static boolean_t check_mod_above(queue_t *, const char *);
50 
51 #define	DL_ACK_PENDING(state) \
52 	((state) == DL_ATTACH_PENDING || \
53 	(state) == DL_DETACH_PENDING || \
54 	(state) == DL_BIND_PENDING || \
55 	(state) == DL_UNBIND_PENDING)
56 
57 /*
58  * Process a DLPI protocol message.
59  * The primitives DL_BIND_REQ, DL_ENABMULTI_REQ, DL_PROMISCON_REQ,
60  * DL_SET_PHYS_ADDR_REQ put the data link below our dld_str_t into an
61  * 'active' state. The primitive DL_PASSIVE_REQ marks our dld_str_t
62  * as 'passive' and forbids it from being subsequently made 'active'
63  * by the above primitives.
64  */
65 void
66 dld_proto(dld_str_t *dsp, mblk_t *mp)
67 {
68 	t_uscalar_t		prim;
69 
70 	if (MBLKL(mp) < sizeof (t_uscalar_t)) {
71 		freemsg(mp);
72 		return;
73 	}
74 	prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive;
75 
76 	switch (prim) {
77 	case DL_INFO_REQ:
78 		proto_info_req(dsp, mp);
79 		break;
80 	case DL_BIND_REQ:
81 		proto_bind_req(dsp, mp);
82 		break;
83 	case DL_UNBIND_REQ:
84 		proto_unbind_req(dsp, mp);
85 		break;
86 	case DL_UNITDATA_REQ:
87 		proto_unitdata_req(dsp, mp);
88 		break;
89 	case DL_UDQOS_REQ:
90 		proto_udqos_req(dsp, mp);
91 		break;
92 	case DL_ATTACH_REQ:
93 		proto_attach_req(dsp, mp);
94 		break;
95 	case DL_DETACH_REQ:
96 		proto_detach_req(dsp, mp);
97 		break;
98 	case DL_ENABMULTI_REQ:
99 		proto_enabmulti_req(dsp, mp);
100 		break;
101 	case DL_DISABMULTI_REQ:
102 		proto_disabmulti_req(dsp, mp);
103 		break;
104 	case DL_PROMISCON_REQ:
105 		proto_promiscon_req(dsp, mp);
106 		break;
107 	case DL_PROMISCOFF_REQ:
108 		proto_promiscoff_req(dsp, mp);
109 		break;
110 	case DL_PHYS_ADDR_REQ:
111 		proto_physaddr_req(dsp, mp);
112 		break;
113 	case DL_SET_PHYS_ADDR_REQ:
114 		proto_setphysaddr_req(dsp, mp);
115 		break;
116 	case DL_NOTIFY_REQ:
117 		proto_notify_req(dsp, mp);
118 		break;
119 	case DL_CAPABILITY_REQ:
120 		proto_capability_req(dsp, mp);
121 		break;
122 	case DL_PASSIVE_REQ:
123 		proto_passive_req(dsp, mp);
124 		break;
125 	default:
126 		proto_req(dsp, mp);
127 		break;
128 	}
129 }
130 
131 #define	NEG(x)	-(x)
132 typedef struct dl_info_ack_wrapper {
133 	dl_info_ack_t		dl_info;
134 	uint8_t			dl_addr[MAXMACADDRLEN + sizeof (uint16_t)];
135 	uint8_t			dl_brdcst_addr[MAXMACADDRLEN];
136 	dl_qos_cl_range1_t	dl_qos_range1;
137 	dl_qos_cl_sel1_t	dl_qos_sel1;
138 } dl_info_ack_wrapper_t;
139 
140 /*
141  * DL_INFO_REQ
142  */
143 static void
144 proto_info_req(dld_str_t *dsp, mblk_t *mp)
145 {
146 	dl_info_ack_wrapper_t	*dlwp;
147 	dl_info_ack_t		*dlp;
148 	dl_qos_cl_sel1_t	*selp;
149 	dl_qos_cl_range1_t	*rangep;
150 	uint8_t			*addr;
151 	uint8_t			*brdcst_addr;
152 	uint_t			addr_length;
153 	uint_t			sap_length;
154 	mac_info_t		minfo;
155 	mac_info_t		*minfop;
156 	queue_t			*q = dsp->ds_wq;
157 
158 	/*
159 	 * Swap the request message for one large enough to contain the
160 	 * wrapper structure defined above.
161 	 */
162 	if ((mp = mexchange(q, mp, sizeof (dl_info_ack_wrapper_t),
163 	    M_PCPROTO, 0)) == NULL)
164 		return;
165 
166 	bzero(mp->b_rptr, sizeof (dl_info_ack_wrapper_t));
167 	dlwp = (dl_info_ack_wrapper_t *)mp->b_rptr;
168 
169 	dlp = &(dlwp->dl_info);
170 	ASSERT(dlp == (dl_info_ack_t *)mp->b_rptr);
171 
172 	dlp->dl_primitive = DL_INFO_ACK;
173 
174 	/*
175 	 * Set up the sub-structure pointers.
176 	 */
177 	addr = dlwp->dl_addr;
178 	brdcst_addr = dlwp->dl_brdcst_addr;
179 	rangep = &(dlwp->dl_qos_range1);
180 	selp = &(dlwp->dl_qos_sel1);
181 
182 	/*
183 	 * This driver supports only version 2 connectionless DLPI provider
184 	 * nodes.
185 	 */
186 	dlp->dl_service_mode = DL_CLDLS;
187 	dlp->dl_version = DL_VERSION_2;
188 
189 	/*
190 	 * Set the style of the provider
191 	 */
192 	dlp->dl_provider_style = dsp->ds_style;
193 	ASSERT(dlp->dl_provider_style == DL_STYLE1 ||
194 	    dlp->dl_provider_style == DL_STYLE2);
195 
196 	/*
197 	 * Set the current DLPI state.
198 	 */
199 	dlp->dl_current_state = dsp->ds_dlstate;
200 
201 	/*
202 	 * Gratuitously set the media type. This is to deal with modules
203 	 * that assume the media type is known prior to DL_ATTACH_REQ
204 	 * being completed.
205 	 */
206 	dlp->dl_mac_type = DL_ETHER;
207 
208 	/*
209 	 * If the stream is not at least attached we try to retrieve the
210 	 * mac_info using mac_info_get()
211 	 */
212 	if (dsp->ds_dlstate == DL_UNATTACHED ||
213 	    dsp->ds_dlstate == DL_ATTACH_PENDING ||
214 	    dsp->ds_dlstate == DL_DETACH_PENDING) {
215 		if (!mac_info_get(ddi_major_to_name(dsp->ds_major), &minfo)) {
216 			/*
217 			 * Cannot find mac_info. giving up.
218 			 */
219 			goto done;
220 		}
221 		minfop = &minfo;
222 	} else {
223 		minfop = (mac_info_t *)dsp->ds_mip;
224 		/* We can only get the sdu if we're attached. */
225 		mac_sdu_get(dsp->ds_mh, &dlp->dl_min_sdu, &dlp->dl_max_sdu);
226 	}
227 
228 	/*
229 	 * Set the media type (properly this time).
230 	 */
231 	if (dsp->ds_native)
232 		dlp->dl_mac_type = minfop->mi_nativemedia;
233 	else
234 		dlp->dl_mac_type = minfop->mi_media;
235 
236 	/*
237 	 * Set the DLSAP length. We only support 16 bit values and they
238 	 * appear after the MAC address portion of DLSAP addresses.
239 	 */
240 	sap_length = sizeof (uint16_t);
241 	dlp->dl_sap_length = NEG(sap_length);
242 
243 	addr_length = minfop->mi_addr_length;
244 
245 	/*
246 	 * Copy in the media broadcast address.
247 	 */
248 	if (minfop->mi_brdcst_addr != NULL) {
249 		dlp->dl_brdcst_addr_offset =
250 		    (uintptr_t)brdcst_addr - (uintptr_t)dlp;
251 		bcopy(minfop->mi_brdcst_addr, brdcst_addr, addr_length);
252 		dlp->dl_brdcst_addr_length = addr_length;
253 	}
254 
255 	/* Only VLAN links and links that have a normal tag mode support QOS. */
256 	if ((dsp->ds_mch != NULL &&
257 	    mac_client_vid(dsp->ds_mch) != VLAN_ID_NONE) ||
258 	    (dsp->ds_dlp != NULL &&
259 	    dsp->ds_dlp->dl_tagmode == LINK_TAGMODE_NORMAL)) {
260 		dlp->dl_qos_range_offset = (uintptr_t)rangep - (uintptr_t)dlp;
261 		dlp->dl_qos_range_length = sizeof (dl_qos_cl_range1_t);
262 
263 		rangep->dl_qos_type = DL_QOS_CL_RANGE1;
264 		rangep->dl_trans_delay.dl_target_value = DL_UNKNOWN;
265 		rangep->dl_trans_delay.dl_accept_value = DL_UNKNOWN;
266 		rangep->dl_protection.dl_min = DL_UNKNOWN;
267 		rangep->dl_protection.dl_max = DL_UNKNOWN;
268 		rangep->dl_residual_error = DL_UNKNOWN;
269 
270 		/*
271 		 * Specify the supported range of priorities.
272 		 */
273 		rangep->dl_priority.dl_min = 0;
274 		rangep->dl_priority.dl_max = (1 << VLAN_PRI_SIZE) - 1;
275 
276 		dlp->dl_qos_offset = (uintptr_t)selp - (uintptr_t)dlp;
277 		dlp->dl_qos_length = sizeof (dl_qos_cl_sel1_t);
278 
279 		selp->dl_qos_type = DL_QOS_CL_SEL1;
280 		selp->dl_trans_delay = DL_UNKNOWN;
281 		selp->dl_protection = DL_UNKNOWN;
282 		selp->dl_residual_error = DL_UNKNOWN;
283 
284 		/*
285 		 * Specify the current priority (which can be changed by
286 		 * the DL_UDQOS_REQ primitive).
287 		 */
288 		selp->dl_priority = dsp->ds_pri;
289 	}
290 
291 	dlp->dl_addr_length = addr_length + sizeof (uint16_t);
292 	if (dsp->ds_dlstate == DL_IDLE) {
293 		/*
294 		 * The stream is bound. Therefore we can formulate a valid
295 		 * DLSAP address.
296 		 */
297 		dlp->dl_addr_offset = (uintptr_t)addr - (uintptr_t)dlp;
298 		if (addr_length > 0)
299 			mac_unicast_primary_get(dsp->ds_mh, addr);
300 
301 		*(uint16_t *)(addr + addr_length) = dsp->ds_sap;
302 	}
303 
304 done:
305 	IMPLY(dlp->dl_qos_offset != 0, dlp->dl_qos_length != 0);
306 	IMPLY(dlp->dl_qos_range_offset != 0,
307 	    dlp->dl_qos_range_length != 0);
308 	IMPLY(dlp->dl_addr_offset != 0, dlp->dl_addr_length != 0);
309 	IMPLY(dlp->dl_brdcst_addr_offset != 0,
310 	    dlp->dl_brdcst_addr_length != 0);
311 
312 	qreply(q, mp);
313 }
314 
315 /*
316  * DL_ATTACH_REQ
317  */
318 static void
319 proto_attach_req(dld_str_t *dsp, mblk_t *mp)
320 {
321 	dl_attach_req_t	*dlp = (dl_attach_req_t *)mp->b_rptr;
322 	int		err = 0;
323 	t_uscalar_t	dl_err;
324 	queue_t		*q = dsp->ds_wq;
325 
326 	if (MBLKL(mp) < sizeof (dl_attach_req_t) ||
327 	    dlp->dl_ppa < 0 || dsp->ds_style == DL_STYLE1) {
328 		dl_err = DL_BADPRIM;
329 		goto failed;
330 	}
331 
332 	if (dsp->ds_dlstate != DL_UNATTACHED) {
333 		dl_err = DL_OUTSTATE;
334 		goto failed;
335 	}
336 
337 	dsp->ds_dlstate = DL_ATTACH_PENDING;
338 
339 	err = dld_str_attach(dsp, dlp->dl_ppa);
340 	if (err != 0) {
341 		switch (err) {
342 		case ENOENT:
343 			dl_err = DL_BADPPA;
344 			err = 0;
345 			break;
346 		default:
347 			dl_err = DL_SYSERR;
348 			break;
349 		}
350 		dsp->ds_dlstate = DL_UNATTACHED;
351 		goto failed;
352 	}
353 	ASSERT(dsp->ds_dlstate == DL_UNBOUND);
354 	dlokack(q, mp, DL_ATTACH_REQ);
355 	return;
356 
357 failed:
358 	dlerrorack(q, mp, DL_ATTACH_REQ, dl_err, (t_uscalar_t)err);
359 }
360 
361 /*
362  * DL_DETACH_REQ
363  */
364 static void
365 proto_detach_req(dld_str_t *dsp, mblk_t *mp)
366 {
367 	queue_t		*q = dsp->ds_wq;
368 	t_uscalar_t	dl_err;
369 
370 	if (MBLKL(mp) < sizeof (dl_detach_req_t)) {
371 		dl_err = DL_BADPRIM;
372 		goto failed;
373 	}
374 
375 	if (dsp->ds_dlstate != DL_UNBOUND) {
376 		dl_err = DL_OUTSTATE;
377 		goto failed;
378 	}
379 
380 	if (dsp->ds_style == DL_STYLE1) {
381 		dl_err = DL_BADPRIM;
382 		goto failed;
383 	}
384 
385 	ASSERT(dsp->ds_datathr_cnt == 0);
386 	dsp->ds_dlstate = DL_DETACH_PENDING;
387 
388 	dld_str_detach(dsp);
389 	dlokack(dsp->ds_wq, mp, DL_DETACH_REQ);
390 	return;
391 
392 failed:
393 	dlerrorack(q, mp, DL_DETACH_REQ, dl_err, 0);
394 }
395 
396 /*
397  * DL_BIND_REQ
398  */
399 static void
400 proto_bind_req(dld_str_t *dsp, mblk_t *mp)
401 {
402 	dl_bind_req_t	*dlp = (dl_bind_req_t *)mp->b_rptr;
403 	int		err = 0;
404 	uint8_t		dlsap_addr[MAXMACADDRLEN + sizeof (uint16_t)];
405 	uint_t		dlsap_addr_length;
406 	t_uscalar_t	dl_err;
407 	t_scalar_t	sap;
408 	queue_t		*q = dsp->ds_wq;
409 	mac_perim_handle_t	mph;
410 	void		*mdip;
411 	int32_t		intr_cpu;
412 
413 	if (MBLKL(mp) < sizeof (dl_bind_req_t)) {
414 		dl_err = DL_BADPRIM;
415 		goto failed;
416 	}
417 
418 	if (dlp->dl_xidtest_flg != 0) {
419 		dl_err = DL_NOAUTO;
420 		goto failed;
421 	}
422 
423 	if (dlp->dl_service_mode != DL_CLDLS) {
424 		dl_err = DL_UNSUPPORTED;
425 		goto failed;
426 	}
427 
428 	if (dsp->ds_dlstate != DL_UNBOUND) {
429 		dl_err = DL_OUTSTATE;
430 		goto failed;
431 	}
432 
433 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
434 
435 	if ((err = dls_active_set(dsp)) != 0) {
436 		dl_err = DL_SYSERR;
437 		goto failed2;
438 	}
439 
440 	dsp->ds_dlstate = DL_BIND_PENDING;
441 	/*
442 	 * Set the receive callback.
443 	 */
444 	dls_rx_set(dsp, (dsp->ds_mode == DLD_RAW) ?
445 	    dld_str_rx_raw : dld_str_rx_unitdata, dsp);
446 
447 	/*
448 	 * Bind the channel such that it can receive packets.
449 	 */
450 	sap = dlp->dl_sap;
451 	dsp->ds_nonip = !check_mod_above(dsp->ds_rq, "ip") &&
452 	    !check_mod_above(dsp->ds_rq, "arp");
453 
454 	err = dls_bind(dsp, sap);
455 	if (err != 0) {
456 		switch (err) {
457 		case EINVAL:
458 			dl_err = DL_BADADDR;
459 			err = 0;
460 			break;
461 		default:
462 			dl_err = DL_SYSERR;
463 			break;
464 		}
465 
466 		dsp->ds_dlstate = DL_UNBOUND;
467 		dls_active_clear(dsp, B_FALSE);
468 		goto failed2;
469 	}
470 
471 	intr_cpu = mac_client_intr_cpu(dsp->ds_mch);
472 	mdip = mac_get_devinfo(dsp->ds_mh);
473 	mac_perim_exit(mph);
474 
475 	/*
476 	 * We do this after we get out of the perim to avoid deadlocks
477 	 * etc. since part of mac_client_retarget_intr is to walk the
478 	 * device tree in order to find and retarget the interrupts.
479 	 */
480 	if (intr_cpu != -1)
481 		mac_client_set_intr_cpu(mdip, dsp->ds_mch, intr_cpu);
482 
483 	/*
484 	 * Copy in MAC address.
485 	 */
486 	dlsap_addr_length = dsp->ds_mip->mi_addr_length;
487 	mac_unicast_primary_get(dsp->ds_mh, dlsap_addr);
488 
489 	/*
490 	 * Copy in the SAP.
491 	 */
492 	*(uint16_t *)(dlsap_addr + dlsap_addr_length) = sap;
493 	dlsap_addr_length += sizeof (uint16_t);
494 
495 	dsp->ds_dlstate = DL_IDLE;
496 	dlbindack(q, mp, sap, dlsap_addr, dlsap_addr_length, 0, 0);
497 	return;
498 
499 failed2:
500 	mac_perim_exit(mph);
501 failed:
502 	dlerrorack(q, mp, DL_BIND_REQ, dl_err, (t_uscalar_t)err);
503 }
504 
505 /*
506  * DL_UNBIND_REQ
507  */
508 static void
509 proto_unbind_req(dld_str_t *dsp, mblk_t *mp)
510 {
511 	queue_t		*q = dsp->ds_wq;
512 	t_uscalar_t	dl_err;
513 	mac_perim_handle_t	mph;
514 
515 	if (MBLKL(mp) < sizeof (dl_unbind_req_t)) {
516 		dl_err = DL_BADPRIM;
517 		goto failed;
518 	}
519 
520 	if (dsp->ds_dlstate != DL_IDLE) {
521 		dl_err = DL_OUTSTATE;
522 		goto failed;
523 	}
524 
525 	mutex_enter(&dsp->ds_lock);
526 	while (dsp->ds_datathr_cnt != 0)
527 		cv_wait(&dsp->ds_datathr_cv, &dsp->ds_lock);
528 
529 	dsp->ds_dlstate = DL_UNBIND_PENDING;
530 	mutex_exit(&dsp->ds_lock);
531 
532 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
533 	/*
534 	 * Unbind the channel to stop packets being received.
535 	 */
536 	dls_unbind(dsp);
537 
538 	/*
539 	 * Disable polling mode, if it is enabled.
540 	 */
541 	(void) dld_capab_poll_disable(dsp, NULL);
542 
543 	/*
544 	 * Clear LSO flags.
545 	 */
546 	dsp->ds_lso = B_FALSE;
547 	dsp->ds_lso_max = 0;
548 
549 	/*
550 	 * Clear the receive callback.
551 	 */
552 	dls_rx_set(dsp, NULL, NULL);
553 	dsp->ds_direct = B_FALSE;
554 
555 	/*
556 	 * Set the mode back to the default (unitdata).
557 	 */
558 	dsp->ds_mode = DLD_UNITDATA;
559 	dsp->ds_dlstate = DL_UNBOUND;
560 
561 	dls_active_clear(dsp, B_FALSE);
562 	mac_perim_exit(mph);
563 	dlokack(dsp->ds_wq, mp, DL_UNBIND_REQ);
564 	return;
565 failed:
566 	dlerrorack(q, mp, DL_UNBIND_REQ, dl_err, 0);
567 }
568 
569 /*
570  * DL_PROMISCON_REQ
571  */
572 static void
573 proto_promiscon_req(dld_str_t *dsp, mblk_t *mp)
574 {
575 	dl_promiscon_req_t *dlp = (dl_promiscon_req_t *)mp->b_rptr;
576 	int		err = 0;
577 	t_uscalar_t	dl_err;
578 	uint32_t	new_flags, promisc_saved;
579 	queue_t		*q = dsp->ds_wq;
580 	mac_perim_handle_t	mph;
581 
582 	if (MBLKL(mp) < sizeof (dl_promiscon_req_t)) {
583 		dl_err = DL_BADPRIM;
584 		goto failed;
585 	}
586 
587 	if (dsp->ds_dlstate == DL_UNATTACHED ||
588 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
589 		dl_err = DL_OUTSTATE;
590 		goto failed;
591 	}
592 
593 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
594 
595 	new_flags = promisc_saved = dsp->ds_promisc;
596 	switch (dlp->dl_level) {
597 	case DL_PROMISC_SAP:
598 		new_flags |= DLS_PROMISC_SAP;
599 		break;
600 
601 	case DL_PROMISC_MULTI:
602 		new_flags |= DLS_PROMISC_MULTI;
603 		break;
604 
605 	case DL_PROMISC_PHYS:
606 		new_flags |= DLS_PROMISC_PHYS;
607 		break;
608 
609 	case DL_PROMISC_RX_ONLY:
610 		new_flags |= DLS_PROMISC_RX_ONLY;
611 		break;
612 
613 	default:
614 		dl_err = DL_NOTSUPPORTED;
615 		goto failed2;
616 	}
617 
618 	if ((promisc_saved == 0) && (err = dls_active_set(dsp)) != 0) {
619 		ASSERT(dsp->ds_promisc == promisc_saved);
620 		dl_err = DL_SYSERR;
621 		goto failed2;
622 	}
623 
624 	/*
625 	 * Adjust channel promiscuity.
626 	 */
627 	err = dls_promisc(dsp, new_flags);
628 
629 	if (err != 0) {
630 		dl_err = DL_SYSERR;
631 		dsp->ds_promisc = promisc_saved;
632 		if (promisc_saved == 0)
633 			dls_active_clear(dsp, B_FALSE);
634 		goto failed2;
635 	}
636 
637 	mac_perim_exit(mph);
638 
639 	dlokack(q, mp, DL_PROMISCON_REQ);
640 	return;
641 
642 failed2:
643 	mac_perim_exit(mph);
644 failed:
645 	dlerrorack(q, mp, DL_PROMISCON_REQ, dl_err, (t_uscalar_t)err);
646 }
647 
648 /*
649  * DL_PROMISCOFF_REQ
650  */
651 static void
652 proto_promiscoff_req(dld_str_t *dsp, mblk_t *mp)
653 {
654 	dl_promiscoff_req_t *dlp = (dl_promiscoff_req_t *)mp->b_rptr;
655 	int		err = 0;
656 	t_uscalar_t	dl_err;
657 	uint32_t	new_flags;
658 	queue_t		*q = dsp->ds_wq;
659 	mac_perim_handle_t	mph;
660 
661 	if (MBLKL(mp) < sizeof (dl_promiscoff_req_t)) {
662 		dl_err = DL_BADPRIM;
663 		goto failed;
664 	}
665 
666 	if (dsp->ds_dlstate == DL_UNATTACHED ||
667 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
668 		dl_err = DL_OUTSTATE;
669 		goto failed;
670 	}
671 
672 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
673 
674 	new_flags = dsp->ds_promisc;
675 	switch (dlp->dl_level) {
676 	case DL_PROMISC_SAP:
677 		if (!(dsp->ds_promisc & DLS_PROMISC_SAP)) {
678 			dl_err = DL_NOTENAB;
679 			goto failed2;
680 		}
681 		new_flags &= ~DLS_PROMISC_SAP;
682 		break;
683 
684 	case DL_PROMISC_MULTI:
685 		if (!(dsp->ds_promisc & DLS_PROMISC_MULTI)) {
686 			dl_err = DL_NOTENAB;
687 			goto failed2;
688 		}
689 		new_flags &= ~DLS_PROMISC_MULTI;
690 		break;
691 
692 	case DL_PROMISC_PHYS:
693 		if (!(dsp->ds_promisc & DLS_PROMISC_PHYS)) {
694 			dl_err = DL_NOTENAB;
695 			goto failed2;
696 		}
697 		new_flags &= ~DLS_PROMISC_PHYS;
698 		break;
699 
700 	case DL_PROMISC_RX_ONLY:
701 		if (!(dsp->ds_promisc & DLS_PROMISC_RX_ONLY)) {
702 			dl_err = DL_NOTENAB;
703 			goto failed2;
704 		}
705 		new_flags &= ~DLS_PROMISC_RX_ONLY;
706 		break;
707 
708 	default:
709 		dl_err = DL_NOTSUPPORTED;
710 		goto failed2;
711 	}
712 
713 	/*
714 	 * Adjust channel promiscuity.
715 	 */
716 	err = dls_promisc(dsp, new_flags);
717 
718 	if (err != 0) {
719 		dl_err = DL_SYSERR;
720 		goto failed2;
721 	}
722 
723 	ASSERT(dsp->ds_promisc == new_flags);
724 	if (dsp->ds_promisc == 0)
725 		dls_active_clear(dsp, B_FALSE);
726 
727 	mac_perim_exit(mph);
728 
729 	dlokack(q, mp, DL_PROMISCOFF_REQ);
730 	return;
731 failed2:
732 	mac_perim_exit(mph);
733 failed:
734 	dlerrorack(q, mp, DL_PROMISCOFF_REQ, dl_err, (t_uscalar_t)err);
735 }
736 
737 /*
738  * DL_ENABMULTI_REQ
739  */
740 static void
741 proto_enabmulti_req(dld_str_t *dsp, mblk_t *mp)
742 {
743 	dl_enabmulti_req_t *dlp = (dl_enabmulti_req_t *)mp->b_rptr;
744 	int		err = 0;
745 	t_uscalar_t	dl_err;
746 	queue_t		*q = dsp->ds_wq;
747 	mac_perim_handle_t	mph;
748 
749 	if (dsp->ds_dlstate == DL_UNATTACHED ||
750 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
751 		dl_err = DL_OUTSTATE;
752 		goto failed;
753 	}
754 
755 	if (MBLKL(mp) < sizeof (dl_enabmulti_req_t) ||
756 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
757 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
758 		dl_err = DL_BADPRIM;
759 		goto failed;
760 	}
761 
762 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
763 
764 	if ((dsp->ds_dmap == NULL) && (err = dls_active_set(dsp)) != 0) {
765 		dl_err = DL_SYSERR;
766 		goto failed2;
767 	}
768 
769 	err = dls_multicst_add(dsp, mp->b_rptr + dlp->dl_addr_offset);
770 	if (err != 0) {
771 		switch (err) {
772 		case EINVAL:
773 			dl_err = DL_BADADDR;
774 			err = 0;
775 			break;
776 		case ENOSPC:
777 			dl_err = DL_TOOMANY;
778 			err = 0;
779 			break;
780 		default:
781 			dl_err = DL_SYSERR;
782 			break;
783 		}
784 		if (dsp->ds_dmap == NULL)
785 			dls_active_clear(dsp, B_FALSE);
786 		goto failed2;
787 	}
788 
789 	mac_perim_exit(mph);
790 
791 	dlokack(q, mp, DL_ENABMULTI_REQ);
792 	return;
793 
794 failed2:
795 	mac_perim_exit(mph);
796 failed:
797 	dlerrorack(q, mp, DL_ENABMULTI_REQ, dl_err, (t_uscalar_t)err);
798 }
799 
800 /*
801  * DL_DISABMULTI_REQ
802  */
803 static void
804 proto_disabmulti_req(dld_str_t *dsp, mblk_t *mp)
805 {
806 	dl_disabmulti_req_t *dlp = (dl_disabmulti_req_t *)mp->b_rptr;
807 	int		err = 0;
808 	t_uscalar_t	dl_err;
809 	queue_t		*q = dsp->ds_wq;
810 	mac_perim_handle_t	mph;
811 
812 	if (dsp->ds_dlstate == DL_UNATTACHED ||
813 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
814 		dl_err = DL_OUTSTATE;
815 		goto failed;
816 	}
817 
818 	if (MBLKL(mp) < sizeof (dl_disabmulti_req_t) ||
819 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
820 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
821 		dl_err = DL_BADPRIM;
822 		goto failed;
823 	}
824 
825 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
826 	err = dls_multicst_remove(dsp, mp->b_rptr + dlp->dl_addr_offset);
827 	if ((err == 0) && (dsp->ds_dmap == NULL))
828 		dls_active_clear(dsp, B_FALSE);
829 	mac_perim_exit(mph);
830 
831 	if (err != 0) {
832 		switch (err) {
833 		case EINVAL:
834 			dl_err = DL_BADADDR;
835 			err = 0;
836 			break;
837 
838 		case ENOENT:
839 			dl_err = DL_NOTENAB;
840 			err = 0;
841 			break;
842 
843 		default:
844 			dl_err = DL_SYSERR;
845 			break;
846 		}
847 		goto failed;
848 	}
849 	dlokack(q, mp, DL_DISABMULTI_REQ);
850 	return;
851 failed:
852 	dlerrorack(q, mp, DL_DISABMULTI_REQ, dl_err, (t_uscalar_t)err);
853 }
854 
855 /*
856  * DL_PHYS_ADDR_REQ
857  */
858 static void
859 proto_physaddr_req(dld_str_t *dsp, mblk_t *mp)
860 {
861 	dl_phys_addr_req_t *dlp = (dl_phys_addr_req_t *)mp->b_rptr;
862 	queue_t		*q = dsp->ds_wq;
863 	t_uscalar_t	dl_err = 0;
864 	char		*addr = NULL;
865 	uint_t		addr_length;
866 
867 	if (MBLKL(mp) < sizeof (dl_phys_addr_req_t)) {
868 		dl_err = DL_BADPRIM;
869 		goto done;
870 	}
871 
872 	if (dsp->ds_dlstate == DL_UNATTACHED ||
873 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
874 		dl_err = DL_OUTSTATE;
875 		goto done;
876 	}
877 
878 	addr_length = dsp->ds_mip->mi_addr_length;
879 	if (addr_length > 0) {
880 		addr = kmem_alloc(addr_length, KM_SLEEP);
881 		switch (dlp->dl_addr_type) {
882 		case DL_CURR_PHYS_ADDR:
883 			mac_unicast_primary_get(dsp->ds_mh, (uint8_t *)addr);
884 			break;
885 		case DL_FACT_PHYS_ADDR:
886 			bcopy(dsp->ds_mip->mi_unicst_addr, addr, addr_length);
887 			break;
888 		case DL_CURR_DEST_ADDR:
889 			if (!mac_dst_get(dsp->ds_mh, (uint8_t *)addr))
890 				dl_err = DL_NOTSUPPORTED;
891 			break;
892 		default:
893 			dl_err = DL_UNSUPPORTED;
894 		}
895 	}
896 done:
897 	if (dl_err == 0)
898 		dlphysaddrack(q, mp, addr, (t_uscalar_t)addr_length);
899 	else
900 		dlerrorack(q, mp, DL_PHYS_ADDR_REQ, dl_err, 0);
901 	if (addr != NULL)
902 		kmem_free(addr, addr_length);
903 }
904 
905 /*
906  * DL_SET_PHYS_ADDR_REQ
907  */
908 static void
909 proto_setphysaddr_req(dld_str_t *dsp, mblk_t *mp)
910 {
911 	dl_set_phys_addr_req_t *dlp = (dl_set_phys_addr_req_t *)mp->b_rptr;
912 	int		err = 0;
913 	t_uscalar_t	dl_err;
914 	queue_t		*q = dsp->ds_wq;
915 	mac_perim_handle_t	mph;
916 
917 	if (dsp->ds_dlstate == DL_UNATTACHED ||
918 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
919 		dl_err = DL_OUTSTATE;
920 		goto failed;
921 	}
922 
923 	if (MBLKL(mp) < sizeof (dl_set_phys_addr_req_t) ||
924 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
925 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
926 		dl_err = DL_BADPRIM;
927 		goto failed;
928 	}
929 
930 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
931 
932 	if ((err = dls_active_set(dsp)) != 0) {
933 		dl_err = DL_SYSERR;
934 		goto failed2;
935 	}
936 
937 	/*
938 	 * If mac-nospoof is enabled and the link is owned by a
939 	 * non-global zone, changing the mac address is not allowed.
940 	 */
941 	if (dsp->ds_dlp->dl_zid != GLOBAL_ZONEID &&
942 	    mac_protect_enabled(dsp->ds_mch, MPT_MACNOSPOOF)) {
943 		dls_active_clear(dsp, B_FALSE);
944 		err = EACCES;
945 		goto failed2;
946 	}
947 
948 	err = mac_unicast_primary_set(dsp->ds_mh,
949 	    mp->b_rptr + dlp->dl_addr_offset);
950 	if (err != 0) {
951 		switch (err) {
952 		case EINVAL:
953 			dl_err = DL_BADADDR;
954 			err = 0;
955 			break;
956 
957 		default:
958 			dl_err = DL_SYSERR;
959 			break;
960 		}
961 		dls_active_clear(dsp, B_FALSE);
962 		goto failed2;
963 
964 	}
965 
966 	mac_perim_exit(mph);
967 
968 	dlokack(q, mp, DL_SET_PHYS_ADDR_REQ);
969 	return;
970 
971 failed2:
972 	mac_perim_exit(mph);
973 failed:
974 	dlerrorack(q, mp, DL_SET_PHYS_ADDR_REQ, dl_err, (t_uscalar_t)err);
975 }
976 
977 /*
978  * DL_UDQOS_REQ
979  */
980 static void
981 proto_udqos_req(dld_str_t *dsp, mblk_t *mp)
982 {
983 	dl_udqos_req_t *dlp = (dl_udqos_req_t *)mp->b_rptr;
984 	dl_qos_cl_sel1_t *selp;
985 	int		off, len;
986 	t_uscalar_t	dl_err;
987 	queue_t		*q = dsp->ds_wq;
988 
989 	off = dlp->dl_qos_offset;
990 	len = dlp->dl_qos_length;
991 
992 	if (MBLKL(mp) < sizeof (dl_udqos_req_t) || !MBLKIN(mp, off, len)) {
993 		dl_err = DL_BADPRIM;
994 		goto failed;
995 	}
996 
997 	selp = (dl_qos_cl_sel1_t *)(mp->b_rptr + off);
998 	if (selp->dl_qos_type != DL_QOS_CL_SEL1) {
999 		dl_err = DL_BADQOSTYPE;
1000 		goto failed;
1001 	}
1002 
1003 	if (selp->dl_priority > (1 << VLAN_PRI_SIZE) - 1 ||
1004 	    selp->dl_priority < 0) {
1005 		dl_err = DL_BADQOSPARAM;
1006 		goto failed;
1007 	}
1008 
1009 	dsp->ds_pri = selp->dl_priority;
1010 	dlokack(q, mp, DL_UDQOS_REQ);
1011 	return;
1012 failed:
1013 	dlerrorack(q, mp, DL_UDQOS_REQ, dl_err, 0);
1014 }
1015 
1016 static boolean_t
1017 check_mod_above(queue_t *q, const char *mod)
1018 {
1019 	queue_t		*next_q;
1020 	boolean_t	ret = B_TRUE;
1021 
1022 	claimstr(q);
1023 	next_q = q->q_next;
1024 	if (strcmp(next_q->q_qinfo->qi_minfo->mi_idname, mod) != 0)
1025 		ret = B_FALSE;
1026 	releasestr(q);
1027 	return (ret);
1028 }
1029 
1030 /*
1031  * DL_CAPABILITY_REQ
1032  */
1033 static void
1034 proto_capability_req(dld_str_t *dsp, mblk_t *mp)
1035 {
1036 	dl_capability_req_t *dlp = (dl_capability_req_t *)mp->b_rptr;
1037 	dl_capability_sub_t *sp;
1038 	size_t		size, len;
1039 	offset_t	off, end;
1040 	t_uscalar_t	dl_err;
1041 	queue_t		*q = dsp->ds_wq;
1042 
1043 	if (MBLKL(mp) < sizeof (dl_capability_req_t)) {
1044 		dl_err = DL_BADPRIM;
1045 		goto failed;
1046 	}
1047 
1048 	if (dsp->ds_dlstate == DL_UNATTACHED ||
1049 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1050 		dl_err = DL_OUTSTATE;
1051 		goto failed;
1052 	}
1053 
1054 	/*
1055 	 * This request is overloaded. If there are no requested capabilities
1056 	 * then we just want to acknowledge with all the capabilities we
1057 	 * support. Otherwise we enable the set of capabilities requested.
1058 	 */
1059 	if (dlp->dl_sub_length == 0) {
1060 		proto_capability_advertise(dsp, mp);
1061 		return;
1062 	}
1063 
1064 	if (!MBLKIN(mp, dlp->dl_sub_offset, dlp->dl_sub_length)) {
1065 		dl_err = DL_BADPRIM;
1066 		goto failed;
1067 	}
1068 
1069 	dlp->dl_primitive = DL_CAPABILITY_ACK;
1070 
1071 	off = dlp->dl_sub_offset;
1072 	len = dlp->dl_sub_length;
1073 
1074 	/*
1075 	 * Walk the list of capabilities to be enabled.
1076 	 */
1077 	for (end = off + len; off < end; ) {
1078 		sp = (dl_capability_sub_t *)(mp->b_rptr + off);
1079 		size = sizeof (dl_capability_sub_t) + sp->dl_length;
1080 
1081 		if (off + size > end ||
1082 		    !IS_P2ALIGNED(off, sizeof (uint32_t))) {
1083 			dl_err = DL_BADPRIM;
1084 			goto failed;
1085 		}
1086 
1087 		switch (sp->dl_cap) {
1088 		/*
1089 		 * TCP/IP checksum offload to hardware.
1090 		 */
1091 		case DL_CAPAB_HCKSUM: {
1092 			dl_capab_hcksum_t *hcksump;
1093 			dl_capab_hcksum_t hcksum;
1094 
1095 			hcksump = (dl_capab_hcksum_t *)&sp[1];
1096 			/*
1097 			 * Copy for alignment.
1098 			 */
1099 			bcopy(hcksump, &hcksum, sizeof (dl_capab_hcksum_t));
1100 			dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1101 			bcopy(&hcksum, hcksump, sizeof (dl_capab_hcksum_t));
1102 			break;
1103 		}
1104 
1105 		case DL_CAPAB_DLD: {
1106 			dl_capab_dld_t	*dldp;
1107 			dl_capab_dld_t	dld;
1108 
1109 			dldp = (dl_capab_dld_t *)&sp[1];
1110 			/*
1111 			 * Copy for alignment.
1112 			 */
1113 			bcopy(dldp, &dld, sizeof (dl_capab_dld_t));
1114 			dlcapabsetqid(&(dld.dld_mid), dsp->ds_rq);
1115 			bcopy(&dld, dldp, sizeof (dl_capab_dld_t));
1116 			break;
1117 		}
1118 		default:
1119 			break;
1120 		}
1121 		off += size;
1122 	}
1123 	qreply(q, mp);
1124 	return;
1125 failed:
1126 	dlerrorack(q, mp, DL_CAPABILITY_REQ, dl_err, 0);
1127 }
1128 
1129 /*
1130  * DL_NOTIFY_REQ
1131  */
1132 static void
1133 proto_notify_req(dld_str_t *dsp, mblk_t *mp)
1134 {
1135 	dl_notify_req_t	*dlp = (dl_notify_req_t *)mp->b_rptr;
1136 	t_uscalar_t	dl_err;
1137 	queue_t		*q = dsp->ds_wq;
1138 	uint_t		note =
1139 	    DL_NOTE_PROMISC_ON_PHYS |
1140 	    DL_NOTE_PROMISC_OFF_PHYS |
1141 	    DL_NOTE_PHYS_ADDR |
1142 	    DL_NOTE_LINK_UP |
1143 	    DL_NOTE_LINK_DOWN |
1144 	    DL_NOTE_CAPAB_RENEG |
1145 	    DL_NOTE_FASTPATH_FLUSH |
1146 	    DL_NOTE_SPEED |
1147 	    DL_NOTE_SDU_SIZE|
1148 	    DL_NOTE_SDU_SIZE2|
1149 	    DL_NOTE_ALLOWED_IPS;
1150 
1151 	if (MBLKL(mp) < sizeof (dl_notify_req_t)) {
1152 		dl_err = DL_BADPRIM;
1153 		goto failed;
1154 	}
1155 
1156 	if (dsp->ds_dlstate == DL_UNATTACHED ||
1157 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1158 		dl_err = DL_OUTSTATE;
1159 		goto failed;
1160 	}
1161 
1162 	note &= ~(mac_no_notification(dsp->ds_mh));
1163 
1164 	/*
1165 	 * Cache the notifications that are being enabled.
1166 	 */
1167 	dsp->ds_notifications = dlp->dl_notifications & note;
1168 	/*
1169 	 * The ACK carries all notifications regardless of which set is
1170 	 * being enabled.
1171 	 */
1172 	dlnotifyack(q, mp, note);
1173 
1174 	/*
1175 	 * Generate DL_NOTIFY_IND messages for each enabled notification.
1176 	 */
1177 	if (dsp->ds_notifications != 0) {
1178 		dld_str_notify_ind(dsp);
1179 	}
1180 	return;
1181 failed:
1182 	dlerrorack(q, mp, DL_NOTIFY_REQ, dl_err, 0);
1183 }
1184 
1185 /*
1186  * DL_UINTDATA_REQ
1187  */
1188 void
1189 proto_unitdata_req(dld_str_t *dsp, mblk_t *mp)
1190 {
1191 	queue_t			*q = dsp->ds_wq;
1192 	dl_unitdata_req_t	*dlp = (dl_unitdata_req_t *)mp->b_rptr;
1193 	off_t			off;
1194 	size_t			len, size;
1195 	const uint8_t		*addr;
1196 	uint16_t		sap;
1197 	uint_t			addr_length;
1198 	mblk_t			*bp, *payload;
1199 	t_uscalar_t		dl_err;
1200 	uint_t			max_sdu;
1201 
1202 	if (MBLKL(mp) < sizeof (dl_unitdata_req_t) || mp->b_cont == NULL) {
1203 		dlerrorack(q, mp, DL_UNITDATA_REQ, DL_BADPRIM, 0);
1204 		return;
1205 	}
1206 
1207 	mutex_enter(&dsp->ds_lock);
1208 	if (dsp->ds_dlstate != DL_IDLE) {
1209 		mutex_exit(&dsp->ds_lock);
1210 		dlerrorack(q, mp, DL_UNITDATA_REQ, DL_OUTSTATE, 0);
1211 		return;
1212 	}
1213 	DLD_DATATHR_INC(dsp);
1214 	mutex_exit(&dsp->ds_lock);
1215 
1216 	addr_length = dsp->ds_mip->mi_addr_length;
1217 
1218 	off = dlp->dl_dest_addr_offset;
1219 	len = dlp->dl_dest_addr_length;
1220 
1221 	if (!MBLKIN(mp, off, len) || !IS_P2ALIGNED(off, sizeof (uint16_t))) {
1222 		dl_err = DL_BADPRIM;
1223 		goto failed;
1224 	}
1225 
1226 	if (len != addr_length + sizeof (uint16_t)) {
1227 		dl_err = DL_BADADDR;
1228 		goto failed;
1229 	}
1230 
1231 	addr = mp->b_rptr + off;
1232 	sap = *(uint16_t *)(mp->b_rptr + off + addr_length);
1233 
1234 	/*
1235 	 * Check the length of the packet and the block types.
1236 	 */
1237 	size = 0;
1238 	payload = mp->b_cont;
1239 	for (bp = payload; bp != NULL; bp = bp->b_cont) {
1240 		if (DB_TYPE(bp) != M_DATA)
1241 			goto baddata;
1242 
1243 		size += MBLKL(bp);
1244 	}
1245 
1246 	mac_sdu_get(dsp->ds_mh, NULL, &max_sdu);
1247 	if (size > max_sdu)
1248 		goto baddata;
1249 
1250 	/*
1251 	 * Build a packet header.
1252 	 */
1253 	if ((bp = dls_header(dsp, addr, sap, dlp->dl_priority.dl_max,
1254 	    &payload)) == NULL) {
1255 		dl_err = DL_BADADDR;
1256 		goto failed;
1257 	}
1258 
1259 	/*
1260 	 * We no longer need the M_PROTO header, so free it.
1261 	 */
1262 	freeb(mp);
1263 
1264 	/*
1265 	 * Transfer the checksum offload information if it is present.
1266 	 */
1267 	mac_hcksum_clone(payload, bp);
1268 
1269 	/*
1270 	 * Link the payload onto the new header.
1271 	 */
1272 	ASSERT(bp->b_cont == NULL);
1273 	bp->b_cont = payload;
1274 
1275 	/*
1276 	 * No lock can be held across modules and putnext()'s,
1277 	 * which can happen here with the call from DLD_TX().
1278 	 */
1279 	if (DLD_TX(dsp, bp, 0, 0) != 0) {
1280 		/* flow-controlled */
1281 		DLD_SETQFULL(dsp);
1282 	}
1283 	DLD_DATATHR_DCR(dsp);
1284 	return;
1285 
1286 failed:
1287 	dlerrorack(q, mp, DL_UNITDATA_REQ, dl_err, 0);
1288 	DLD_DATATHR_DCR(dsp);
1289 	return;
1290 
1291 baddata:
1292 	dluderrorind(q, mp, (void *)addr, len, DL_BADDATA, 0);
1293 	DLD_DATATHR_DCR(dsp);
1294 }
1295 
1296 /*
1297  * DL_PASSIVE_REQ
1298  */
1299 static void
1300 proto_passive_req(dld_str_t *dsp, mblk_t *mp)
1301 {
1302 	t_uscalar_t dl_err;
1303 
1304 	/*
1305 	 * If we've already become active by issuing an active primitive,
1306 	 * then it's too late to try to become passive.
1307 	 */
1308 	if (dsp->ds_passivestate == DLD_ACTIVE) {
1309 		dl_err = DL_OUTSTATE;
1310 		goto failed;
1311 	}
1312 
1313 	if (MBLKL(mp) < sizeof (dl_passive_req_t)) {
1314 		dl_err = DL_BADPRIM;
1315 		goto failed;
1316 	}
1317 
1318 	dsp->ds_passivestate = DLD_PASSIVE;
1319 	dlokack(dsp->ds_wq, mp, DL_PASSIVE_REQ);
1320 	return;
1321 failed:
1322 	dlerrorack(dsp->ds_wq, mp, DL_PASSIVE_REQ, dl_err, 0);
1323 }
1324 
1325 
1326 /*
1327  * Catch-all handler.
1328  */
1329 static void
1330 proto_req(dld_str_t *dsp, mblk_t *mp)
1331 {
1332 	union DL_primitives	*dlp = (union DL_primitives *)mp->b_rptr;
1333 
1334 	dlerrorack(dsp->ds_wq, mp, dlp->dl_primitive, DL_UNSUPPORTED, 0);
1335 }
1336 
1337 static int
1338 dld_capab_perim(dld_str_t *dsp, void *data, uint_t flags)
1339 {
1340 	switch (flags) {
1341 	case DLD_ENABLE:
1342 		mac_perim_enter_by_mh(dsp->ds_mh, (mac_perim_handle_t *)data);
1343 		return (0);
1344 
1345 	case DLD_DISABLE:
1346 		mac_perim_exit((mac_perim_handle_t)data);
1347 		return (0);
1348 
1349 	case DLD_QUERY:
1350 		return (mac_perim_held(dsp->ds_mh));
1351 	}
1352 	return (0);
1353 }
1354 
1355 static int
1356 dld_capab_direct(dld_str_t *dsp, void *data, uint_t flags)
1357 {
1358 	dld_capab_direct_t	*direct = data;
1359 
1360 	ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1361 
1362 	switch (flags) {
1363 	case DLD_ENABLE:
1364 		dls_rx_set(dsp, (dls_rx_t)direct->di_rx_cf,
1365 		    direct->di_rx_ch);
1366 
1367 		direct->di_tx_df = (uintptr_t)str_mdata_fastpath_put;
1368 		direct->di_tx_dh = dsp;
1369 		direct->di_tx_cb_df = (uintptr_t)mac_client_tx_notify;
1370 		direct->di_tx_cb_dh = dsp->ds_mch;
1371 		direct->di_tx_fctl_df = (uintptr_t)mac_tx_is_flow_blocked;
1372 		direct->di_tx_fctl_dh = dsp->ds_mch;
1373 
1374 		dsp->ds_direct = B_TRUE;
1375 
1376 		return (0);
1377 
1378 	case DLD_DISABLE:
1379 		dls_rx_set(dsp, (dsp->ds_mode == DLD_FASTPATH) ?
1380 		    dld_str_rx_fastpath : dld_str_rx_unitdata, (void *)dsp);
1381 		dsp->ds_direct = B_FALSE;
1382 
1383 		return (0);
1384 	}
1385 	return (ENOTSUP);
1386 }
1387 
1388 /*
1389  * This function is misnamed. All polling and fanouts are run out of
1390  * the lower MAC for VNICs and out of the MAC for NICs. The
1391  * availability of Rx rings and promiscous mode is taken care of
1392  * between the soft ring set (mac_srs), the Rx ring, and the SW
1393  * classifier. Fanout, if necessary, is done by the soft rings that
1394  * are part of the SRS. By default the SRS divvies up the packets
1395  * based on protocol: TCP, UDP, or Other (OTH).
1396  *
1397  * The SRS (or its associated soft rings) always store the ill_rx_ring
1398  * (the cookie returned when they registered with IP during plumb) as their
1399  * 2nd argument which is passed up as mac_resource_handle_t. The upcall
1400  * function and 1st argument is what the caller registered when they
1401  * called mac_rx_classify_flow_add() to register the flow. For VNIC,
1402  * the function is vnic_rx and argument is vnic_t. For regular NIC
1403  * case, it mac_rx_default and mac_handle_t. As explained above, the
1404  * SRS (or its soft ring) will add the ill_rx_ring (mac_resource_handle_t)
1405  * from its stored 2nd argument.
1406  */
1407 static int
1408 dld_capab_poll_enable(dld_str_t *dsp, dld_capab_poll_t *poll)
1409 {
1410 	if (dsp->ds_polling)
1411 		return (EINVAL);
1412 
1413 	if ((dld_opt & DLD_OPT_NO_POLL) != 0 || dsp->ds_mode == DLD_RAW)
1414 		return (ENOTSUP);
1415 
1416 	/*
1417 	 * Enable client polling if and only if DLS bypass is
1418 	 * possible. Some traffic requires DLS processing in the Rx
1419 	 * data path. In such a case we can neither allow the client
1420 	 * (IP) to directly poll the soft ring (since DLS processing
1421 	 * hasn't been done) nor can we allow DLS bypass.
1422 	 */
1423 	if (!mac_rx_bypass_set(dsp->ds_mch, dsp->ds_rx, dsp->ds_rx_arg))
1424 		return (ENOTSUP);
1425 
1426 	/*
1427 	 * Register soft ring resources. This will come in handy later if
1428 	 * the user decides to modify CPU bindings to use more CPUs for the
1429 	 * device in which case we will switch to fanout using soft rings.
1430 	 */
1431 	mac_resource_set_common(dsp->ds_mch,
1432 	    (mac_resource_add_t)poll->poll_ring_add_cf,
1433 	    (mac_resource_remove_t)poll->poll_ring_remove_cf,
1434 	    (mac_resource_quiesce_t)poll->poll_ring_quiesce_cf,
1435 	    (mac_resource_restart_t)poll->poll_ring_restart_cf,
1436 	    (mac_resource_bind_t)poll->poll_ring_bind_cf,
1437 	    poll->poll_ring_ch);
1438 
1439 	mac_client_poll_enable(dsp->ds_mch);
1440 
1441 	dsp->ds_polling = B_TRUE;
1442 	return (0);
1443 }
1444 
1445 /* ARGSUSED */
1446 static int
1447 dld_capab_poll_disable(dld_str_t *dsp, dld_capab_poll_t *poll)
1448 {
1449 	if (!dsp->ds_polling)
1450 		return (EINVAL);
1451 
1452 	mac_client_poll_disable(dsp->ds_mch);
1453 	mac_resource_set(dsp->ds_mch, NULL, NULL);
1454 
1455 	dsp->ds_polling = B_FALSE;
1456 	return (0);
1457 }
1458 
1459 static int
1460 dld_capab_poll(dld_str_t *dsp, void *data, uint_t flags)
1461 {
1462 	dld_capab_poll_t	*poll = data;
1463 
1464 	ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1465 
1466 	switch (flags) {
1467 	case DLD_ENABLE:
1468 		return (dld_capab_poll_enable(dsp, poll));
1469 	case DLD_DISABLE:
1470 		return (dld_capab_poll_disable(dsp, poll));
1471 	}
1472 	return (ENOTSUP);
1473 }
1474 
1475 static int
1476 dld_capab_lso(dld_str_t *dsp, void *data, uint_t flags)
1477 {
1478 	dld_capab_lso_t		*lso = data;
1479 
1480 	ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1481 
1482 	switch (flags) {
1483 	case DLD_ENABLE: {
1484 		mac_capab_lso_t		mac_lso;
1485 
1486 		/*
1487 		 * Check if LSO is supported on this MAC & enable LSO
1488 		 * accordingly.
1489 		 */
1490 		if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_LSO, &mac_lso)) {
1491 			lso->lso_max_tcpv4 = mac_lso.lso_basic_tcp_ipv4.lso_max;
1492 			lso->lso_max_tcpv6 = mac_lso.lso_basic_tcp_ipv6.lso_max;
1493 			lso->lso_flags = 0;
1494 			/* translate the flag for mac clients */
1495 			if ((mac_lso.lso_flags & LSO_TX_BASIC_TCP_IPV4) != 0)
1496 				lso->lso_flags |= DLD_LSO_BASIC_TCP_IPV4;
1497 			if ((mac_lso.lso_flags & LSO_TX_BASIC_TCP_IPV6) != 0)
1498 				lso->lso_flags |= DLD_LSO_BASIC_TCP_IPV6;
1499 			dsp->ds_lso = lso->lso_flags != 0;
1500 			/*
1501 			 * DLS uses this to try and make sure that a raw ioctl
1502 			 * doesn't send too much data, but doesn't currently
1503 			 * check the actual SAP that is sending this (or that
1504 			 * it's TCP). So for now, just use the max value here.
1505 			 */
1506 			dsp->ds_lso_max = MAX(lso->lso_max_tcpv4,
1507 			    lso->lso_max_tcpv6);
1508 		} else {
1509 			dsp->ds_lso = B_FALSE;
1510 			dsp->ds_lso_max = 0;
1511 			return (ENOTSUP);
1512 		}
1513 		return (0);
1514 	}
1515 	case DLD_DISABLE: {
1516 		dsp->ds_lso = B_FALSE;
1517 		dsp->ds_lso_max = 0;
1518 		return (0);
1519 	}
1520 	}
1521 	return (ENOTSUP);
1522 }
1523 
1524 static int
1525 dld_capab(dld_str_t *dsp, uint_t type, void *data, uint_t flags)
1526 {
1527 	int	err;
1528 
1529 	/*
1530 	 * Don't enable direct callback capabilities unless the caller is
1531 	 * the IP client. When a module is inserted in a stream (_I_INSERT)
1532 	 * the stack initiates capability disable, but due to races, the
1533 	 * module insertion may complete before the capability disable
1534 	 * completes. So we limit the check to DLD_ENABLE case.
1535 	 */
1536 	if ((flags == DLD_ENABLE && type != DLD_CAPAB_PERIM) &&
1537 	    (!(dsp->ds_sap == ETHERTYPE_IP || dsp->ds_sap == ETHERTYPE_IPV6) ||
1538 	    !check_mod_above(dsp->ds_rq, "ip"))) {
1539 		return (ENOTSUP);
1540 	}
1541 
1542 	switch (type) {
1543 	case DLD_CAPAB_DIRECT:
1544 		if (dsp->ds_sap == ETHERTYPE_IPV6) {
1545 			err = ENOTSUP;
1546 			break;
1547 		}
1548 		err = dld_capab_direct(dsp, data, flags);
1549 		break;
1550 
1551 	case DLD_CAPAB_POLL:
1552 		if (dsp->ds_sap == ETHERTYPE_IPV6) {
1553 			err = ENOTSUP;
1554 			break;
1555 		}
1556 		err =  dld_capab_poll(dsp, data, flags);
1557 		break;
1558 
1559 	case DLD_CAPAB_PERIM:
1560 		err = dld_capab_perim(dsp, data, flags);
1561 		break;
1562 
1563 	case DLD_CAPAB_LSO:
1564 		err = dld_capab_lso(dsp, data, flags);
1565 		break;
1566 
1567 	default:
1568 		err = ENOTSUP;
1569 		break;
1570 	}
1571 
1572 	return (err);
1573 }
1574 
1575 /*
1576  * DL_CAPABILITY_ACK/DL_ERROR_ACK
1577  */
1578 static void
1579 proto_capability_advertise(dld_str_t *dsp, mblk_t *mp)
1580 {
1581 	dl_capability_ack_t	*dlap;
1582 	dl_capability_sub_t	*dlsp;
1583 	size_t			subsize;
1584 	dl_capab_dld_t		dld;
1585 	dl_capab_hcksum_t	hcksum;
1586 	dl_capab_zerocopy_t	zcopy;
1587 	dl_capab_vrrp_t		vrrp;
1588 	mac_capab_vrrp_t	vrrp_capab;
1589 	uint8_t			*ptr;
1590 	queue_t			*q = dsp->ds_wq;
1591 	mblk_t			*mp1;
1592 	boolean_t		hcksum_capable = B_FALSE;
1593 	boolean_t		zcopy_capable = B_FALSE;
1594 	boolean_t		dld_capable = B_FALSE;
1595 	boolean_t		vrrp_capable = B_FALSE;
1596 
1597 	/*
1598 	 * Initially assume no capabilities.
1599 	 */
1600 	subsize = 0;
1601 
1602 	/*
1603 	 * Check if checksum offload is supported on this MAC.
1604 	 */
1605 	bzero(&hcksum, sizeof (dl_capab_hcksum_t));
1606 	if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_HCKSUM,
1607 	    &hcksum.hcksum_txflags)) {
1608 		if (hcksum.hcksum_txflags != 0) {
1609 			hcksum_capable = B_TRUE;
1610 			subsize += sizeof (dl_capability_sub_t) +
1611 			    sizeof (dl_capab_hcksum_t);
1612 		}
1613 	}
1614 
1615 	/*
1616 	 * Check if zerocopy is supported on this interface.
1617 	 * If advertising DL_CAPAB_ZEROCOPY has not been explicitly disabled
1618 	 * then reserve space for that capability.
1619 	 */
1620 	if (!mac_capab_get(dsp->ds_mh, MAC_CAPAB_NO_ZCOPY, NULL) &&
1621 	    !(dld_opt & DLD_OPT_NO_ZEROCOPY)) {
1622 		zcopy_capable = B_TRUE;
1623 		subsize += sizeof (dl_capability_sub_t) +
1624 		    sizeof (dl_capab_zerocopy_t);
1625 	}
1626 
1627 	/*
1628 	 * Direct capability negotiation interface between IP and DLD
1629 	 */
1630 	if ((dsp->ds_sap == ETHERTYPE_IP || dsp->ds_sap == ETHERTYPE_IPV6) &&
1631 	    check_mod_above(dsp->ds_rq, "ip")) {
1632 		dld_capable = B_TRUE;
1633 		subsize += sizeof (dl_capability_sub_t) +
1634 		    sizeof (dl_capab_dld_t);
1635 	}
1636 
1637 	/*
1638 	 * Check if vrrp is supported on this interface. If so, reserve
1639 	 * space for that capability.
1640 	 */
1641 	if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_VRRP, &vrrp_capab)) {
1642 		vrrp_capable = B_TRUE;
1643 		subsize += sizeof (dl_capability_sub_t) +
1644 		    sizeof (dl_capab_vrrp_t);
1645 	}
1646 
1647 	/*
1648 	 * If there are no capabilities to advertise or if we
1649 	 * can't allocate a response, send a DL_ERROR_ACK.
1650 	 */
1651 	if ((mp1 = reallocb(mp,
1652 	    sizeof (dl_capability_ack_t) + subsize, 0)) == NULL) {
1653 		dlerrorack(q, mp, DL_CAPABILITY_REQ, DL_NOTSUPPORTED, 0);
1654 		return;
1655 	}
1656 
1657 	mp = mp1;
1658 	DB_TYPE(mp) = M_PROTO;
1659 	mp->b_wptr = mp->b_rptr + sizeof (dl_capability_ack_t) + subsize;
1660 	bzero(mp->b_rptr, MBLKL(mp));
1661 	dlap = (dl_capability_ack_t *)mp->b_rptr;
1662 	dlap->dl_primitive = DL_CAPABILITY_ACK;
1663 	dlap->dl_sub_offset = sizeof (dl_capability_ack_t);
1664 	dlap->dl_sub_length = subsize;
1665 	ptr = (uint8_t *)&dlap[1];
1666 
1667 	/*
1668 	 * TCP/IP checksum offload.
1669 	 */
1670 	if (hcksum_capable) {
1671 		dlsp = (dl_capability_sub_t *)ptr;
1672 
1673 		dlsp->dl_cap = DL_CAPAB_HCKSUM;
1674 		dlsp->dl_length = sizeof (dl_capab_hcksum_t);
1675 		ptr += sizeof (dl_capability_sub_t);
1676 
1677 		hcksum.hcksum_version = HCKSUM_VERSION_1;
1678 		dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1679 		bcopy(&hcksum, ptr, sizeof (dl_capab_hcksum_t));
1680 		ptr += sizeof (dl_capab_hcksum_t);
1681 	}
1682 
1683 	/*
1684 	 * Zero copy
1685 	 */
1686 	if (zcopy_capable) {
1687 		dlsp = (dl_capability_sub_t *)ptr;
1688 
1689 		dlsp->dl_cap = DL_CAPAB_ZEROCOPY;
1690 		dlsp->dl_length = sizeof (dl_capab_zerocopy_t);
1691 		ptr += sizeof (dl_capability_sub_t);
1692 
1693 		bzero(&zcopy, sizeof (dl_capab_zerocopy_t));
1694 		zcopy.zerocopy_version = ZEROCOPY_VERSION_1;
1695 		zcopy.zerocopy_flags = DL_CAPAB_VMSAFE_MEM;
1696 
1697 		dlcapabsetqid(&(zcopy.zerocopy_mid), dsp->ds_rq);
1698 		bcopy(&zcopy, ptr, sizeof (dl_capab_zerocopy_t));
1699 		ptr += sizeof (dl_capab_zerocopy_t);
1700 	}
1701 
1702 	/*
1703 	 * VRRP capability negotiation
1704 	 */
1705 	if (vrrp_capable) {
1706 		dlsp = (dl_capability_sub_t *)ptr;
1707 		dlsp->dl_cap = DL_CAPAB_VRRP;
1708 		dlsp->dl_length = sizeof (dl_capab_vrrp_t);
1709 		ptr += sizeof (dl_capability_sub_t);
1710 
1711 		bzero(&vrrp, sizeof (dl_capab_vrrp_t));
1712 		vrrp.vrrp_af = vrrp_capab.mcv_af;
1713 		bcopy(&vrrp, ptr, sizeof (dl_capab_vrrp_t));
1714 		ptr += sizeof (dl_capab_vrrp_t);
1715 	}
1716 
1717 	/*
1718 	 * Direct capability negotiation interface between IP and DLD.
1719 	 * Refer to dld.h for details.
1720 	 */
1721 	if (dld_capable) {
1722 		dlsp = (dl_capability_sub_t *)ptr;
1723 		dlsp->dl_cap = DL_CAPAB_DLD;
1724 		dlsp->dl_length = sizeof (dl_capab_dld_t);
1725 		ptr += sizeof (dl_capability_sub_t);
1726 
1727 		bzero(&dld, sizeof (dl_capab_dld_t));
1728 		dld.dld_version = DLD_CURRENT_VERSION;
1729 		dld.dld_capab = (uintptr_t)dld_capab;
1730 		dld.dld_capab_handle = (uintptr_t)dsp;
1731 
1732 		dlcapabsetqid(&(dld.dld_mid), dsp->ds_rq);
1733 		bcopy(&dld, ptr, sizeof (dl_capab_dld_t));
1734 		ptr += sizeof (dl_capab_dld_t);
1735 	}
1736 
1737 	ASSERT(ptr == mp->b_rptr + sizeof (dl_capability_ack_t) + subsize);
1738 	qreply(q, mp);
1739 }
1740 
1741 /*
1742  * Disable any enabled capabilities.
1743  */
1744 void
1745 dld_capabilities_disable(dld_str_t *dsp)
1746 {
1747 	if (dsp->ds_polling)
1748 		(void) dld_capab_poll_disable(dsp, NULL);
1749 }
1750