xref: /illumos-gate/usr/src/uts/common/io/dld/dld_proto.c (revision fcdb3229a31dd4ff700c69238814e326aad49098)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2012, Nexenta Systems, Inc. All rights reserved.
24  * Copyright (c) 2018, Joyent, Inc.
25  * Copyright 2025 Oxide Computer Company
26  */
27 
28 /*
29  * Data-Link Driver
30  */
31 #include <sys/sysmacros.h>
32 #include <sys/strsubr.h>
33 #include <sys/strsun.h>
34 #include <sys/vlan.h>
35 #include <sys/dld_impl.h>
36 #include <sys/mac_client.h>
37 #include <sys/mac_client_impl.h>
38 #include <sys/mac_client_priv.h>
39 
40 typedef void proto_reqfunc_t(dld_str_t *, mblk_t *);
41 
42 static proto_reqfunc_t proto_info_req, proto_attach_req, proto_detach_req,
43     proto_bind_req, proto_unbind_req, proto_promiscon_req, proto_promiscoff_req,
44     proto_enabmulti_req, proto_disabmulti_req, proto_physaddr_req,
45     proto_setphysaddr_req, proto_udqos_req, proto_req, proto_capability_req,
46     proto_notify_req, proto_passive_req;
47 
48 static void proto_capability_advertise(dld_str_t *, mblk_t *);
49 static int dld_capab_poll_disable(dld_str_t *, dld_capab_poll_t *);
50 static boolean_t check_mod_above(queue_t *, const char *);
51 
52 #define	DL_ACK_PENDING(state) \
53 	((state) == DL_ATTACH_PENDING || \
54 	(state) == DL_DETACH_PENDING || \
55 	(state) == DL_BIND_PENDING || \
56 	(state) == DL_UNBIND_PENDING)
57 
58 /*
59  * Process a DLPI protocol message.
60  * The primitives DL_BIND_REQ, DL_ENABMULTI_REQ, DL_PROMISCON_REQ,
61  * DL_SET_PHYS_ADDR_REQ put the data link below our dld_str_t into an
62  * 'active' state. The primitive DL_PASSIVE_REQ marks our dld_str_t
63  * as 'passive' and forbids it from being subsequently made 'active'
64  * by the above primitives.
65  */
66 void
67 dld_proto(dld_str_t *dsp, mblk_t *mp)
68 {
69 	t_uscalar_t		prim;
70 
71 	if (MBLKL(mp) < sizeof (t_uscalar_t)) {
72 		freemsg(mp);
73 		return;
74 	}
75 	prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive;
76 
77 	switch (prim) {
78 	case DL_INFO_REQ:
79 		proto_info_req(dsp, mp);
80 		break;
81 	case DL_BIND_REQ:
82 		proto_bind_req(dsp, mp);
83 		break;
84 	case DL_UNBIND_REQ:
85 		proto_unbind_req(dsp, mp);
86 		break;
87 	case DL_UNITDATA_REQ:
88 		proto_unitdata_req(dsp, mp);
89 		break;
90 	case DL_UDQOS_REQ:
91 		proto_udqos_req(dsp, mp);
92 		break;
93 	case DL_ATTACH_REQ:
94 		proto_attach_req(dsp, mp);
95 		break;
96 	case DL_DETACH_REQ:
97 		proto_detach_req(dsp, mp);
98 		break;
99 	case DL_ENABMULTI_REQ:
100 		proto_enabmulti_req(dsp, mp);
101 		break;
102 	case DL_DISABMULTI_REQ:
103 		proto_disabmulti_req(dsp, mp);
104 		break;
105 	case DL_PROMISCON_REQ:
106 		proto_promiscon_req(dsp, mp);
107 		break;
108 	case DL_PROMISCOFF_REQ:
109 		proto_promiscoff_req(dsp, mp);
110 		break;
111 	case DL_PHYS_ADDR_REQ:
112 		proto_physaddr_req(dsp, mp);
113 		break;
114 	case DL_SET_PHYS_ADDR_REQ:
115 		proto_setphysaddr_req(dsp, mp);
116 		break;
117 	case DL_NOTIFY_REQ:
118 		proto_notify_req(dsp, mp);
119 		break;
120 	case DL_CAPABILITY_REQ:
121 		proto_capability_req(dsp, mp);
122 		break;
123 	case DL_PASSIVE_REQ:
124 		proto_passive_req(dsp, mp);
125 		break;
126 	default:
127 		proto_req(dsp, mp);
128 		break;
129 	}
130 }
131 
132 #define	NEG(x)	-(x)
133 typedef struct dl_info_ack_wrapper {
134 	dl_info_ack_t		dl_info;
135 	uint8_t			dl_addr[MAXMACADDRLEN + sizeof (uint16_t)];
136 	uint8_t			dl_brdcst_addr[MAXMACADDRLEN];
137 	dl_qos_cl_range1_t	dl_qos_range1;
138 	dl_qos_cl_sel1_t	dl_qos_sel1;
139 } dl_info_ack_wrapper_t;
140 
141 /*
142  * DL_INFO_REQ
143  */
144 static void
145 proto_info_req(dld_str_t *dsp, mblk_t *mp)
146 {
147 	dl_info_ack_wrapper_t	*dlwp;
148 	dl_info_ack_t		*dlp;
149 	dl_qos_cl_sel1_t	*selp;
150 	dl_qos_cl_range1_t	*rangep;
151 	uint8_t			*addr;
152 	uint8_t			*brdcst_addr;
153 	uint_t			addr_length;
154 	uint_t			sap_length;
155 	mac_info_t		minfo;
156 	mac_info_t		*minfop;
157 	queue_t			*q = dsp->ds_wq;
158 
159 	/*
160 	 * Swap the request message for one large enough to contain the
161 	 * wrapper structure defined above.
162 	 */
163 	if ((mp = mexchange(q, mp, sizeof (dl_info_ack_wrapper_t),
164 	    M_PCPROTO, 0)) == NULL)
165 		return;
166 
167 	bzero(mp->b_rptr, sizeof (dl_info_ack_wrapper_t));
168 	dlwp = (dl_info_ack_wrapper_t *)mp->b_rptr;
169 
170 	dlp = &(dlwp->dl_info);
171 	ASSERT(dlp == (dl_info_ack_t *)mp->b_rptr);
172 
173 	dlp->dl_primitive = DL_INFO_ACK;
174 
175 	/*
176 	 * Set up the sub-structure pointers.
177 	 */
178 	addr = dlwp->dl_addr;
179 	brdcst_addr = dlwp->dl_brdcst_addr;
180 	rangep = &(dlwp->dl_qos_range1);
181 	selp = &(dlwp->dl_qos_sel1);
182 
183 	/*
184 	 * This driver supports only version 2 connectionless DLPI provider
185 	 * nodes.
186 	 */
187 	dlp->dl_service_mode = DL_CLDLS;
188 	dlp->dl_version = DL_VERSION_2;
189 
190 	/*
191 	 * Set the style of the provider
192 	 */
193 	dlp->dl_provider_style = dsp->ds_style;
194 	ASSERT(dlp->dl_provider_style == DL_STYLE1 ||
195 	    dlp->dl_provider_style == DL_STYLE2);
196 
197 	/*
198 	 * Set the current DLPI state.
199 	 */
200 	dlp->dl_current_state = dsp->ds_dlstate;
201 
202 	/*
203 	 * Gratuitously set the media type. This is to deal with modules
204 	 * that assume the media type is known prior to DL_ATTACH_REQ
205 	 * being completed.
206 	 */
207 	dlp->dl_mac_type = DL_ETHER;
208 
209 	/*
210 	 * If the stream is not at least attached we try to retrieve the
211 	 * mac_info using mac_info_get()
212 	 */
213 	if (dsp->ds_dlstate == DL_UNATTACHED ||
214 	    dsp->ds_dlstate == DL_ATTACH_PENDING ||
215 	    dsp->ds_dlstate == DL_DETACH_PENDING) {
216 		if (!mac_info_get(ddi_major_to_name(dsp->ds_major), &minfo)) {
217 			/*
218 			 * Cannot find mac_info. giving up.
219 			 */
220 			goto done;
221 		}
222 		minfop = &minfo;
223 	} else {
224 		minfop = (mac_info_t *)dsp->ds_mip;
225 		/* We can only get the sdu if we're attached. */
226 		mac_sdu_get(dsp->ds_mh, &dlp->dl_min_sdu, &dlp->dl_max_sdu);
227 	}
228 
229 	/*
230 	 * Set the media type (properly this time).
231 	 */
232 	if (dsp->ds_native)
233 		dlp->dl_mac_type = minfop->mi_nativemedia;
234 	else
235 		dlp->dl_mac_type = minfop->mi_media;
236 
237 	/*
238 	 * Set the DLSAP length. We only support 16 bit values and they
239 	 * appear after the MAC address portion of DLSAP addresses.
240 	 */
241 	sap_length = sizeof (uint16_t);
242 	dlp->dl_sap_length = NEG(sap_length);
243 
244 	addr_length = minfop->mi_addr_length;
245 
246 	/*
247 	 * Copy in the media broadcast address.
248 	 */
249 	if (minfop->mi_brdcst_addr != NULL) {
250 		dlp->dl_brdcst_addr_offset =
251 		    (uintptr_t)brdcst_addr - (uintptr_t)dlp;
252 		bcopy(minfop->mi_brdcst_addr, brdcst_addr, addr_length);
253 		dlp->dl_brdcst_addr_length = addr_length;
254 	}
255 
256 	/* Only VLAN links and links that have a normal tag mode support QOS. */
257 	if ((dsp->ds_mch != NULL &&
258 	    mac_client_vid(dsp->ds_mch) != VLAN_ID_NONE) ||
259 	    (dsp->ds_dlp != NULL &&
260 	    dsp->ds_dlp->dl_tagmode == LINK_TAGMODE_NORMAL)) {
261 		dlp->dl_qos_range_offset = (uintptr_t)rangep - (uintptr_t)dlp;
262 		dlp->dl_qos_range_length = sizeof (dl_qos_cl_range1_t);
263 
264 		rangep->dl_qos_type = DL_QOS_CL_RANGE1;
265 		rangep->dl_trans_delay.dl_target_value = DL_UNKNOWN;
266 		rangep->dl_trans_delay.dl_accept_value = DL_UNKNOWN;
267 		rangep->dl_protection.dl_min = DL_UNKNOWN;
268 		rangep->dl_protection.dl_max = DL_UNKNOWN;
269 		rangep->dl_residual_error = DL_UNKNOWN;
270 
271 		/*
272 		 * Specify the supported range of priorities.
273 		 */
274 		rangep->dl_priority.dl_min = 0;
275 		rangep->dl_priority.dl_max = (1 << VLAN_PRI_SIZE) - 1;
276 
277 		dlp->dl_qos_offset = (uintptr_t)selp - (uintptr_t)dlp;
278 		dlp->dl_qos_length = sizeof (dl_qos_cl_sel1_t);
279 
280 		selp->dl_qos_type = DL_QOS_CL_SEL1;
281 		selp->dl_trans_delay = DL_UNKNOWN;
282 		selp->dl_protection = DL_UNKNOWN;
283 		selp->dl_residual_error = DL_UNKNOWN;
284 
285 		/*
286 		 * Specify the current priority (which can be changed by
287 		 * the DL_UDQOS_REQ primitive).
288 		 */
289 		selp->dl_priority = dsp->ds_pri;
290 	}
291 
292 	dlp->dl_addr_length = addr_length + sizeof (uint16_t);
293 	if (dsp->ds_dlstate == DL_IDLE) {
294 		/*
295 		 * The stream is bound. Therefore we can formulate a valid
296 		 * DLSAP address.
297 		 */
298 		dlp->dl_addr_offset = (uintptr_t)addr - (uintptr_t)dlp;
299 		if (addr_length > 0)
300 			mac_unicast_primary_get(dsp->ds_mh, addr);
301 
302 		*(uint16_t *)(addr + addr_length) = dsp->ds_sap;
303 	}
304 
305 done:
306 	IMPLY(dlp->dl_qos_offset != 0, dlp->dl_qos_length != 0);
307 	IMPLY(dlp->dl_qos_range_offset != 0,
308 	    dlp->dl_qos_range_length != 0);
309 	IMPLY(dlp->dl_addr_offset != 0, dlp->dl_addr_length != 0);
310 	IMPLY(dlp->dl_brdcst_addr_offset != 0,
311 	    dlp->dl_brdcst_addr_length != 0);
312 
313 	qreply(q, mp);
314 }
315 
316 /*
317  * DL_ATTACH_REQ
318  */
319 static void
320 proto_attach_req(dld_str_t *dsp, mblk_t *mp)
321 {
322 	dl_attach_req_t	*dlp = (dl_attach_req_t *)mp->b_rptr;
323 	int		err = 0;
324 	t_uscalar_t	dl_err;
325 	queue_t		*q = dsp->ds_wq;
326 
327 	if (MBLKL(mp) < sizeof (dl_attach_req_t) ||
328 	    dsp->ds_style == DL_STYLE1) {
329 		dl_err = DL_BADPRIM;
330 		goto failed;
331 	}
332 
333 	if (dsp->ds_dlstate != DL_UNATTACHED) {
334 		dl_err = DL_OUTSTATE;
335 		goto failed;
336 	}
337 
338 	dsp->ds_dlstate = DL_ATTACH_PENDING;
339 
340 	err = dld_str_attach(dsp, dlp->dl_ppa);
341 	if (err != 0) {
342 		switch (err) {
343 		case ENOENT:
344 			dl_err = DL_BADPPA;
345 			err = 0;
346 			break;
347 		default:
348 			dl_err = DL_SYSERR;
349 			break;
350 		}
351 		dsp->ds_dlstate = DL_UNATTACHED;
352 		goto failed;
353 	}
354 	ASSERT(dsp->ds_dlstate == DL_UNBOUND);
355 	dlokack(q, mp, DL_ATTACH_REQ);
356 	return;
357 
358 failed:
359 	dlerrorack(q, mp, DL_ATTACH_REQ, dl_err, (t_uscalar_t)err);
360 }
361 
362 /*
363  * DL_DETACH_REQ
364  */
365 static void
366 proto_detach_req(dld_str_t *dsp, mblk_t *mp)
367 {
368 	queue_t		*q = dsp->ds_wq;
369 	t_uscalar_t	dl_err;
370 
371 	if (MBLKL(mp) < sizeof (dl_detach_req_t)) {
372 		dl_err = DL_BADPRIM;
373 		goto failed;
374 	}
375 
376 	if (dsp->ds_dlstate != DL_UNBOUND) {
377 		dl_err = DL_OUTSTATE;
378 		goto failed;
379 	}
380 
381 	if (dsp->ds_style == DL_STYLE1) {
382 		dl_err = DL_BADPRIM;
383 		goto failed;
384 	}
385 
386 	ASSERT(dsp->ds_datathr_cnt == 0);
387 	dsp->ds_dlstate = DL_DETACH_PENDING;
388 
389 	dld_str_detach(dsp);
390 	dlokack(dsp->ds_wq, mp, DL_DETACH_REQ);
391 	return;
392 
393 failed:
394 	dlerrorack(q, mp, DL_DETACH_REQ, dl_err, 0);
395 }
396 
397 /*
398  * DL_BIND_REQ
399  */
400 static void
401 proto_bind_req(dld_str_t *dsp, mblk_t *mp)
402 {
403 	dl_bind_req_t	*dlp = (dl_bind_req_t *)mp->b_rptr;
404 	int		err = 0;
405 	uint8_t		dlsap_addr[MAXMACADDRLEN + sizeof (uint16_t)];
406 	uint_t		dlsap_addr_length;
407 	t_uscalar_t	dl_err;
408 	t_scalar_t	sap;
409 	queue_t		*q = dsp->ds_wq;
410 	mac_perim_handle_t	mph;
411 	void		*mdip;
412 	int32_t		intr_cpu;
413 
414 	if (MBLKL(mp) < sizeof (dl_bind_req_t)) {
415 		dl_err = DL_BADPRIM;
416 		goto failed;
417 	}
418 
419 	if (dlp->dl_xidtest_flg != 0) {
420 		dl_err = DL_NOAUTO;
421 		goto failed;
422 	}
423 
424 	if (dlp->dl_service_mode != DL_CLDLS) {
425 		dl_err = DL_UNSUPPORTED;
426 		goto failed;
427 	}
428 
429 	if (dsp->ds_dlstate != DL_UNBOUND) {
430 		dl_err = DL_OUTSTATE;
431 		goto failed;
432 	}
433 
434 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
435 
436 	if ((err = dls_active_set(dsp)) != 0) {
437 		dl_err = DL_SYSERR;
438 		goto failed2;
439 	}
440 
441 	dsp->ds_dlstate = DL_BIND_PENDING;
442 	/*
443 	 * Set the receive callback.
444 	 */
445 	dls_rx_set(dsp, (dsp->ds_mode == DLD_RAW) ?
446 	    dld_str_rx_raw : dld_str_rx_unitdata, dsp);
447 
448 	/*
449 	 * Bind the channel such that it can receive packets.
450 	 */
451 	sap = dlp->dl_sap;
452 	dsp->ds_nonip = !check_mod_above(dsp->ds_rq, "ip") &&
453 	    !check_mod_above(dsp->ds_rq, "arp");
454 
455 	err = dls_bind(dsp, sap);
456 	if (err != 0) {
457 		switch (err) {
458 		case EINVAL:
459 			dl_err = DL_BADADDR;
460 			err = 0;
461 			break;
462 		default:
463 			dl_err = DL_SYSERR;
464 			break;
465 		}
466 
467 		dsp->ds_dlstate = DL_UNBOUND;
468 		dls_active_clear(dsp, B_FALSE);
469 		goto failed2;
470 	}
471 
472 	intr_cpu = mac_client_intr_cpu(dsp->ds_mch);
473 	mdip = mac_get_devinfo(dsp->ds_mh);
474 	mac_perim_exit(mph);
475 
476 	/*
477 	 * We do this after we get out of the perim to avoid deadlocks
478 	 * etc. since part of mac_client_retarget_intr is to walk the
479 	 * device tree in order to find and retarget the interrupts.
480 	 */
481 	if (intr_cpu != -1)
482 		mac_client_set_intr_cpu(mdip, dsp->ds_mch, intr_cpu);
483 
484 	/*
485 	 * Copy in MAC address.
486 	 */
487 	dlsap_addr_length = dsp->ds_mip->mi_addr_length;
488 	mac_unicast_primary_get(dsp->ds_mh, dlsap_addr);
489 
490 	/*
491 	 * Copy in the SAP.
492 	 */
493 	*(uint16_t *)(dlsap_addr + dlsap_addr_length) = sap;
494 	dlsap_addr_length += sizeof (uint16_t);
495 
496 	dsp->ds_dlstate = DL_IDLE;
497 	dlbindack(q, mp, sap, dlsap_addr, dlsap_addr_length, 0, 0);
498 	return;
499 
500 failed2:
501 	mac_perim_exit(mph);
502 failed:
503 	dlerrorack(q, mp, DL_BIND_REQ, dl_err, (t_uscalar_t)err);
504 }
505 
506 /*
507  * DL_UNBIND_REQ
508  */
509 static void
510 proto_unbind_req(dld_str_t *dsp, mblk_t *mp)
511 {
512 	queue_t		*q = dsp->ds_wq;
513 	t_uscalar_t	dl_err;
514 	mac_perim_handle_t	mph;
515 
516 	if (MBLKL(mp) < sizeof (dl_unbind_req_t)) {
517 		dl_err = DL_BADPRIM;
518 		goto failed;
519 	}
520 
521 	if (dsp->ds_dlstate != DL_IDLE) {
522 		dl_err = DL_OUTSTATE;
523 		goto failed;
524 	}
525 
526 	mutex_enter(&dsp->ds_lock);
527 	while (dsp->ds_datathr_cnt != 0)
528 		cv_wait(&dsp->ds_datathr_cv, &dsp->ds_lock);
529 
530 	dsp->ds_dlstate = DL_UNBIND_PENDING;
531 	mutex_exit(&dsp->ds_lock);
532 
533 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
534 	/*
535 	 * Unbind the channel to stop packets being received.
536 	 */
537 	dls_unbind(dsp);
538 
539 	/*
540 	 * Disable polling mode, if it is enabled.
541 	 */
542 	(void) dld_capab_poll_disable(dsp, NULL);
543 
544 	/*
545 	 * Clear LSO flags.
546 	 */
547 	dsp->ds_lso = B_FALSE;
548 	dsp->ds_lso_max = 0;
549 
550 	/*
551 	 * Clear the receive callback.
552 	 */
553 	dls_rx_set(dsp, NULL, NULL);
554 	dsp->ds_direct = B_FALSE;
555 
556 	/*
557 	 * Set the mode back to the default (unitdata).
558 	 */
559 	dsp->ds_mode = DLD_UNITDATA;
560 	dsp->ds_dlstate = DL_UNBOUND;
561 
562 	dls_active_clear(dsp, B_FALSE);
563 	mac_perim_exit(mph);
564 	dlokack(dsp->ds_wq, mp, DL_UNBIND_REQ);
565 	return;
566 failed:
567 	dlerrorack(q, mp, DL_UNBIND_REQ, dl_err, 0);
568 }
569 
570 /*
571  * DL_PROMISCON_REQ
572  */
573 static void
574 proto_promiscon_req(dld_str_t *dsp, mblk_t *mp)
575 {
576 	dl_promiscon_req_t *dlp = (dl_promiscon_req_t *)mp->b_rptr;
577 	int		err = 0;
578 	t_uscalar_t	dl_err;
579 	uint32_t	new_flags, promisc_saved;
580 	queue_t		*q = dsp->ds_wq;
581 	mac_perim_handle_t	mph;
582 
583 	if (MBLKL(mp) < sizeof (dl_promiscon_req_t)) {
584 		dl_err = DL_BADPRIM;
585 		goto failed;
586 	}
587 
588 	if (dsp->ds_dlstate == DL_UNATTACHED ||
589 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
590 		dl_err = DL_OUTSTATE;
591 		goto failed;
592 	}
593 
594 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
595 
596 	new_flags = promisc_saved = dsp->ds_promisc;
597 	switch (dlp->dl_level) {
598 	case DL_PROMISC_SAP:
599 		new_flags |= DLS_PROMISC_SAP;
600 		break;
601 
602 	case DL_PROMISC_MULTI:
603 		new_flags |= DLS_PROMISC_MULTI;
604 		break;
605 
606 	case DL_PROMISC_PHYS:
607 		new_flags |= DLS_PROMISC_PHYS;
608 		break;
609 
610 	case DL_PROMISC_RX_ONLY:
611 		new_flags |= DLS_PROMISC_RX_ONLY;
612 		break;
613 
614 	default:
615 		dl_err = DL_NOTSUPPORTED;
616 		goto failed2;
617 	}
618 
619 	if ((promisc_saved == 0) && (err = dls_active_set(dsp)) != 0) {
620 		ASSERT(dsp->ds_promisc == promisc_saved);
621 		dl_err = DL_SYSERR;
622 		goto failed2;
623 	}
624 
625 	/*
626 	 * Adjust channel promiscuity.
627 	 */
628 	err = dls_promisc(dsp, new_flags);
629 
630 	if (err != 0) {
631 		dl_err = DL_SYSERR;
632 		dsp->ds_promisc = promisc_saved;
633 		if (promisc_saved == 0)
634 			dls_active_clear(dsp, B_FALSE);
635 		goto failed2;
636 	}
637 
638 	mac_perim_exit(mph);
639 
640 	dlokack(q, mp, DL_PROMISCON_REQ);
641 	return;
642 
643 failed2:
644 	mac_perim_exit(mph);
645 failed:
646 	dlerrorack(q, mp, DL_PROMISCON_REQ, dl_err, (t_uscalar_t)err);
647 }
648 
649 /*
650  * DL_PROMISCOFF_REQ
651  */
652 static void
653 proto_promiscoff_req(dld_str_t *dsp, mblk_t *mp)
654 {
655 	dl_promiscoff_req_t *dlp = (dl_promiscoff_req_t *)mp->b_rptr;
656 	int		err = 0;
657 	t_uscalar_t	dl_err;
658 	uint32_t	new_flags;
659 	queue_t		*q = dsp->ds_wq;
660 	mac_perim_handle_t	mph;
661 
662 	if (MBLKL(mp) < sizeof (dl_promiscoff_req_t)) {
663 		dl_err = DL_BADPRIM;
664 		goto failed;
665 	}
666 
667 	if (dsp->ds_dlstate == DL_UNATTACHED ||
668 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
669 		dl_err = DL_OUTSTATE;
670 		goto failed;
671 	}
672 
673 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
674 
675 	new_flags = dsp->ds_promisc;
676 	switch (dlp->dl_level) {
677 	case DL_PROMISC_SAP:
678 		if (!(dsp->ds_promisc & DLS_PROMISC_SAP)) {
679 			dl_err = DL_NOTENAB;
680 			goto failed2;
681 		}
682 		new_flags &= ~DLS_PROMISC_SAP;
683 		break;
684 
685 	case DL_PROMISC_MULTI:
686 		if (!(dsp->ds_promisc & DLS_PROMISC_MULTI)) {
687 			dl_err = DL_NOTENAB;
688 			goto failed2;
689 		}
690 		new_flags &= ~DLS_PROMISC_MULTI;
691 		break;
692 
693 	case DL_PROMISC_PHYS:
694 		if (!(dsp->ds_promisc & DLS_PROMISC_PHYS)) {
695 			dl_err = DL_NOTENAB;
696 			goto failed2;
697 		}
698 		new_flags &= ~DLS_PROMISC_PHYS;
699 		break;
700 
701 	case DL_PROMISC_RX_ONLY:
702 		if (!(dsp->ds_promisc & DLS_PROMISC_RX_ONLY)) {
703 			dl_err = DL_NOTENAB;
704 			goto failed2;
705 		}
706 		new_flags &= ~DLS_PROMISC_RX_ONLY;
707 		break;
708 
709 	default:
710 		dl_err = DL_NOTSUPPORTED;
711 		goto failed2;
712 	}
713 
714 	/*
715 	 * Adjust channel promiscuity.
716 	 */
717 	err = dls_promisc(dsp, new_flags);
718 
719 	if (err != 0) {
720 		dl_err = DL_SYSERR;
721 		goto failed2;
722 	}
723 
724 	ASSERT(dsp->ds_promisc == new_flags);
725 	if (dsp->ds_promisc == 0)
726 		dls_active_clear(dsp, B_FALSE);
727 
728 	mac_perim_exit(mph);
729 
730 	dlokack(q, mp, DL_PROMISCOFF_REQ);
731 	return;
732 failed2:
733 	mac_perim_exit(mph);
734 failed:
735 	dlerrorack(q, mp, DL_PROMISCOFF_REQ, dl_err, (t_uscalar_t)err);
736 }
737 
738 /*
739  * DL_ENABMULTI_REQ
740  */
741 static void
742 proto_enabmulti_req(dld_str_t *dsp, mblk_t *mp)
743 {
744 	dl_enabmulti_req_t *dlp = (dl_enabmulti_req_t *)mp->b_rptr;
745 	int		err = 0;
746 	t_uscalar_t	dl_err;
747 	queue_t		*q = dsp->ds_wq;
748 	mac_perim_handle_t	mph;
749 
750 	if (dsp->ds_dlstate == DL_UNATTACHED ||
751 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
752 		dl_err = DL_OUTSTATE;
753 		goto failed;
754 	}
755 
756 	if (MBLKL(mp) < sizeof (dl_enabmulti_req_t) ||
757 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
758 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
759 		dl_err = DL_BADPRIM;
760 		goto failed;
761 	}
762 
763 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
764 
765 	if ((dsp->ds_dmap == NULL) && (err = dls_active_set(dsp)) != 0) {
766 		dl_err = DL_SYSERR;
767 		goto failed2;
768 	}
769 
770 	err = dls_multicst_add(dsp, mp->b_rptr + dlp->dl_addr_offset);
771 	if (err != 0) {
772 		switch (err) {
773 		case EINVAL:
774 			dl_err = DL_BADADDR;
775 			err = 0;
776 			break;
777 		case ENOSPC:
778 			dl_err = DL_TOOMANY;
779 			err = 0;
780 			break;
781 		default:
782 			dl_err = DL_SYSERR;
783 			break;
784 		}
785 		if (dsp->ds_dmap == NULL)
786 			dls_active_clear(dsp, B_FALSE);
787 		goto failed2;
788 	}
789 
790 	mac_perim_exit(mph);
791 
792 	dlokack(q, mp, DL_ENABMULTI_REQ);
793 	return;
794 
795 failed2:
796 	mac_perim_exit(mph);
797 failed:
798 	dlerrorack(q, mp, DL_ENABMULTI_REQ, dl_err, (t_uscalar_t)err);
799 }
800 
801 /*
802  * DL_DISABMULTI_REQ
803  */
804 static void
805 proto_disabmulti_req(dld_str_t *dsp, mblk_t *mp)
806 {
807 	dl_disabmulti_req_t *dlp = (dl_disabmulti_req_t *)mp->b_rptr;
808 	int		err = 0;
809 	t_uscalar_t	dl_err;
810 	queue_t		*q = dsp->ds_wq;
811 	mac_perim_handle_t	mph;
812 
813 	if (dsp->ds_dlstate == DL_UNATTACHED ||
814 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
815 		dl_err = DL_OUTSTATE;
816 		goto failed;
817 	}
818 
819 	if (MBLKL(mp) < sizeof (dl_disabmulti_req_t) ||
820 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
821 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
822 		dl_err = DL_BADPRIM;
823 		goto failed;
824 	}
825 
826 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
827 	err = dls_multicst_remove(dsp, mp->b_rptr + dlp->dl_addr_offset);
828 	if ((err == 0) && (dsp->ds_dmap == NULL))
829 		dls_active_clear(dsp, B_FALSE);
830 	mac_perim_exit(mph);
831 
832 	if (err != 0) {
833 		switch (err) {
834 		case EINVAL:
835 			dl_err = DL_BADADDR;
836 			err = 0;
837 			break;
838 
839 		case ENOENT:
840 			dl_err = DL_NOTENAB;
841 			err = 0;
842 			break;
843 
844 		default:
845 			dl_err = DL_SYSERR;
846 			break;
847 		}
848 		goto failed;
849 	}
850 	dlokack(q, mp, DL_DISABMULTI_REQ);
851 	return;
852 failed:
853 	dlerrorack(q, mp, DL_DISABMULTI_REQ, dl_err, (t_uscalar_t)err);
854 }
855 
856 /*
857  * DL_PHYS_ADDR_REQ
858  */
859 static void
860 proto_physaddr_req(dld_str_t *dsp, mblk_t *mp)
861 {
862 	dl_phys_addr_req_t *dlp = (dl_phys_addr_req_t *)mp->b_rptr;
863 	queue_t		*q = dsp->ds_wq;
864 	t_uscalar_t	dl_err = 0;
865 	char		*addr = NULL;
866 	uint_t		addr_length;
867 
868 	if (MBLKL(mp) < sizeof (dl_phys_addr_req_t)) {
869 		dl_err = DL_BADPRIM;
870 		goto done;
871 	}
872 
873 	if (dsp->ds_dlstate == DL_UNATTACHED ||
874 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
875 		dl_err = DL_OUTSTATE;
876 		goto done;
877 	}
878 
879 	addr_length = dsp->ds_mip->mi_addr_length;
880 	if (addr_length > 0) {
881 		addr = kmem_alloc(addr_length, KM_SLEEP);
882 		switch (dlp->dl_addr_type) {
883 		case DL_CURR_PHYS_ADDR:
884 			mac_unicast_primary_get(dsp->ds_mh, (uint8_t *)addr);
885 			break;
886 		case DL_FACT_PHYS_ADDR:
887 			bcopy(dsp->ds_mip->mi_unicst_addr, addr, addr_length);
888 			break;
889 		case DL_CURR_DEST_ADDR:
890 			if (!mac_dst_get(dsp->ds_mh, (uint8_t *)addr))
891 				dl_err = DL_NOTSUPPORTED;
892 			break;
893 		default:
894 			dl_err = DL_UNSUPPORTED;
895 		}
896 	}
897 done:
898 	if (dl_err == 0)
899 		dlphysaddrack(q, mp, addr, (t_uscalar_t)addr_length);
900 	else
901 		dlerrorack(q, mp, DL_PHYS_ADDR_REQ, dl_err, 0);
902 	if (addr != NULL)
903 		kmem_free(addr, addr_length);
904 }
905 
906 /*
907  * DL_SET_PHYS_ADDR_REQ
908  */
909 static void
910 proto_setphysaddr_req(dld_str_t *dsp, mblk_t *mp)
911 {
912 	dl_set_phys_addr_req_t *dlp = (dl_set_phys_addr_req_t *)mp->b_rptr;
913 	int		err = 0;
914 	t_uscalar_t	dl_err;
915 	queue_t		*q = dsp->ds_wq;
916 	mac_perim_handle_t	mph;
917 
918 	if (dsp->ds_dlstate == DL_UNATTACHED ||
919 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
920 		dl_err = DL_OUTSTATE;
921 		goto failed;
922 	}
923 
924 	if (MBLKL(mp) < sizeof (dl_set_phys_addr_req_t) ||
925 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
926 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
927 		dl_err = DL_BADPRIM;
928 		goto failed;
929 	}
930 
931 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
932 
933 	if ((err = dls_active_set(dsp)) != 0) {
934 		dl_err = DL_SYSERR;
935 		goto failed2;
936 	}
937 
938 	/*
939 	 * If mac-nospoof is enabled and the link is owned by a
940 	 * non-global zone, changing the mac address is not allowed.
941 	 */
942 	if (dsp->ds_dlp->dl_zid != GLOBAL_ZONEID &&
943 	    mac_protect_enabled(dsp->ds_mch, MPT_MACNOSPOOF)) {
944 		dls_active_clear(dsp, B_FALSE);
945 		err = EACCES;
946 		goto failed2;
947 	}
948 
949 	err = mac_unicast_primary_set(dsp->ds_mh,
950 	    mp->b_rptr + dlp->dl_addr_offset);
951 	if (err != 0) {
952 		switch (err) {
953 		case EINVAL:
954 			dl_err = DL_BADADDR;
955 			err = 0;
956 			break;
957 
958 		default:
959 			dl_err = DL_SYSERR;
960 			break;
961 		}
962 		dls_active_clear(dsp, B_FALSE);
963 		goto failed2;
964 
965 	}
966 
967 	mac_perim_exit(mph);
968 
969 	dlokack(q, mp, DL_SET_PHYS_ADDR_REQ);
970 	return;
971 
972 failed2:
973 	mac_perim_exit(mph);
974 failed:
975 	dlerrorack(q, mp, DL_SET_PHYS_ADDR_REQ, dl_err, (t_uscalar_t)err);
976 }
977 
978 /*
979  * DL_UDQOS_REQ
980  */
981 static void
982 proto_udqos_req(dld_str_t *dsp, mblk_t *mp)
983 {
984 	dl_udqos_req_t *dlp = (dl_udqos_req_t *)mp->b_rptr;
985 	dl_qos_cl_sel1_t *selp;
986 	int		off, len;
987 	t_uscalar_t	dl_err;
988 	queue_t		*q = dsp->ds_wq;
989 
990 	off = dlp->dl_qos_offset;
991 	len = dlp->dl_qos_length;
992 
993 	if (MBLKL(mp) < sizeof (dl_udqos_req_t) || !MBLKIN(mp, off, len)) {
994 		dl_err = DL_BADPRIM;
995 		goto failed;
996 	}
997 
998 	selp = (dl_qos_cl_sel1_t *)(mp->b_rptr + off);
999 	if (selp->dl_qos_type != DL_QOS_CL_SEL1) {
1000 		dl_err = DL_BADQOSTYPE;
1001 		goto failed;
1002 	}
1003 
1004 	if (selp->dl_priority > (1 << VLAN_PRI_SIZE) - 1 ||
1005 	    selp->dl_priority < 0) {
1006 		dl_err = DL_BADQOSPARAM;
1007 		goto failed;
1008 	}
1009 
1010 	dsp->ds_pri = selp->dl_priority;
1011 	dlokack(q, mp, DL_UDQOS_REQ);
1012 	return;
1013 failed:
1014 	dlerrorack(q, mp, DL_UDQOS_REQ, dl_err, 0);
1015 }
1016 
1017 static boolean_t
1018 check_mod_above(queue_t *q, const char *mod)
1019 {
1020 	queue_t		*next_q;
1021 	boolean_t	ret = B_TRUE;
1022 
1023 	claimstr(q);
1024 	next_q = q->q_next;
1025 	if (strcmp(next_q->q_qinfo->qi_minfo->mi_idname, mod) != 0)
1026 		ret = B_FALSE;
1027 	releasestr(q);
1028 	return (ret);
1029 }
1030 
1031 /*
1032  * DL_CAPABILITY_REQ
1033  */
1034 static void
1035 proto_capability_req(dld_str_t *dsp, mblk_t *mp)
1036 {
1037 	dl_capability_req_t *dlp = (dl_capability_req_t *)mp->b_rptr;
1038 	dl_capability_sub_t *sp;
1039 	size_t		size, len;
1040 	offset_t	off, end;
1041 	t_uscalar_t	dl_err;
1042 	queue_t		*q = dsp->ds_wq;
1043 
1044 	if (MBLKL(mp) < sizeof (dl_capability_req_t)) {
1045 		dl_err = DL_BADPRIM;
1046 		goto failed;
1047 	}
1048 
1049 	if (dsp->ds_dlstate == DL_UNATTACHED ||
1050 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1051 		dl_err = DL_OUTSTATE;
1052 		goto failed;
1053 	}
1054 
1055 	/*
1056 	 * This request is overloaded. If there are no requested capabilities
1057 	 * then we just want to acknowledge with all the capabilities we
1058 	 * support. Otherwise we enable the set of capabilities requested.
1059 	 */
1060 	if (dlp->dl_sub_length == 0) {
1061 		proto_capability_advertise(dsp, mp);
1062 		return;
1063 	}
1064 
1065 	if (!MBLKIN(mp, dlp->dl_sub_offset, dlp->dl_sub_length)) {
1066 		dl_err = DL_BADPRIM;
1067 		goto failed;
1068 	}
1069 
1070 	dlp->dl_primitive = DL_CAPABILITY_ACK;
1071 
1072 	off = dlp->dl_sub_offset;
1073 	len = dlp->dl_sub_length;
1074 
1075 	/*
1076 	 * Walk the list of capabilities to be enabled.
1077 	 */
1078 	for (end = off + len; off < end; ) {
1079 		sp = (dl_capability_sub_t *)(mp->b_rptr + off);
1080 		size = sizeof (dl_capability_sub_t) + sp->dl_length;
1081 
1082 		if (off + size > end ||
1083 		    !IS_P2ALIGNED(off, sizeof (uint32_t))) {
1084 			dl_err = DL_BADPRIM;
1085 			goto failed;
1086 		}
1087 
1088 		switch (sp->dl_cap) {
1089 		/*
1090 		 * TCP/IP checksum offload to hardware.
1091 		 */
1092 		case DL_CAPAB_HCKSUM: {
1093 			dl_capab_hcksum_t *hcksump;
1094 			dl_capab_hcksum_t hcksum;
1095 
1096 			hcksump = (dl_capab_hcksum_t *)&sp[1];
1097 			/*
1098 			 * Copy for alignment.
1099 			 */
1100 			bcopy(hcksump, &hcksum, sizeof (dl_capab_hcksum_t));
1101 			dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1102 			bcopy(&hcksum, hcksump, sizeof (dl_capab_hcksum_t));
1103 			break;
1104 		}
1105 
1106 		case DL_CAPAB_DLD: {
1107 			dl_capab_dld_t	*dldp;
1108 			dl_capab_dld_t	dld;
1109 
1110 			dldp = (dl_capab_dld_t *)&sp[1];
1111 			/*
1112 			 * Copy for alignment.
1113 			 */
1114 			bcopy(dldp, &dld, sizeof (dl_capab_dld_t));
1115 			dlcapabsetqid(&(dld.dld_mid), dsp->ds_rq);
1116 			bcopy(&dld, dldp, sizeof (dl_capab_dld_t));
1117 			break;
1118 		}
1119 		default:
1120 			break;
1121 		}
1122 		off += size;
1123 	}
1124 	qreply(q, mp);
1125 	return;
1126 failed:
1127 	dlerrorack(q, mp, DL_CAPABILITY_REQ, dl_err, 0);
1128 }
1129 
1130 /*
1131  * DL_NOTIFY_REQ
1132  */
1133 static void
1134 proto_notify_req(dld_str_t *dsp, mblk_t *mp)
1135 {
1136 	dl_notify_req_t	*dlp = (dl_notify_req_t *)mp->b_rptr;
1137 	t_uscalar_t	dl_err;
1138 	queue_t		*q = dsp->ds_wq;
1139 	uint_t		note =
1140 	    DL_NOTE_PROMISC_ON_PHYS |
1141 	    DL_NOTE_PROMISC_OFF_PHYS |
1142 	    DL_NOTE_PHYS_ADDR |
1143 	    DL_NOTE_LINK_UP |
1144 	    DL_NOTE_LINK_DOWN |
1145 	    DL_NOTE_CAPAB_RENEG |
1146 	    DL_NOTE_FASTPATH_FLUSH |
1147 	    DL_NOTE_SPEED |
1148 	    DL_NOTE_SDU_SIZE|
1149 	    DL_NOTE_SDU_SIZE2|
1150 	    DL_NOTE_ALLOWED_IPS;
1151 
1152 	if (MBLKL(mp) < sizeof (dl_notify_req_t)) {
1153 		dl_err = DL_BADPRIM;
1154 		goto failed;
1155 	}
1156 
1157 	if (dsp->ds_dlstate == DL_UNATTACHED ||
1158 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1159 		dl_err = DL_OUTSTATE;
1160 		goto failed;
1161 	}
1162 
1163 	note &= ~(mac_no_notification(dsp->ds_mh));
1164 
1165 	/*
1166 	 * Cache the notifications that are being enabled.
1167 	 */
1168 	dsp->ds_notifications = dlp->dl_notifications & note;
1169 	/*
1170 	 * The ACK carries all notifications regardless of which set is
1171 	 * being enabled.
1172 	 */
1173 	dlnotifyack(q, mp, note);
1174 
1175 	/*
1176 	 * Generate DL_NOTIFY_IND messages for each enabled notification.
1177 	 */
1178 	if (dsp->ds_notifications != 0) {
1179 		dld_str_notify_ind(dsp);
1180 	}
1181 	return;
1182 failed:
1183 	dlerrorack(q, mp, DL_NOTIFY_REQ, dl_err, 0);
1184 }
1185 
1186 /*
1187  * DL_UINTDATA_REQ
1188  */
1189 void
1190 proto_unitdata_req(dld_str_t *dsp, mblk_t *mp)
1191 {
1192 	queue_t			*q = dsp->ds_wq;
1193 	dl_unitdata_req_t	*dlp = (dl_unitdata_req_t *)mp->b_rptr;
1194 	off_t			off;
1195 	size_t			len, size;
1196 	const uint8_t		*addr;
1197 	uint16_t		sap;
1198 	uint_t			addr_length;
1199 	mblk_t			*bp, *payload;
1200 	t_uscalar_t		dl_err;
1201 	uint_t			max_sdu;
1202 
1203 	if (MBLKL(mp) < sizeof (dl_unitdata_req_t) || mp->b_cont == NULL) {
1204 		dlerrorack(q, mp, DL_UNITDATA_REQ, DL_BADPRIM, 0);
1205 		return;
1206 	}
1207 
1208 	mutex_enter(&dsp->ds_lock);
1209 	if (dsp->ds_dlstate != DL_IDLE) {
1210 		mutex_exit(&dsp->ds_lock);
1211 		dlerrorack(q, mp, DL_UNITDATA_REQ, DL_OUTSTATE, 0);
1212 		return;
1213 	}
1214 	DLD_DATATHR_INC(dsp);
1215 	mutex_exit(&dsp->ds_lock);
1216 
1217 	addr_length = dsp->ds_mip->mi_addr_length;
1218 
1219 	off = dlp->dl_dest_addr_offset;
1220 	len = dlp->dl_dest_addr_length;
1221 
1222 	if (!MBLKIN(mp, off, len) || !IS_P2ALIGNED(off, sizeof (uint16_t))) {
1223 		dl_err = DL_BADPRIM;
1224 		goto failed;
1225 	}
1226 
1227 	if (len != addr_length + sizeof (uint16_t)) {
1228 		dl_err = DL_BADADDR;
1229 		goto failed;
1230 	}
1231 
1232 	addr = mp->b_rptr + off;
1233 	sap = *(uint16_t *)(mp->b_rptr + off + addr_length);
1234 
1235 	/*
1236 	 * Check the length of the packet and the block types.
1237 	 */
1238 	size = 0;
1239 	payload = mp->b_cont;
1240 	for (bp = payload; bp != NULL; bp = bp->b_cont) {
1241 		if (DB_TYPE(bp) != M_DATA)
1242 			goto baddata;
1243 
1244 		size += MBLKL(bp);
1245 	}
1246 
1247 	mac_sdu_get(dsp->ds_mh, NULL, &max_sdu);
1248 	if (size > max_sdu)
1249 		goto baddata;
1250 
1251 	/*
1252 	 * Build a packet header.
1253 	 */
1254 	if ((bp = dls_header(dsp, addr, sap, dlp->dl_priority.dl_max,
1255 	    &payload)) == NULL) {
1256 		dl_err = DL_BADADDR;
1257 		goto failed;
1258 	}
1259 
1260 	/*
1261 	 * We no longer need the M_PROTO header, so free it.
1262 	 */
1263 	freeb(mp);
1264 
1265 	/*
1266 	 * Transfer the checksum offload information if it is present.
1267 	 */
1268 	mac_hcksum_clone(payload, bp);
1269 
1270 	/*
1271 	 * Link the payload onto the new header.
1272 	 */
1273 	ASSERT(bp->b_cont == NULL);
1274 	bp->b_cont = payload;
1275 
1276 	/*
1277 	 * No lock can be held across modules and putnext()'s,
1278 	 * which can happen here with the call from DLD_TX().
1279 	 */
1280 	if (DLD_TX(dsp, bp, 0, 0) != 0) {
1281 		/* flow-controlled */
1282 		DLD_SETQFULL(dsp);
1283 	}
1284 	DLD_DATATHR_DCR(dsp);
1285 	return;
1286 
1287 failed:
1288 	dlerrorack(q, mp, DL_UNITDATA_REQ, dl_err, 0);
1289 	DLD_DATATHR_DCR(dsp);
1290 	return;
1291 
1292 baddata:
1293 	dluderrorind(q, mp, (void *)addr, len, DL_BADDATA, 0);
1294 	DLD_DATATHR_DCR(dsp);
1295 }
1296 
1297 /*
1298  * DL_PASSIVE_REQ
1299  */
1300 static void
1301 proto_passive_req(dld_str_t *dsp, mblk_t *mp)
1302 {
1303 	t_uscalar_t dl_err;
1304 
1305 	/*
1306 	 * If we've already become active by issuing an active primitive,
1307 	 * then it's too late to try to become passive.
1308 	 */
1309 	if (dsp->ds_passivestate == DLD_ACTIVE) {
1310 		dl_err = DL_OUTSTATE;
1311 		goto failed;
1312 	}
1313 
1314 	if (MBLKL(mp) < sizeof (dl_passive_req_t)) {
1315 		dl_err = DL_BADPRIM;
1316 		goto failed;
1317 	}
1318 
1319 	dsp->ds_passivestate = DLD_PASSIVE;
1320 	dlokack(dsp->ds_wq, mp, DL_PASSIVE_REQ);
1321 	return;
1322 failed:
1323 	dlerrorack(dsp->ds_wq, mp, DL_PASSIVE_REQ, dl_err, 0);
1324 }
1325 
1326 
1327 /*
1328  * Catch-all handler.
1329  */
1330 static void
1331 proto_req(dld_str_t *dsp, mblk_t *mp)
1332 {
1333 	union DL_primitives	*dlp = (union DL_primitives *)mp->b_rptr;
1334 
1335 	dlerrorack(dsp->ds_wq, mp, dlp->dl_primitive, DL_UNSUPPORTED, 0);
1336 }
1337 
1338 static int
1339 dld_capab_perim(dld_str_t *dsp, void *data, uint_t flags)
1340 {
1341 	switch (flags) {
1342 	case DLD_ENABLE:
1343 		mac_perim_enter_by_mh(dsp->ds_mh, (mac_perim_handle_t *)data);
1344 		return (0);
1345 
1346 	case DLD_DISABLE:
1347 		mac_perim_exit((mac_perim_handle_t)data);
1348 		return (0);
1349 
1350 	case DLD_QUERY:
1351 		return (mac_perim_held(dsp->ds_mh));
1352 	}
1353 	return (0);
1354 }
1355 
1356 static int
1357 dld_capab_direct(dld_str_t *dsp, void *data, uint_t flags)
1358 {
1359 	dld_capab_direct_t	*direct = data;
1360 
1361 	ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1362 
1363 	switch (flags) {
1364 	case DLD_ENABLE:
1365 		dls_rx_set(dsp, (dls_rx_t)direct->di_rx_cf,
1366 		    direct->di_rx_ch);
1367 
1368 		direct->di_tx_df = (uintptr_t)str_mdata_fastpath_put;
1369 		direct->di_tx_dh = dsp;
1370 		direct->di_tx_cb_df = (uintptr_t)mac_client_tx_notify;
1371 		direct->di_tx_cb_dh = dsp->ds_mch;
1372 		direct->di_tx_fctl_df = (uintptr_t)mac_tx_is_flow_blocked;
1373 		direct->di_tx_fctl_dh = dsp->ds_mch;
1374 
1375 		dsp->ds_direct = B_TRUE;
1376 
1377 		return (0);
1378 
1379 	case DLD_DISABLE:
1380 		dls_rx_set(dsp, (dsp->ds_mode == DLD_FASTPATH) ?
1381 		    dld_str_rx_fastpath : dld_str_rx_unitdata, (void *)dsp);
1382 		dsp->ds_direct = B_FALSE;
1383 
1384 		return (0);
1385 	}
1386 	return (ENOTSUP);
1387 }
1388 
1389 /*
1390  * This function is misnamed. All polling and fanouts are run out of
1391  * the lower MAC for VNICs and out of the MAC for NICs. The
1392  * availability of Rx rings and promiscous mode is taken care of
1393  * between the soft ring set (mac_srs), the Rx ring, and the SW
1394  * classifier. Fanout, if necessary, is done by the soft rings that
1395  * are part of the SRS. By default the SRS divvies up the packets
1396  * based on protocol: TCP, UDP, or Other (OTH).
1397  *
1398  * The SRS (or its associated soft rings) always store the ill_rx_ring
1399  * (the cookie returned when they registered with IP during plumb) as their
1400  * 2nd argument which is passed up as mac_resource_handle_t. The upcall
1401  * function and 1st argument is what the caller registered when they
1402  * called mac_rx_classify_flow_add() to register the flow. For VNIC,
1403  * the function is vnic_rx and argument is vnic_t. For regular NIC
1404  * case, it mac_rx_default and mac_handle_t. As explained above, the
1405  * SRS (or its soft ring) will add the ill_rx_ring (mac_resource_handle_t)
1406  * from its stored 2nd argument.
1407  */
1408 static int
1409 dld_capab_poll_enable(dld_str_t *dsp, dld_capab_poll_t *poll)
1410 {
1411 	if (dsp->ds_polling)
1412 		return (EINVAL);
1413 
1414 	if ((dld_opt & DLD_OPT_NO_POLL) != 0 || dsp->ds_mode == DLD_RAW)
1415 		return (ENOTSUP);
1416 
1417 	/*
1418 	 * Enable client polling if and only if DLS bypass is
1419 	 * possible. Some traffic requires DLS processing in the Rx
1420 	 * data path. In such a case we can neither allow the client
1421 	 * (IP) to directly poll the soft ring (since DLS processing
1422 	 * hasn't been done) nor can we allow DLS bypass.
1423 	 */
1424 	if (!mac_rx_bypass_set(dsp->ds_mch, dsp->ds_rx, dsp->ds_rx_arg,
1425 	    dsp->ds_sap == ETHERTYPE_IPV6))
1426 		return (ENOTSUP);
1427 
1428 	/*
1429 	 * Register soft ring resources. This will come in handy later if
1430 	 * the user decides to modify CPU bindings to use more CPUs for the
1431 	 * device in which case we will switch to fanout using soft rings.
1432 	 */
1433 	mac_resource_set_common(dsp->ds_mch,
1434 	    (mac_resource_add_t)poll->poll_ring_add_cf,
1435 	    (mac_resource_remove_t)poll->poll_ring_remove_cf,
1436 	    (mac_resource_quiesce_t)poll->poll_ring_quiesce_cf,
1437 	    (mac_resource_restart_t)poll->poll_ring_restart_cf,
1438 	    (mac_resource_bind_t)poll->poll_ring_bind_cf,
1439 	    poll->poll_ring_ch);
1440 
1441 	mac_client_poll_enable(dsp->ds_mch);
1442 
1443 	dsp->ds_polling = B_TRUE;
1444 	return (0);
1445 }
1446 
1447 /* ARGSUSED */
1448 static int
1449 dld_capab_poll_disable(dld_str_t *dsp, dld_capab_poll_t *poll)
1450 {
1451 	if (!dsp->ds_polling)
1452 		return (EINVAL);
1453 
1454 	mac_client_poll_disable(dsp->ds_mch);
1455 	mac_resource_set(dsp->ds_mch, NULL, NULL);
1456 
1457 	dsp->ds_polling = B_FALSE;
1458 	return (0);
1459 }
1460 
1461 static int
1462 dld_capab_poll(dld_str_t *dsp, void *data, uint_t flags)
1463 {
1464 	dld_capab_poll_t	*poll = data;
1465 
1466 	ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1467 
1468 	switch (flags) {
1469 	case DLD_ENABLE:
1470 		return (dld_capab_poll_enable(dsp, poll));
1471 	case DLD_DISABLE:
1472 		return (dld_capab_poll_disable(dsp, poll));
1473 	}
1474 	return (ENOTSUP);
1475 }
1476 
1477 static int
1478 dld_capab_lso(dld_str_t *dsp, void *data, uint_t flags)
1479 {
1480 	dld_capab_lso_t		*lso = data;
1481 
1482 	ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1483 
1484 	switch (flags) {
1485 	case DLD_ENABLE: {
1486 		mac_capab_lso_t		mac_lso;
1487 
1488 		/*
1489 		 * Check if LSO is supported on this MAC & enable LSO
1490 		 * accordingly.
1491 		 */
1492 		if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_LSO, &mac_lso)) {
1493 			lso->lso_max_tcpv4 = mac_lso.lso_basic_tcp_ipv4.lso_max;
1494 			lso->lso_max_tcpv6 = mac_lso.lso_basic_tcp_ipv6.lso_max;
1495 			lso->lso_flags = 0;
1496 			/* translate the flag for mac clients */
1497 			if ((mac_lso.lso_flags & LSO_TX_BASIC_TCP_IPV4) != 0)
1498 				lso->lso_flags |= DLD_LSO_BASIC_TCP_IPV4;
1499 			if ((mac_lso.lso_flags & LSO_TX_BASIC_TCP_IPV6) != 0)
1500 				lso->lso_flags |= DLD_LSO_BASIC_TCP_IPV6;
1501 			dsp->ds_lso = lso->lso_flags != 0;
1502 			/*
1503 			 * DLS uses this to try and make sure that a raw ioctl
1504 			 * doesn't send too much data, but doesn't currently
1505 			 * check the actual SAP that is sending this (or that
1506 			 * it's TCP). So for now, just use the max value here.
1507 			 */
1508 			dsp->ds_lso_max = MAX(lso->lso_max_tcpv4,
1509 			    lso->lso_max_tcpv6);
1510 		} else {
1511 			dsp->ds_lso = B_FALSE;
1512 			dsp->ds_lso_max = 0;
1513 			return (ENOTSUP);
1514 		}
1515 		return (0);
1516 	}
1517 	case DLD_DISABLE: {
1518 		dsp->ds_lso = B_FALSE;
1519 		dsp->ds_lso_max = 0;
1520 		return (0);
1521 	}
1522 	}
1523 	return (ENOTSUP);
1524 }
1525 
1526 static int
1527 dld_capab(dld_str_t *dsp, uint_t type, void *data, uint_t flags)
1528 {
1529 	int	err;
1530 
1531 	/*
1532 	 * Don't enable direct callback capabilities unless the caller is
1533 	 * the IP client. When a module is inserted in a stream (_I_INSERT)
1534 	 * the stack initiates capability disable, but due to races, the
1535 	 * module insertion may complete before the capability disable
1536 	 * completes. So we limit the check to DLD_ENABLE case.
1537 	 */
1538 	if ((flags == DLD_ENABLE && type != DLD_CAPAB_PERIM) &&
1539 	    (!(dsp->ds_sap == ETHERTYPE_IP || dsp->ds_sap == ETHERTYPE_IPV6) ||
1540 	    !check_mod_above(dsp->ds_rq, "ip"))) {
1541 		return (ENOTSUP);
1542 	}
1543 
1544 	switch (type) {
1545 	case DLD_CAPAB_DIRECT:
1546 		err = dld_capab_direct(dsp, data, flags);
1547 		break;
1548 
1549 	case DLD_CAPAB_POLL:
1550 		err =  dld_capab_poll(dsp, data, flags);
1551 		break;
1552 
1553 	case DLD_CAPAB_PERIM:
1554 		err = dld_capab_perim(dsp, data, flags);
1555 		break;
1556 
1557 	case DLD_CAPAB_LSO:
1558 		err = dld_capab_lso(dsp, data, flags);
1559 		break;
1560 
1561 	default:
1562 		err = ENOTSUP;
1563 		break;
1564 	}
1565 
1566 	return (err);
1567 }
1568 
1569 /*
1570  * DL_CAPABILITY_ACK/DL_ERROR_ACK
1571  */
1572 static void
1573 proto_capability_advertise(dld_str_t *dsp, mblk_t *mp)
1574 {
1575 	dl_capability_ack_t	*dlap;
1576 	dl_capability_sub_t	*dlsp;
1577 	size_t			subsize;
1578 	dl_capab_dld_t		dld;
1579 	dl_capab_hcksum_t	hcksum;
1580 	dl_capab_zerocopy_t	zcopy;
1581 	dl_capab_vrrp_t		vrrp;
1582 	mac_capab_vrrp_t	vrrp_capab;
1583 	uint8_t			*ptr;
1584 	queue_t			*q = dsp->ds_wq;
1585 	mblk_t			*mp1;
1586 	boolean_t		hcksum_capable = B_FALSE;
1587 	boolean_t		zcopy_capable = B_FALSE;
1588 	boolean_t		dld_capable = B_FALSE;
1589 	boolean_t		vrrp_capable = B_FALSE;
1590 
1591 	/*
1592 	 * Initially assume no capabilities.
1593 	 */
1594 	subsize = 0;
1595 
1596 	/*
1597 	 * Check if checksum offload is supported on this MAC.
1598 	 */
1599 	bzero(&hcksum, sizeof (dl_capab_hcksum_t));
1600 	if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_HCKSUM,
1601 	    &hcksum.hcksum_txflags)) {
1602 		if (hcksum.hcksum_txflags != 0) {
1603 			hcksum_capable = B_TRUE;
1604 			subsize += sizeof (dl_capability_sub_t) +
1605 			    sizeof (dl_capab_hcksum_t);
1606 		}
1607 	}
1608 
1609 	/*
1610 	 * Check if zerocopy is supported on this interface.
1611 	 * If advertising DL_CAPAB_ZEROCOPY has not been explicitly disabled
1612 	 * then reserve space for that capability.
1613 	 */
1614 	if (!mac_capab_get(dsp->ds_mh, MAC_CAPAB_NO_ZCOPY, NULL) &&
1615 	    !(dld_opt & DLD_OPT_NO_ZEROCOPY)) {
1616 		zcopy_capable = B_TRUE;
1617 		subsize += sizeof (dl_capability_sub_t) +
1618 		    sizeof (dl_capab_zerocopy_t);
1619 	}
1620 
1621 	/*
1622 	 * Direct capability negotiation interface between IP and DLD
1623 	 */
1624 	if ((dsp->ds_sap == ETHERTYPE_IP || dsp->ds_sap == ETHERTYPE_IPV6) &&
1625 	    check_mod_above(dsp->ds_rq, "ip")) {
1626 		dld_capable = B_TRUE;
1627 		subsize += sizeof (dl_capability_sub_t) +
1628 		    sizeof (dl_capab_dld_t);
1629 	}
1630 
1631 	/*
1632 	 * Check if vrrp is supported on this interface. If so, reserve
1633 	 * space for that capability.
1634 	 */
1635 	if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_VRRP, &vrrp_capab)) {
1636 		vrrp_capable = B_TRUE;
1637 		subsize += sizeof (dl_capability_sub_t) +
1638 		    sizeof (dl_capab_vrrp_t);
1639 	}
1640 
1641 	/*
1642 	 * If there are no capabilities to advertise or if we
1643 	 * can't allocate a response, send a DL_ERROR_ACK.
1644 	 */
1645 	if ((mp1 = reallocb(mp,
1646 	    sizeof (dl_capability_ack_t) + subsize, 0)) == NULL) {
1647 		dlerrorack(q, mp, DL_CAPABILITY_REQ, DL_NOTSUPPORTED, 0);
1648 		return;
1649 	}
1650 
1651 	mp = mp1;
1652 	DB_TYPE(mp) = M_PROTO;
1653 	mp->b_wptr = mp->b_rptr + sizeof (dl_capability_ack_t) + subsize;
1654 	bzero(mp->b_rptr, MBLKL(mp));
1655 	dlap = (dl_capability_ack_t *)mp->b_rptr;
1656 	dlap->dl_primitive = DL_CAPABILITY_ACK;
1657 	dlap->dl_sub_offset = sizeof (dl_capability_ack_t);
1658 	dlap->dl_sub_length = subsize;
1659 	ptr = (uint8_t *)&dlap[1];
1660 
1661 	/*
1662 	 * TCP/IP checksum offload.
1663 	 */
1664 	if (hcksum_capable) {
1665 		dlsp = (dl_capability_sub_t *)ptr;
1666 
1667 		dlsp->dl_cap = DL_CAPAB_HCKSUM;
1668 		dlsp->dl_length = sizeof (dl_capab_hcksum_t);
1669 		ptr += sizeof (dl_capability_sub_t);
1670 
1671 		hcksum.hcksum_version = HCKSUM_VERSION_1;
1672 		dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1673 		bcopy(&hcksum, ptr, sizeof (dl_capab_hcksum_t));
1674 		ptr += sizeof (dl_capab_hcksum_t);
1675 	}
1676 
1677 	/*
1678 	 * Zero copy
1679 	 */
1680 	if (zcopy_capable) {
1681 		dlsp = (dl_capability_sub_t *)ptr;
1682 
1683 		dlsp->dl_cap = DL_CAPAB_ZEROCOPY;
1684 		dlsp->dl_length = sizeof (dl_capab_zerocopy_t);
1685 		ptr += sizeof (dl_capability_sub_t);
1686 
1687 		bzero(&zcopy, sizeof (dl_capab_zerocopy_t));
1688 		zcopy.zerocopy_version = ZEROCOPY_VERSION_1;
1689 		zcopy.zerocopy_flags = DL_CAPAB_VMSAFE_MEM;
1690 
1691 		dlcapabsetqid(&(zcopy.zerocopy_mid), dsp->ds_rq);
1692 		bcopy(&zcopy, ptr, sizeof (dl_capab_zerocopy_t));
1693 		ptr += sizeof (dl_capab_zerocopy_t);
1694 	}
1695 
1696 	/*
1697 	 * VRRP capability negotiation
1698 	 */
1699 	if (vrrp_capable) {
1700 		dlsp = (dl_capability_sub_t *)ptr;
1701 		dlsp->dl_cap = DL_CAPAB_VRRP;
1702 		dlsp->dl_length = sizeof (dl_capab_vrrp_t);
1703 		ptr += sizeof (dl_capability_sub_t);
1704 
1705 		bzero(&vrrp, sizeof (dl_capab_vrrp_t));
1706 		vrrp.vrrp_af = vrrp_capab.mcv_af;
1707 		bcopy(&vrrp, ptr, sizeof (dl_capab_vrrp_t));
1708 		ptr += sizeof (dl_capab_vrrp_t);
1709 	}
1710 
1711 	/*
1712 	 * Direct capability negotiation interface between IP and DLD.
1713 	 * Refer to dld.h for details.
1714 	 */
1715 	if (dld_capable) {
1716 		dlsp = (dl_capability_sub_t *)ptr;
1717 		dlsp->dl_cap = DL_CAPAB_DLD;
1718 		dlsp->dl_length = sizeof (dl_capab_dld_t);
1719 		ptr += sizeof (dl_capability_sub_t);
1720 
1721 		bzero(&dld, sizeof (dl_capab_dld_t));
1722 		dld.dld_version = DLD_CURRENT_VERSION;
1723 		dld.dld_capab = (uintptr_t)dld_capab;
1724 		dld.dld_capab_handle = (uintptr_t)dsp;
1725 
1726 		dlcapabsetqid(&(dld.dld_mid), dsp->ds_rq);
1727 		bcopy(&dld, ptr, sizeof (dl_capab_dld_t));
1728 		ptr += sizeof (dl_capab_dld_t);
1729 	}
1730 
1731 	ASSERT(ptr == mp->b_rptr + sizeof (dl_capability_ack_t) + subsize);
1732 	qreply(q, mp);
1733 }
1734 
1735 /*
1736  * Disable any enabled capabilities.
1737  */
1738 void
1739 dld_capabilities_disable(dld_str_t *dsp)
1740 {
1741 	if (dsp->ds_polling)
1742 		(void) dld_capab_poll_disable(dsp, NULL);
1743 }
1744