xref: /illumos-gate/usr/src/uts/common/io/dld/dld_proto.c (revision 69a119caa6570c7077699161b7c28b6ee9f8b0f4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2012, Nexenta Systems, Inc. All rights reserved.
24  */
25 
26 /*
27  * Data-Link Driver
28  */
29 #include <sys/sysmacros.h>
30 #include <sys/strsubr.h>
31 #include <sys/strsun.h>
32 #include <sys/vlan.h>
33 #include <sys/dld_impl.h>
34 #include <sys/mac_client.h>
35 #include <sys/mac_client_impl.h>
36 #include <sys/mac_client_priv.h>
37 
38 typedef void proto_reqfunc_t(dld_str_t *, mblk_t *);
39 
40 static proto_reqfunc_t proto_info_req, proto_attach_req, proto_detach_req,
41     proto_bind_req, proto_unbind_req, proto_promiscon_req, proto_promiscoff_req,
42     proto_enabmulti_req, proto_disabmulti_req, proto_physaddr_req,
43     proto_setphysaddr_req, proto_udqos_req, proto_req, proto_capability_req,
44     proto_notify_req, proto_passive_req;
45 
46 static void proto_capability_advertise(dld_str_t *, mblk_t *);
47 static int dld_capab_poll_disable(dld_str_t *, dld_capab_poll_t *);
48 static boolean_t check_mod_above(queue_t *, const char *);
49 
50 #define	DL_ACK_PENDING(state) \
51 	((state) == DL_ATTACH_PENDING || \
52 	(state) == DL_DETACH_PENDING || \
53 	(state) == DL_BIND_PENDING || \
54 	(state) == DL_UNBIND_PENDING)
55 
56 /*
57  * Process a DLPI protocol message.
58  * The primitives DL_BIND_REQ, DL_ENABMULTI_REQ, DL_PROMISCON_REQ,
59  * DL_SET_PHYS_ADDR_REQ put the data link below our dld_str_t into an
60  * 'active' state. The primitive DL_PASSIVE_REQ marks our dld_str_t
61  * as 'passive' and forbids it from being subsequently made 'active'
62  * by the above primitives.
63  */
64 void
65 dld_proto(dld_str_t *dsp, mblk_t *mp)
66 {
67 	t_uscalar_t		prim;
68 
69 	if (MBLKL(mp) < sizeof (t_uscalar_t)) {
70 		freemsg(mp);
71 		return;
72 	}
73 	prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive;
74 
75 	switch (prim) {
76 	case DL_INFO_REQ:
77 		proto_info_req(dsp, mp);
78 		break;
79 	case DL_BIND_REQ:
80 		proto_bind_req(dsp, mp);
81 		break;
82 	case DL_UNBIND_REQ:
83 		proto_unbind_req(dsp, mp);
84 		break;
85 	case DL_UNITDATA_REQ:
86 		proto_unitdata_req(dsp, mp);
87 		break;
88 	case DL_UDQOS_REQ:
89 		proto_udqos_req(dsp, mp);
90 		break;
91 	case DL_ATTACH_REQ:
92 		proto_attach_req(dsp, mp);
93 		break;
94 	case DL_DETACH_REQ:
95 		proto_detach_req(dsp, mp);
96 		break;
97 	case DL_ENABMULTI_REQ:
98 		proto_enabmulti_req(dsp, mp);
99 		break;
100 	case DL_DISABMULTI_REQ:
101 		proto_disabmulti_req(dsp, mp);
102 		break;
103 	case DL_PROMISCON_REQ:
104 		proto_promiscon_req(dsp, mp);
105 		break;
106 	case DL_PROMISCOFF_REQ:
107 		proto_promiscoff_req(dsp, mp);
108 		break;
109 	case DL_PHYS_ADDR_REQ:
110 		proto_physaddr_req(dsp, mp);
111 		break;
112 	case DL_SET_PHYS_ADDR_REQ:
113 		proto_setphysaddr_req(dsp, mp);
114 		break;
115 	case DL_NOTIFY_REQ:
116 		proto_notify_req(dsp, mp);
117 		break;
118 	case DL_CAPABILITY_REQ:
119 		proto_capability_req(dsp, mp);
120 		break;
121 	case DL_PASSIVE_REQ:
122 		proto_passive_req(dsp, mp);
123 		break;
124 	default:
125 		proto_req(dsp, mp);
126 		break;
127 	}
128 }
129 
130 #define	NEG(x)	-(x)
131 typedef struct dl_info_ack_wrapper {
132 	dl_info_ack_t		dl_info;
133 	uint8_t			dl_addr[MAXMACADDRLEN + sizeof (uint16_t)];
134 	uint8_t			dl_brdcst_addr[MAXMACADDRLEN];
135 	dl_qos_cl_range1_t	dl_qos_range1;
136 	dl_qos_cl_sel1_t	dl_qos_sel1;
137 } dl_info_ack_wrapper_t;
138 
139 /*
140  * DL_INFO_REQ
141  */
142 static void
143 proto_info_req(dld_str_t *dsp, mblk_t *mp)
144 {
145 	dl_info_ack_wrapper_t	*dlwp;
146 	dl_info_ack_t		*dlp;
147 	dl_qos_cl_sel1_t	*selp;
148 	dl_qos_cl_range1_t	*rangep;
149 	uint8_t			*addr;
150 	uint8_t			*brdcst_addr;
151 	uint_t			addr_length;
152 	uint_t			sap_length;
153 	mac_info_t		minfo;
154 	mac_info_t		*minfop;
155 	queue_t			*q = dsp->ds_wq;
156 
157 	/*
158 	 * Swap the request message for one large enough to contain the
159 	 * wrapper structure defined above.
160 	 */
161 	if ((mp = mexchange(q, mp, sizeof (dl_info_ack_wrapper_t),
162 	    M_PCPROTO, 0)) == NULL)
163 		return;
164 
165 	bzero(mp->b_rptr, sizeof (dl_info_ack_wrapper_t));
166 	dlwp = (dl_info_ack_wrapper_t *)mp->b_rptr;
167 
168 	dlp = &(dlwp->dl_info);
169 	ASSERT(dlp == (dl_info_ack_t *)mp->b_rptr);
170 
171 	dlp->dl_primitive = DL_INFO_ACK;
172 
173 	/*
174 	 * Set up the sub-structure pointers.
175 	 */
176 	addr = dlwp->dl_addr;
177 	brdcst_addr = dlwp->dl_brdcst_addr;
178 	rangep = &(dlwp->dl_qos_range1);
179 	selp = &(dlwp->dl_qos_sel1);
180 
181 	/*
182 	 * This driver supports only version 2 connectionless DLPI provider
183 	 * nodes.
184 	 */
185 	dlp->dl_service_mode = DL_CLDLS;
186 	dlp->dl_version = DL_VERSION_2;
187 
188 	/*
189 	 * Set the style of the provider
190 	 */
191 	dlp->dl_provider_style = dsp->ds_style;
192 	ASSERT(dlp->dl_provider_style == DL_STYLE1 ||
193 	    dlp->dl_provider_style == DL_STYLE2);
194 
195 	/*
196 	 * Set the current DLPI state.
197 	 */
198 	dlp->dl_current_state = dsp->ds_dlstate;
199 
200 	/*
201 	 * Gratuitously set the media type. This is to deal with modules
202 	 * that assume the media type is known prior to DL_ATTACH_REQ
203 	 * being completed.
204 	 */
205 	dlp->dl_mac_type = DL_ETHER;
206 
207 	/*
208 	 * If the stream is not at least attached we try to retrieve the
209 	 * mac_info using mac_info_get()
210 	 */
211 	if (dsp->ds_dlstate == DL_UNATTACHED ||
212 	    dsp->ds_dlstate == DL_ATTACH_PENDING ||
213 	    dsp->ds_dlstate == DL_DETACH_PENDING) {
214 		if (!mac_info_get(ddi_major_to_name(dsp->ds_major), &minfo)) {
215 			/*
216 			 * Cannot find mac_info. giving up.
217 			 */
218 			goto done;
219 		}
220 		minfop = &minfo;
221 	} else {
222 		minfop = (mac_info_t *)dsp->ds_mip;
223 		/* We can only get the sdu if we're attached. */
224 		mac_sdu_get(dsp->ds_mh, &dlp->dl_min_sdu, &dlp->dl_max_sdu);
225 	}
226 
227 	/*
228 	 * Set the media type (properly this time).
229 	 */
230 	if (dsp->ds_native)
231 		dlp->dl_mac_type = minfop->mi_nativemedia;
232 	else
233 		dlp->dl_mac_type = minfop->mi_media;
234 
235 	/*
236 	 * Set the DLSAP length. We only support 16 bit values and they
237 	 * appear after the MAC address portion of DLSAP addresses.
238 	 */
239 	sap_length = sizeof (uint16_t);
240 	dlp->dl_sap_length = NEG(sap_length);
241 
242 	addr_length = minfop->mi_addr_length;
243 
244 	/*
245 	 * Copy in the media broadcast address.
246 	 */
247 	if (minfop->mi_brdcst_addr != NULL) {
248 		dlp->dl_brdcst_addr_offset =
249 		    (uintptr_t)brdcst_addr - (uintptr_t)dlp;
250 		bcopy(minfop->mi_brdcst_addr, brdcst_addr, addr_length);
251 		dlp->dl_brdcst_addr_length = addr_length;
252 	}
253 
254 	/* Only VLAN links and links that have a normal tag mode support QOS. */
255 	if ((dsp->ds_mch != NULL &&
256 	    mac_client_vid(dsp->ds_mch) != VLAN_ID_NONE) ||
257 	    (dsp->ds_dlp != NULL &&
258 	    dsp->ds_dlp->dl_tagmode == LINK_TAGMODE_NORMAL)) {
259 		dlp->dl_qos_range_offset = (uintptr_t)rangep - (uintptr_t)dlp;
260 		dlp->dl_qos_range_length = sizeof (dl_qos_cl_range1_t);
261 
262 		rangep->dl_qos_type = DL_QOS_CL_RANGE1;
263 		rangep->dl_trans_delay.dl_target_value = DL_UNKNOWN;
264 		rangep->dl_trans_delay.dl_accept_value = DL_UNKNOWN;
265 		rangep->dl_protection.dl_min = DL_UNKNOWN;
266 		rangep->dl_protection.dl_max = DL_UNKNOWN;
267 		rangep->dl_residual_error = DL_UNKNOWN;
268 
269 		/*
270 		 * Specify the supported range of priorities.
271 		 */
272 		rangep->dl_priority.dl_min = 0;
273 		rangep->dl_priority.dl_max = (1 << VLAN_PRI_SIZE) - 1;
274 
275 		dlp->dl_qos_offset = (uintptr_t)selp - (uintptr_t)dlp;
276 		dlp->dl_qos_length = sizeof (dl_qos_cl_sel1_t);
277 
278 		selp->dl_qos_type = DL_QOS_CL_SEL1;
279 		selp->dl_trans_delay = DL_UNKNOWN;
280 		selp->dl_protection = DL_UNKNOWN;
281 		selp->dl_residual_error = DL_UNKNOWN;
282 
283 		/*
284 		 * Specify the current priority (which can be changed by
285 		 * the DL_UDQOS_REQ primitive).
286 		 */
287 		selp->dl_priority = dsp->ds_pri;
288 	}
289 
290 	dlp->dl_addr_length = addr_length + sizeof (uint16_t);
291 	if (dsp->ds_dlstate == DL_IDLE) {
292 		/*
293 		 * The stream is bound. Therefore we can formulate a valid
294 		 * DLSAP address.
295 		 */
296 		dlp->dl_addr_offset = (uintptr_t)addr - (uintptr_t)dlp;
297 		if (addr_length > 0)
298 			mac_unicast_primary_get(dsp->ds_mh, addr);
299 
300 		*(uint16_t *)(addr + addr_length) = dsp->ds_sap;
301 	}
302 
303 done:
304 	IMPLY(dlp->dl_qos_offset != 0, dlp->dl_qos_length != 0);
305 	IMPLY(dlp->dl_qos_range_offset != 0,
306 	    dlp->dl_qos_range_length != 0);
307 	IMPLY(dlp->dl_addr_offset != 0, dlp->dl_addr_length != 0);
308 	IMPLY(dlp->dl_brdcst_addr_offset != 0,
309 	    dlp->dl_brdcst_addr_length != 0);
310 
311 	qreply(q, mp);
312 }
313 
314 /*
315  * DL_ATTACH_REQ
316  */
317 static void
318 proto_attach_req(dld_str_t *dsp, mblk_t *mp)
319 {
320 	dl_attach_req_t	*dlp = (dl_attach_req_t *)mp->b_rptr;
321 	int		err = 0;
322 	t_uscalar_t	dl_err;
323 	queue_t		*q = dsp->ds_wq;
324 
325 	if (MBLKL(mp) < sizeof (dl_attach_req_t) ||
326 	    dlp->dl_ppa < 0 || dsp->ds_style == DL_STYLE1) {
327 		dl_err = DL_BADPRIM;
328 		goto failed;
329 	}
330 
331 	if (dsp->ds_dlstate != DL_UNATTACHED) {
332 		dl_err = DL_OUTSTATE;
333 		goto failed;
334 	}
335 
336 	dsp->ds_dlstate = DL_ATTACH_PENDING;
337 
338 	err = dld_str_attach(dsp, dlp->dl_ppa);
339 	if (err != 0) {
340 		switch (err) {
341 		case ENOENT:
342 			dl_err = DL_BADPPA;
343 			err = 0;
344 			break;
345 		default:
346 			dl_err = DL_SYSERR;
347 			break;
348 		}
349 		dsp->ds_dlstate = DL_UNATTACHED;
350 		goto failed;
351 	}
352 	ASSERT(dsp->ds_dlstate == DL_UNBOUND);
353 	dlokack(q, mp, DL_ATTACH_REQ);
354 	return;
355 
356 failed:
357 	dlerrorack(q, mp, DL_ATTACH_REQ, dl_err, (t_uscalar_t)err);
358 }
359 
360 /*
361  * DL_DETACH_REQ
362  */
363 static void
364 proto_detach_req(dld_str_t *dsp, mblk_t *mp)
365 {
366 	queue_t		*q = dsp->ds_wq;
367 	t_uscalar_t	dl_err;
368 
369 	if (MBLKL(mp) < sizeof (dl_detach_req_t)) {
370 		dl_err = DL_BADPRIM;
371 		goto failed;
372 	}
373 
374 	if (dsp->ds_dlstate != DL_UNBOUND) {
375 		dl_err = DL_OUTSTATE;
376 		goto failed;
377 	}
378 
379 	if (dsp->ds_style == DL_STYLE1) {
380 		dl_err = DL_BADPRIM;
381 		goto failed;
382 	}
383 
384 	ASSERT(dsp->ds_datathr_cnt == 0);
385 	dsp->ds_dlstate = DL_DETACH_PENDING;
386 
387 	dld_str_detach(dsp);
388 	dlokack(dsp->ds_wq, mp, DL_DETACH_REQ);
389 	return;
390 
391 failed:
392 	dlerrorack(q, mp, DL_DETACH_REQ, dl_err, 0);
393 }
394 
395 /*
396  * DL_BIND_REQ
397  */
398 static void
399 proto_bind_req(dld_str_t *dsp, mblk_t *mp)
400 {
401 	dl_bind_req_t	*dlp = (dl_bind_req_t *)mp->b_rptr;
402 	int		err = 0;
403 	uint8_t		dlsap_addr[MAXMACADDRLEN + sizeof (uint16_t)];
404 	uint_t		dlsap_addr_length;
405 	t_uscalar_t	dl_err;
406 	t_scalar_t	sap;
407 	queue_t		*q = dsp->ds_wq;
408 	mac_perim_handle_t	mph;
409 	void		*mdip;
410 	int32_t		intr_cpu;
411 
412 	if (MBLKL(mp) < sizeof (dl_bind_req_t)) {
413 		dl_err = DL_BADPRIM;
414 		goto failed;
415 	}
416 
417 	if (dlp->dl_xidtest_flg != 0) {
418 		dl_err = DL_NOAUTO;
419 		goto failed;
420 	}
421 
422 	if (dlp->dl_service_mode != DL_CLDLS) {
423 		dl_err = DL_UNSUPPORTED;
424 		goto failed;
425 	}
426 
427 	if (dsp->ds_dlstate != DL_UNBOUND) {
428 		dl_err = DL_OUTSTATE;
429 		goto failed;
430 	}
431 
432 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
433 
434 	if ((err = dls_active_set(dsp)) != 0) {
435 		dl_err = DL_SYSERR;
436 		goto failed2;
437 	}
438 
439 	dsp->ds_dlstate = DL_BIND_PENDING;
440 	/*
441 	 * Set the receive callback.
442 	 */
443 	dls_rx_set(dsp, (dsp->ds_mode == DLD_RAW) ?
444 	    dld_str_rx_raw : dld_str_rx_unitdata, dsp);
445 
446 	/*
447 	 * Bind the channel such that it can receive packets.
448 	 */
449 	sap = dlp->dl_sap;
450 	dsp->ds_nonip = !check_mod_above(dsp->ds_rq, "ip") &&
451 	    !check_mod_above(dsp->ds_rq, "arp");
452 
453 	err = dls_bind(dsp, sap);
454 	if (err != 0) {
455 		switch (err) {
456 		case EINVAL:
457 			dl_err = DL_BADADDR;
458 			err = 0;
459 			break;
460 		default:
461 			dl_err = DL_SYSERR;
462 			break;
463 		}
464 
465 		dsp->ds_dlstate = DL_UNBOUND;
466 		dls_active_clear(dsp, B_FALSE);
467 		goto failed2;
468 	}
469 
470 	intr_cpu = mac_client_intr_cpu(dsp->ds_mch);
471 	mdip = mac_get_devinfo(dsp->ds_mh);
472 	mac_perim_exit(mph);
473 
474 	/*
475 	 * We do this after we get out of the perim to avoid deadlocks
476 	 * etc. since part of mac_client_retarget_intr is to walk the
477 	 * device tree in order to find and retarget the interrupts.
478 	 */
479 	if (intr_cpu != -1)
480 		mac_client_set_intr_cpu(mdip, dsp->ds_mch, intr_cpu);
481 
482 	/*
483 	 * Copy in MAC address.
484 	 */
485 	dlsap_addr_length = dsp->ds_mip->mi_addr_length;
486 	mac_unicast_primary_get(dsp->ds_mh, dlsap_addr);
487 
488 	/*
489 	 * Copy in the SAP.
490 	 */
491 	*(uint16_t *)(dlsap_addr + dlsap_addr_length) = sap;
492 	dlsap_addr_length += sizeof (uint16_t);
493 
494 	dsp->ds_dlstate = DL_IDLE;
495 	dlbindack(q, mp, sap, dlsap_addr, dlsap_addr_length, 0, 0);
496 	return;
497 
498 failed2:
499 	mac_perim_exit(mph);
500 failed:
501 	dlerrorack(q, mp, DL_BIND_REQ, dl_err, (t_uscalar_t)err);
502 }
503 
504 /*
505  * DL_UNBIND_REQ
506  */
507 static void
508 proto_unbind_req(dld_str_t *dsp, mblk_t *mp)
509 {
510 	queue_t		*q = dsp->ds_wq;
511 	t_uscalar_t	dl_err;
512 	mac_perim_handle_t	mph;
513 
514 	if (MBLKL(mp) < sizeof (dl_unbind_req_t)) {
515 		dl_err = DL_BADPRIM;
516 		goto failed;
517 	}
518 
519 	if (dsp->ds_dlstate != DL_IDLE) {
520 		dl_err = DL_OUTSTATE;
521 		goto failed;
522 	}
523 
524 	mutex_enter(&dsp->ds_lock);
525 	while (dsp->ds_datathr_cnt != 0)
526 		cv_wait(&dsp->ds_datathr_cv, &dsp->ds_lock);
527 
528 	dsp->ds_dlstate = DL_UNBIND_PENDING;
529 	mutex_exit(&dsp->ds_lock);
530 
531 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
532 	/*
533 	 * Unbind the channel to stop packets being received.
534 	 */
535 	dls_unbind(dsp);
536 
537 	/*
538 	 * Disable polling mode, if it is enabled.
539 	 */
540 	(void) dld_capab_poll_disable(dsp, NULL);
541 
542 	/*
543 	 * Clear LSO flags.
544 	 */
545 	dsp->ds_lso = B_FALSE;
546 	dsp->ds_lso_max = 0;
547 
548 	/*
549 	 * Clear the receive callback.
550 	 */
551 	dls_rx_set(dsp, NULL, NULL);
552 	dsp->ds_direct = B_FALSE;
553 
554 	/*
555 	 * Set the mode back to the default (unitdata).
556 	 */
557 	dsp->ds_mode = DLD_UNITDATA;
558 	dsp->ds_dlstate = DL_UNBOUND;
559 
560 	dls_active_clear(dsp, B_FALSE);
561 	mac_perim_exit(mph);
562 	dlokack(dsp->ds_wq, mp, DL_UNBIND_REQ);
563 	return;
564 failed:
565 	dlerrorack(q, mp, DL_UNBIND_REQ, dl_err, 0);
566 }
567 
568 /*
569  * DL_PROMISCON_REQ
570  */
571 static void
572 proto_promiscon_req(dld_str_t *dsp, mblk_t *mp)
573 {
574 	dl_promiscon_req_t *dlp = (dl_promiscon_req_t *)mp->b_rptr;
575 	int		err = 0;
576 	t_uscalar_t	dl_err;
577 	uint32_t	new_flags, promisc_saved;
578 	queue_t		*q = dsp->ds_wq;
579 	mac_perim_handle_t	mph;
580 
581 	if (MBLKL(mp) < sizeof (dl_promiscon_req_t)) {
582 		dl_err = DL_BADPRIM;
583 		goto failed;
584 	}
585 
586 	if (dsp->ds_dlstate == DL_UNATTACHED ||
587 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
588 		dl_err = DL_OUTSTATE;
589 		goto failed;
590 	}
591 
592 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
593 
594 	new_flags = promisc_saved = dsp->ds_promisc;
595 	switch (dlp->dl_level) {
596 	case DL_PROMISC_SAP:
597 		new_flags |= DLS_PROMISC_SAP;
598 		break;
599 
600 	case DL_PROMISC_MULTI:
601 		new_flags |= DLS_PROMISC_MULTI;
602 		break;
603 
604 	case DL_PROMISC_PHYS:
605 		new_flags |= DLS_PROMISC_PHYS;
606 		break;
607 
608 	default:
609 		dl_err = DL_NOTSUPPORTED;
610 		goto failed;
611 	}
612 
613 	if ((promisc_saved == 0) && (err = dls_active_set(dsp)) != 0) {
614 		ASSERT(dsp->ds_promisc == promisc_saved);
615 		dl_err = DL_SYSERR;
616 		goto failed2;
617 	}
618 
619 	/*
620 	 * Adjust channel promiscuity.
621 	 */
622 	err = dls_promisc(dsp, new_flags);
623 
624 	if (err != 0) {
625 		dl_err = DL_SYSERR;
626 		dsp->ds_promisc = promisc_saved;
627 		if (promisc_saved == 0)
628 			dls_active_clear(dsp, B_FALSE);
629 		goto failed2;
630 	}
631 
632 	mac_perim_exit(mph);
633 
634 	dlokack(q, mp, DL_PROMISCON_REQ);
635 	return;
636 
637 failed2:
638 	mac_perim_exit(mph);
639 failed:
640 	dlerrorack(q, mp, DL_PROMISCON_REQ, dl_err, (t_uscalar_t)err);
641 }
642 
643 /*
644  * DL_PROMISCOFF_REQ
645  */
646 static void
647 proto_promiscoff_req(dld_str_t *dsp, mblk_t *mp)
648 {
649 	dl_promiscoff_req_t *dlp = (dl_promiscoff_req_t *)mp->b_rptr;
650 	int		err = 0;
651 	t_uscalar_t	dl_err;
652 	uint32_t	new_flags;
653 	queue_t		*q = dsp->ds_wq;
654 	mac_perim_handle_t	mph;
655 
656 	if (MBLKL(mp) < sizeof (dl_promiscoff_req_t)) {
657 		dl_err = DL_BADPRIM;
658 		goto failed;
659 	}
660 
661 	if (dsp->ds_dlstate == DL_UNATTACHED ||
662 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
663 		dl_err = DL_OUTSTATE;
664 		goto failed;
665 	}
666 
667 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
668 
669 	new_flags = dsp->ds_promisc;
670 	switch (dlp->dl_level) {
671 	case DL_PROMISC_SAP:
672 		if (!(dsp->ds_promisc & DLS_PROMISC_SAP)) {
673 			dl_err = DL_NOTENAB;
674 			goto failed;
675 		}
676 		new_flags &= ~DLS_PROMISC_SAP;
677 		break;
678 
679 	case DL_PROMISC_MULTI:
680 		if (!(dsp->ds_promisc & DLS_PROMISC_MULTI)) {
681 			dl_err = DL_NOTENAB;
682 			goto failed;
683 		}
684 		new_flags &= ~DLS_PROMISC_MULTI;
685 		break;
686 
687 	case DL_PROMISC_PHYS:
688 		if (!(dsp->ds_promisc & DLS_PROMISC_PHYS)) {
689 			dl_err = DL_NOTENAB;
690 			goto failed;
691 		}
692 		new_flags &= ~DLS_PROMISC_PHYS;
693 		break;
694 
695 	default:
696 		dl_err = DL_NOTSUPPORTED;
697 		goto failed;
698 	}
699 
700 	/*
701 	 * Adjust channel promiscuity.
702 	 */
703 	err = dls_promisc(dsp, new_flags);
704 
705 	if (err != 0) {
706 		mac_perim_exit(mph);
707 		dl_err = DL_SYSERR;
708 		goto failed;
709 	}
710 
711 	ASSERT(dsp->ds_promisc == new_flags);
712 	if (dsp->ds_promisc == 0)
713 		dls_active_clear(dsp, B_FALSE);
714 
715 	mac_perim_exit(mph);
716 
717 	dlokack(q, mp, DL_PROMISCOFF_REQ);
718 	return;
719 failed:
720 	dlerrorack(q, mp, DL_PROMISCOFF_REQ, dl_err, (t_uscalar_t)err);
721 }
722 
723 /*
724  * DL_ENABMULTI_REQ
725  */
726 static void
727 proto_enabmulti_req(dld_str_t *dsp, mblk_t *mp)
728 {
729 	dl_enabmulti_req_t *dlp = (dl_enabmulti_req_t *)mp->b_rptr;
730 	int		err = 0;
731 	t_uscalar_t	dl_err;
732 	queue_t		*q = dsp->ds_wq;
733 	mac_perim_handle_t	mph;
734 
735 	if (dsp->ds_dlstate == DL_UNATTACHED ||
736 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
737 		dl_err = DL_OUTSTATE;
738 		goto failed;
739 	}
740 
741 	if (MBLKL(mp) < sizeof (dl_enabmulti_req_t) ||
742 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
743 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
744 		dl_err = DL_BADPRIM;
745 		goto failed;
746 	}
747 
748 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
749 
750 	if ((dsp->ds_dmap == NULL) && (err = dls_active_set(dsp)) != 0) {
751 		dl_err = DL_SYSERR;
752 		goto failed2;
753 	}
754 
755 	err = dls_multicst_add(dsp, mp->b_rptr + dlp->dl_addr_offset);
756 	if (err != 0) {
757 		switch (err) {
758 		case EINVAL:
759 			dl_err = DL_BADADDR;
760 			err = 0;
761 			break;
762 		case ENOSPC:
763 			dl_err = DL_TOOMANY;
764 			err = 0;
765 			break;
766 		default:
767 			dl_err = DL_SYSERR;
768 			break;
769 		}
770 		if (dsp->ds_dmap == NULL)
771 			dls_active_clear(dsp, B_FALSE);
772 		goto failed2;
773 	}
774 
775 	mac_perim_exit(mph);
776 
777 	dlokack(q, mp, DL_ENABMULTI_REQ);
778 	return;
779 
780 failed2:
781 	mac_perim_exit(mph);
782 failed:
783 	dlerrorack(q, mp, DL_ENABMULTI_REQ, dl_err, (t_uscalar_t)err);
784 }
785 
786 /*
787  * DL_DISABMULTI_REQ
788  */
789 static void
790 proto_disabmulti_req(dld_str_t *dsp, mblk_t *mp)
791 {
792 	dl_disabmulti_req_t *dlp = (dl_disabmulti_req_t *)mp->b_rptr;
793 	int		err = 0;
794 	t_uscalar_t	dl_err;
795 	queue_t		*q = dsp->ds_wq;
796 	mac_perim_handle_t	mph;
797 
798 	if (dsp->ds_dlstate == DL_UNATTACHED ||
799 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
800 		dl_err = DL_OUTSTATE;
801 		goto failed;
802 	}
803 
804 	if (MBLKL(mp) < sizeof (dl_disabmulti_req_t) ||
805 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
806 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
807 		dl_err = DL_BADPRIM;
808 		goto failed;
809 	}
810 
811 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
812 	err = dls_multicst_remove(dsp, mp->b_rptr + dlp->dl_addr_offset);
813 	if ((err == 0) && (dsp->ds_dmap == NULL))
814 		dls_active_clear(dsp, B_FALSE);
815 	mac_perim_exit(mph);
816 
817 	if (err != 0) {
818 	switch (err) {
819 		case EINVAL:
820 			dl_err = DL_BADADDR;
821 			err = 0;
822 			break;
823 
824 		case ENOENT:
825 			dl_err = DL_NOTENAB;
826 			err = 0;
827 			break;
828 
829 		default:
830 			dl_err = DL_SYSERR;
831 			break;
832 		}
833 		goto failed;
834 	}
835 	dlokack(q, mp, DL_DISABMULTI_REQ);
836 	return;
837 failed:
838 	dlerrorack(q, mp, DL_DISABMULTI_REQ, dl_err, (t_uscalar_t)err);
839 }
840 
841 /*
842  * DL_PHYS_ADDR_REQ
843  */
844 static void
845 proto_physaddr_req(dld_str_t *dsp, mblk_t *mp)
846 {
847 	dl_phys_addr_req_t *dlp = (dl_phys_addr_req_t *)mp->b_rptr;
848 	queue_t		*q = dsp->ds_wq;
849 	t_uscalar_t	dl_err = 0;
850 	char		*addr = NULL;
851 	uint_t		addr_length;
852 
853 	if (MBLKL(mp) < sizeof (dl_phys_addr_req_t)) {
854 		dl_err = DL_BADPRIM;
855 		goto done;
856 	}
857 
858 	if (dsp->ds_dlstate == DL_UNATTACHED ||
859 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
860 		dl_err = DL_OUTSTATE;
861 		goto done;
862 	}
863 
864 	addr_length = dsp->ds_mip->mi_addr_length;
865 	if (addr_length > 0) {
866 		addr = kmem_alloc(addr_length, KM_SLEEP);
867 		switch (dlp->dl_addr_type) {
868 		case DL_CURR_PHYS_ADDR:
869 			mac_unicast_primary_get(dsp->ds_mh, (uint8_t *)addr);
870 			break;
871 		case DL_FACT_PHYS_ADDR:
872 			bcopy(dsp->ds_mip->mi_unicst_addr, addr, addr_length);
873 			break;
874 		case DL_CURR_DEST_ADDR:
875 			if (!mac_dst_get(dsp->ds_mh, (uint8_t *)addr))
876 				dl_err = DL_NOTSUPPORTED;
877 			break;
878 		default:
879 			dl_err = DL_UNSUPPORTED;
880 		}
881 	}
882 done:
883 	if (dl_err == 0)
884 		dlphysaddrack(q, mp, addr, (t_uscalar_t)addr_length);
885 	else
886 		dlerrorack(q, mp, DL_PHYS_ADDR_REQ, dl_err, 0);
887 	if (addr != NULL)
888 		kmem_free(addr, addr_length);
889 }
890 
891 /*
892  * DL_SET_PHYS_ADDR_REQ
893  */
894 static void
895 proto_setphysaddr_req(dld_str_t *dsp, mblk_t *mp)
896 {
897 	dl_set_phys_addr_req_t *dlp = (dl_set_phys_addr_req_t *)mp->b_rptr;
898 	int		err = 0;
899 	t_uscalar_t	dl_err;
900 	queue_t		*q = dsp->ds_wq;
901 	mac_perim_handle_t	mph;
902 
903 	if (dsp->ds_dlstate == DL_UNATTACHED ||
904 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
905 		dl_err = DL_OUTSTATE;
906 		goto failed;
907 	}
908 
909 	if (MBLKL(mp) < sizeof (dl_set_phys_addr_req_t) ||
910 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
911 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
912 		dl_err = DL_BADPRIM;
913 		goto failed;
914 	}
915 
916 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
917 
918 	if ((err = dls_active_set(dsp)) != 0) {
919 		dl_err = DL_SYSERR;
920 		goto failed2;
921 	}
922 
923 	/*
924 	 * If mac-nospoof is enabled and the link is owned by a
925 	 * non-global zone, changing the mac address is not allowed.
926 	 */
927 	if (dsp->ds_dlp->dl_zid != GLOBAL_ZONEID &&
928 	    mac_protect_enabled(dsp->ds_mch, MPT_MACNOSPOOF)) {
929 		dls_active_clear(dsp, B_FALSE);
930 		err = EACCES;
931 		goto failed2;
932 	}
933 
934 	err = mac_unicast_primary_set(dsp->ds_mh,
935 	    mp->b_rptr + dlp->dl_addr_offset);
936 	if (err != 0) {
937 		switch (err) {
938 		case EINVAL:
939 			dl_err = DL_BADADDR;
940 			err = 0;
941 			break;
942 
943 		default:
944 			dl_err = DL_SYSERR;
945 			break;
946 		}
947 		dls_active_clear(dsp, B_FALSE);
948 		goto failed2;
949 
950 	}
951 
952 	mac_perim_exit(mph);
953 
954 	dlokack(q, mp, DL_SET_PHYS_ADDR_REQ);
955 	return;
956 
957 failed2:
958 	mac_perim_exit(mph);
959 failed:
960 	dlerrorack(q, mp, DL_SET_PHYS_ADDR_REQ, dl_err, (t_uscalar_t)err);
961 }
962 
963 /*
964  * DL_UDQOS_REQ
965  */
966 static void
967 proto_udqos_req(dld_str_t *dsp, mblk_t *mp)
968 {
969 	dl_udqos_req_t *dlp = (dl_udqos_req_t *)mp->b_rptr;
970 	dl_qos_cl_sel1_t *selp;
971 	int		off, len;
972 	t_uscalar_t	dl_err;
973 	queue_t		*q = dsp->ds_wq;
974 
975 	off = dlp->dl_qos_offset;
976 	len = dlp->dl_qos_length;
977 
978 	if (MBLKL(mp) < sizeof (dl_udqos_req_t) || !MBLKIN(mp, off, len)) {
979 		dl_err = DL_BADPRIM;
980 		goto failed;
981 	}
982 
983 	selp = (dl_qos_cl_sel1_t *)(mp->b_rptr + off);
984 	if (selp->dl_qos_type != DL_QOS_CL_SEL1) {
985 		dl_err = DL_BADQOSTYPE;
986 		goto failed;
987 	}
988 
989 	if (selp->dl_priority > (1 << VLAN_PRI_SIZE) - 1 ||
990 	    selp->dl_priority < 0) {
991 		dl_err = DL_BADQOSPARAM;
992 		goto failed;
993 	}
994 
995 	dsp->ds_pri = selp->dl_priority;
996 	dlokack(q, mp, DL_UDQOS_REQ);
997 	return;
998 failed:
999 	dlerrorack(q, mp, DL_UDQOS_REQ, dl_err, 0);
1000 }
1001 
1002 static boolean_t
1003 check_mod_above(queue_t *q, const char *mod)
1004 {
1005 	queue_t		*next_q;
1006 	boolean_t	ret = B_TRUE;
1007 
1008 	claimstr(q);
1009 	next_q = q->q_next;
1010 	if (strcmp(next_q->q_qinfo->qi_minfo->mi_idname, mod) != 0)
1011 		ret = B_FALSE;
1012 	releasestr(q);
1013 	return (ret);
1014 }
1015 
1016 /*
1017  * DL_CAPABILITY_REQ
1018  */
1019 static void
1020 proto_capability_req(dld_str_t *dsp, mblk_t *mp)
1021 {
1022 	dl_capability_req_t *dlp = (dl_capability_req_t *)mp->b_rptr;
1023 	dl_capability_sub_t *sp;
1024 	size_t		size, len;
1025 	offset_t	off, end;
1026 	t_uscalar_t	dl_err;
1027 	queue_t		*q = dsp->ds_wq;
1028 
1029 	if (MBLKL(mp) < sizeof (dl_capability_req_t)) {
1030 		dl_err = DL_BADPRIM;
1031 		goto failed;
1032 	}
1033 
1034 	if (dsp->ds_dlstate == DL_UNATTACHED ||
1035 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1036 		dl_err = DL_OUTSTATE;
1037 		goto failed;
1038 	}
1039 
1040 	/*
1041 	 * This request is overloaded. If there are no requested capabilities
1042 	 * then we just want to acknowledge with all the capabilities we
1043 	 * support. Otherwise we enable the set of capabilities requested.
1044 	 */
1045 	if (dlp->dl_sub_length == 0) {
1046 		proto_capability_advertise(dsp, mp);
1047 		return;
1048 	}
1049 
1050 	if (!MBLKIN(mp, dlp->dl_sub_offset, dlp->dl_sub_length)) {
1051 		dl_err = DL_BADPRIM;
1052 		goto failed;
1053 	}
1054 
1055 	dlp->dl_primitive = DL_CAPABILITY_ACK;
1056 
1057 	off = dlp->dl_sub_offset;
1058 	len = dlp->dl_sub_length;
1059 
1060 	/*
1061 	 * Walk the list of capabilities to be enabled.
1062 	 */
1063 	for (end = off + len; off < end; ) {
1064 		sp = (dl_capability_sub_t *)(mp->b_rptr + off);
1065 		size = sizeof (dl_capability_sub_t) + sp->dl_length;
1066 
1067 		if (off + size > end ||
1068 		    !IS_P2ALIGNED(off, sizeof (uint32_t))) {
1069 			dl_err = DL_BADPRIM;
1070 			goto failed;
1071 		}
1072 
1073 		switch (sp->dl_cap) {
1074 		/*
1075 		 * TCP/IP checksum offload to hardware.
1076 		 */
1077 		case DL_CAPAB_HCKSUM: {
1078 			dl_capab_hcksum_t *hcksump;
1079 			dl_capab_hcksum_t hcksum;
1080 
1081 			hcksump = (dl_capab_hcksum_t *)&sp[1];
1082 			/*
1083 			 * Copy for alignment.
1084 			 */
1085 			bcopy(hcksump, &hcksum, sizeof (dl_capab_hcksum_t));
1086 			dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1087 			bcopy(&hcksum, hcksump, sizeof (dl_capab_hcksum_t));
1088 			break;
1089 		}
1090 
1091 		case DL_CAPAB_DLD: {
1092 			dl_capab_dld_t	*dldp;
1093 			dl_capab_dld_t	dld;
1094 
1095 			dldp = (dl_capab_dld_t *)&sp[1];
1096 			/*
1097 			 * Copy for alignment.
1098 			 */
1099 			bcopy(dldp, &dld, sizeof (dl_capab_dld_t));
1100 			dlcapabsetqid(&(dld.dld_mid), dsp->ds_rq);
1101 			bcopy(&dld, dldp, sizeof (dl_capab_dld_t));
1102 			break;
1103 		}
1104 		default:
1105 			break;
1106 		}
1107 		off += size;
1108 	}
1109 	qreply(q, mp);
1110 	return;
1111 failed:
1112 	dlerrorack(q, mp, DL_CAPABILITY_REQ, dl_err, 0);
1113 }
1114 
1115 /*
1116  * DL_NOTIFY_REQ
1117  */
1118 static void
1119 proto_notify_req(dld_str_t *dsp, mblk_t *mp)
1120 {
1121 	dl_notify_req_t	*dlp = (dl_notify_req_t *)mp->b_rptr;
1122 	t_uscalar_t	dl_err;
1123 	queue_t		*q = dsp->ds_wq;
1124 	uint_t		note =
1125 	    DL_NOTE_PROMISC_ON_PHYS |
1126 	    DL_NOTE_PROMISC_OFF_PHYS |
1127 	    DL_NOTE_PHYS_ADDR |
1128 	    DL_NOTE_LINK_UP |
1129 	    DL_NOTE_LINK_DOWN |
1130 	    DL_NOTE_CAPAB_RENEG |
1131 	    DL_NOTE_FASTPATH_FLUSH |
1132 	    DL_NOTE_SPEED |
1133 	    DL_NOTE_SDU_SIZE|
1134 	    DL_NOTE_SDU_SIZE2|
1135 	    DL_NOTE_ALLOWED_IPS;
1136 
1137 	if (MBLKL(mp) < sizeof (dl_notify_req_t)) {
1138 		dl_err = DL_BADPRIM;
1139 		goto failed;
1140 	}
1141 
1142 	if (dsp->ds_dlstate == DL_UNATTACHED ||
1143 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1144 		dl_err = DL_OUTSTATE;
1145 		goto failed;
1146 	}
1147 
1148 	note &= ~(mac_no_notification(dsp->ds_mh));
1149 
1150 	/*
1151 	 * Cache the notifications that are being enabled.
1152 	 */
1153 	dsp->ds_notifications = dlp->dl_notifications & note;
1154 	/*
1155 	 * The ACK carries all notifications regardless of which set is
1156 	 * being enabled.
1157 	 */
1158 	dlnotifyack(q, mp, note);
1159 
1160 	/*
1161 	 * Generate DL_NOTIFY_IND messages for each enabled notification.
1162 	 */
1163 	if (dsp->ds_notifications != 0) {
1164 		dld_str_notify_ind(dsp);
1165 	}
1166 	return;
1167 failed:
1168 	dlerrorack(q, mp, DL_NOTIFY_REQ, dl_err, 0);
1169 }
1170 
1171 /*
1172  * DL_UINTDATA_REQ
1173  */
1174 void
1175 proto_unitdata_req(dld_str_t *dsp, mblk_t *mp)
1176 {
1177 	queue_t			*q = dsp->ds_wq;
1178 	dl_unitdata_req_t	*dlp = (dl_unitdata_req_t *)mp->b_rptr;
1179 	off_t			off;
1180 	size_t			len, size;
1181 	const uint8_t		*addr;
1182 	uint16_t		sap;
1183 	uint_t			addr_length;
1184 	mblk_t			*bp, *payload;
1185 	uint32_t		start, stuff, end, value, flags;
1186 	t_uscalar_t		dl_err;
1187 	uint_t			max_sdu;
1188 
1189 	if (MBLKL(mp) < sizeof (dl_unitdata_req_t) || mp->b_cont == NULL) {
1190 		dlerrorack(q, mp, DL_UNITDATA_REQ, DL_BADPRIM, 0);
1191 		return;
1192 	}
1193 
1194 	mutex_enter(&dsp->ds_lock);
1195 	if (dsp->ds_dlstate != DL_IDLE) {
1196 		mutex_exit(&dsp->ds_lock);
1197 		dlerrorack(q, mp, DL_UNITDATA_REQ, DL_OUTSTATE, 0);
1198 		return;
1199 	}
1200 	DLD_DATATHR_INC(dsp);
1201 	mutex_exit(&dsp->ds_lock);
1202 
1203 	addr_length = dsp->ds_mip->mi_addr_length;
1204 
1205 	off = dlp->dl_dest_addr_offset;
1206 	len = dlp->dl_dest_addr_length;
1207 
1208 	if (!MBLKIN(mp, off, len) || !IS_P2ALIGNED(off, sizeof (uint16_t))) {
1209 		dl_err = DL_BADPRIM;
1210 		goto failed;
1211 	}
1212 
1213 	if (len != addr_length + sizeof (uint16_t)) {
1214 		dl_err = DL_BADADDR;
1215 		goto failed;
1216 	}
1217 
1218 	addr = mp->b_rptr + off;
1219 	sap = *(uint16_t *)(mp->b_rptr + off + addr_length);
1220 
1221 	/*
1222 	 * Check the length of the packet and the block types.
1223 	 */
1224 	size = 0;
1225 	payload = mp->b_cont;
1226 	for (bp = payload; bp != NULL; bp = bp->b_cont) {
1227 		if (DB_TYPE(bp) != M_DATA)
1228 			goto baddata;
1229 
1230 		size += MBLKL(bp);
1231 	}
1232 
1233 	mac_sdu_get(dsp->ds_mh, NULL, &max_sdu);
1234 	if (size > max_sdu)
1235 		goto baddata;
1236 
1237 	/*
1238 	 * Build a packet header.
1239 	 */
1240 	if ((bp = dls_header(dsp, addr, sap, dlp->dl_priority.dl_max,
1241 	    &payload)) == NULL) {
1242 		dl_err = DL_BADADDR;
1243 		goto failed;
1244 	}
1245 
1246 	/*
1247 	 * We no longer need the M_PROTO header, so free it.
1248 	 */
1249 	freeb(mp);
1250 
1251 	/*
1252 	 * Transfer the checksum offload information if it is present.
1253 	 */
1254 	hcksum_retrieve(payload, NULL, NULL, &start, &stuff, &end, &value,
1255 	    &flags);
1256 	(void) hcksum_assoc(bp, NULL, NULL, start, stuff, end, value, flags, 0);
1257 
1258 	/*
1259 	 * Link the payload onto the new header.
1260 	 */
1261 	ASSERT(bp->b_cont == NULL);
1262 	bp->b_cont = payload;
1263 
1264 	/*
1265 	 * No lock can be held across modules and putnext()'s,
1266 	 * which can happen here with the call from DLD_TX().
1267 	 */
1268 	if (DLD_TX(dsp, bp, 0, 0) != NULL) {
1269 		/* flow-controlled */
1270 		DLD_SETQFULL(dsp);
1271 	}
1272 	DLD_DATATHR_DCR(dsp);
1273 	return;
1274 
1275 failed:
1276 	dlerrorack(q, mp, DL_UNITDATA_REQ, dl_err, 0);
1277 	DLD_DATATHR_DCR(dsp);
1278 	return;
1279 
1280 baddata:
1281 	dluderrorind(q, mp, (void *)addr, len, DL_BADDATA, 0);
1282 	DLD_DATATHR_DCR(dsp);
1283 }
1284 
1285 /*
1286  * DL_PASSIVE_REQ
1287  */
1288 static void
1289 proto_passive_req(dld_str_t *dsp, mblk_t *mp)
1290 {
1291 	t_uscalar_t dl_err;
1292 
1293 	/*
1294 	 * If we've already become active by issuing an active primitive,
1295 	 * then it's too late to try to become passive.
1296 	 */
1297 	if (dsp->ds_passivestate == DLD_ACTIVE) {
1298 		dl_err = DL_OUTSTATE;
1299 		goto failed;
1300 	}
1301 
1302 	if (MBLKL(mp) < sizeof (dl_passive_req_t)) {
1303 		dl_err = DL_BADPRIM;
1304 		goto failed;
1305 	}
1306 
1307 	dsp->ds_passivestate = DLD_PASSIVE;
1308 	dlokack(dsp->ds_wq, mp, DL_PASSIVE_REQ);
1309 	return;
1310 failed:
1311 	dlerrorack(dsp->ds_wq, mp, DL_PASSIVE_REQ, dl_err, 0);
1312 }
1313 
1314 
1315 /*
1316  * Catch-all handler.
1317  */
1318 static void
1319 proto_req(dld_str_t *dsp, mblk_t *mp)
1320 {
1321 	union DL_primitives	*dlp = (union DL_primitives *)mp->b_rptr;
1322 
1323 	dlerrorack(dsp->ds_wq, mp, dlp->dl_primitive, DL_UNSUPPORTED, 0);
1324 }
1325 
1326 static int
1327 dld_capab_perim(dld_str_t *dsp, void *data, uint_t flags)
1328 {
1329 	switch (flags) {
1330 	case DLD_ENABLE:
1331 		mac_perim_enter_by_mh(dsp->ds_mh, (mac_perim_handle_t *)data);
1332 		return (0);
1333 
1334 	case DLD_DISABLE:
1335 		mac_perim_exit((mac_perim_handle_t)data);
1336 		return (0);
1337 
1338 	case DLD_QUERY:
1339 		return (mac_perim_held(dsp->ds_mh));
1340 	}
1341 	return (0);
1342 }
1343 
1344 static int
1345 dld_capab_direct(dld_str_t *dsp, void *data, uint_t flags)
1346 {
1347 	dld_capab_direct_t	*direct = data;
1348 
1349 	ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1350 
1351 	switch (flags) {
1352 	case DLD_ENABLE:
1353 		dls_rx_set(dsp, (dls_rx_t)direct->di_rx_cf,
1354 		    direct->di_rx_ch);
1355 
1356 		direct->di_tx_df = (uintptr_t)str_mdata_fastpath_put;
1357 		direct->di_tx_dh = dsp;
1358 		direct->di_tx_cb_df = (uintptr_t)mac_client_tx_notify;
1359 		direct->di_tx_cb_dh = dsp->ds_mch;
1360 		direct->di_tx_fctl_df = (uintptr_t)mac_tx_is_flow_blocked;
1361 		direct->di_tx_fctl_dh = dsp->ds_mch;
1362 
1363 		dsp->ds_direct = B_TRUE;
1364 
1365 		return (0);
1366 
1367 	case DLD_DISABLE:
1368 		dls_rx_set(dsp, (dsp->ds_mode == DLD_FASTPATH) ?
1369 		    dld_str_rx_fastpath : dld_str_rx_unitdata, (void *)dsp);
1370 		dsp->ds_direct = B_FALSE;
1371 
1372 		return (0);
1373 	}
1374 	return (ENOTSUP);
1375 }
1376 
1377 /*
1378  * dld_capab_poll_enable()
1379  *
1380  * This function is misnamed. All polling  and fanouts are run out of the
1381  * lower mac (in case of VNIC and the only mac in case of NICs). The
1382  * availability of Rx ring and promiscous mode is all taken care between
1383  * the soft ring set (mac_srs), the Rx ring, and S/W classifier. Any
1384  * fanout necessary is done by the soft rings that are part of the
1385  * mac_srs (by default mac_srs sends the packets up via a TCP and
1386  * non TCP soft ring).
1387  *
1388  * The mac_srs (or its associated soft rings) always store the ill_rx_ring
1389  * (the cookie returned when they registered with IP during plumb) as their
1390  * 2nd argument which is passed up as mac_resource_handle_t. The upcall
1391  * function and 1st argument is what the caller registered when they
1392  * called mac_rx_classify_flow_add() to register the flow. For VNIC,
1393  * the function is vnic_rx and argument is vnic_t. For regular NIC
1394  * case, it mac_rx_default and mac_handle_t. As explained above, the
1395  * mac_srs (or its soft ring) will add the ill_rx_ring (mac_resource_handle_t)
1396  * from its stored 2nd argument.
1397  */
1398 static int
1399 dld_capab_poll_enable(dld_str_t *dsp, dld_capab_poll_t *poll)
1400 {
1401 	if (dsp->ds_polling)
1402 		return (EINVAL);
1403 
1404 	if ((dld_opt & DLD_OPT_NO_POLL) != 0 || dsp->ds_mode == DLD_RAW)
1405 		return (ENOTSUP);
1406 
1407 	/*
1408 	 * Enable client polling if and only if DLS bypass is possible.
1409 	 * Special cases like VLANs need DLS processing in the Rx data path.
1410 	 * In such a case we can neither allow the client (IP) to directly
1411 	 * poll the softring (since DLS processing hasn't been done) nor can
1412 	 * we allow DLS bypass.
1413 	 */
1414 	if (!mac_rx_bypass_set(dsp->ds_mch, dsp->ds_rx, dsp->ds_rx_arg))
1415 		return (ENOTSUP);
1416 
1417 	/*
1418 	 * Register soft ring resources. This will come in handy later if
1419 	 * the user decides to modify CPU bindings to use more CPUs for the
1420 	 * device in which case we will switch to fanout using soft rings.
1421 	 */
1422 	mac_resource_set_common(dsp->ds_mch,
1423 	    (mac_resource_add_t)poll->poll_ring_add_cf,
1424 	    (mac_resource_remove_t)poll->poll_ring_remove_cf,
1425 	    (mac_resource_quiesce_t)poll->poll_ring_quiesce_cf,
1426 	    (mac_resource_restart_t)poll->poll_ring_restart_cf,
1427 	    (mac_resource_bind_t)poll->poll_ring_bind_cf,
1428 	    poll->poll_ring_ch);
1429 
1430 	mac_client_poll_enable(dsp->ds_mch);
1431 
1432 	dsp->ds_polling = B_TRUE;
1433 	return (0);
1434 }
1435 
1436 /* ARGSUSED */
1437 static int
1438 dld_capab_poll_disable(dld_str_t *dsp, dld_capab_poll_t *poll)
1439 {
1440 	if (!dsp->ds_polling)
1441 		return (EINVAL);
1442 
1443 	mac_client_poll_disable(dsp->ds_mch);
1444 	mac_resource_set(dsp->ds_mch, NULL, NULL);
1445 
1446 	dsp->ds_polling = B_FALSE;
1447 	return (0);
1448 }
1449 
1450 static int
1451 dld_capab_poll(dld_str_t *dsp, void *data, uint_t flags)
1452 {
1453 	dld_capab_poll_t	*poll = data;
1454 
1455 	ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1456 
1457 	switch (flags) {
1458 	case DLD_ENABLE:
1459 		return (dld_capab_poll_enable(dsp, poll));
1460 	case DLD_DISABLE:
1461 		return (dld_capab_poll_disable(dsp, poll));
1462 	}
1463 	return (ENOTSUP);
1464 }
1465 
1466 static int
1467 dld_capab_lso(dld_str_t *dsp, void *data, uint_t flags)
1468 {
1469 	dld_capab_lso_t		*lso = data;
1470 
1471 	ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1472 
1473 	switch (flags) {
1474 	case DLD_ENABLE: {
1475 		mac_capab_lso_t		mac_lso;
1476 
1477 		/*
1478 		 * Check if LSO is supported on this MAC & enable LSO
1479 		 * accordingly.
1480 		 */
1481 		if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_LSO, &mac_lso)) {
1482 			lso->lso_max = mac_lso.lso_basic_tcp_ipv4.lso_max;
1483 			lso->lso_flags = 0;
1484 			/* translate the flag for mac clients */
1485 			if ((mac_lso.lso_flags & LSO_TX_BASIC_TCP_IPV4) != 0)
1486 				lso->lso_flags |= DLD_LSO_BASIC_TCP_IPV4;
1487 			dsp->ds_lso = B_TRUE;
1488 			dsp->ds_lso_max = lso->lso_max;
1489 		} else {
1490 			dsp->ds_lso = B_FALSE;
1491 			dsp->ds_lso_max = 0;
1492 			return (ENOTSUP);
1493 		}
1494 		return (0);
1495 	}
1496 	case DLD_DISABLE: {
1497 		dsp->ds_lso = B_FALSE;
1498 		dsp->ds_lso_max = 0;
1499 		return (0);
1500 	}
1501 	}
1502 	return (ENOTSUP);
1503 }
1504 
1505 static int
1506 dld_capab(dld_str_t *dsp, uint_t type, void *data, uint_t flags)
1507 {
1508 	int	err;
1509 
1510 	/*
1511 	 * Don't enable direct callback capabilities unless the caller is
1512 	 * the IP client. When a module is inserted in a stream (_I_INSERT)
1513 	 * the stack initiates capability disable, but due to races, the
1514 	 * module insertion may complete before the capability disable
1515 	 * completes. So we limit the check to DLD_ENABLE case.
1516 	 */
1517 	if ((flags == DLD_ENABLE && type != DLD_CAPAB_PERIM) &&
1518 	    (dsp->ds_sap != ETHERTYPE_IP ||
1519 	    !check_mod_above(dsp->ds_rq, "ip"))) {
1520 		return (ENOTSUP);
1521 	}
1522 
1523 	switch (type) {
1524 	case DLD_CAPAB_DIRECT:
1525 		err = dld_capab_direct(dsp, data, flags);
1526 		break;
1527 
1528 	case DLD_CAPAB_POLL:
1529 		err =  dld_capab_poll(dsp, data, flags);
1530 		break;
1531 
1532 	case DLD_CAPAB_PERIM:
1533 		err = dld_capab_perim(dsp, data, flags);
1534 		break;
1535 
1536 	case DLD_CAPAB_LSO:
1537 		err = dld_capab_lso(dsp, data, flags);
1538 		break;
1539 
1540 	default:
1541 		err = ENOTSUP;
1542 		break;
1543 	}
1544 
1545 	return (err);
1546 }
1547 
1548 /*
1549  * DL_CAPABILITY_ACK/DL_ERROR_ACK
1550  */
1551 static void
1552 proto_capability_advertise(dld_str_t *dsp, mblk_t *mp)
1553 {
1554 	dl_capability_ack_t	*dlap;
1555 	dl_capability_sub_t	*dlsp;
1556 	size_t			subsize;
1557 	dl_capab_dld_t		dld;
1558 	dl_capab_hcksum_t	hcksum;
1559 	dl_capab_zerocopy_t	zcopy;
1560 	dl_capab_vrrp_t		vrrp;
1561 	mac_capab_vrrp_t	vrrp_capab;
1562 	uint8_t			*ptr;
1563 	queue_t			*q = dsp->ds_wq;
1564 	mblk_t			*mp1;
1565 	boolean_t		hcksum_capable = B_FALSE;
1566 	boolean_t		zcopy_capable = B_FALSE;
1567 	boolean_t		dld_capable = B_FALSE;
1568 	boolean_t		vrrp_capable = B_FALSE;
1569 
1570 	/*
1571 	 * Initially assume no capabilities.
1572 	 */
1573 	subsize = 0;
1574 
1575 	/*
1576 	 * Check if checksum offload is supported on this MAC.
1577 	 */
1578 	bzero(&hcksum, sizeof (dl_capab_hcksum_t));
1579 	if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_HCKSUM,
1580 	    &hcksum.hcksum_txflags)) {
1581 		if (hcksum.hcksum_txflags != 0) {
1582 			hcksum_capable = B_TRUE;
1583 			subsize += sizeof (dl_capability_sub_t) +
1584 			    sizeof (dl_capab_hcksum_t);
1585 		}
1586 	}
1587 
1588 	/*
1589 	 * Check if zerocopy is supported on this interface.
1590 	 * If advertising DL_CAPAB_ZEROCOPY has not been explicitly disabled
1591 	 * then reserve space for that capability.
1592 	 */
1593 	if (!mac_capab_get(dsp->ds_mh, MAC_CAPAB_NO_ZCOPY, NULL) &&
1594 	    !(dld_opt & DLD_OPT_NO_ZEROCOPY)) {
1595 		zcopy_capable = B_TRUE;
1596 		subsize += sizeof (dl_capability_sub_t) +
1597 		    sizeof (dl_capab_zerocopy_t);
1598 	}
1599 
1600 	/*
1601 	 * Direct capability negotiation interface between IP and DLD
1602 	 */
1603 	if (dsp->ds_sap == ETHERTYPE_IP && check_mod_above(dsp->ds_rq, "ip")) {
1604 		dld_capable = B_TRUE;
1605 		subsize += sizeof (dl_capability_sub_t) +
1606 		    sizeof (dl_capab_dld_t);
1607 	}
1608 
1609 	/*
1610 	 * Check if vrrp is supported on this interface. If so, reserve
1611 	 * space for that capability.
1612 	 */
1613 	if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_VRRP, &vrrp_capab)) {
1614 		vrrp_capable = B_TRUE;
1615 		subsize += sizeof (dl_capability_sub_t) +
1616 		    sizeof (dl_capab_vrrp_t);
1617 	}
1618 
1619 	/*
1620 	 * If there are no capabilities to advertise or if we
1621 	 * can't allocate a response, send a DL_ERROR_ACK.
1622 	 */
1623 	if ((mp1 = reallocb(mp,
1624 	    sizeof (dl_capability_ack_t) + subsize, 0)) == NULL) {
1625 		dlerrorack(q, mp, DL_CAPABILITY_REQ, DL_NOTSUPPORTED, 0);
1626 		return;
1627 	}
1628 
1629 	mp = mp1;
1630 	DB_TYPE(mp) = M_PROTO;
1631 	mp->b_wptr = mp->b_rptr + sizeof (dl_capability_ack_t) + subsize;
1632 	bzero(mp->b_rptr, MBLKL(mp));
1633 	dlap = (dl_capability_ack_t *)mp->b_rptr;
1634 	dlap->dl_primitive = DL_CAPABILITY_ACK;
1635 	dlap->dl_sub_offset = sizeof (dl_capability_ack_t);
1636 	dlap->dl_sub_length = subsize;
1637 	ptr = (uint8_t *)&dlap[1];
1638 
1639 	/*
1640 	 * TCP/IP checksum offload.
1641 	 */
1642 	if (hcksum_capable) {
1643 		dlsp = (dl_capability_sub_t *)ptr;
1644 
1645 		dlsp->dl_cap = DL_CAPAB_HCKSUM;
1646 		dlsp->dl_length = sizeof (dl_capab_hcksum_t);
1647 		ptr += sizeof (dl_capability_sub_t);
1648 
1649 		hcksum.hcksum_version = HCKSUM_VERSION_1;
1650 		dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1651 		bcopy(&hcksum, ptr, sizeof (dl_capab_hcksum_t));
1652 		ptr += sizeof (dl_capab_hcksum_t);
1653 	}
1654 
1655 	/*
1656 	 * Zero copy
1657 	 */
1658 	if (zcopy_capable) {
1659 		dlsp = (dl_capability_sub_t *)ptr;
1660 
1661 		dlsp->dl_cap = DL_CAPAB_ZEROCOPY;
1662 		dlsp->dl_length = sizeof (dl_capab_zerocopy_t);
1663 		ptr += sizeof (dl_capability_sub_t);
1664 
1665 		bzero(&zcopy, sizeof (dl_capab_zerocopy_t));
1666 		zcopy.zerocopy_version = ZEROCOPY_VERSION_1;
1667 		zcopy.zerocopy_flags = DL_CAPAB_VMSAFE_MEM;
1668 
1669 		dlcapabsetqid(&(zcopy.zerocopy_mid), dsp->ds_rq);
1670 		bcopy(&zcopy, ptr, sizeof (dl_capab_zerocopy_t));
1671 		ptr += sizeof (dl_capab_zerocopy_t);
1672 	}
1673 
1674 	/*
1675 	 * VRRP capability negotiation
1676 	 */
1677 	if (vrrp_capable) {
1678 		dlsp = (dl_capability_sub_t *)ptr;
1679 		dlsp->dl_cap = DL_CAPAB_VRRP;
1680 		dlsp->dl_length = sizeof (dl_capab_vrrp_t);
1681 		ptr += sizeof (dl_capability_sub_t);
1682 
1683 		bzero(&vrrp, sizeof (dl_capab_vrrp_t));
1684 		vrrp.vrrp_af = vrrp_capab.mcv_af;
1685 		bcopy(&vrrp, ptr, sizeof (dl_capab_vrrp_t));
1686 		ptr += sizeof (dl_capab_vrrp_t);
1687 	}
1688 
1689 	/*
1690 	 * Direct capability negotiation interface between IP and DLD.
1691 	 * Refer to dld.h for details.
1692 	 */
1693 	if (dld_capable) {
1694 		dlsp = (dl_capability_sub_t *)ptr;
1695 		dlsp->dl_cap = DL_CAPAB_DLD;
1696 		dlsp->dl_length = sizeof (dl_capab_dld_t);
1697 		ptr += sizeof (dl_capability_sub_t);
1698 
1699 		bzero(&dld, sizeof (dl_capab_dld_t));
1700 		dld.dld_version = DLD_CURRENT_VERSION;
1701 		dld.dld_capab = (uintptr_t)dld_capab;
1702 		dld.dld_capab_handle = (uintptr_t)dsp;
1703 
1704 		dlcapabsetqid(&(dld.dld_mid), dsp->ds_rq);
1705 		bcopy(&dld, ptr, sizeof (dl_capab_dld_t));
1706 		ptr += sizeof (dl_capab_dld_t);
1707 	}
1708 
1709 	ASSERT(ptr == mp->b_rptr + sizeof (dl_capability_ack_t) + subsize);
1710 	qreply(q, mp);
1711 }
1712 
1713 /*
1714  * Disable any enabled capabilities.
1715  */
1716 void
1717 dld_capabilities_disable(dld_str_t *dsp)
1718 {
1719 	if (dsp->ds_polling)
1720 		(void) dld_capab_poll_disable(dsp, NULL);
1721 }
1722