xref: /titanic_51/usr/src/uts/common/io/dld/dld_proto.c (revision 7f0b8309074a5d8e9f9d8ffe7aad7bb0b1ee6b1f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Data-Link Driver
28  */
29 #include <sys/sysmacros.h>
30 #include <sys/strsubr.h>
31 #include <sys/strsun.h>
32 #include <sys/vlan.h>
33 #include <sys/dld_impl.h>
34 #include <sys/mac_client.h>
35 #include <sys/mac_client_impl.h>
36 #include <sys/mac_client_priv.h>
37 
38 typedef void proto_reqfunc_t(dld_str_t *, mblk_t *);
39 
40 static proto_reqfunc_t proto_info_req, proto_attach_req, proto_detach_req,
41     proto_bind_req, proto_unbind_req, proto_promiscon_req, proto_promiscoff_req,
42     proto_enabmulti_req, proto_disabmulti_req, proto_physaddr_req,
43     proto_setphysaddr_req, proto_udqos_req, proto_req, proto_capability_req,
44     proto_notify_req, proto_passive_req;
45 
46 static void proto_capability_advertise(dld_str_t *, mblk_t *);
47 static int dld_capab_poll_disable(dld_str_t *, dld_capab_poll_t *);
48 
49 #define	DL_ACK_PENDING(state) \
50 	((state) == DL_ATTACH_PENDING || \
51 	(state) == DL_DETACH_PENDING || \
52 	(state) == DL_BIND_PENDING || \
53 	(state) == DL_UNBIND_PENDING)
54 
55 /*
56  * Process a DLPI protocol message.
57  * The primitives DL_BIND_REQ, DL_ENABMULTI_REQ, DL_PROMISCON_REQ,
58  * DL_SET_PHYS_ADDR_REQ put the data link below our dld_str_t into an
59  * 'active' state. The primitive DL_PASSIVE_REQ marks our dld_str_t
60  * as 'passive' and forbids it from being subsequently made 'active'
61  * by the above primitives.
62  */
63 void
64 dld_proto(dld_str_t *dsp, mblk_t *mp)
65 {
66 	t_uscalar_t		prim;
67 
68 	if (MBLKL(mp) < sizeof (t_uscalar_t)) {
69 		freemsg(mp);
70 		return;
71 	}
72 	prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive;
73 
74 	switch (prim) {
75 	case DL_INFO_REQ:
76 		proto_info_req(dsp, mp);
77 		break;
78 	case DL_BIND_REQ:
79 		proto_bind_req(dsp, mp);
80 		break;
81 	case DL_UNBIND_REQ:
82 		proto_unbind_req(dsp, mp);
83 		break;
84 	case DL_UNITDATA_REQ:
85 		proto_unitdata_req(dsp, mp);
86 		break;
87 	case DL_UDQOS_REQ:
88 		proto_udqos_req(dsp, mp);
89 		break;
90 	case DL_ATTACH_REQ:
91 		proto_attach_req(dsp, mp);
92 		break;
93 	case DL_DETACH_REQ:
94 		proto_detach_req(dsp, mp);
95 		break;
96 	case DL_ENABMULTI_REQ:
97 		proto_enabmulti_req(dsp, mp);
98 		break;
99 	case DL_DISABMULTI_REQ:
100 		proto_disabmulti_req(dsp, mp);
101 		break;
102 	case DL_PROMISCON_REQ:
103 		proto_promiscon_req(dsp, mp);
104 		break;
105 	case DL_PROMISCOFF_REQ:
106 		proto_promiscoff_req(dsp, mp);
107 		break;
108 	case DL_PHYS_ADDR_REQ:
109 		proto_physaddr_req(dsp, mp);
110 		break;
111 	case DL_SET_PHYS_ADDR_REQ:
112 		proto_setphysaddr_req(dsp, mp);
113 		break;
114 	case DL_NOTIFY_REQ:
115 		proto_notify_req(dsp, mp);
116 		break;
117 	case DL_CAPABILITY_REQ:
118 		proto_capability_req(dsp, mp);
119 		break;
120 	case DL_PASSIVE_REQ:
121 		proto_passive_req(dsp, mp);
122 		break;
123 	default:
124 		proto_req(dsp, mp);
125 		break;
126 	}
127 }
128 
129 #define	NEG(x)	-(x)
130 typedef struct dl_info_ack_wrapper {
131 	dl_info_ack_t		dl_info;
132 	uint8_t			dl_addr[MAXMACADDRLEN + sizeof (uint16_t)];
133 	uint8_t			dl_brdcst_addr[MAXMACADDRLEN];
134 	dl_qos_cl_range1_t	dl_qos_range1;
135 	dl_qos_cl_sel1_t	dl_qos_sel1;
136 } dl_info_ack_wrapper_t;
137 
138 /*
139  * DL_INFO_REQ
140  */
141 static void
142 proto_info_req(dld_str_t *dsp, mblk_t *mp)
143 {
144 	dl_info_ack_wrapper_t	*dlwp;
145 	dl_info_ack_t		*dlp;
146 	dl_qos_cl_sel1_t	*selp;
147 	dl_qos_cl_range1_t	*rangep;
148 	uint8_t			*addr;
149 	uint8_t			*brdcst_addr;
150 	uint_t			addr_length;
151 	uint_t			sap_length;
152 	mac_info_t		minfo;
153 	mac_info_t		*minfop;
154 	queue_t			*q = dsp->ds_wq;
155 
156 	/*
157 	 * Swap the request message for one large enough to contain the
158 	 * wrapper structure defined above.
159 	 */
160 	if ((mp = mexchange(q, mp, sizeof (dl_info_ack_wrapper_t),
161 	    M_PCPROTO, 0)) == NULL)
162 		return;
163 
164 	bzero(mp->b_rptr, sizeof (dl_info_ack_wrapper_t));
165 	dlwp = (dl_info_ack_wrapper_t *)mp->b_rptr;
166 
167 	dlp = &(dlwp->dl_info);
168 	ASSERT(dlp == (dl_info_ack_t *)mp->b_rptr);
169 
170 	dlp->dl_primitive = DL_INFO_ACK;
171 
172 	/*
173 	 * Set up the sub-structure pointers.
174 	 */
175 	addr = dlwp->dl_addr;
176 	brdcst_addr = dlwp->dl_brdcst_addr;
177 	rangep = &(dlwp->dl_qos_range1);
178 	selp = &(dlwp->dl_qos_sel1);
179 
180 	/*
181 	 * This driver supports only version 2 connectionless DLPI provider
182 	 * nodes.
183 	 */
184 	dlp->dl_service_mode = DL_CLDLS;
185 	dlp->dl_version = DL_VERSION_2;
186 
187 	/*
188 	 * Set the style of the provider
189 	 */
190 	dlp->dl_provider_style = dsp->ds_style;
191 	ASSERT(dlp->dl_provider_style == DL_STYLE1 ||
192 	    dlp->dl_provider_style == DL_STYLE2);
193 
194 	/*
195 	 * Set the current DLPI state.
196 	 */
197 	dlp->dl_current_state = dsp->ds_dlstate;
198 
199 	/*
200 	 * Gratuitously set the media type. This is to deal with modules
201 	 * that assume the media type is known prior to DL_ATTACH_REQ
202 	 * being completed.
203 	 */
204 	dlp->dl_mac_type = DL_ETHER;
205 
206 	/*
207 	 * If the stream is not at least attached we try to retrieve the
208 	 * mac_info using mac_info_get()
209 	 */
210 	if (dsp->ds_dlstate == DL_UNATTACHED ||
211 	    dsp->ds_dlstate == DL_ATTACH_PENDING ||
212 	    dsp->ds_dlstate == DL_DETACH_PENDING) {
213 		if (!mac_info_get(ddi_major_to_name(dsp->ds_major), &minfo)) {
214 			/*
215 			 * Cannot find mac_info. giving up.
216 			 */
217 			goto done;
218 		}
219 		minfop = &minfo;
220 	} else {
221 		minfop = (mac_info_t *)dsp->ds_mip;
222 		/* We can only get the sdu if we're attached. */
223 		mac_sdu_get(dsp->ds_mh, &dlp->dl_min_sdu, &dlp->dl_max_sdu);
224 	}
225 
226 	/*
227 	 * Set the media type (properly this time).
228 	 */
229 	if (dsp->ds_native)
230 		dlp->dl_mac_type = minfop->mi_nativemedia;
231 	else
232 		dlp->dl_mac_type = minfop->mi_media;
233 
234 	/*
235 	 * Set the DLSAP length. We only support 16 bit values and they
236 	 * appear after the MAC address portion of DLSAP addresses.
237 	 */
238 	sap_length = sizeof (uint16_t);
239 	dlp->dl_sap_length = NEG(sap_length);
240 
241 	addr_length = minfop->mi_addr_length;
242 
243 	/*
244 	 * Copy in the media broadcast address.
245 	 */
246 	if (minfop->mi_brdcst_addr != NULL) {
247 		dlp->dl_brdcst_addr_offset =
248 		    (uintptr_t)brdcst_addr - (uintptr_t)dlp;
249 		bcopy(minfop->mi_brdcst_addr, brdcst_addr, addr_length);
250 		dlp->dl_brdcst_addr_length = addr_length;
251 	}
252 
253 	dlp->dl_qos_range_offset = (uintptr_t)rangep - (uintptr_t)dlp;
254 	dlp->dl_qos_range_length = sizeof (dl_qos_cl_range1_t);
255 
256 	rangep->dl_qos_type = DL_QOS_CL_RANGE1;
257 	rangep->dl_trans_delay.dl_target_value = DL_UNKNOWN;
258 	rangep->dl_trans_delay.dl_accept_value = DL_UNKNOWN;
259 	rangep->dl_protection.dl_min = DL_UNKNOWN;
260 	rangep->dl_protection.dl_max = DL_UNKNOWN;
261 	rangep->dl_residual_error = DL_UNKNOWN;
262 
263 	/*
264 	 * Specify the supported range of priorities.
265 	 */
266 	rangep->dl_priority.dl_min = 0;
267 	rangep->dl_priority.dl_max = (1 << VLAN_PRI_SIZE) - 1;
268 
269 	dlp->dl_qos_offset = (uintptr_t)selp - (uintptr_t)dlp;
270 	dlp->dl_qos_length = sizeof (dl_qos_cl_sel1_t);
271 
272 	selp->dl_qos_type = DL_QOS_CL_SEL1;
273 	selp->dl_trans_delay = DL_UNKNOWN;
274 	selp->dl_protection = DL_UNKNOWN;
275 	selp->dl_residual_error = DL_UNKNOWN;
276 
277 	/*
278 	 * Specify the current priority (which can be changed by
279 	 * the DL_UDQOS_REQ primitive).
280 	 */
281 	selp->dl_priority = dsp->ds_pri;
282 
283 	dlp->dl_addr_length = addr_length + sizeof (uint16_t);
284 	if (dsp->ds_dlstate == DL_IDLE) {
285 		/*
286 		 * The stream is bound. Therefore we can formulate a valid
287 		 * DLSAP address.
288 		 */
289 		dlp->dl_addr_offset = (uintptr_t)addr - (uintptr_t)dlp;
290 		if (addr_length > 0)
291 			mac_unicast_primary_get(dsp->ds_mh, addr);
292 
293 		*(uint16_t *)(addr + addr_length) = dsp->ds_sap;
294 	}
295 
296 done:
297 	ASSERT(IMPLY(dlp->dl_qos_offset != 0, dlp->dl_qos_length != 0));
298 	ASSERT(IMPLY(dlp->dl_qos_range_offset != 0,
299 	    dlp->dl_qos_range_length != 0));
300 	ASSERT(IMPLY(dlp->dl_addr_offset != 0, dlp->dl_addr_length != 0));
301 	ASSERT(IMPLY(dlp->dl_brdcst_addr_offset != 0,
302 	    dlp->dl_brdcst_addr_length != 0));
303 
304 	qreply(q, mp);
305 }
306 
307 /*
308  * DL_ATTACH_REQ
309  */
310 static void
311 proto_attach_req(dld_str_t *dsp, mblk_t *mp)
312 {
313 	dl_attach_req_t	*dlp = (dl_attach_req_t *)mp->b_rptr;
314 	int		err = 0;
315 	t_uscalar_t	dl_err;
316 	queue_t		*q = dsp->ds_wq;
317 
318 	if (MBLKL(mp) < sizeof (dl_attach_req_t) ||
319 	    dlp->dl_ppa < 0 || dsp->ds_style == DL_STYLE1) {
320 		dl_err = DL_BADPRIM;
321 		goto failed;
322 	}
323 
324 	if (dsp->ds_dlstate != DL_UNATTACHED) {
325 		dl_err = DL_OUTSTATE;
326 		goto failed;
327 	}
328 
329 	dsp->ds_dlstate = DL_ATTACH_PENDING;
330 
331 	err = dld_str_attach(dsp, dlp->dl_ppa);
332 	if (err != 0) {
333 		switch (err) {
334 		case ENOENT:
335 			dl_err = DL_BADPPA;
336 			err = 0;
337 			break;
338 		default:
339 			dl_err = DL_SYSERR;
340 			break;
341 		}
342 		dsp->ds_dlstate = DL_UNATTACHED;
343 		goto failed;
344 	}
345 	ASSERT(dsp->ds_dlstate == DL_UNBOUND);
346 	dlokack(q, mp, DL_ATTACH_REQ);
347 	return;
348 
349 failed:
350 	dlerrorack(q, mp, DL_ATTACH_REQ, dl_err, (t_uscalar_t)err);
351 }
352 
353 /*
354  * DL_DETACH_REQ
355  */
356 static void
357 proto_detach_req(dld_str_t *dsp, mblk_t *mp)
358 {
359 	queue_t		*q = dsp->ds_wq;
360 	t_uscalar_t	dl_err;
361 
362 	if (MBLKL(mp) < sizeof (dl_detach_req_t)) {
363 		dl_err = DL_BADPRIM;
364 		goto failed;
365 	}
366 
367 	if (dsp->ds_dlstate != DL_UNBOUND) {
368 		dl_err = DL_OUTSTATE;
369 		goto failed;
370 	}
371 
372 	if (dsp->ds_style == DL_STYLE1) {
373 		dl_err = DL_BADPRIM;
374 		goto failed;
375 	}
376 
377 	ASSERT(dsp->ds_datathr_cnt == 0);
378 	dsp->ds_dlstate = DL_DETACH_PENDING;
379 
380 	dld_str_detach(dsp);
381 	dlokack(dsp->ds_wq, mp, DL_DETACH_REQ);
382 	return;
383 
384 failed:
385 	dlerrorack(q, mp, DL_DETACH_REQ, dl_err, 0);
386 }
387 
388 /*
389  * DL_BIND_REQ
390  */
391 static void
392 proto_bind_req(dld_str_t *dsp, mblk_t *mp)
393 {
394 	dl_bind_req_t	*dlp = (dl_bind_req_t *)mp->b_rptr;
395 	int		err = 0;
396 	uint8_t		dlsap_addr[MAXMACADDRLEN + sizeof (uint16_t)];
397 	uint_t		dlsap_addr_length;
398 	t_uscalar_t	dl_err;
399 	t_scalar_t	sap;
400 	queue_t		*q = dsp->ds_wq;
401 	mac_perim_handle_t	mph;
402 	void		*mdip;
403 	int32_t		intr_cpu;
404 
405 	if (MBLKL(mp) < sizeof (dl_bind_req_t)) {
406 		dl_err = DL_BADPRIM;
407 		goto failed;
408 	}
409 
410 	if (dlp->dl_xidtest_flg != 0) {
411 		dl_err = DL_NOAUTO;
412 		goto failed;
413 	}
414 
415 	if (dlp->dl_service_mode != DL_CLDLS) {
416 		dl_err = DL_UNSUPPORTED;
417 		goto failed;
418 	}
419 
420 	if (dsp->ds_dlstate != DL_UNBOUND) {
421 		dl_err = DL_OUTSTATE;
422 		goto failed;
423 	}
424 
425 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
426 
427 	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
428 	    ((err = dls_active_set(dsp)) != 0)) {
429 		dl_err = DL_SYSERR;
430 		goto failed2;
431 	}
432 
433 	dsp->ds_dlstate = DL_BIND_PENDING;
434 	/*
435 	 * Set the receive callback.
436 	 */
437 	dls_rx_set(dsp, (dsp->ds_mode == DLD_RAW) ?
438 	    dld_str_rx_raw : dld_str_rx_unitdata, dsp);
439 
440 	/*
441 	 * Bind the channel such that it can receive packets.
442 	 */
443 	sap = dlp->dl_sap;
444 	err = dls_bind(dsp, sap);
445 	if (err != 0) {
446 		switch (err) {
447 		case EINVAL:
448 			dl_err = DL_BADADDR;
449 			err = 0;
450 			break;
451 		default:
452 			dl_err = DL_SYSERR;
453 			break;
454 		}
455 
456 		dsp->ds_dlstate = DL_UNBOUND;
457 		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
458 			dls_active_clear(dsp);
459 		goto failed2;
460 	}
461 
462 	intr_cpu = mac_client_intr_cpu(dsp->ds_mch);
463 	mdip = mac_get_devinfo(dsp->ds_mh);
464 	mac_perim_exit(mph);
465 
466 	/*
467 	 * We do this after we get out of the perim to avoid deadlocks
468 	 * etc. since part of mac_client_retarget_intr is to walk the
469 	 * device tree in order to find and retarget the interrupts.
470 	 */
471 	mac_client_set_intr_cpu(mdip, dsp->ds_mch, intr_cpu);
472 
473 	/*
474 	 * Copy in MAC address.
475 	 */
476 	dlsap_addr_length = dsp->ds_mip->mi_addr_length;
477 	mac_unicast_primary_get(dsp->ds_mh, dlsap_addr);
478 
479 	/*
480 	 * Copy in the SAP.
481 	 */
482 	*(uint16_t *)(dlsap_addr + dlsap_addr_length) = sap;
483 	dlsap_addr_length += sizeof (uint16_t);
484 
485 	dsp->ds_dlstate = DL_IDLE;
486 	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
487 		dsp->ds_passivestate = DLD_ACTIVE;
488 
489 	dlbindack(q, mp, sap, dlsap_addr, dlsap_addr_length, 0, 0);
490 	return;
491 
492 failed2:
493 	mac_perim_exit(mph);
494 failed:
495 	dlerrorack(q, mp, DL_BIND_REQ, dl_err, (t_uscalar_t)err);
496 }
497 
498 /*
499  * DL_UNBIND_REQ
500  */
501 static void
502 proto_unbind_req(dld_str_t *dsp, mblk_t *mp)
503 {
504 	queue_t		*q = dsp->ds_wq;
505 	t_uscalar_t	dl_err;
506 	mac_perim_handle_t	mph;
507 
508 	if (MBLKL(mp) < sizeof (dl_unbind_req_t)) {
509 		dl_err = DL_BADPRIM;
510 		goto failed;
511 	}
512 
513 	if (dsp->ds_dlstate != DL_IDLE) {
514 		dl_err = DL_OUTSTATE;
515 		goto failed;
516 	}
517 
518 	mutex_enter(&dsp->ds_lock);
519 	while (dsp->ds_datathr_cnt != 0)
520 		cv_wait(&dsp->ds_datathr_cv, &dsp->ds_lock);
521 
522 	dsp->ds_dlstate = DL_UNBIND_PENDING;
523 	mutex_exit(&dsp->ds_lock);
524 
525 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
526 	/*
527 	 * Unbind the channel to stop packets being received.
528 	 */
529 	if (dls_unbind(dsp) != 0) {
530 		dl_err = DL_OUTSTATE;
531 		mac_perim_exit(mph);
532 		goto failed;
533 	}
534 
535 	/*
536 	 * Disable polling mode, if it is enabled.
537 	 */
538 	(void) dld_capab_poll_disable(dsp, NULL);
539 
540 	/*
541 	 * Clear LSO flags.
542 	 */
543 	dsp->ds_lso = B_FALSE;
544 	dsp->ds_lso_max = 0;
545 
546 	/*
547 	 * Clear the receive callback.
548 	 */
549 	dls_rx_set(dsp, NULL, NULL);
550 	dsp->ds_direct = B_FALSE;
551 
552 	/*
553 	 * Set the mode back to the default (unitdata).
554 	 */
555 	dsp->ds_mode = DLD_UNITDATA;
556 	dsp->ds_dlstate = DL_UNBOUND;
557 
558 	mac_perim_exit(mph);
559 	dlokack(dsp->ds_wq, mp, DL_UNBIND_REQ);
560 	return;
561 failed:
562 	dlerrorack(q, mp, DL_UNBIND_REQ, dl_err, 0);
563 }
564 
565 /*
566  * DL_PROMISCON_REQ
567  */
568 static void
569 proto_promiscon_req(dld_str_t *dsp, mblk_t *mp)
570 {
571 	dl_promiscon_req_t *dlp = (dl_promiscon_req_t *)mp->b_rptr;
572 	int		err = 0;
573 	t_uscalar_t	dl_err;
574 	uint32_t	promisc_saved;
575 	queue_t		*q = dsp->ds_wq;
576 	mac_perim_handle_t	mph;
577 
578 	if (MBLKL(mp) < sizeof (dl_promiscon_req_t)) {
579 		dl_err = DL_BADPRIM;
580 		goto failed;
581 	}
582 
583 	if (dsp->ds_dlstate == DL_UNATTACHED ||
584 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
585 		dl_err = DL_OUTSTATE;
586 		goto failed;
587 	}
588 
589 	promisc_saved = dsp->ds_promisc;
590 	switch (dlp->dl_level) {
591 	case DL_PROMISC_SAP:
592 		dsp->ds_promisc |= DLS_PROMISC_SAP;
593 		break;
594 
595 	case DL_PROMISC_MULTI:
596 		dsp->ds_promisc |= DLS_PROMISC_MULTI;
597 		break;
598 
599 	case DL_PROMISC_PHYS:
600 		dsp->ds_promisc |= DLS_PROMISC_PHYS;
601 		break;
602 
603 	default:
604 		dl_err = DL_NOTSUPPORTED;
605 		goto failed;
606 	}
607 
608 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
609 
610 	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
611 	    ((err = dls_active_set(dsp)) != 0)) {
612 		dsp->ds_promisc = promisc_saved;
613 		dl_err = DL_SYSERR;
614 		goto failed2;
615 	}
616 
617 	/*
618 	 * Adjust channel promiscuity.
619 	 */
620 	err = dls_promisc(dsp, promisc_saved);
621 
622 	if (err != 0) {
623 		dl_err = DL_SYSERR;
624 		dsp->ds_promisc = promisc_saved;
625 		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
626 			dls_active_clear(dsp);
627 		goto failed2;
628 	}
629 
630 	mac_perim_exit(mph);
631 
632 	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
633 		dsp->ds_passivestate = DLD_ACTIVE;
634 	dlokack(q, mp, DL_PROMISCON_REQ);
635 	return;
636 
637 failed2:
638 	mac_perim_exit(mph);
639 failed:
640 	dlerrorack(q, mp, DL_PROMISCON_REQ, dl_err, (t_uscalar_t)err);
641 }
642 
643 /*
644  * DL_PROMISCOFF_REQ
645  */
646 static void
647 proto_promiscoff_req(dld_str_t *dsp, mblk_t *mp)
648 {
649 	dl_promiscoff_req_t *dlp = (dl_promiscoff_req_t *)mp->b_rptr;
650 	int		err = 0;
651 	t_uscalar_t	dl_err;
652 	uint32_t	promisc_saved;
653 	queue_t		*q = dsp->ds_wq;
654 	mac_perim_handle_t	mph;
655 
656 	if (MBLKL(mp) < sizeof (dl_promiscoff_req_t)) {
657 		dl_err = DL_BADPRIM;
658 		goto failed;
659 	}
660 
661 	if (dsp->ds_dlstate == DL_UNATTACHED ||
662 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
663 		dl_err = DL_OUTSTATE;
664 		goto failed;
665 	}
666 
667 	promisc_saved = dsp->ds_promisc;
668 	switch (dlp->dl_level) {
669 	case DL_PROMISC_SAP:
670 		if (!(dsp->ds_promisc & DLS_PROMISC_SAP)) {
671 			dl_err = DL_NOTENAB;
672 			goto failed;
673 		}
674 		dsp->ds_promisc &= ~DLS_PROMISC_SAP;
675 		break;
676 
677 	case DL_PROMISC_MULTI:
678 		if (!(dsp->ds_promisc & DLS_PROMISC_MULTI)) {
679 			dl_err = DL_NOTENAB;
680 			goto failed;
681 		}
682 		dsp->ds_promisc &= ~DLS_PROMISC_MULTI;
683 		break;
684 
685 	case DL_PROMISC_PHYS:
686 		if (!(dsp->ds_promisc & DLS_PROMISC_PHYS)) {
687 			dl_err = DL_NOTENAB;
688 			goto failed;
689 		}
690 		dsp->ds_promisc &= ~DLS_PROMISC_PHYS;
691 		break;
692 
693 	default:
694 		dl_err = DL_NOTSUPPORTED;
695 		goto failed;
696 	}
697 
698 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
699 	/*
700 	 * Adjust channel promiscuity.
701 	 */
702 	err = dls_promisc(dsp, promisc_saved);
703 	mac_perim_exit(mph);
704 
705 	if (err != 0) {
706 		dl_err = DL_SYSERR;
707 		goto failed;
708 	}
709 	dlokack(q, mp, DL_PROMISCOFF_REQ);
710 	return;
711 failed:
712 	dlerrorack(q, mp, DL_PROMISCOFF_REQ, dl_err, (t_uscalar_t)err);
713 }
714 
715 /*
716  * DL_ENABMULTI_REQ
717  */
718 static void
719 proto_enabmulti_req(dld_str_t *dsp, mblk_t *mp)
720 {
721 	dl_enabmulti_req_t *dlp = (dl_enabmulti_req_t *)mp->b_rptr;
722 	int		err = 0;
723 	t_uscalar_t	dl_err;
724 	queue_t		*q = dsp->ds_wq;
725 	mac_perim_handle_t	mph;
726 
727 	if (dsp->ds_dlstate == DL_UNATTACHED ||
728 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
729 		dl_err = DL_OUTSTATE;
730 		goto failed;
731 	}
732 
733 	if (MBLKL(mp) < sizeof (dl_enabmulti_req_t) ||
734 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
735 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
736 		dl_err = DL_BADPRIM;
737 		goto failed;
738 	}
739 
740 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
741 
742 	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
743 	    ((err = dls_active_set(dsp)) != 0)) {
744 		dl_err = DL_SYSERR;
745 		goto failed2;
746 	}
747 
748 	err = dls_multicst_add(dsp, mp->b_rptr + dlp->dl_addr_offset);
749 
750 	if (err != 0) {
751 		switch (err) {
752 		case EINVAL:
753 			dl_err = DL_BADADDR;
754 			err = 0;
755 			break;
756 		case ENOSPC:
757 			dl_err = DL_TOOMANY;
758 			err = 0;
759 			break;
760 		default:
761 			dl_err = DL_SYSERR;
762 			break;
763 		}
764 		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
765 			dls_active_clear(dsp);
766 
767 		goto failed2;
768 	}
769 
770 	mac_perim_exit(mph);
771 
772 	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
773 		dsp->ds_passivestate = DLD_ACTIVE;
774 	dlokack(q, mp, DL_ENABMULTI_REQ);
775 	return;
776 
777 failed2:
778 	mac_perim_exit(mph);
779 failed:
780 	dlerrorack(q, mp, DL_ENABMULTI_REQ, dl_err, (t_uscalar_t)err);
781 }
782 
783 /*
784  * DL_DISABMULTI_REQ
785  */
786 static void
787 proto_disabmulti_req(dld_str_t *dsp, mblk_t *mp)
788 {
789 	dl_disabmulti_req_t *dlp = (dl_disabmulti_req_t *)mp->b_rptr;
790 	int		err = 0;
791 	t_uscalar_t	dl_err;
792 	queue_t		*q = dsp->ds_wq;
793 	mac_perim_handle_t	mph;
794 
795 	if (dsp->ds_dlstate == DL_UNATTACHED ||
796 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
797 		dl_err = DL_OUTSTATE;
798 		goto failed;
799 	}
800 
801 	if (MBLKL(mp) < sizeof (dl_disabmulti_req_t) ||
802 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
803 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
804 		dl_err = DL_BADPRIM;
805 		goto failed;
806 	}
807 
808 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
809 	err = dls_multicst_remove(dsp, mp->b_rptr + dlp->dl_addr_offset);
810 	mac_perim_exit(mph);
811 
812 	if (err != 0) {
813 	switch (err) {
814 		case EINVAL:
815 			dl_err = DL_BADADDR;
816 			err = 0;
817 			break;
818 
819 		case ENOENT:
820 			dl_err = DL_NOTENAB;
821 			err = 0;
822 			break;
823 
824 		default:
825 			dl_err = DL_SYSERR;
826 			break;
827 		}
828 		goto failed;
829 	}
830 	dlokack(q, mp, DL_DISABMULTI_REQ);
831 	return;
832 failed:
833 	dlerrorack(q, mp, DL_DISABMULTI_REQ, dl_err, (t_uscalar_t)err);
834 }
835 
836 /*
837  * DL_PHYS_ADDR_REQ
838  */
839 static void
840 proto_physaddr_req(dld_str_t *dsp, mblk_t *mp)
841 {
842 	dl_phys_addr_req_t *dlp = (dl_phys_addr_req_t *)mp->b_rptr;
843 	queue_t		*q = dsp->ds_wq;
844 	t_uscalar_t	dl_err;
845 	char		*addr;
846 	uint_t		addr_length;
847 
848 	if (MBLKL(mp) < sizeof (dl_phys_addr_req_t)) {
849 		dl_err = DL_BADPRIM;
850 		goto failed;
851 	}
852 
853 	if (dsp->ds_dlstate == DL_UNATTACHED ||
854 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
855 		dl_err = DL_OUTSTATE;
856 		goto failed;
857 	}
858 
859 	if (dlp->dl_addr_type != DL_CURR_PHYS_ADDR &&
860 	    dlp->dl_addr_type != DL_FACT_PHYS_ADDR) {
861 		dl_err = DL_UNSUPPORTED;
862 		goto failed;
863 	}
864 
865 	addr_length = dsp->ds_mip->mi_addr_length;
866 	if (addr_length > 0) {
867 		addr = kmem_alloc(addr_length, KM_SLEEP);
868 		if (dlp->dl_addr_type == DL_CURR_PHYS_ADDR)
869 			mac_unicast_primary_get(dsp->ds_mh, (uint8_t *)addr);
870 		else
871 			bcopy(dsp->ds_mip->mi_unicst_addr, addr, addr_length);
872 
873 		dlphysaddrack(q, mp, addr, (t_uscalar_t)addr_length);
874 		kmem_free(addr, addr_length);
875 	} else {
876 		dlphysaddrack(q, mp, NULL, 0);
877 	}
878 	return;
879 failed:
880 	dlerrorack(q, mp, DL_PHYS_ADDR_REQ, dl_err, 0);
881 }
882 
883 /*
884  * DL_SET_PHYS_ADDR_REQ
885  */
886 static void
887 proto_setphysaddr_req(dld_str_t *dsp, mblk_t *mp)
888 {
889 	dl_set_phys_addr_req_t *dlp = (dl_set_phys_addr_req_t *)mp->b_rptr;
890 	int		err = 0;
891 	t_uscalar_t	dl_err;
892 	queue_t		*q = dsp->ds_wq;
893 	mac_perim_handle_t	mph;
894 
895 	if (dsp->ds_dlstate == DL_UNATTACHED ||
896 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
897 		dl_err = DL_OUTSTATE;
898 		goto failed;
899 	}
900 
901 	if (MBLKL(mp) < sizeof (dl_set_phys_addr_req_t) ||
902 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
903 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
904 		dl_err = DL_BADPRIM;
905 		goto failed;
906 	}
907 
908 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
909 
910 	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
911 	    ((err = dls_active_set(dsp)) != 0)) {
912 		dl_err = DL_SYSERR;
913 		goto failed2;
914 	}
915 
916 	err = mac_unicast_primary_set(dsp->ds_mh,
917 	    mp->b_rptr + dlp->dl_addr_offset);
918 	if (err != 0) {
919 		switch (err) {
920 		case EINVAL:
921 			dl_err = DL_BADADDR;
922 			err = 0;
923 			break;
924 
925 		default:
926 			dl_err = DL_SYSERR;
927 			break;
928 		}
929 		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
930 			dls_active_clear(dsp);
931 
932 		goto failed2;
933 
934 	}
935 
936 	mac_perim_exit(mph);
937 
938 	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
939 		dsp->ds_passivestate = DLD_ACTIVE;
940 	dlokack(q, mp, DL_SET_PHYS_ADDR_REQ);
941 	return;
942 
943 failed2:
944 	mac_perim_exit(mph);
945 failed:
946 	dlerrorack(q, mp, DL_SET_PHYS_ADDR_REQ, dl_err, (t_uscalar_t)err);
947 }
948 
949 /*
950  * DL_UDQOS_REQ
951  */
952 static void
953 proto_udqos_req(dld_str_t *dsp, mblk_t *mp)
954 {
955 	dl_udqos_req_t *dlp = (dl_udqos_req_t *)mp->b_rptr;
956 	dl_qos_cl_sel1_t *selp;
957 	int		off, len;
958 	t_uscalar_t	dl_err;
959 	queue_t		*q = dsp->ds_wq;
960 
961 	off = dlp->dl_qos_offset;
962 	len = dlp->dl_qos_length;
963 
964 	if (MBLKL(mp) < sizeof (dl_udqos_req_t) || !MBLKIN(mp, off, len)) {
965 		dl_err = DL_BADPRIM;
966 		goto failed;
967 	}
968 
969 	selp = (dl_qos_cl_sel1_t *)(mp->b_rptr + off);
970 	if (selp->dl_qos_type != DL_QOS_CL_SEL1) {
971 		dl_err = DL_BADQOSTYPE;
972 		goto failed;
973 	}
974 
975 	if (selp->dl_priority > (1 << VLAN_PRI_SIZE) - 1 ||
976 	    selp->dl_priority < 0) {
977 		dl_err = DL_BADQOSPARAM;
978 		goto failed;
979 	}
980 
981 	dsp->ds_pri = selp->dl_priority;
982 	dlokack(q, mp, DL_UDQOS_REQ);
983 	return;
984 failed:
985 	dlerrorack(q, mp, DL_UDQOS_REQ, dl_err, 0);
986 }
987 
988 static boolean_t
989 check_ip_above(queue_t *q)
990 {
991 	queue_t		*next_q;
992 	boolean_t	ret = B_TRUE;
993 
994 	claimstr(q);
995 	next_q = q->q_next;
996 	if (strcmp(next_q->q_qinfo->qi_minfo->mi_idname, "ip") != 0)
997 		ret = B_FALSE;
998 	releasestr(q);
999 	return (ret);
1000 }
1001 
1002 /*
1003  * DL_CAPABILITY_REQ
1004  */
1005 static void
1006 proto_capability_req(dld_str_t *dsp, mblk_t *mp)
1007 {
1008 	dl_capability_req_t *dlp = (dl_capability_req_t *)mp->b_rptr;
1009 	dl_capability_sub_t *sp;
1010 	size_t		size, len;
1011 	offset_t	off, end;
1012 	t_uscalar_t	dl_err;
1013 	queue_t		*q = dsp->ds_wq;
1014 
1015 	if (MBLKL(mp) < sizeof (dl_capability_req_t)) {
1016 		dl_err = DL_BADPRIM;
1017 		goto failed;
1018 	}
1019 
1020 	if (dsp->ds_dlstate == DL_UNATTACHED ||
1021 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1022 		dl_err = DL_OUTSTATE;
1023 		goto failed;
1024 	}
1025 
1026 	/*
1027 	 * This request is overloaded. If there are no requested capabilities
1028 	 * then we just want to acknowledge with all the capabilities we
1029 	 * support. Otherwise we enable the set of capabilities requested.
1030 	 */
1031 	if (dlp->dl_sub_length == 0) {
1032 		proto_capability_advertise(dsp, mp);
1033 		return;
1034 	}
1035 
1036 	if (!MBLKIN(mp, dlp->dl_sub_offset, dlp->dl_sub_length)) {
1037 		dl_err = DL_BADPRIM;
1038 		goto failed;
1039 	}
1040 
1041 	dlp->dl_primitive = DL_CAPABILITY_ACK;
1042 
1043 	off = dlp->dl_sub_offset;
1044 	len = dlp->dl_sub_length;
1045 
1046 	/*
1047 	 * Walk the list of capabilities to be enabled.
1048 	 */
1049 	for (end = off + len; off < end; ) {
1050 		sp = (dl_capability_sub_t *)(mp->b_rptr + off);
1051 		size = sizeof (dl_capability_sub_t) + sp->dl_length;
1052 
1053 		if (off + size > end ||
1054 		    !IS_P2ALIGNED(off, sizeof (uint32_t))) {
1055 			dl_err = DL_BADPRIM;
1056 			goto failed;
1057 		}
1058 
1059 		switch (sp->dl_cap) {
1060 		/*
1061 		 * TCP/IP checksum offload to hardware.
1062 		 */
1063 		case DL_CAPAB_HCKSUM: {
1064 			dl_capab_hcksum_t *hcksump;
1065 			dl_capab_hcksum_t hcksum;
1066 
1067 			hcksump = (dl_capab_hcksum_t *)&sp[1];
1068 			/*
1069 			 * Copy for alignment.
1070 			 */
1071 			bcopy(hcksump, &hcksum, sizeof (dl_capab_hcksum_t));
1072 			dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1073 			bcopy(&hcksum, hcksump, sizeof (dl_capab_hcksum_t));
1074 			break;
1075 		}
1076 
1077 		case DL_CAPAB_DLD: {
1078 			dl_capab_dld_t	*dldp;
1079 			dl_capab_dld_t	dld;
1080 
1081 			dldp = (dl_capab_dld_t *)&sp[1];
1082 			/*
1083 			 * Copy for alignment.
1084 			 */
1085 			bcopy(dldp, &dld, sizeof (dl_capab_dld_t));
1086 			dlcapabsetqid(&(dld.dld_mid), dsp->ds_rq);
1087 			bcopy(&dld, dldp, sizeof (dl_capab_dld_t));
1088 			break;
1089 		}
1090 		default:
1091 			break;
1092 		}
1093 		off += size;
1094 	}
1095 	qreply(q, mp);
1096 	return;
1097 failed:
1098 	dlerrorack(q, mp, DL_CAPABILITY_REQ, dl_err, 0);
1099 }
1100 
1101 /*
1102  * DL_NOTIFY_REQ
1103  */
1104 static void
1105 proto_notify_req(dld_str_t *dsp, mblk_t *mp)
1106 {
1107 	dl_notify_req_t	*dlp = (dl_notify_req_t *)mp->b_rptr;
1108 	t_uscalar_t	dl_err;
1109 	queue_t		*q = dsp->ds_wq;
1110 	uint_t		note =
1111 	    DL_NOTE_PROMISC_ON_PHYS |
1112 	    DL_NOTE_PROMISC_OFF_PHYS |
1113 	    DL_NOTE_PHYS_ADDR |
1114 	    DL_NOTE_LINK_UP |
1115 	    DL_NOTE_LINK_DOWN |
1116 	    DL_NOTE_CAPAB_RENEG |
1117 	    DL_NOTE_SPEED;
1118 
1119 	if (MBLKL(mp) < sizeof (dl_notify_req_t)) {
1120 		dl_err = DL_BADPRIM;
1121 		goto failed;
1122 	}
1123 
1124 	if (dsp->ds_dlstate == DL_UNATTACHED ||
1125 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1126 		dl_err = DL_OUTSTATE;
1127 		goto failed;
1128 	}
1129 
1130 	note &= ~(mac_no_notification(dsp->ds_mh));
1131 
1132 	/*
1133 	 * Cache the notifications that are being enabled.
1134 	 */
1135 	dsp->ds_notifications = dlp->dl_notifications & note;
1136 	/*
1137 	 * The ACK carries all notifications regardless of which set is
1138 	 * being enabled.
1139 	 */
1140 	dlnotifyack(q, mp, note);
1141 
1142 	/*
1143 	 * Generate DL_NOTIFY_IND messages for each enabled notification.
1144 	 */
1145 	if (dsp->ds_notifications != 0) {
1146 		dld_str_notify_ind(dsp);
1147 	}
1148 	return;
1149 failed:
1150 	dlerrorack(q, mp, DL_NOTIFY_REQ, dl_err, 0);
1151 }
1152 
1153 /*
1154  * DL_UINTDATA_REQ
1155  */
1156 void
1157 proto_unitdata_req(dld_str_t *dsp, mblk_t *mp)
1158 {
1159 	queue_t			*q = dsp->ds_wq;
1160 	dl_unitdata_req_t	*dlp = (dl_unitdata_req_t *)mp->b_rptr;
1161 	off_t			off;
1162 	size_t			len, size;
1163 	const uint8_t		*addr;
1164 	uint16_t		sap;
1165 	uint_t			addr_length;
1166 	mblk_t			*bp, *payload;
1167 	uint32_t		start, stuff, end, value, flags;
1168 	t_uscalar_t		dl_err;
1169 	uint_t			max_sdu;
1170 
1171 	if (MBLKL(mp) < sizeof (dl_unitdata_req_t) || mp->b_cont == NULL) {
1172 		dlerrorack(q, mp, DL_UNITDATA_REQ, DL_BADPRIM, 0);
1173 		return;
1174 	}
1175 
1176 	mutex_enter(&dsp->ds_lock);
1177 	if (dsp->ds_dlstate != DL_IDLE) {
1178 		mutex_exit(&dsp->ds_lock);
1179 		dlerrorack(q, mp, DL_UNITDATA_REQ, DL_OUTSTATE, 0);
1180 		return;
1181 	}
1182 	DLD_DATATHR_INC(dsp);
1183 	mutex_exit(&dsp->ds_lock);
1184 
1185 	addr_length = dsp->ds_mip->mi_addr_length;
1186 
1187 	off = dlp->dl_dest_addr_offset;
1188 	len = dlp->dl_dest_addr_length;
1189 
1190 	if (!MBLKIN(mp, off, len) || !IS_P2ALIGNED(off, sizeof (uint16_t))) {
1191 		dl_err = DL_BADPRIM;
1192 		goto failed;
1193 	}
1194 
1195 	if (len != addr_length + sizeof (uint16_t)) {
1196 		dl_err = DL_BADADDR;
1197 		goto failed;
1198 	}
1199 
1200 	addr = mp->b_rptr + off;
1201 	sap = *(uint16_t *)(mp->b_rptr + off + addr_length);
1202 
1203 	/*
1204 	 * Check the length of the packet and the block types.
1205 	 */
1206 	size = 0;
1207 	payload = mp->b_cont;
1208 	for (bp = payload; bp != NULL; bp = bp->b_cont) {
1209 		if (DB_TYPE(bp) != M_DATA)
1210 			goto baddata;
1211 
1212 		size += MBLKL(bp);
1213 	}
1214 
1215 	mac_sdu_get(dsp->ds_mh, NULL, &max_sdu);
1216 	if (size > max_sdu)
1217 		goto baddata;
1218 
1219 	/*
1220 	 * Build a packet header.
1221 	 */
1222 	if ((bp = dls_header(dsp, addr, sap, dlp->dl_priority.dl_max,
1223 	    &payload)) == NULL) {
1224 		dl_err = DL_BADADDR;
1225 		goto failed;
1226 	}
1227 
1228 	/*
1229 	 * We no longer need the M_PROTO header, so free it.
1230 	 */
1231 	freeb(mp);
1232 
1233 	/*
1234 	 * Transfer the checksum offload information if it is present.
1235 	 */
1236 	hcksum_retrieve(payload, NULL, NULL, &start, &stuff, &end, &value,
1237 	    &flags);
1238 	(void) hcksum_assoc(bp, NULL, NULL, start, stuff, end, value, flags, 0);
1239 
1240 	/*
1241 	 * Link the payload onto the new header.
1242 	 */
1243 	ASSERT(bp->b_cont == NULL);
1244 	bp->b_cont = payload;
1245 
1246 	/*
1247 	 * No lock can be held across modules and putnext()'s,
1248 	 * which can happen here with the call from DLD_TX().
1249 	 */
1250 	if (DLD_TX(dsp, bp, 0, 0) != NULL) {
1251 		/* flow-controlled */
1252 		DLD_SETQFULL(dsp);
1253 	}
1254 	DLD_DATATHR_DCR(dsp);
1255 	return;
1256 
1257 failed:
1258 	dlerrorack(q, mp, DL_UNITDATA_REQ, dl_err, 0);
1259 	DLD_DATATHR_DCR(dsp);
1260 	return;
1261 
1262 baddata:
1263 	dluderrorind(q, mp, (void *)addr, len, DL_BADDATA, 0);
1264 	DLD_DATATHR_DCR(dsp);
1265 }
1266 
1267 /*
1268  * DL_PASSIVE_REQ
1269  */
1270 static void
1271 proto_passive_req(dld_str_t *dsp, mblk_t *mp)
1272 {
1273 	t_uscalar_t dl_err;
1274 
1275 	/*
1276 	 * If we've already become active by issuing an active primitive,
1277 	 * then it's too late to try to become passive.
1278 	 */
1279 	if (dsp->ds_passivestate == DLD_ACTIVE) {
1280 		dl_err = DL_OUTSTATE;
1281 		goto failed;
1282 	}
1283 
1284 	if (MBLKL(mp) < sizeof (dl_passive_req_t)) {
1285 		dl_err = DL_BADPRIM;
1286 		goto failed;
1287 	}
1288 
1289 	dsp->ds_passivestate = DLD_PASSIVE;
1290 	dlokack(dsp->ds_wq, mp, DL_PASSIVE_REQ);
1291 	return;
1292 failed:
1293 	dlerrorack(dsp->ds_wq, mp, DL_PASSIVE_REQ, dl_err, 0);
1294 }
1295 
1296 
1297 /*
1298  * Catch-all handler.
1299  */
1300 static void
1301 proto_req(dld_str_t *dsp, mblk_t *mp)
1302 {
1303 	union DL_primitives	*dlp = (union DL_primitives *)mp->b_rptr;
1304 
1305 	dlerrorack(dsp->ds_wq, mp, dlp->dl_primitive, DL_UNSUPPORTED, 0);
1306 }
1307 
1308 static int
1309 dld_capab_perim(dld_str_t *dsp, void *data, uint_t flags)
1310 {
1311 	switch (flags) {
1312 	case DLD_ENABLE:
1313 		mac_perim_enter_by_mh(dsp->ds_mh, (mac_perim_handle_t *)data);
1314 		return (0);
1315 
1316 	case DLD_DISABLE:
1317 		mac_perim_exit((mac_perim_handle_t)data);
1318 		return (0);
1319 
1320 	case DLD_QUERY:
1321 		return (mac_perim_held(dsp->ds_mh));
1322 	}
1323 	return (0);
1324 }
1325 
1326 static int
1327 dld_capab_direct(dld_str_t *dsp, void *data, uint_t flags)
1328 {
1329 	dld_capab_direct_t	*direct = data;
1330 
1331 	ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1332 
1333 	switch (flags) {
1334 	case DLD_ENABLE:
1335 		dls_rx_set(dsp, (dls_rx_t)direct->di_rx_cf,
1336 		    direct->di_rx_ch);
1337 
1338 		direct->di_tx_df = (uintptr_t)str_mdata_fastpath_put;
1339 		direct->di_tx_dh = dsp;
1340 		direct->di_tx_cb_df = (uintptr_t)mac_client_tx_notify;
1341 		direct->di_tx_cb_dh = dsp->ds_mch;
1342 		direct->di_tx_fctl_df = (uintptr_t)mac_tx_is_flow_blocked;
1343 		direct->di_tx_fctl_dh = dsp->ds_mch;
1344 
1345 		dsp->ds_direct = B_TRUE;
1346 
1347 		return (0);
1348 
1349 	case DLD_DISABLE:
1350 		dls_rx_set(dsp, (dsp->ds_mode == DLD_FASTPATH) ?
1351 		    dld_str_rx_fastpath : dld_str_rx_unitdata, (void *)dsp);
1352 		dsp->ds_direct = B_FALSE;
1353 
1354 		return (0);
1355 	}
1356 	return (ENOTSUP);
1357 }
1358 
1359 /*
1360  * dld_capab_poll_enable()
1361  *
1362  * This function is misnamed. All polling  and fanouts are run out of the
1363  * lower mac (in case of VNIC and the only mac in case of NICs). The
1364  * availability of Rx ring and promiscous mode is all taken care between
1365  * the soft ring set (mac_srs), the Rx ring, and S/W classifier. Any
1366  * fanout necessary is done by the soft rings that are part of the
1367  * mac_srs (by default mac_srs sends the packets up via a TCP and
1368  * non TCP soft ring).
1369  *
1370  * The mac_srs (or its associated soft rings) always store the ill_rx_ring
1371  * (the cookie returned when they registered with IP during plumb) as their
1372  * 2nd argument which is passed up as mac_resource_handle_t. The upcall
1373  * function and 1st argument is what the caller registered when they
1374  * called mac_rx_classify_flow_add() to register the flow. For VNIC,
1375  * the function is vnic_rx and argument is vnic_t. For regular NIC
1376  * case, it mac_rx_default and mac_handle_t. As explained above, the
1377  * mac_srs (or its soft ring) will add the ill_rx_ring (mac_resource_handle_t)
1378  * from its stored 2nd argument.
1379  */
1380 static int
1381 dld_capab_poll_enable(dld_str_t *dsp, dld_capab_poll_t *poll)
1382 {
1383 	if (dsp->ds_polling)
1384 		return (EINVAL);
1385 
1386 	if ((dld_opt & DLD_OPT_NO_POLL) != 0 || dsp->ds_mode == DLD_RAW)
1387 		return (ENOTSUP);
1388 
1389 	/*
1390 	 * Enable client polling if and only if DLS bypass is possible.
1391 	 * Special cases like VLANs need DLS processing in the Rx data path.
1392 	 * In such a case we can neither allow the client (IP) to directly
1393 	 * poll the softring (since DLS processing hasn't been done) nor can
1394 	 * we allow DLS bypass.
1395 	 */
1396 	if (!mac_rx_bypass_set(dsp->ds_mch, dsp->ds_rx, dsp->ds_rx_arg))
1397 		return (ENOTSUP);
1398 
1399 	/*
1400 	 * Register soft ring resources. This will come in handy later if
1401 	 * the user decides to modify CPU bindings to use more CPUs for the
1402 	 * device in which case we will switch to fanout using soft rings.
1403 	 */
1404 	mac_resource_set_common(dsp->ds_mch,
1405 	    (mac_resource_add_t)poll->poll_ring_add_cf,
1406 	    (mac_resource_remove_t)poll->poll_ring_remove_cf,
1407 	    (mac_resource_quiesce_t)poll->poll_ring_quiesce_cf,
1408 	    (mac_resource_restart_t)poll->poll_ring_restart_cf,
1409 	    (mac_resource_bind_t)poll->poll_ring_bind_cf,
1410 	    poll->poll_ring_ch);
1411 
1412 	mac_client_poll_enable(dsp->ds_mch);
1413 
1414 	dsp->ds_polling = B_TRUE;
1415 	return (0);
1416 }
1417 
1418 /* ARGSUSED */
1419 static int
1420 dld_capab_poll_disable(dld_str_t *dsp, dld_capab_poll_t *poll)
1421 {
1422 	if (!dsp->ds_polling)
1423 		return (EINVAL);
1424 
1425 	mac_client_poll_disable(dsp->ds_mch);
1426 	mac_resource_set(dsp->ds_mch, NULL, NULL);
1427 
1428 	dsp->ds_polling = B_FALSE;
1429 	return (0);
1430 }
1431 
1432 static int
1433 dld_capab_poll(dld_str_t *dsp, void *data, uint_t flags)
1434 {
1435 	dld_capab_poll_t	*poll = data;
1436 
1437 	ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1438 
1439 	switch (flags) {
1440 	case DLD_ENABLE:
1441 		return (dld_capab_poll_enable(dsp, poll));
1442 	case DLD_DISABLE:
1443 		return (dld_capab_poll_disable(dsp, poll));
1444 	}
1445 	return (ENOTSUP);
1446 }
1447 
1448 static int
1449 dld_capab_lso(dld_str_t *dsp, void *data, uint_t flags)
1450 {
1451 	dld_capab_lso_t		*lso = data;
1452 
1453 	ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1454 
1455 	switch (flags) {
1456 	case DLD_ENABLE: {
1457 		mac_capab_lso_t		mac_lso;
1458 
1459 		/*
1460 		 * Check if LSO is supported on this MAC & enable LSO
1461 		 * accordingly.
1462 		 */
1463 		if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_LSO, &mac_lso)) {
1464 			lso->lso_max = mac_lso.lso_basic_tcp_ipv4.lso_max;
1465 			lso->lso_flags = 0;
1466 			/* translate the flag for mac clients */
1467 			if ((mac_lso.lso_flags & LSO_TX_BASIC_TCP_IPV4) != 0)
1468 				lso->lso_flags |= DLD_LSO_TX_BASIC_TCP_IPV4;
1469 			dsp->ds_lso = B_TRUE;
1470 			dsp->ds_lso_max = lso->lso_max;
1471 		} else {
1472 			dsp->ds_lso = B_FALSE;
1473 			dsp->ds_lso_max = 0;
1474 			return (ENOTSUP);
1475 		}
1476 		return (0);
1477 	}
1478 	case DLD_DISABLE: {
1479 		dsp->ds_lso = B_FALSE;
1480 		dsp->ds_lso_max = 0;
1481 		return (0);
1482 	}
1483 	}
1484 	return (ENOTSUP);
1485 }
1486 
1487 static int
1488 dld_capab(dld_str_t *dsp, uint_t type, void *data, uint_t flags)
1489 {
1490 	int	err;
1491 
1492 	/*
1493 	 * Don't enable direct callback capabilities unless the caller is
1494 	 * the IP client. When a module is inserted in a stream (_I_INSERT)
1495 	 * the stack initiates capability disable, but due to races, the
1496 	 * module insertion may complete before the capability disable
1497 	 * completes. So we limit the check to DLD_ENABLE case.
1498 	 */
1499 	if ((flags == DLD_ENABLE && type != DLD_CAPAB_PERIM) &&
1500 	    (dsp->ds_sap != ETHERTYPE_IP || !check_ip_above(dsp->ds_rq))) {
1501 		return (ENOTSUP);
1502 	}
1503 
1504 	switch (type) {
1505 	case DLD_CAPAB_DIRECT:
1506 		err = dld_capab_direct(dsp, data, flags);
1507 		break;
1508 
1509 	case DLD_CAPAB_POLL:
1510 		err =  dld_capab_poll(dsp, data, flags);
1511 		break;
1512 
1513 	case DLD_CAPAB_PERIM:
1514 		err = dld_capab_perim(dsp, data, flags);
1515 		break;
1516 
1517 	case DLD_CAPAB_LSO:
1518 		err = dld_capab_lso(dsp, data, flags);
1519 		break;
1520 
1521 	default:
1522 		err = ENOTSUP;
1523 		break;
1524 	}
1525 
1526 	return (err);
1527 }
1528 
1529 /*
1530  * DL_CAPABILITY_ACK/DL_ERROR_ACK
1531  */
1532 static void
1533 proto_capability_advertise(dld_str_t *dsp, mblk_t *mp)
1534 {
1535 	dl_capability_ack_t	*dlap;
1536 	dl_capability_sub_t	*dlsp;
1537 	size_t			subsize;
1538 	dl_capab_dld_t		dld;
1539 	dl_capab_hcksum_t	hcksum;
1540 	dl_capab_zerocopy_t	zcopy;
1541 	uint8_t			*ptr;
1542 	queue_t			*q = dsp->ds_wq;
1543 	mblk_t			*mp1;
1544 	boolean_t		is_vlan;
1545 	boolean_t		hcksum_capable = B_FALSE;
1546 	boolean_t		zcopy_capable = B_FALSE;
1547 	boolean_t		dld_capable = B_FALSE;
1548 
1549 	/*
1550 	 * Initially assume no capabilities.
1551 	 */
1552 	subsize = 0;
1553 	is_vlan = (mac_client_vid(dsp->ds_mch) != VLAN_ID_NONE);
1554 
1555 	/*
1556 	 * Check if checksum offload is supported on this MAC.  Don't
1557 	 * advertise DL_CAPAB_HCKSUM if the underlying MAC is VLAN incapable,
1558 	 * since it might not be able to do the hardware checksum offload
1559 	 * with the correct offset.
1560 	 */
1561 	bzero(&hcksum, sizeof (dl_capab_hcksum_t));
1562 	if ((!is_vlan || (!mac_capab_get(dsp->ds_mh, MAC_CAPAB_NO_NATIVEVLAN,
1563 	    NULL))) && mac_capab_get(dsp->ds_mh, MAC_CAPAB_HCKSUM,
1564 	    &hcksum.hcksum_txflags)) {
1565 		if (hcksum.hcksum_txflags != 0) {
1566 			hcksum_capable = B_TRUE;
1567 			subsize += sizeof (dl_capability_sub_t) +
1568 			    sizeof (dl_capab_hcksum_t);
1569 		}
1570 	}
1571 
1572 	/*
1573 	 * Check if zerocopy is supported on this interface.
1574 	 * If advertising DL_CAPAB_ZEROCOPY has not been explicitly disabled
1575 	 * then reserve space for that capability.
1576 	 */
1577 	if (!mac_capab_get(dsp->ds_mh, MAC_CAPAB_NO_ZCOPY, NULL) &&
1578 	    !(dld_opt & DLD_OPT_NO_ZEROCOPY)) {
1579 		zcopy_capable = B_TRUE;
1580 		subsize += sizeof (dl_capability_sub_t) +
1581 		    sizeof (dl_capab_zerocopy_t);
1582 	}
1583 
1584 	/*
1585 	 * Direct capability negotiation interface between IP and DLD
1586 	 */
1587 	if (dsp->ds_sap == ETHERTYPE_IP && check_ip_above(dsp->ds_rq)) {
1588 		dld_capable = B_TRUE;
1589 		subsize += sizeof (dl_capability_sub_t) +
1590 		    sizeof (dl_capab_dld_t);
1591 	}
1592 
1593 	/*
1594 	 * If there are no capabilities to advertise or if we
1595 	 * can't allocate a response, send a DL_ERROR_ACK.
1596 	 */
1597 	if ((mp1 = reallocb(mp,
1598 	    sizeof (dl_capability_ack_t) + subsize, 0)) == NULL) {
1599 		dlerrorack(q, mp, DL_CAPABILITY_REQ, DL_NOTSUPPORTED, 0);
1600 		return;
1601 	}
1602 
1603 	mp = mp1;
1604 	DB_TYPE(mp) = M_PROTO;
1605 	mp->b_wptr = mp->b_rptr + sizeof (dl_capability_ack_t) + subsize;
1606 	bzero(mp->b_rptr, MBLKL(mp));
1607 	dlap = (dl_capability_ack_t *)mp->b_rptr;
1608 	dlap->dl_primitive = DL_CAPABILITY_ACK;
1609 	dlap->dl_sub_offset = sizeof (dl_capability_ack_t);
1610 	dlap->dl_sub_length = subsize;
1611 	ptr = (uint8_t *)&dlap[1];
1612 
1613 	/*
1614 	 * TCP/IP checksum offload.
1615 	 */
1616 	if (hcksum_capable) {
1617 		dlsp = (dl_capability_sub_t *)ptr;
1618 
1619 		dlsp->dl_cap = DL_CAPAB_HCKSUM;
1620 		dlsp->dl_length = sizeof (dl_capab_hcksum_t);
1621 		ptr += sizeof (dl_capability_sub_t);
1622 
1623 		hcksum.hcksum_version = HCKSUM_VERSION_1;
1624 		dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1625 		bcopy(&hcksum, ptr, sizeof (dl_capab_hcksum_t));
1626 		ptr += sizeof (dl_capab_hcksum_t);
1627 	}
1628 
1629 	/*
1630 	 * Zero copy
1631 	 */
1632 	if (zcopy_capable) {
1633 		dlsp = (dl_capability_sub_t *)ptr;
1634 
1635 		dlsp->dl_cap = DL_CAPAB_ZEROCOPY;
1636 		dlsp->dl_length = sizeof (dl_capab_zerocopy_t);
1637 		ptr += sizeof (dl_capability_sub_t);
1638 
1639 		bzero(&zcopy, sizeof (dl_capab_zerocopy_t));
1640 		zcopy.zerocopy_version = ZEROCOPY_VERSION_1;
1641 		zcopy.zerocopy_flags = DL_CAPAB_VMSAFE_MEM;
1642 
1643 		dlcapabsetqid(&(zcopy.zerocopy_mid), dsp->ds_rq);
1644 		bcopy(&zcopy, ptr, sizeof (dl_capab_zerocopy_t));
1645 		ptr += sizeof (dl_capab_zerocopy_t);
1646 	}
1647 
1648 	/*
1649 	 * Direct capability negotiation interface between IP and DLD.
1650 	 * Refer to dld.h for details.
1651 	 */
1652 	if (dld_capable) {
1653 		dlsp = (dl_capability_sub_t *)ptr;
1654 		dlsp->dl_cap = DL_CAPAB_DLD;
1655 		dlsp->dl_length = sizeof (dl_capab_dld_t);
1656 		ptr += sizeof (dl_capability_sub_t);
1657 
1658 		bzero(&dld, sizeof (dl_capab_dld_t));
1659 		dld.dld_version = DLD_CURRENT_VERSION;
1660 		dld.dld_capab = (uintptr_t)dld_capab;
1661 		dld.dld_capab_handle = (uintptr_t)dsp;
1662 
1663 		dlcapabsetqid(&(dld.dld_mid), dsp->ds_rq);
1664 		bcopy(&dld, ptr, sizeof (dl_capab_dld_t));
1665 		ptr += sizeof (dl_capab_dld_t);
1666 	}
1667 
1668 	ASSERT(ptr == mp->b_rptr + sizeof (dl_capability_ack_t) + subsize);
1669 	qreply(q, mp);
1670 }
1671 
1672 /*
1673  * Disable any enabled capabilities.
1674  */
1675 void
1676 dld_capabilities_disable(dld_str_t *dsp)
1677 {
1678 	if (dsp->ds_polling)
1679 		(void) dld_capab_poll_disable(dsp, NULL);
1680 }
1681