xref: /illumos-gate/usr/src/uts/common/io/dld/dld_proto.c (revision 5328fc53d11d7151861fa272e4fb0248b8f0e145)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2012, Nexenta Systems, Inc. All rights reserved.
24  * Copyright (c) 2018, Joyent, Inc.
25  */
26 
27 /*
28  * Data-Link Driver
29  */
30 #include <sys/sysmacros.h>
31 #include <sys/strsubr.h>
32 #include <sys/strsun.h>
33 #include <sys/vlan.h>
34 #include <sys/dld_impl.h>
35 #include <sys/mac_client.h>
36 #include <sys/mac_client_impl.h>
37 #include <sys/mac_client_priv.h>
38 
39 typedef void proto_reqfunc_t(dld_str_t *, mblk_t *);
40 
41 static proto_reqfunc_t proto_info_req, proto_attach_req, proto_detach_req,
42     proto_bind_req, proto_unbind_req, proto_promiscon_req, proto_promiscoff_req,
43     proto_enabmulti_req, proto_disabmulti_req, proto_physaddr_req,
44     proto_setphysaddr_req, proto_udqos_req, proto_req, proto_capability_req,
45     proto_notify_req, proto_passive_req;
46 
47 static void proto_capability_advertise(dld_str_t *, mblk_t *);
48 static int dld_capab_poll_disable(dld_str_t *, dld_capab_poll_t *);
49 static boolean_t check_mod_above(queue_t *, const char *);
50 
51 #define	DL_ACK_PENDING(state) \
52 	((state) == DL_ATTACH_PENDING || \
53 	(state) == DL_DETACH_PENDING || \
54 	(state) == DL_BIND_PENDING || \
55 	(state) == DL_UNBIND_PENDING)
56 
57 /*
58  * Process a DLPI protocol message.
59  * The primitives DL_BIND_REQ, DL_ENABMULTI_REQ, DL_PROMISCON_REQ,
60  * DL_SET_PHYS_ADDR_REQ put the data link below our dld_str_t into an
61  * 'active' state. The primitive DL_PASSIVE_REQ marks our dld_str_t
62  * as 'passive' and forbids it from being subsequently made 'active'
63  * by the above primitives.
64  */
65 void
66 dld_proto(dld_str_t *dsp, mblk_t *mp)
67 {
68 	t_uscalar_t		prim;
69 
70 	if (MBLKL(mp) < sizeof (t_uscalar_t)) {
71 		freemsg(mp);
72 		return;
73 	}
74 	prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive;
75 
76 	switch (prim) {
77 	case DL_INFO_REQ:
78 		proto_info_req(dsp, mp);
79 		break;
80 	case DL_BIND_REQ:
81 		proto_bind_req(dsp, mp);
82 		break;
83 	case DL_UNBIND_REQ:
84 		proto_unbind_req(dsp, mp);
85 		break;
86 	case DL_UNITDATA_REQ:
87 		proto_unitdata_req(dsp, mp);
88 		break;
89 	case DL_UDQOS_REQ:
90 		proto_udqos_req(dsp, mp);
91 		break;
92 	case DL_ATTACH_REQ:
93 		proto_attach_req(dsp, mp);
94 		break;
95 	case DL_DETACH_REQ:
96 		proto_detach_req(dsp, mp);
97 		break;
98 	case DL_ENABMULTI_REQ:
99 		proto_enabmulti_req(dsp, mp);
100 		break;
101 	case DL_DISABMULTI_REQ:
102 		proto_disabmulti_req(dsp, mp);
103 		break;
104 	case DL_PROMISCON_REQ:
105 		proto_promiscon_req(dsp, mp);
106 		break;
107 	case DL_PROMISCOFF_REQ:
108 		proto_promiscoff_req(dsp, mp);
109 		break;
110 	case DL_PHYS_ADDR_REQ:
111 		proto_physaddr_req(dsp, mp);
112 		break;
113 	case DL_SET_PHYS_ADDR_REQ:
114 		proto_setphysaddr_req(dsp, mp);
115 		break;
116 	case DL_NOTIFY_REQ:
117 		proto_notify_req(dsp, mp);
118 		break;
119 	case DL_CAPABILITY_REQ:
120 		proto_capability_req(dsp, mp);
121 		break;
122 	case DL_PASSIVE_REQ:
123 		proto_passive_req(dsp, mp);
124 		break;
125 	default:
126 		proto_req(dsp, mp);
127 		break;
128 	}
129 }
130 
131 #define	NEG(x)	-(x)
132 typedef struct dl_info_ack_wrapper {
133 	dl_info_ack_t		dl_info;
134 	uint8_t			dl_addr[MAXMACADDRLEN + sizeof (uint16_t)];
135 	uint8_t			dl_brdcst_addr[MAXMACADDRLEN];
136 	dl_qos_cl_range1_t	dl_qos_range1;
137 	dl_qos_cl_sel1_t	dl_qos_sel1;
138 } dl_info_ack_wrapper_t;
139 
140 /*
141  * DL_INFO_REQ
142  */
143 static void
144 proto_info_req(dld_str_t *dsp, mblk_t *mp)
145 {
146 	dl_info_ack_wrapper_t	*dlwp;
147 	dl_info_ack_t		*dlp;
148 	dl_qos_cl_sel1_t	*selp;
149 	dl_qos_cl_range1_t	*rangep;
150 	uint8_t			*addr;
151 	uint8_t			*brdcst_addr;
152 	uint_t			addr_length;
153 	uint_t			sap_length;
154 	mac_info_t		minfo;
155 	mac_info_t		*minfop;
156 	queue_t			*q = dsp->ds_wq;
157 
158 	/*
159 	 * Swap the request message for one large enough to contain the
160 	 * wrapper structure defined above.
161 	 */
162 	if ((mp = mexchange(q, mp, sizeof (dl_info_ack_wrapper_t),
163 	    M_PCPROTO, 0)) == NULL)
164 		return;
165 
166 	bzero(mp->b_rptr, sizeof (dl_info_ack_wrapper_t));
167 	dlwp = (dl_info_ack_wrapper_t *)mp->b_rptr;
168 
169 	dlp = &(dlwp->dl_info);
170 	ASSERT(dlp == (dl_info_ack_t *)mp->b_rptr);
171 
172 	dlp->dl_primitive = DL_INFO_ACK;
173 
174 	/*
175 	 * Set up the sub-structure pointers.
176 	 */
177 	addr = dlwp->dl_addr;
178 	brdcst_addr = dlwp->dl_brdcst_addr;
179 	rangep = &(dlwp->dl_qos_range1);
180 	selp = &(dlwp->dl_qos_sel1);
181 
182 	/*
183 	 * This driver supports only version 2 connectionless DLPI provider
184 	 * nodes.
185 	 */
186 	dlp->dl_service_mode = DL_CLDLS;
187 	dlp->dl_version = DL_VERSION_2;
188 
189 	/*
190 	 * Set the style of the provider
191 	 */
192 	dlp->dl_provider_style = dsp->ds_style;
193 	ASSERT(dlp->dl_provider_style == DL_STYLE1 ||
194 	    dlp->dl_provider_style == DL_STYLE2);
195 
196 	/*
197 	 * Set the current DLPI state.
198 	 */
199 	dlp->dl_current_state = dsp->ds_dlstate;
200 
201 	/*
202 	 * Gratuitously set the media type. This is to deal with modules
203 	 * that assume the media type is known prior to DL_ATTACH_REQ
204 	 * being completed.
205 	 */
206 	dlp->dl_mac_type = DL_ETHER;
207 
208 	/*
209 	 * If the stream is not at least attached we try to retrieve the
210 	 * mac_info using mac_info_get()
211 	 */
212 	if (dsp->ds_dlstate == DL_UNATTACHED ||
213 	    dsp->ds_dlstate == DL_ATTACH_PENDING ||
214 	    dsp->ds_dlstate == DL_DETACH_PENDING) {
215 		if (!mac_info_get(ddi_major_to_name(dsp->ds_major), &minfo)) {
216 			/*
217 			 * Cannot find mac_info. giving up.
218 			 */
219 			goto done;
220 		}
221 		minfop = &minfo;
222 	} else {
223 		minfop = (mac_info_t *)dsp->ds_mip;
224 		/* We can only get the sdu if we're attached. */
225 		mac_sdu_get(dsp->ds_mh, &dlp->dl_min_sdu, &dlp->dl_max_sdu);
226 	}
227 
228 	/*
229 	 * Set the media type (properly this time).
230 	 */
231 	if (dsp->ds_native)
232 		dlp->dl_mac_type = minfop->mi_nativemedia;
233 	else
234 		dlp->dl_mac_type = minfop->mi_media;
235 
236 	/*
237 	 * Set the DLSAP length. We only support 16 bit values and they
238 	 * appear after the MAC address portion of DLSAP addresses.
239 	 */
240 	sap_length = sizeof (uint16_t);
241 	dlp->dl_sap_length = NEG(sap_length);
242 
243 	addr_length = minfop->mi_addr_length;
244 
245 	/*
246 	 * Copy in the media broadcast address.
247 	 */
248 	if (minfop->mi_brdcst_addr != NULL) {
249 		dlp->dl_brdcst_addr_offset =
250 		    (uintptr_t)brdcst_addr - (uintptr_t)dlp;
251 		bcopy(minfop->mi_brdcst_addr, brdcst_addr, addr_length);
252 		dlp->dl_brdcst_addr_length = addr_length;
253 	}
254 
255 	/* Only VLAN links and links that have a normal tag mode support QOS. */
256 	if ((dsp->ds_mch != NULL &&
257 	    mac_client_vid(dsp->ds_mch) != VLAN_ID_NONE) ||
258 	    (dsp->ds_dlp != NULL &&
259 	    dsp->ds_dlp->dl_tagmode == LINK_TAGMODE_NORMAL)) {
260 		dlp->dl_qos_range_offset = (uintptr_t)rangep - (uintptr_t)dlp;
261 		dlp->dl_qos_range_length = sizeof (dl_qos_cl_range1_t);
262 
263 		rangep->dl_qos_type = DL_QOS_CL_RANGE1;
264 		rangep->dl_trans_delay.dl_target_value = DL_UNKNOWN;
265 		rangep->dl_trans_delay.dl_accept_value = DL_UNKNOWN;
266 		rangep->dl_protection.dl_min = DL_UNKNOWN;
267 		rangep->dl_protection.dl_max = DL_UNKNOWN;
268 		rangep->dl_residual_error = DL_UNKNOWN;
269 
270 		/*
271 		 * Specify the supported range of priorities.
272 		 */
273 		rangep->dl_priority.dl_min = 0;
274 		rangep->dl_priority.dl_max = (1 << VLAN_PRI_SIZE) - 1;
275 
276 		dlp->dl_qos_offset = (uintptr_t)selp - (uintptr_t)dlp;
277 		dlp->dl_qos_length = sizeof (dl_qos_cl_sel1_t);
278 
279 		selp->dl_qos_type = DL_QOS_CL_SEL1;
280 		selp->dl_trans_delay = DL_UNKNOWN;
281 		selp->dl_protection = DL_UNKNOWN;
282 		selp->dl_residual_error = DL_UNKNOWN;
283 
284 		/*
285 		 * Specify the current priority (which can be changed by
286 		 * the DL_UDQOS_REQ primitive).
287 		 */
288 		selp->dl_priority = dsp->ds_pri;
289 	}
290 
291 	dlp->dl_addr_length = addr_length + sizeof (uint16_t);
292 	if (dsp->ds_dlstate == DL_IDLE) {
293 		/*
294 		 * The stream is bound. Therefore we can formulate a valid
295 		 * DLSAP address.
296 		 */
297 		dlp->dl_addr_offset = (uintptr_t)addr - (uintptr_t)dlp;
298 		if (addr_length > 0)
299 			mac_unicast_primary_get(dsp->ds_mh, addr);
300 
301 		*(uint16_t *)(addr + addr_length) = dsp->ds_sap;
302 	}
303 
304 done:
305 	IMPLY(dlp->dl_qos_offset != 0, dlp->dl_qos_length != 0);
306 	IMPLY(dlp->dl_qos_range_offset != 0,
307 	    dlp->dl_qos_range_length != 0);
308 	IMPLY(dlp->dl_addr_offset != 0, dlp->dl_addr_length != 0);
309 	IMPLY(dlp->dl_brdcst_addr_offset != 0,
310 	    dlp->dl_brdcst_addr_length != 0);
311 
312 	qreply(q, mp);
313 }
314 
315 /*
316  * DL_ATTACH_REQ
317  */
318 static void
319 proto_attach_req(dld_str_t *dsp, mblk_t *mp)
320 {
321 	dl_attach_req_t	*dlp = (dl_attach_req_t *)mp->b_rptr;
322 	int		err = 0;
323 	t_uscalar_t	dl_err;
324 	queue_t		*q = dsp->ds_wq;
325 
326 	if (MBLKL(mp) < sizeof (dl_attach_req_t) ||
327 	    dlp->dl_ppa < 0 || dsp->ds_style == DL_STYLE1) {
328 		dl_err = DL_BADPRIM;
329 		goto failed;
330 	}
331 
332 	if (dsp->ds_dlstate != DL_UNATTACHED) {
333 		dl_err = DL_OUTSTATE;
334 		goto failed;
335 	}
336 
337 	dsp->ds_dlstate = DL_ATTACH_PENDING;
338 
339 	err = dld_str_attach(dsp, dlp->dl_ppa);
340 	if (err != 0) {
341 		switch (err) {
342 		case ENOENT:
343 			dl_err = DL_BADPPA;
344 			err = 0;
345 			break;
346 		default:
347 			dl_err = DL_SYSERR;
348 			break;
349 		}
350 		dsp->ds_dlstate = DL_UNATTACHED;
351 		goto failed;
352 	}
353 	ASSERT(dsp->ds_dlstate == DL_UNBOUND);
354 	dlokack(q, mp, DL_ATTACH_REQ);
355 	return;
356 
357 failed:
358 	dlerrorack(q, mp, DL_ATTACH_REQ, dl_err, (t_uscalar_t)err);
359 }
360 
361 /*
362  * DL_DETACH_REQ
363  */
364 static void
365 proto_detach_req(dld_str_t *dsp, mblk_t *mp)
366 {
367 	queue_t		*q = dsp->ds_wq;
368 	t_uscalar_t	dl_err;
369 
370 	if (MBLKL(mp) < sizeof (dl_detach_req_t)) {
371 		dl_err = DL_BADPRIM;
372 		goto failed;
373 	}
374 
375 	if (dsp->ds_dlstate != DL_UNBOUND) {
376 		dl_err = DL_OUTSTATE;
377 		goto failed;
378 	}
379 
380 	if (dsp->ds_style == DL_STYLE1) {
381 		dl_err = DL_BADPRIM;
382 		goto failed;
383 	}
384 
385 	ASSERT(dsp->ds_datathr_cnt == 0);
386 	dsp->ds_dlstate = DL_DETACH_PENDING;
387 
388 	dld_str_detach(dsp);
389 	dlokack(dsp->ds_wq, mp, DL_DETACH_REQ);
390 	return;
391 
392 failed:
393 	dlerrorack(q, mp, DL_DETACH_REQ, dl_err, 0);
394 }
395 
396 /*
397  * DL_BIND_REQ
398  */
399 static void
400 proto_bind_req(dld_str_t *dsp, mblk_t *mp)
401 {
402 	dl_bind_req_t	*dlp = (dl_bind_req_t *)mp->b_rptr;
403 	int		err = 0;
404 	uint8_t		dlsap_addr[MAXMACADDRLEN + sizeof (uint16_t)];
405 	uint_t		dlsap_addr_length;
406 	t_uscalar_t	dl_err;
407 	t_scalar_t	sap;
408 	queue_t		*q = dsp->ds_wq;
409 	mac_perim_handle_t	mph;
410 	void		*mdip;
411 	int32_t		intr_cpu;
412 
413 	if (MBLKL(mp) < sizeof (dl_bind_req_t)) {
414 		dl_err = DL_BADPRIM;
415 		goto failed;
416 	}
417 
418 	if (dlp->dl_xidtest_flg != 0) {
419 		dl_err = DL_NOAUTO;
420 		goto failed;
421 	}
422 
423 	if (dlp->dl_service_mode != DL_CLDLS) {
424 		dl_err = DL_UNSUPPORTED;
425 		goto failed;
426 	}
427 
428 	if (dsp->ds_dlstate != DL_UNBOUND) {
429 		dl_err = DL_OUTSTATE;
430 		goto failed;
431 	}
432 
433 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
434 
435 	if ((err = dls_active_set(dsp)) != 0) {
436 		dl_err = DL_SYSERR;
437 		goto failed2;
438 	}
439 
440 	dsp->ds_dlstate = DL_BIND_PENDING;
441 	/*
442 	 * Set the receive callback.
443 	 */
444 	dls_rx_set(dsp, (dsp->ds_mode == DLD_RAW) ?
445 	    dld_str_rx_raw : dld_str_rx_unitdata, dsp);
446 
447 	/*
448 	 * Bind the channel such that it can receive packets.
449 	 */
450 	sap = dlp->dl_sap;
451 	dsp->ds_nonip = !check_mod_above(dsp->ds_rq, "ip") &&
452 	    !check_mod_above(dsp->ds_rq, "arp");
453 
454 	err = dls_bind(dsp, sap);
455 	if (err != 0) {
456 		switch (err) {
457 		case EINVAL:
458 			dl_err = DL_BADADDR;
459 			err = 0;
460 			break;
461 		default:
462 			dl_err = DL_SYSERR;
463 			break;
464 		}
465 
466 		dsp->ds_dlstate = DL_UNBOUND;
467 		dls_active_clear(dsp, B_FALSE);
468 		goto failed2;
469 	}
470 
471 	intr_cpu = mac_client_intr_cpu(dsp->ds_mch);
472 	mdip = mac_get_devinfo(dsp->ds_mh);
473 	mac_perim_exit(mph);
474 
475 	/*
476 	 * We do this after we get out of the perim to avoid deadlocks
477 	 * etc. since part of mac_client_retarget_intr is to walk the
478 	 * device tree in order to find and retarget the interrupts.
479 	 */
480 	if (intr_cpu != -1)
481 		mac_client_set_intr_cpu(mdip, dsp->ds_mch, intr_cpu);
482 
483 	/*
484 	 * Copy in MAC address.
485 	 */
486 	dlsap_addr_length = dsp->ds_mip->mi_addr_length;
487 	mac_unicast_primary_get(dsp->ds_mh, dlsap_addr);
488 
489 	/*
490 	 * Copy in the SAP.
491 	 */
492 	*(uint16_t *)(dlsap_addr + dlsap_addr_length) = sap;
493 	dlsap_addr_length += sizeof (uint16_t);
494 
495 	dsp->ds_dlstate = DL_IDLE;
496 	dlbindack(q, mp, sap, dlsap_addr, dlsap_addr_length, 0, 0);
497 	return;
498 
499 failed2:
500 	mac_perim_exit(mph);
501 failed:
502 	dlerrorack(q, mp, DL_BIND_REQ, dl_err, (t_uscalar_t)err);
503 }
504 
505 /*
506  * DL_UNBIND_REQ
507  */
508 static void
509 proto_unbind_req(dld_str_t *dsp, mblk_t *mp)
510 {
511 	queue_t		*q = dsp->ds_wq;
512 	t_uscalar_t	dl_err;
513 	mac_perim_handle_t	mph;
514 
515 	if (MBLKL(mp) < sizeof (dl_unbind_req_t)) {
516 		dl_err = DL_BADPRIM;
517 		goto failed;
518 	}
519 
520 	if (dsp->ds_dlstate != DL_IDLE) {
521 		dl_err = DL_OUTSTATE;
522 		goto failed;
523 	}
524 
525 	mutex_enter(&dsp->ds_lock);
526 	while (dsp->ds_datathr_cnt != 0)
527 		cv_wait(&dsp->ds_datathr_cv, &dsp->ds_lock);
528 
529 	dsp->ds_dlstate = DL_UNBIND_PENDING;
530 	mutex_exit(&dsp->ds_lock);
531 
532 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
533 	/*
534 	 * Unbind the channel to stop packets being received.
535 	 */
536 	dls_unbind(dsp);
537 
538 	/*
539 	 * Disable polling mode, if it is enabled.
540 	 */
541 	(void) dld_capab_poll_disable(dsp, NULL);
542 
543 	/*
544 	 * Clear LSO flags.
545 	 */
546 	dsp->ds_lso = B_FALSE;
547 	dsp->ds_lso_max = 0;
548 
549 	/*
550 	 * Clear the receive callback.
551 	 */
552 	dls_rx_set(dsp, NULL, NULL);
553 	dsp->ds_direct = B_FALSE;
554 
555 	/*
556 	 * Set the mode back to the default (unitdata).
557 	 */
558 	dsp->ds_mode = DLD_UNITDATA;
559 	dsp->ds_dlstate = DL_UNBOUND;
560 
561 	dls_active_clear(dsp, B_FALSE);
562 	mac_perim_exit(mph);
563 	dlokack(dsp->ds_wq, mp, DL_UNBIND_REQ);
564 	return;
565 failed:
566 	dlerrorack(q, mp, DL_UNBIND_REQ, dl_err, 0);
567 }
568 
569 /*
570  * DL_PROMISCON_REQ
571  */
572 static void
573 proto_promiscon_req(dld_str_t *dsp, mblk_t *mp)
574 {
575 	dl_promiscon_req_t *dlp = (dl_promiscon_req_t *)mp->b_rptr;
576 	int		err = 0;
577 	t_uscalar_t	dl_err;
578 	uint32_t	new_flags, promisc_saved;
579 	queue_t		*q = dsp->ds_wq;
580 	mac_perim_handle_t	mph;
581 
582 	if (MBLKL(mp) < sizeof (dl_promiscon_req_t)) {
583 		dl_err = DL_BADPRIM;
584 		goto failed;
585 	}
586 
587 	if (dsp->ds_dlstate == DL_UNATTACHED ||
588 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
589 		dl_err = DL_OUTSTATE;
590 		goto failed;
591 	}
592 
593 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
594 
595 	new_flags = promisc_saved = dsp->ds_promisc;
596 	switch (dlp->dl_level) {
597 	case DL_PROMISC_SAP:
598 		new_flags |= DLS_PROMISC_SAP;
599 		break;
600 
601 	case DL_PROMISC_MULTI:
602 		new_flags |= DLS_PROMISC_MULTI;
603 		break;
604 
605 	case DL_PROMISC_PHYS:
606 		new_flags |= DLS_PROMISC_PHYS;
607 		break;
608 
609 	default:
610 		dl_err = DL_NOTSUPPORTED;
611 		goto failed2;
612 	}
613 
614 	if ((promisc_saved == 0) && (err = dls_active_set(dsp)) != 0) {
615 		ASSERT(dsp->ds_promisc == promisc_saved);
616 		dl_err = DL_SYSERR;
617 		goto failed2;
618 	}
619 
620 	/*
621 	 * Adjust channel promiscuity.
622 	 */
623 	err = dls_promisc(dsp, new_flags);
624 
625 	if (err != 0) {
626 		dl_err = DL_SYSERR;
627 		dsp->ds_promisc = promisc_saved;
628 		if (promisc_saved == 0)
629 			dls_active_clear(dsp, B_FALSE);
630 		goto failed2;
631 	}
632 
633 	mac_perim_exit(mph);
634 
635 	dlokack(q, mp, DL_PROMISCON_REQ);
636 	return;
637 
638 failed2:
639 	mac_perim_exit(mph);
640 failed:
641 	dlerrorack(q, mp, DL_PROMISCON_REQ, dl_err, (t_uscalar_t)err);
642 }
643 
644 /*
645  * DL_PROMISCOFF_REQ
646  */
647 static void
648 proto_promiscoff_req(dld_str_t *dsp, mblk_t *mp)
649 {
650 	dl_promiscoff_req_t *dlp = (dl_promiscoff_req_t *)mp->b_rptr;
651 	int		err = 0;
652 	t_uscalar_t	dl_err;
653 	uint32_t	new_flags;
654 	queue_t		*q = dsp->ds_wq;
655 	mac_perim_handle_t	mph;
656 
657 	if (MBLKL(mp) < sizeof (dl_promiscoff_req_t)) {
658 		dl_err = DL_BADPRIM;
659 		goto failed;
660 	}
661 
662 	if (dsp->ds_dlstate == DL_UNATTACHED ||
663 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
664 		dl_err = DL_OUTSTATE;
665 		goto failed;
666 	}
667 
668 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
669 
670 	new_flags = dsp->ds_promisc;
671 	switch (dlp->dl_level) {
672 	case DL_PROMISC_SAP:
673 		if (!(dsp->ds_promisc & DLS_PROMISC_SAP)) {
674 			dl_err = DL_NOTENAB;
675 			goto failed2;
676 		}
677 		new_flags &= ~DLS_PROMISC_SAP;
678 		break;
679 
680 	case DL_PROMISC_MULTI:
681 		if (!(dsp->ds_promisc & DLS_PROMISC_MULTI)) {
682 			dl_err = DL_NOTENAB;
683 			goto failed2;
684 		}
685 		new_flags &= ~DLS_PROMISC_MULTI;
686 		break;
687 
688 	case DL_PROMISC_PHYS:
689 		if (!(dsp->ds_promisc & DLS_PROMISC_PHYS)) {
690 			dl_err = DL_NOTENAB;
691 			goto failed2;
692 		}
693 		new_flags &= ~DLS_PROMISC_PHYS;
694 		break;
695 
696 	default:
697 		dl_err = DL_NOTSUPPORTED;
698 		goto failed2;
699 	}
700 
701 	/*
702 	 * Adjust channel promiscuity.
703 	 */
704 	err = dls_promisc(dsp, new_flags);
705 
706 	if (err != 0) {
707 		dl_err = DL_SYSERR;
708 		goto failed2;
709 	}
710 
711 	ASSERT(dsp->ds_promisc == new_flags);
712 	if (dsp->ds_promisc == 0)
713 		dls_active_clear(dsp, B_FALSE);
714 
715 	mac_perim_exit(mph);
716 
717 	dlokack(q, mp, DL_PROMISCOFF_REQ);
718 	return;
719 failed2:
720 	mac_perim_exit(mph);
721 failed:
722 	dlerrorack(q, mp, DL_PROMISCOFF_REQ, dl_err, (t_uscalar_t)err);
723 }
724 
725 /*
726  * DL_ENABMULTI_REQ
727  */
728 static void
729 proto_enabmulti_req(dld_str_t *dsp, mblk_t *mp)
730 {
731 	dl_enabmulti_req_t *dlp = (dl_enabmulti_req_t *)mp->b_rptr;
732 	int		err = 0;
733 	t_uscalar_t	dl_err;
734 	queue_t		*q = dsp->ds_wq;
735 	mac_perim_handle_t	mph;
736 
737 	if (dsp->ds_dlstate == DL_UNATTACHED ||
738 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
739 		dl_err = DL_OUTSTATE;
740 		goto failed;
741 	}
742 
743 	if (MBLKL(mp) < sizeof (dl_enabmulti_req_t) ||
744 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
745 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
746 		dl_err = DL_BADPRIM;
747 		goto failed;
748 	}
749 
750 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
751 
752 	if ((dsp->ds_dmap == NULL) && (err = dls_active_set(dsp)) != 0) {
753 		dl_err = DL_SYSERR;
754 		goto failed2;
755 	}
756 
757 	err = dls_multicst_add(dsp, mp->b_rptr + dlp->dl_addr_offset);
758 	if (err != 0) {
759 		switch (err) {
760 		case EINVAL:
761 			dl_err = DL_BADADDR;
762 			err = 0;
763 			break;
764 		case ENOSPC:
765 			dl_err = DL_TOOMANY;
766 			err = 0;
767 			break;
768 		default:
769 			dl_err = DL_SYSERR;
770 			break;
771 		}
772 		if (dsp->ds_dmap == NULL)
773 			dls_active_clear(dsp, B_FALSE);
774 		goto failed2;
775 	}
776 
777 	mac_perim_exit(mph);
778 
779 	dlokack(q, mp, DL_ENABMULTI_REQ);
780 	return;
781 
782 failed2:
783 	mac_perim_exit(mph);
784 failed:
785 	dlerrorack(q, mp, DL_ENABMULTI_REQ, dl_err, (t_uscalar_t)err);
786 }
787 
788 /*
789  * DL_DISABMULTI_REQ
790  */
791 static void
792 proto_disabmulti_req(dld_str_t *dsp, mblk_t *mp)
793 {
794 	dl_disabmulti_req_t *dlp = (dl_disabmulti_req_t *)mp->b_rptr;
795 	int		err = 0;
796 	t_uscalar_t	dl_err;
797 	queue_t		*q = dsp->ds_wq;
798 	mac_perim_handle_t	mph;
799 
800 	if (dsp->ds_dlstate == DL_UNATTACHED ||
801 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
802 		dl_err = DL_OUTSTATE;
803 		goto failed;
804 	}
805 
806 	if (MBLKL(mp) < sizeof (dl_disabmulti_req_t) ||
807 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
808 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
809 		dl_err = DL_BADPRIM;
810 		goto failed;
811 	}
812 
813 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
814 	err = dls_multicst_remove(dsp, mp->b_rptr + dlp->dl_addr_offset);
815 	if ((err == 0) && (dsp->ds_dmap == NULL))
816 		dls_active_clear(dsp, B_FALSE);
817 	mac_perim_exit(mph);
818 
819 	if (err != 0) {
820 		switch (err) {
821 		case EINVAL:
822 			dl_err = DL_BADADDR;
823 			err = 0;
824 			break;
825 
826 		case ENOENT:
827 			dl_err = DL_NOTENAB;
828 			err = 0;
829 			break;
830 
831 		default:
832 			dl_err = DL_SYSERR;
833 			break;
834 		}
835 		goto failed;
836 	}
837 	dlokack(q, mp, DL_DISABMULTI_REQ);
838 	return;
839 failed:
840 	dlerrorack(q, mp, DL_DISABMULTI_REQ, dl_err, (t_uscalar_t)err);
841 }
842 
843 /*
844  * DL_PHYS_ADDR_REQ
845  */
846 static void
847 proto_physaddr_req(dld_str_t *dsp, mblk_t *mp)
848 {
849 	dl_phys_addr_req_t *dlp = (dl_phys_addr_req_t *)mp->b_rptr;
850 	queue_t		*q = dsp->ds_wq;
851 	t_uscalar_t	dl_err = 0;
852 	char		*addr = NULL;
853 	uint_t		addr_length;
854 
855 	if (MBLKL(mp) < sizeof (dl_phys_addr_req_t)) {
856 		dl_err = DL_BADPRIM;
857 		goto done;
858 	}
859 
860 	if (dsp->ds_dlstate == DL_UNATTACHED ||
861 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
862 		dl_err = DL_OUTSTATE;
863 		goto done;
864 	}
865 
866 	addr_length = dsp->ds_mip->mi_addr_length;
867 	if (addr_length > 0) {
868 		addr = kmem_alloc(addr_length, KM_SLEEP);
869 		switch (dlp->dl_addr_type) {
870 		case DL_CURR_PHYS_ADDR:
871 			mac_unicast_primary_get(dsp->ds_mh, (uint8_t *)addr);
872 			break;
873 		case DL_FACT_PHYS_ADDR:
874 			bcopy(dsp->ds_mip->mi_unicst_addr, addr, addr_length);
875 			break;
876 		case DL_CURR_DEST_ADDR:
877 			if (!mac_dst_get(dsp->ds_mh, (uint8_t *)addr))
878 				dl_err = DL_NOTSUPPORTED;
879 			break;
880 		default:
881 			dl_err = DL_UNSUPPORTED;
882 		}
883 	}
884 done:
885 	if (dl_err == 0)
886 		dlphysaddrack(q, mp, addr, (t_uscalar_t)addr_length);
887 	else
888 		dlerrorack(q, mp, DL_PHYS_ADDR_REQ, dl_err, 0);
889 	if (addr != NULL)
890 		kmem_free(addr, addr_length);
891 }
892 
893 /*
894  * DL_SET_PHYS_ADDR_REQ
895  */
896 static void
897 proto_setphysaddr_req(dld_str_t *dsp, mblk_t *mp)
898 {
899 	dl_set_phys_addr_req_t *dlp = (dl_set_phys_addr_req_t *)mp->b_rptr;
900 	int		err = 0;
901 	t_uscalar_t	dl_err;
902 	queue_t		*q = dsp->ds_wq;
903 	mac_perim_handle_t	mph;
904 
905 	if (dsp->ds_dlstate == DL_UNATTACHED ||
906 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
907 		dl_err = DL_OUTSTATE;
908 		goto failed;
909 	}
910 
911 	if (MBLKL(mp) < sizeof (dl_set_phys_addr_req_t) ||
912 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
913 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
914 		dl_err = DL_BADPRIM;
915 		goto failed;
916 	}
917 
918 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
919 
920 	if ((err = dls_active_set(dsp)) != 0) {
921 		dl_err = DL_SYSERR;
922 		goto failed2;
923 	}
924 
925 	/*
926 	 * If mac-nospoof is enabled and the link is owned by a
927 	 * non-global zone, changing the mac address is not allowed.
928 	 */
929 	if (dsp->ds_dlp->dl_zid != GLOBAL_ZONEID &&
930 	    mac_protect_enabled(dsp->ds_mch, MPT_MACNOSPOOF)) {
931 		dls_active_clear(dsp, B_FALSE);
932 		err = EACCES;
933 		goto failed2;
934 	}
935 
936 	err = mac_unicast_primary_set(dsp->ds_mh,
937 	    mp->b_rptr + dlp->dl_addr_offset);
938 	if (err != 0) {
939 		switch (err) {
940 		case EINVAL:
941 			dl_err = DL_BADADDR;
942 			err = 0;
943 			break;
944 
945 		default:
946 			dl_err = DL_SYSERR;
947 			break;
948 		}
949 		dls_active_clear(dsp, B_FALSE);
950 		goto failed2;
951 
952 	}
953 
954 	mac_perim_exit(mph);
955 
956 	dlokack(q, mp, DL_SET_PHYS_ADDR_REQ);
957 	return;
958 
959 failed2:
960 	mac_perim_exit(mph);
961 failed:
962 	dlerrorack(q, mp, DL_SET_PHYS_ADDR_REQ, dl_err, (t_uscalar_t)err);
963 }
964 
965 /*
966  * DL_UDQOS_REQ
967  */
968 static void
969 proto_udqos_req(dld_str_t *dsp, mblk_t *mp)
970 {
971 	dl_udqos_req_t *dlp = (dl_udqos_req_t *)mp->b_rptr;
972 	dl_qos_cl_sel1_t *selp;
973 	int		off, len;
974 	t_uscalar_t	dl_err;
975 	queue_t		*q = dsp->ds_wq;
976 
977 	off = dlp->dl_qos_offset;
978 	len = dlp->dl_qos_length;
979 
980 	if (MBLKL(mp) < sizeof (dl_udqos_req_t) || !MBLKIN(mp, off, len)) {
981 		dl_err = DL_BADPRIM;
982 		goto failed;
983 	}
984 
985 	selp = (dl_qos_cl_sel1_t *)(mp->b_rptr + off);
986 	if (selp->dl_qos_type != DL_QOS_CL_SEL1) {
987 		dl_err = DL_BADQOSTYPE;
988 		goto failed;
989 	}
990 
991 	if (selp->dl_priority > (1 << VLAN_PRI_SIZE) - 1 ||
992 	    selp->dl_priority < 0) {
993 		dl_err = DL_BADQOSPARAM;
994 		goto failed;
995 	}
996 
997 	dsp->ds_pri = selp->dl_priority;
998 	dlokack(q, mp, DL_UDQOS_REQ);
999 	return;
1000 failed:
1001 	dlerrorack(q, mp, DL_UDQOS_REQ, dl_err, 0);
1002 }
1003 
1004 static boolean_t
1005 check_mod_above(queue_t *q, const char *mod)
1006 {
1007 	queue_t		*next_q;
1008 	boolean_t	ret = B_TRUE;
1009 
1010 	claimstr(q);
1011 	next_q = q->q_next;
1012 	if (strcmp(next_q->q_qinfo->qi_minfo->mi_idname, mod) != 0)
1013 		ret = B_FALSE;
1014 	releasestr(q);
1015 	return (ret);
1016 }
1017 
1018 /*
1019  * DL_CAPABILITY_REQ
1020  */
1021 static void
1022 proto_capability_req(dld_str_t *dsp, mblk_t *mp)
1023 {
1024 	dl_capability_req_t *dlp = (dl_capability_req_t *)mp->b_rptr;
1025 	dl_capability_sub_t *sp;
1026 	size_t		size, len;
1027 	offset_t	off, end;
1028 	t_uscalar_t	dl_err;
1029 	queue_t		*q = dsp->ds_wq;
1030 
1031 	if (MBLKL(mp) < sizeof (dl_capability_req_t)) {
1032 		dl_err = DL_BADPRIM;
1033 		goto failed;
1034 	}
1035 
1036 	if (dsp->ds_dlstate == DL_UNATTACHED ||
1037 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1038 		dl_err = DL_OUTSTATE;
1039 		goto failed;
1040 	}
1041 
1042 	/*
1043 	 * This request is overloaded. If there are no requested capabilities
1044 	 * then we just want to acknowledge with all the capabilities we
1045 	 * support. Otherwise we enable the set of capabilities requested.
1046 	 */
1047 	if (dlp->dl_sub_length == 0) {
1048 		proto_capability_advertise(dsp, mp);
1049 		return;
1050 	}
1051 
1052 	if (!MBLKIN(mp, dlp->dl_sub_offset, dlp->dl_sub_length)) {
1053 		dl_err = DL_BADPRIM;
1054 		goto failed;
1055 	}
1056 
1057 	dlp->dl_primitive = DL_CAPABILITY_ACK;
1058 
1059 	off = dlp->dl_sub_offset;
1060 	len = dlp->dl_sub_length;
1061 
1062 	/*
1063 	 * Walk the list of capabilities to be enabled.
1064 	 */
1065 	for (end = off + len; off < end; ) {
1066 		sp = (dl_capability_sub_t *)(mp->b_rptr + off);
1067 		size = sizeof (dl_capability_sub_t) + sp->dl_length;
1068 
1069 		if (off + size > end ||
1070 		    !IS_P2ALIGNED(off, sizeof (uint32_t))) {
1071 			dl_err = DL_BADPRIM;
1072 			goto failed;
1073 		}
1074 
1075 		switch (sp->dl_cap) {
1076 		/*
1077 		 * TCP/IP checksum offload to hardware.
1078 		 */
1079 		case DL_CAPAB_HCKSUM: {
1080 			dl_capab_hcksum_t *hcksump;
1081 			dl_capab_hcksum_t hcksum;
1082 
1083 			hcksump = (dl_capab_hcksum_t *)&sp[1];
1084 			/*
1085 			 * Copy for alignment.
1086 			 */
1087 			bcopy(hcksump, &hcksum, sizeof (dl_capab_hcksum_t));
1088 			dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1089 			bcopy(&hcksum, hcksump, sizeof (dl_capab_hcksum_t));
1090 			break;
1091 		}
1092 
1093 		case DL_CAPAB_DLD: {
1094 			dl_capab_dld_t	*dldp;
1095 			dl_capab_dld_t	dld;
1096 
1097 			dldp = (dl_capab_dld_t *)&sp[1];
1098 			/*
1099 			 * Copy for alignment.
1100 			 */
1101 			bcopy(dldp, &dld, sizeof (dl_capab_dld_t));
1102 			dlcapabsetqid(&(dld.dld_mid), dsp->ds_rq);
1103 			bcopy(&dld, dldp, sizeof (dl_capab_dld_t));
1104 			break;
1105 		}
1106 		default:
1107 			break;
1108 		}
1109 		off += size;
1110 	}
1111 	qreply(q, mp);
1112 	return;
1113 failed:
1114 	dlerrorack(q, mp, DL_CAPABILITY_REQ, dl_err, 0);
1115 }
1116 
1117 /*
1118  * DL_NOTIFY_REQ
1119  */
1120 static void
1121 proto_notify_req(dld_str_t *dsp, mblk_t *mp)
1122 {
1123 	dl_notify_req_t	*dlp = (dl_notify_req_t *)mp->b_rptr;
1124 	t_uscalar_t	dl_err;
1125 	queue_t		*q = dsp->ds_wq;
1126 	uint_t		note =
1127 	    DL_NOTE_PROMISC_ON_PHYS |
1128 	    DL_NOTE_PROMISC_OFF_PHYS |
1129 	    DL_NOTE_PHYS_ADDR |
1130 	    DL_NOTE_LINK_UP |
1131 	    DL_NOTE_LINK_DOWN |
1132 	    DL_NOTE_CAPAB_RENEG |
1133 	    DL_NOTE_FASTPATH_FLUSH |
1134 	    DL_NOTE_SPEED |
1135 	    DL_NOTE_SDU_SIZE|
1136 	    DL_NOTE_SDU_SIZE2|
1137 	    DL_NOTE_ALLOWED_IPS;
1138 
1139 	if (MBLKL(mp) < sizeof (dl_notify_req_t)) {
1140 		dl_err = DL_BADPRIM;
1141 		goto failed;
1142 	}
1143 
1144 	if (dsp->ds_dlstate == DL_UNATTACHED ||
1145 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1146 		dl_err = DL_OUTSTATE;
1147 		goto failed;
1148 	}
1149 
1150 	note &= ~(mac_no_notification(dsp->ds_mh));
1151 
1152 	/*
1153 	 * Cache the notifications that are being enabled.
1154 	 */
1155 	dsp->ds_notifications = dlp->dl_notifications & note;
1156 	/*
1157 	 * The ACK carries all notifications regardless of which set is
1158 	 * being enabled.
1159 	 */
1160 	dlnotifyack(q, mp, note);
1161 
1162 	/*
1163 	 * Generate DL_NOTIFY_IND messages for each enabled notification.
1164 	 */
1165 	if (dsp->ds_notifications != 0) {
1166 		dld_str_notify_ind(dsp);
1167 	}
1168 	return;
1169 failed:
1170 	dlerrorack(q, mp, DL_NOTIFY_REQ, dl_err, 0);
1171 }
1172 
1173 /*
1174  * DL_UINTDATA_REQ
1175  */
1176 void
1177 proto_unitdata_req(dld_str_t *dsp, mblk_t *mp)
1178 {
1179 	queue_t			*q = dsp->ds_wq;
1180 	dl_unitdata_req_t	*dlp = (dl_unitdata_req_t *)mp->b_rptr;
1181 	off_t			off;
1182 	size_t			len, size;
1183 	const uint8_t		*addr;
1184 	uint16_t		sap;
1185 	uint_t			addr_length;
1186 	mblk_t			*bp, *payload;
1187 	uint32_t		start, stuff, end, value, flags;
1188 	t_uscalar_t		dl_err;
1189 	uint_t			max_sdu;
1190 
1191 	if (MBLKL(mp) < sizeof (dl_unitdata_req_t) || mp->b_cont == NULL) {
1192 		dlerrorack(q, mp, DL_UNITDATA_REQ, DL_BADPRIM, 0);
1193 		return;
1194 	}
1195 
1196 	mutex_enter(&dsp->ds_lock);
1197 	if (dsp->ds_dlstate != DL_IDLE) {
1198 		mutex_exit(&dsp->ds_lock);
1199 		dlerrorack(q, mp, DL_UNITDATA_REQ, DL_OUTSTATE, 0);
1200 		return;
1201 	}
1202 	DLD_DATATHR_INC(dsp);
1203 	mutex_exit(&dsp->ds_lock);
1204 
1205 	addr_length = dsp->ds_mip->mi_addr_length;
1206 
1207 	off = dlp->dl_dest_addr_offset;
1208 	len = dlp->dl_dest_addr_length;
1209 
1210 	if (!MBLKIN(mp, off, len) || !IS_P2ALIGNED(off, sizeof (uint16_t))) {
1211 		dl_err = DL_BADPRIM;
1212 		goto failed;
1213 	}
1214 
1215 	if (len != addr_length + sizeof (uint16_t)) {
1216 		dl_err = DL_BADADDR;
1217 		goto failed;
1218 	}
1219 
1220 	addr = mp->b_rptr + off;
1221 	sap = *(uint16_t *)(mp->b_rptr + off + addr_length);
1222 
1223 	/*
1224 	 * Check the length of the packet and the block types.
1225 	 */
1226 	size = 0;
1227 	payload = mp->b_cont;
1228 	for (bp = payload; bp != NULL; bp = bp->b_cont) {
1229 		if (DB_TYPE(bp) != M_DATA)
1230 			goto baddata;
1231 
1232 		size += MBLKL(bp);
1233 	}
1234 
1235 	mac_sdu_get(dsp->ds_mh, NULL, &max_sdu);
1236 	if (size > max_sdu)
1237 		goto baddata;
1238 
1239 	/*
1240 	 * Build a packet header.
1241 	 */
1242 	if ((bp = dls_header(dsp, addr, sap, dlp->dl_priority.dl_max,
1243 	    &payload)) == NULL) {
1244 		dl_err = DL_BADADDR;
1245 		goto failed;
1246 	}
1247 
1248 	/*
1249 	 * We no longer need the M_PROTO header, so free it.
1250 	 */
1251 	freeb(mp);
1252 
1253 	/*
1254 	 * Transfer the checksum offload information if it is present.
1255 	 */
1256 	hcksum_retrieve(payload, NULL, NULL, &start, &stuff, &end, &value,
1257 	    &flags);
1258 	(void) hcksum_assoc(bp, NULL, NULL, start, stuff, end, value, flags, 0);
1259 
1260 	/*
1261 	 * Link the payload onto the new header.
1262 	 */
1263 	ASSERT(bp->b_cont == NULL);
1264 	bp->b_cont = payload;
1265 
1266 	/*
1267 	 * No lock can be held across modules and putnext()'s,
1268 	 * which can happen here with the call from DLD_TX().
1269 	 */
1270 	if (DLD_TX(dsp, bp, 0, 0) != 0) {
1271 		/* flow-controlled */
1272 		DLD_SETQFULL(dsp);
1273 	}
1274 	DLD_DATATHR_DCR(dsp);
1275 	return;
1276 
1277 failed:
1278 	dlerrorack(q, mp, DL_UNITDATA_REQ, dl_err, 0);
1279 	DLD_DATATHR_DCR(dsp);
1280 	return;
1281 
1282 baddata:
1283 	dluderrorind(q, mp, (void *)addr, len, DL_BADDATA, 0);
1284 	DLD_DATATHR_DCR(dsp);
1285 }
1286 
1287 /*
1288  * DL_PASSIVE_REQ
1289  */
1290 static void
1291 proto_passive_req(dld_str_t *dsp, mblk_t *mp)
1292 {
1293 	t_uscalar_t dl_err;
1294 
1295 	/*
1296 	 * If we've already become active by issuing an active primitive,
1297 	 * then it's too late to try to become passive.
1298 	 */
1299 	if (dsp->ds_passivestate == DLD_ACTIVE) {
1300 		dl_err = DL_OUTSTATE;
1301 		goto failed;
1302 	}
1303 
1304 	if (MBLKL(mp) < sizeof (dl_passive_req_t)) {
1305 		dl_err = DL_BADPRIM;
1306 		goto failed;
1307 	}
1308 
1309 	dsp->ds_passivestate = DLD_PASSIVE;
1310 	dlokack(dsp->ds_wq, mp, DL_PASSIVE_REQ);
1311 	return;
1312 failed:
1313 	dlerrorack(dsp->ds_wq, mp, DL_PASSIVE_REQ, dl_err, 0);
1314 }
1315 
1316 
1317 /*
1318  * Catch-all handler.
1319  */
1320 static void
1321 proto_req(dld_str_t *dsp, mblk_t *mp)
1322 {
1323 	union DL_primitives	*dlp = (union DL_primitives *)mp->b_rptr;
1324 
1325 	dlerrorack(dsp->ds_wq, mp, dlp->dl_primitive, DL_UNSUPPORTED, 0);
1326 }
1327 
1328 static int
1329 dld_capab_perim(dld_str_t *dsp, void *data, uint_t flags)
1330 {
1331 	switch (flags) {
1332 	case DLD_ENABLE:
1333 		mac_perim_enter_by_mh(dsp->ds_mh, (mac_perim_handle_t *)data);
1334 		return (0);
1335 
1336 	case DLD_DISABLE:
1337 		mac_perim_exit((mac_perim_handle_t)data);
1338 		return (0);
1339 
1340 	case DLD_QUERY:
1341 		return (mac_perim_held(dsp->ds_mh));
1342 	}
1343 	return (0);
1344 }
1345 
1346 static int
1347 dld_capab_direct(dld_str_t *dsp, void *data, uint_t flags)
1348 {
1349 	dld_capab_direct_t	*direct = data;
1350 
1351 	ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1352 
1353 	switch (flags) {
1354 	case DLD_ENABLE:
1355 		dls_rx_set(dsp, (dls_rx_t)direct->di_rx_cf,
1356 		    direct->di_rx_ch);
1357 
1358 		direct->di_tx_df = (uintptr_t)str_mdata_fastpath_put;
1359 		direct->di_tx_dh = dsp;
1360 		direct->di_tx_cb_df = (uintptr_t)mac_client_tx_notify;
1361 		direct->di_tx_cb_dh = dsp->ds_mch;
1362 		direct->di_tx_fctl_df = (uintptr_t)mac_tx_is_flow_blocked;
1363 		direct->di_tx_fctl_dh = dsp->ds_mch;
1364 
1365 		dsp->ds_direct = B_TRUE;
1366 
1367 		return (0);
1368 
1369 	case DLD_DISABLE:
1370 		dls_rx_set(dsp, (dsp->ds_mode == DLD_FASTPATH) ?
1371 		    dld_str_rx_fastpath : dld_str_rx_unitdata, (void *)dsp);
1372 		dsp->ds_direct = B_FALSE;
1373 
1374 		return (0);
1375 	}
1376 	return (ENOTSUP);
1377 }
1378 
1379 /*
1380  * dld_capab_poll_enable()
1381  *
1382  * This function is misnamed. All polling  and fanouts are run out of the
1383  * lower mac (in case of VNIC and the only mac in case of NICs). The
1384  * availability of Rx ring and promiscous mode is all taken care between
1385  * the soft ring set (mac_srs), the Rx ring, and S/W classifier. Any
1386  * fanout necessary is done by the soft rings that are part of the
1387  * mac_srs (by default mac_srs sends the packets up via a TCP and
1388  * non TCP soft ring).
1389  *
1390  * The mac_srs (or its associated soft rings) always store the ill_rx_ring
1391  * (the cookie returned when they registered with IP during plumb) as their
1392  * 2nd argument which is passed up as mac_resource_handle_t. The upcall
1393  * function and 1st argument is what the caller registered when they
1394  * called mac_rx_classify_flow_add() to register the flow. For VNIC,
1395  * the function is vnic_rx and argument is vnic_t. For regular NIC
1396  * case, it mac_rx_default and mac_handle_t. As explained above, the
1397  * mac_srs (or its soft ring) will add the ill_rx_ring (mac_resource_handle_t)
1398  * from its stored 2nd argument.
1399  */
1400 static int
1401 dld_capab_poll_enable(dld_str_t *dsp, dld_capab_poll_t *poll)
1402 {
1403 	if (dsp->ds_polling)
1404 		return (EINVAL);
1405 
1406 	if ((dld_opt & DLD_OPT_NO_POLL) != 0 || dsp->ds_mode == DLD_RAW)
1407 		return (ENOTSUP);
1408 
1409 	/*
1410 	 * Enable client polling if and only if DLS bypass is possible.
1411 	 * Special cases like VLANs need DLS processing in the Rx data path.
1412 	 * In such a case we can neither allow the client (IP) to directly
1413 	 * poll the softring (since DLS processing hasn't been done) nor can
1414 	 * we allow DLS bypass.
1415 	 */
1416 	if (!mac_rx_bypass_set(dsp->ds_mch, dsp->ds_rx, dsp->ds_rx_arg))
1417 		return (ENOTSUP);
1418 
1419 	/*
1420 	 * Register soft ring resources. This will come in handy later if
1421 	 * the user decides to modify CPU bindings to use more CPUs for the
1422 	 * device in which case we will switch to fanout using soft rings.
1423 	 */
1424 	mac_resource_set_common(dsp->ds_mch,
1425 	    (mac_resource_add_t)poll->poll_ring_add_cf,
1426 	    (mac_resource_remove_t)poll->poll_ring_remove_cf,
1427 	    (mac_resource_quiesce_t)poll->poll_ring_quiesce_cf,
1428 	    (mac_resource_restart_t)poll->poll_ring_restart_cf,
1429 	    (mac_resource_bind_t)poll->poll_ring_bind_cf,
1430 	    poll->poll_ring_ch);
1431 
1432 	mac_client_poll_enable(dsp->ds_mch);
1433 
1434 	dsp->ds_polling = B_TRUE;
1435 	return (0);
1436 }
1437 
1438 /* ARGSUSED */
1439 static int
1440 dld_capab_poll_disable(dld_str_t *dsp, dld_capab_poll_t *poll)
1441 {
1442 	if (!dsp->ds_polling)
1443 		return (EINVAL);
1444 
1445 	mac_client_poll_disable(dsp->ds_mch);
1446 	mac_resource_set(dsp->ds_mch, NULL, NULL);
1447 
1448 	dsp->ds_polling = B_FALSE;
1449 	return (0);
1450 }
1451 
1452 static int
1453 dld_capab_poll(dld_str_t *dsp, void *data, uint_t flags)
1454 {
1455 	dld_capab_poll_t	*poll = data;
1456 
1457 	ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1458 
1459 	switch (flags) {
1460 	case DLD_ENABLE:
1461 		return (dld_capab_poll_enable(dsp, poll));
1462 	case DLD_DISABLE:
1463 		return (dld_capab_poll_disable(dsp, poll));
1464 	}
1465 	return (ENOTSUP);
1466 }
1467 
1468 static int
1469 dld_capab_lso(dld_str_t *dsp, void *data, uint_t flags)
1470 {
1471 	dld_capab_lso_t		*lso = data;
1472 
1473 	ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1474 
1475 	switch (flags) {
1476 	case DLD_ENABLE: {
1477 		mac_capab_lso_t		mac_lso;
1478 
1479 		/*
1480 		 * Check if LSO is supported on this MAC & enable LSO
1481 		 * accordingly.
1482 		 */
1483 		if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_LSO, &mac_lso)) {
1484 			lso->lso_max = mac_lso.lso_basic_tcp_ipv4.lso_max;
1485 			lso->lso_flags = 0;
1486 			/* translate the flag for mac clients */
1487 			if ((mac_lso.lso_flags & LSO_TX_BASIC_TCP_IPV4) != 0)
1488 				lso->lso_flags |= DLD_LSO_BASIC_TCP_IPV4;
1489 			dsp->ds_lso = B_TRUE;
1490 			dsp->ds_lso_max = lso->lso_max;
1491 		} else {
1492 			dsp->ds_lso = B_FALSE;
1493 			dsp->ds_lso_max = 0;
1494 			return (ENOTSUP);
1495 		}
1496 		return (0);
1497 	}
1498 	case DLD_DISABLE: {
1499 		dsp->ds_lso = B_FALSE;
1500 		dsp->ds_lso_max = 0;
1501 		return (0);
1502 	}
1503 	}
1504 	return (ENOTSUP);
1505 }
1506 
1507 static int
1508 dld_capab(dld_str_t *dsp, uint_t type, void *data, uint_t flags)
1509 {
1510 	int	err;
1511 
1512 	/*
1513 	 * Don't enable direct callback capabilities unless the caller is
1514 	 * the IP client. When a module is inserted in a stream (_I_INSERT)
1515 	 * the stack initiates capability disable, but due to races, the
1516 	 * module insertion may complete before the capability disable
1517 	 * completes. So we limit the check to DLD_ENABLE case.
1518 	 */
1519 	if ((flags == DLD_ENABLE && type != DLD_CAPAB_PERIM) &&
1520 	    (dsp->ds_sap != ETHERTYPE_IP ||
1521 	    !check_mod_above(dsp->ds_rq, "ip"))) {
1522 		return (ENOTSUP);
1523 	}
1524 
1525 	switch (type) {
1526 	case DLD_CAPAB_DIRECT:
1527 		err = dld_capab_direct(dsp, data, flags);
1528 		break;
1529 
1530 	case DLD_CAPAB_POLL:
1531 		err =  dld_capab_poll(dsp, data, flags);
1532 		break;
1533 
1534 	case DLD_CAPAB_PERIM:
1535 		err = dld_capab_perim(dsp, data, flags);
1536 		break;
1537 
1538 	case DLD_CAPAB_LSO:
1539 		err = dld_capab_lso(dsp, data, flags);
1540 		break;
1541 
1542 	default:
1543 		err = ENOTSUP;
1544 		break;
1545 	}
1546 
1547 	return (err);
1548 }
1549 
1550 /*
1551  * DL_CAPABILITY_ACK/DL_ERROR_ACK
1552  */
1553 static void
1554 proto_capability_advertise(dld_str_t *dsp, mblk_t *mp)
1555 {
1556 	dl_capability_ack_t	*dlap;
1557 	dl_capability_sub_t	*dlsp;
1558 	size_t			subsize;
1559 	dl_capab_dld_t		dld;
1560 	dl_capab_hcksum_t	hcksum;
1561 	dl_capab_zerocopy_t	zcopy;
1562 	dl_capab_vrrp_t		vrrp;
1563 	mac_capab_vrrp_t	vrrp_capab;
1564 	uint8_t			*ptr;
1565 	queue_t			*q = dsp->ds_wq;
1566 	mblk_t			*mp1;
1567 	boolean_t		hcksum_capable = B_FALSE;
1568 	boolean_t		zcopy_capable = B_FALSE;
1569 	boolean_t		dld_capable = B_FALSE;
1570 	boolean_t		vrrp_capable = B_FALSE;
1571 
1572 	/*
1573 	 * Initially assume no capabilities.
1574 	 */
1575 	subsize = 0;
1576 
1577 	/*
1578 	 * Check if checksum offload is supported on this MAC.
1579 	 */
1580 	bzero(&hcksum, sizeof (dl_capab_hcksum_t));
1581 	if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_HCKSUM,
1582 	    &hcksum.hcksum_txflags)) {
1583 		if (hcksum.hcksum_txflags != 0) {
1584 			hcksum_capable = B_TRUE;
1585 			subsize += sizeof (dl_capability_sub_t) +
1586 			    sizeof (dl_capab_hcksum_t);
1587 		}
1588 	}
1589 
1590 	/*
1591 	 * Check if zerocopy is supported on this interface.
1592 	 * If advertising DL_CAPAB_ZEROCOPY has not been explicitly disabled
1593 	 * then reserve space for that capability.
1594 	 */
1595 	if (!mac_capab_get(dsp->ds_mh, MAC_CAPAB_NO_ZCOPY, NULL) &&
1596 	    !(dld_opt & DLD_OPT_NO_ZEROCOPY)) {
1597 		zcopy_capable = B_TRUE;
1598 		subsize += sizeof (dl_capability_sub_t) +
1599 		    sizeof (dl_capab_zerocopy_t);
1600 	}
1601 
1602 	/*
1603 	 * Direct capability negotiation interface between IP and DLD
1604 	 */
1605 	if (dsp->ds_sap == ETHERTYPE_IP && check_mod_above(dsp->ds_rq, "ip")) {
1606 		dld_capable = B_TRUE;
1607 		subsize += sizeof (dl_capability_sub_t) +
1608 		    sizeof (dl_capab_dld_t);
1609 	}
1610 
1611 	/*
1612 	 * Check if vrrp is supported on this interface. If so, reserve
1613 	 * space for that capability.
1614 	 */
1615 	if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_VRRP, &vrrp_capab)) {
1616 		vrrp_capable = B_TRUE;
1617 		subsize += sizeof (dl_capability_sub_t) +
1618 		    sizeof (dl_capab_vrrp_t);
1619 	}
1620 
1621 	/*
1622 	 * If there are no capabilities to advertise or if we
1623 	 * can't allocate a response, send a DL_ERROR_ACK.
1624 	 */
1625 	if ((mp1 = reallocb(mp,
1626 	    sizeof (dl_capability_ack_t) + subsize, 0)) == NULL) {
1627 		dlerrorack(q, mp, DL_CAPABILITY_REQ, DL_NOTSUPPORTED, 0);
1628 		return;
1629 	}
1630 
1631 	mp = mp1;
1632 	DB_TYPE(mp) = M_PROTO;
1633 	mp->b_wptr = mp->b_rptr + sizeof (dl_capability_ack_t) + subsize;
1634 	bzero(mp->b_rptr, MBLKL(mp));
1635 	dlap = (dl_capability_ack_t *)mp->b_rptr;
1636 	dlap->dl_primitive = DL_CAPABILITY_ACK;
1637 	dlap->dl_sub_offset = sizeof (dl_capability_ack_t);
1638 	dlap->dl_sub_length = subsize;
1639 	ptr = (uint8_t *)&dlap[1];
1640 
1641 	/*
1642 	 * TCP/IP checksum offload.
1643 	 */
1644 	if (hcksum_capable) {
1645 		dlsp = (dl_capability_sub_t *)ptr;
1646 
1647 		dlsp->dl_cap = DL_CAPAB_HCKSUM;
1648 		dlsp->dl_length = sizeof (dl_capab_hcksum_t);
1649 		ptr += sizeof (dl_capability_sub_t);
1650 
1651 		hcksum.hcksum_version = HCKSUM_VERSION_1;
1652 		dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1653 		bcopy(&hcksum, ptr, sizeof (dl_capab_hcksum_t));
1654 		ptr += sizeof (dl_capab_hcksum_t);
1655 	}
1656 
1657 	/*
1658 	 * Zero copy
1659 	 */
1660 	if (zcopy_capable) {
1661 		dlsp = (dl_capability_sub_t *)ptr;
1662 
1663 		dlsp->dl_cap = DL_CAPAB_ZEROCOPY;
1664 		dlsp->dl_length = sizeof (dl_capab_zerocopy_t);
1665 		ptr += sizeof (dl_capability_sub_t);
1666 
1667 		bzero(&zcopy, sizeof (dl_capab_zerocopy_t));
1668 		zcopy.zerocopy_version = ZEROCOPY_VERSION_1;
1669 		zcopy.zerocopy_flags = DL_CAPAB_VMSAFE_MEM;
1670 
1671 		dlcapabsetqid(&(zcopy.zerocopy_mid), dsp->ds_rq);
1672 		bcopy(&zcopy, ptr, sizeof (dl_capab_zerocopy_t));
1673 		ptr += sizeof (dl_capab_zerocopy_t);
1674 	}
1675 
1676 	/*
1677 	 * VRRP capability negotiation
1678 	 */
1679 	if (vrrp_capable) {
1680 		dlsp = (dl_capability_sub_t *)ptr;
1681 		dlsp->dl_cap = DL_CAPAB_VRRP;
1682 		dlsp->dl_length = sizeof (dl_capab_vrrp_t);
1683 		ptr += sizeof (dl_capability_sub_t);
1684 
1685 		bzero(&vrrp, sizeof (dl_capab_vrrp_t));
1686 		vrrp.vrrp_af = vrrp_capab.mcv_af;
1687 		bcopy(&vrrp, ptr, sizeof (dl_capab_vrrp_t));
1688 		ptr += sizeof (dl_capab_vrrp_t);
1689 	}
1690 
1691 	/*
1692 	 * Direct capability negotiation interface between IP and DLD.
1693 	 * Refer to dld.h for details.
1694 	 */
1695 	if (dld_capable) {
1696 		dlsp = (dl_capability_sub_t *)ptr;
1697 		dlsp->dl_cap = DL_CAPAB_DLD;
1698 		dlsp->dl_length = sizeof (dl_capab_dld_t);
1699 		ptr += sizeof (dl_capability_sub_t);
1700 
1701 		bzero(&dld, sizeof (dl_capab_dld_t));
1702 		dld.dld_version = DLD_CURRENT_VERSION;
1703 		dld.dld_capab = (uintptr_t)dld_capab;
1704 		dld.dld_capab_handle = (uintptr_t)dsp;
1705 
1706 		dlcapabsetqid(&(dld.dld_mid), dsp->ds_rq);
1707 		bcopy(&dld, ptr, sizeof (dl_capab_dld_t));
1708 		ptr += sizeof (dl_capab_dld_t);
1709 	}
1710 
1711 	ASSERT(ptr == mp->b_rptr + sizeof (dl_capability_ack_t) + subsize);
1712 	qreply(q, mp);
1713 }
1714 
1715 /*
1716  * Disable any enabled capabilities.
1717  */
1718 void
1719 dld_capabilities_disable(dld_str_t *dsp)
1720 {
1721 	if (dsp->ds_polling)
1722 		(void) dld_capab_poll_disable(dsp, NULL);
1723 }
1724