xref: /illumos-gate/usr/src/uts/common/io/dld/dld_proto.c (revision 8a5251963032143e4d8c00bee444d848a79095cc)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2012, Nexenta Systems, Inc. All rights reserved.
24  * Copyright (c) 2018, Joyent, Inc.
25  * Copyright 2025 Oxide Computer Company
26  */
27 
28 /*
29  * Data-Link Driver
30  */
31 #include <sys/sysmacros.h>
32 #include <sys/strsubr.h>
33 #include <sys/strsun.h>
34 #include <sys/vlan.h>
35 #include <sys/dld_impl.h>
36 #include <sys/mac_client.h>
37 #include <sys/mac_client_impl.h>
38 #include <sys/mac_client_priv.h>
39 
40 typedef void proto_reqfunc_t(dld_str_t *, mblk_t *);
41 
42 static proto_reqfunc_t proto_info_req, proto_attach_req, proto_detach_req,
43     proto_bind_req, proto_unbind_req, proto_promiscon_req, proto_promiscoff_req,
44     proto_enabmulti_req, proto_disabmulti_req, proto_physaddr_req,
45     proto_setphysaddr_req, proto_udqos_req, proto_req, proto_capability_req,
46     proto_notify_req, proto_passive_req;
47 
48 static void proto_capability_advertise(dld_str_t *, mblk_t *);
49 static int dld_capab_poll_disable(dld_str_t *, dld_capab_poll_t *);
50 static boolean_t check_mod_above(queue_t *, const char *);
51 
52 #define	DL_ACK_PENDING(state) \
53 	((state) == DL_ATTACH_PENDING || \
54 	(state) == DL_DETACH_PENDING || \
55 	(state) == DL_BIND_PENDING || \
56 	(state) == DL_UNBIND_PENDING)
57 
58 /*
59  * Process a DLPI protocol message.
60  * The primitives DL_BIND_REQ, DL_ENABMULTI_REQ, DL_PROMISCON_REQ,
61  * DL_SET_PHYS_ADDR_REQ put the data link below our dld_str_t into an
62  * 'active' state. The primitive DL_PASSIVE_REQ marks our dld_str_t
63  * as 'passive' and forbids it from being subsequently made 'active'
64  * by the above primitives.
65  */
66 void
dld_proto(dld_str_t * dsp,mblk_t * mp)67 dld_proto(dld_str_t *dsp, mblk_t *mp)
68 {
69 	t_uscalar_t		prim;
70 
71 	if (MBLKL(mp) < sizeof (t_uscalar_t)) {
72 		freemsg(mp);
73 		return;
74 	}
75 	prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive;
76 
77 	switch (prim) {
78 	case DL_INFO_REQ:
79 		proto_info_req(dsp, mp);
80 		break;
81 	case DL_BIND_REQ:
82 		proto_bind_req(dsp, mp);
83 		break;
84 	case DL_UNBIND_REQ:
85 		proto_unbind_req(dsp, mp);
86 		break;
87 	case DL_UNITDATA_REQ:
88 		proto_unitdata_req(dsp, mp);
89 		break;
90 	case DL_UDQOS_REQ:
91 		proto_udqos_req(dsp, mp);
92 		break;
93 	case DL_ATTACH_REQ:
94 		proto_attach_req(dsp, mp);
95 		break;
96 	case DL_DETACH_REQ:
97 		proto_detach_req(dsp, mp);
98 		break;
99 	case DL_ENABMULTI_REQ:
100 		proto_enabmulti_req(dsp, mp);
101 		break;
102 	case DL_DISABMULTI_REQ:
103 		proto_disabmulti_req(dsp, mp);
104 		break;
105 	case DL_PROMISCON_REQ:
106 		proto_promiscon_req(dsp, mp);
107 		break;
108 	case DL_PROMISCOFF_REQ:
109 		proto_promiscoff_req(dsp, mp);
110 		break;
111 	case DL_PHYS_ADDR_REQ:
112 		proto_physaddr_req(dsp, mp);
113 		break;
114 	case DL_SET_PHYS_ADDR_REQ:
115 		proto_setphysaddr_req(dsp, mp);
116 		break;
117 	case DL_NOTIFY_REQ:
118 		proto_notify_req(dsp, mp);
119 		break;
120 	case DL_CAPABILITY_REQ:
121 		proto_capability_req(dsp, mp);
122 		break;
123 	case DL_PASSIVE_REQ:
124 		proto_passive_req(dsp, mp);
125 		break;
126 	default:
127 		proto_req(dsp, mp);
128 		break;
129 	}
130 }
131 
132 #define	NEG(x)	-(x)
133 typedef struct dl_info_ack_wrapper {
134 	dl_info_ack_t		dl_info;
135 	uint8_t			dl_addr[MAXMACADDRLEN + sizeof (uint16_t)];
136 	uint8_t			dl_brdcst_addr[MAXMACADDRLEN];
137 	dl_qos_cl_range1_t	dl_qos_range1;
138 	dl_qos_cl_sel1_t	dl_qos_sel1;
139 } dl_info_ack_wrapper_t;
140 
141 /*
142  * DL_INFO_REQ
143  */
144 static void
proto_info_req(dld_str_t * dsp,mblk_t * mp)145 proto_info_req(dld_str_t *dsp, mblk_t *mp)
146 {
147 	dl_info_ack_wrapper_t	*dlwp;
148 	dl_info_ack_t		*dlp;
149 	dl_qos_cl_sel1_t	*selp;
150 	dl_qos_cl_range1_t	*rangep;
151 	uint8_t			*addr;
152 	uint8_t			*brdcst_addr;
153 	uint_t			addr_length;
154 	uint_t			sap_length;
155 	mac_info_t		minfo;
156 	mac_info_t		*minfop;
157 	queue_t			*q = dsp->ds_wq;
158 
159 	/*
160 	 * Swap the request message for one large enough to contain the
161 	 * wrapper structure defined above.
162 	 */
163 	if ((mp = mexchange(q, mp, sizeof (dl_info_ack_wrapper_t),
164 	    M_PCPROTO, 0)) == NULL)
165 		return;
166 
167 	bzero(mp->b_rptr, sizeof (dl_info_ack_wrapper_t));
168 	dlwp = (dl_info_ack_wrapper_t *)mp->b_rptr;
169 
170 	dlp = &(dlwp->dl_info);
171 	ASSERT(dlp == (dl_info_ack_t *)mp->b_rptr);
172 
173 	dlp->dl_primitive = DL_INFO_ACK;
174 
175 	/*
176 	 * Set up the sub-structure pointers.
177 	 */
178 	addr = dlwp->dl_addr;
179 	brdcst_addr = dlwp->dl_brdcst_addr;
180 	rangep = &(dlwp->dl_qos_range1);
181 	selp = &(dlwp->dl_qos_sel1);
182 
183 	/*
184 	 * This driver supports only version 2 connectionless DLPI provider
185 	 * nodes.
186 	 */
187 	dlp->dl_service_mode = DL_CLDLS;
188 	dlp->dl_version = DL_VERSION_2;
189 
190 	/*
191 	 * Set the style of the provider
192 	 */
193 	dlp->dl_provider_style = dsp->ds_style;
194 	ASSERT(dlp->dl_provider_style == DL_STYLE1 ||
195 	    dlp->dl_provider_style == DL_STYLE2);
196 
197 	/*
198 	 * Set the current DLPI state.
199 	 */
200 	dlp->dl_current_state = dsp->ds_dlstate;
201 
202 	/*
203 	 * Gratuitously set the media type. This is to deal with modules
204 	 * that assume the media type is known prior to DL_ATTACH_REQ
205 	 * being completed.
206 	 */
207 	dlp->dl_mac_type = DL_ETHER;
208 
209 	/*
210 	 * If the stream is not at least attached we try to retrieve the
211 	 * mac_info using mac_info_get()
212 	 */
213 	if (dsp->ds_dlstate == DL_UNATTACHED ||
214 	    dsp->ds_dlstate == DL_ATTACH_PENDING ||
215 	    dsp->ds_dlstate == DL_DETACH_PENDING) {
216 		if (!mac_info_get(ddi_major_to_name(dsp->ds_major), &minfo)) {
217 			/*
218 			 * Cannot find mac_info. giving up.
219 			 */
220 			goto done;
221 		}
222 		minfop = &minfo;
223 	} else {
224 		minfop = (mac_info_t *)dsp->ds_mip;
225 		/* We can only get the sdu if we're attached. */
226 		mac_sdu_get(dsp->ds_mh, &dlp->dl_min_sdu, &dlp->dl_max_sdu);
227 	}
228 
229 	/*
230 	 * Set the media type (properly this time).
231 	 */
232 	if (dsp->ds_native)
233 		dlp->dl_mac_type = minfop->mi_nativemedia;
234 	else
235 		dlp->dl_mac_type = minfop->mi_media;
236 
237 	/*
238 	 * Set the DLSAP length. We only support 16 bit values and they
239 	 * appear after the MAC address portion of DLSAP addresses.
240 	 */
241 	sap_length = sizeof (uint16_t);
242 	dlp->dl_sap_length = NEG(sap_length);
243 
244 	addr_length = minfop->mi_addr_length;
245 
246 	/*
247 	 * Copy in the media broadcast address.
248 	 */
249 	if (minfop->mi_brdcst_addr != NULL) {
250 		dlp->dl_brdcst_addr_offset =
251 		    (uintptr_t)brdcst_addr - (uintptr_t)dlp;
252 		bcopy(minfop->mi_brdcst_addr, brdcst_addr, addr_length);
253 		dlp->dl_brdcst_addr_length = addr_length;
254 	}
255 
256 	/* Only VLAN links and links that have a normal tag mode support QOS. */
257 	if ((dsp->ds_mch != NULL &&
258 	    mac_client_vid(dsp->ds_mch) != VLAN_ID_NONE) ||
259 	    (dsp->ds_dlp != NULL &&
260 	    dsp->ds_dlp->dl_tagmode == LINK_TAGMODE_NORMAL)) {
261 		dlp->dl_qos_range_offset = (uintptr_t)rangep - (uintptr_t)dlp;
262 		dlp->dl_qos_range_length = sizeof (dl_qos_cl_range1_t);
263 
264 		rangep->dl_qos_type = DL_QOS_CL_RANGE1;
265 		rangep->dl_trans_delay.dl_target_value = DL_UNKNOWN;
266 		rangep->dl_trans_delay.dl_accept_value = DL_UNKNOWN;
267 		rangep->dl_protection.dl_min = DL_UNKNOWN;
268 		rangep->dl_protection.dl_max = DL_UNKNOWN;
269 		rangep->dl_residual_error = DL_UNKNOWN;
270 
271 		/*
272 		 * Specify the supported range of priorities.
273 		 */
274 		rangep->dl_priority.dl_min = 0;
275 		rangep->dl_priority.dl_max = (1 << VLAN_PRI_SIZE) - 1;
276 
277 		dlp->dl_qos_offset = (uintptr_t)selp - (uintptr_t)dlp;
278 		dlp->dl_qos_length = sizeof (dl_qos_cl_sel1_t);
279 
280 		selp->dl_qos_type = DL_QOS_CL_SEL1;
281 		selp->dl_trans_delay = DL_UNKNOWN;
282 		selp->dl_protection = DL_UNKNOWN;
283 		selp->dl_residual_error = DL_UNKNOWN;
284 
285 		/*
286 		 * Specify the current priority (which can be changed by
287 		 * the DL_UDQOS_REQ primitive).
288 		 */
289 		selp->dl_priority = dsp->ds_pri;
290 	}
291 
292 	dlp->dl_addr_length = addr_length + sizeof (uint16_t);
293 	if (dsp->ds_dlstate == DL_IDLE) {
294 		/*
295 		 * The stream is bound. Therefore we can formulate a valid
296 		 * DLSAP address.
297 		 */
298 		dlp->dl_addr_offset = (uintptr_t)addr - (uintptr_t)dlp;
299 		if (addr_length > 0)
300 			mac_unicast_primary_get(dsp->ds_mh, addr);
301 
302 		*(uint16_t *)(addr + addr_length) = dsp->ds_sap;
303 	}
304 
305 done:
306 	IMPLY(dlp->dl_qos_offset != 0, dlp->dl_qos_length != 0);
307 	IMPLY(dlp->dl_qos_range_offset != 0,
308 	    dlp->dl_qos_range_length != 0);
309 	IMPLY(dlp->dl_addr_offset != 0, dlp->dl_addr_length != 0);
310 	IMPLY(dlp->dl_brdcst_addr_offset != 0,
311 	    dlp->dl_brdcst_addr_length != 0);
312 
313 	qreply(q, mp);
314 }
315 
316 /*
317  * DL_ATTACH_REQ
318  */
319 static void
proto_attach_req(dld_str_t * dsp,mblk_t * mp)320 proto_attach_req(dld_str_t *dsp, mblk_t *mp)
321 {
322 	dl_attach_req_t	*dlp = (dl_attach_req_t *)mp->b_rptr;
323 	int		err = 0;
324 	t_uscalar_t	dl_err;
325 	queue_t		*q = dsp->ds_wq;
326 
327 	if (MBLKL(mp) < sizeof (dl_attach_req_t) ||
328 	    dsp->ds_style == DL_STYLE1) {
329 		dl_err = DL_BADPRIM;
330 		goto failed;
331 	}
332 
333 	if (dsp->ds_dlstate != DL_UNATTACHED) {
334 		dl_err = DL_OUTSTATE;
335 		goto failed;
336 	}
337 
338 	dsp->ds_dlstate = DL_ATTACH_PENDING;
339 
340 	err = dld_str_attach(dsp, dlp->dl_ppa);
341 	if (err != 0) {
342 		switch (err) {
343 		case ENOENT:
344 			dl_err = DL_BADPPA;
345 			err = 0;
346 			break;
347 		default:
348 			dl_err = DL_SYSERR;
349 			break;
350 		}
351 		dsp->ds_dlstate = DL_UNATTACHED;
352 		goto failed;
353 	}
354 	ASSERT(dsp->ds_dlstate == DL_UNBOUND);
355 	dlokack(q, mp, DL_ATTACH_REQ);
356 	return;
357 
358 failed:
359 	dlerrorack(q, mp, DL_ATTACH_REQ, dl_err, (t_uscalar_t)err);
360 }
361 
362 /*
363  * DL_DETACH_REQ
364  */
365 static void
proto_detach_req(dld_str_t * dsp,mblk_t * mp)366 proto_detach_req(dld_str_t *dsp, mblk_t *mp)
367 {
368 	queue_t		*q = dsp->ds_wq;
369 	t_uscalar_t	dl_err;
370 
371 	if (MBLKL(mp) < sizeof (dl_detach_req_t)) {
372 		dl_err = DL_BADPRIM;
373 		goto failed;
374 	}
375 
376 	if (dsp->ds_dlstate != DL_UNBOUND) {
377 		dl_err = DL_OUTSTATE;
378 		goto failed;
379 	}
380 
381 	if (dsp->ds_style == DL_STYLE1) {
382 		dl_err = DL_BADPRIM;
383 		goto failed;
384 	}
385 
386 	ASSERT(dsp->ds_datathr_cnt == 0);
387 	dsp->ds_dlstate = DL_DETACH_PENDING;
388 
389 	dld_str_detach(dsp);
390 	dlokack(dsp->ds_wq, mp, DL_DETACH_REQ);
391 	return;
392 
393 failed:
394 	dlerrorack(q, mp, DL_DETACH_REQ, dl_err, 0);
395 }
396 
397 /*
398  * DL_BIND_REQ
399  */
400 static void
proto_bind_req(dld_str_t * dsp,mblk_t * mp)401 proto_bind_req(dld_str_t *dsp, mblk_t *mp)
402 {
403 	dl_bind_req_t	*dlp = (dl_bind_req_t *)mp->b_rptr;
404 	int		err = 0;
405 	uint8_t		dlsap_addr[MAXMACADDRLEN + sizeof (uint16_t)];
406 	uint_t		dlsap_addr_length;
407 	t_uscalar_t	dl_err;
408 	t_scalar_t	sap;
409 	queue_t		*q = dsp->ds_wq;
410 	mac_perim_handle_t	mph;
411 	void		*mdip;
412 	int32_t		intr_cpu;
413 
414 	if (MBLKL(mp) < sizeof (dl_bind_req_t)) {
415 		dl_err = DL_BADPRIM;
416 		goto failed;
417 	}
418 
419 	if (dlp->dl_xidtest_flg != 0) {
420 		dl_err = DL_NOAUTO;
421 		goto failed;
422 	}
423 
424 	if (dlp->dl_service_mode != DL_CLDLS) {
425 		dl_err = DL_UNSUPPORTED;
426 		goto failed;
427 	}
428 
429 	if (dsp->ds_dlstate != DL_UNBOUND) {
430 		dl_err = DL_OUTSTATE;
431 		goto failed;
432 	}
433 
434 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
435 
436 	if ((err = dls_active_set(dsp)) != 0) {
437 		dl_err = DL_SYSERR;
438 		goto failed2;
439 	}
440 
441 	dsp->ds_dlstate = DL_BIND_PENDING;
442 	/*
443 	 * Set the receive callback.
444 	 */
445 	dls_rx_set(dsp, (dsp->ds_mode == DLD_RAW) ?
446 	    dld_str_rx_raw : dld_str_rx_unitdata, dsp);
447 
448 	/*
449 	 * Bind the channel such that it can receive packets.
450 	 */
451 	sap = dlp->dl_sap;
452 	dsp->ds_nonip = !check_mod_above(dsp->ds_rq, "ip") &&
453 	    !check_mod_above(dsp->ds_rq, "arp");
454 
455 	err = dls_bind(dsp, sap);
456 	if (err != 0) {
457 		switch (err) {
458 		case EINVAL:
459 			dl_err = DL_BADADDR;
460 			err = 0;
461 			break;
462 		default:
463 			dl_err = DL_SYSERR;
464 			break;
465 		}
466 
467 		dsp->ds_dlstate = DL_UNBOUND;
468 		dls_active_clear(dsp, B_FALSE);
469 		goto failed2;
470 	}
471 
472 	intr_cpu = mac_client_intr_cpu(dsp->ds_mch);
473 	mdip = mac_get_devinfo(dsp->ds_mh);
474 	mac_perim_exit(mph);
475 
476 	/*
477 	 * We do this after we get out of the perim to avoid deadlocks
478 	 * etc. since part of mac_client_retarget_intr is to walk the
479 	 * device tree in order to find and retarget the interrupts.
480 	 */
481 	if (intr_cpu != -1)
482 		mac_client_set_intr_cpu(mdip, dsp->ds_mch, intr_cpu);
483 
484 	/*
485 	 * Copy in MAC address.
486 	 */
487 	dlsap_addr_length = dsp->ds_mip->mi_addr_length;
488 	mac_unicast_primary_get(dsp->ds_mh, dlsap_addr);
489 
490 	/*
491 	 * Copy in the SAP.
492 	 */
493 	*(uint16_t *)(dlsap_addr + dlsap_addr_length) = sap;
494 	dlsap_addr_length += sizeof (uint16_t);
495 
496 	dsp->ds_dlstate = DL_IDLE;
497 	dlbindack(q, mp, sap, dlsap_addr, dlsap_addr_length, 0, 0);
498 	return;
499 
500 failed2:
501 	mac_perim_exit(mph);
502 failed:
503 	dlerrorack(q, mp, DL_BIND_REQ, dl_err, (t_uscalar_t)err);
504 }
505 
506 /*
507  * DL_UNBIND_REQ
508  */
509 static void
proto_unbind_req(dld_str_t * dsp,mblk_t * mp)510 proto_unbind_req(dld_str_t *dsp, mblk_t *mp)
511 {
512 	queue_t		*q = dsp->ds_wq;
513 	t_uscalar_t	dl_err;
514 	mac_perim_handle_t	mph;
515 
516 	if (MBLKL(mp) < sizeof (dl_unbind_req_t)) {
517 		dl_err = DL_BADPRIM;
518 		goto failed;
519 	}
520 
521 	if (dsp->ds_dlstate != DL_IDLE) {
522 		dl_err = DL_OUTSTATE;
523 		goto failed;
524 	}
525 
526 	mutex_enter(&dsp->ds_lock);
527 	while (dsp->ds_datathr_cnt != 0)
528 		cv_wait(&dsp->ds_datathr_cv, &dsp->ds_lock);
529 
530 	dsp->ds_dlstate = DL_UNBIND_PENDING;
531 	mutex_exit(&dsp->ds_lock);
532 
533 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
534 	/*
535 	 * Unbind the channel to stop packets being received.
536 	 */
537 	dls_unbind(dsp);
538 
539 	/*
540 	 * Disable polling mode, if it is enabled.
541 	 */
542 	(void) dld_capab_poll_disable(dsp, NULL);
543 
544 	/*
545 	 * Clear LSO flags.
546 	 */
547 	dsp->ds_lso = B_FALSE;
548 	dsp->ds_lso_max = 0;
549 
550 	/*
551 	 * Clear the receive callback.
552 	 */
553 	dls_rx_set(dsp, NULL, NULL);
554 	dsp->ds_direct = B_FALSE;
555 
556 	/*
557 	 * Set the mode back to the default (unitdata).
558 	 */
559 	dsp->ds_mode = DLD_UNITDATA;
560 	dsp->ds_dlstate = DL_UNBOUND;
561 
562 	dls_active_clear(dsp, B_FALSE);
563 	mac_perim_exit(mph);
564 	dlokack(dsp->ds_wq, mp, DL_UNBIND_REQ);
565 	return;
566 failed:
567 	dlerrorack(q, mp, DL_UNBIND_REQ, dl_err, 0);
568 }
569 
570 /*
571  * DL_PROMISCON_REQ
572  */
573 static void
proto_promiscon_req(dld_str_t * dsp,mblk_t * mp)574 proto_promiscon_req(dld_str_t *dsp, mblk_t *mp)
575 {
576 	dl_promiscon_req_t *dlp = (dl_promiscon_req_t *)mp->b_rptr;
577 	int		err = 0;
578 	t_uscalar_t	dl_err;
579 	uint32_t	new_flags, promisc_saved;
580 	queue_t		*q = dsp->ds_wq;
581 	mac_perim_handle_t	mph;
582 
583 	if (MBLKL(mp) < sizeof (dl_promiscon_req_t)) {
584 		dl_err = DL_BADPRIM;
585 		goto failed;
586 	}
587 
588 	if (dsp->ds_dlstate == DL_UNATTACHED ||
589 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
590 		dl_err = DL_OUTSTATE;
591 		goto failed;
592 	}
593 
594 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
595 
596 	new_flags = promisc_saved = dsp->ds_promisc;
597 	switch (dlp->dl_level) {
598 	case DL_PROMISC_SAP:
599 		new_flags |= DLS_PROMISC_SAP;
600 		break;
601 
602 	case DL_PROMISC_MULTI:
603 		new_flags |= DLS_PROMISC_MULTI;
604 		break;
605 
606 	case DL_PROMISC_PHYS:
607 		new_flags |= DLS_PROMISC_PHYS;
608 		break;
609 
610 	case DL_PROMISC_RX_ONLY:
611 		new_flags |= DLS_PROMISC_RX_ONLY;
612 		break;
613 
614 	case DL_PROMISC_INCOMING:
615 		new_flags |= DLS_PROMISC_INCOMING;
616 		break;
617 
618 	case DL_PROMISC_OUTGOING:
619 		new_flags |= DLS_PROMISC_OUTGOING;
620 		break;
621 
622 	default:
623 		dl_err = DL_NOTSUPPORTED;
624 		goto failed2;
625 	}
626 
627 	if ((promisc_saved == 0) && (err = dls_active_set(dsp)) != 0) {
628 		ASSERT(dsp->ds_promisc == promisc_saved);
629 		dl_err = DL_SYSERR;
630 		goto failed2;
631 	}
632 
633 	/*
634 	 * Adjust channel promiscuity.
635 	 */
636 	err = dls_promisc(dsp, new_flags);
637 
638 	if (err != 0) {
639 		dl_err = DL_SYSERR;
640 		dsp->ds_promisc = promisc_saved;
641 		if (promisc_saved == 0)
642 			dls_active_clear(dsp, B_FALSE);
643 		goto failed2;
644 	}
645 
646 	mac_perim_exit(mph);
647 
648 	dlokack(q, mp, DL_PROMISCON_REQ);
649 	return;
650 
651 failed2:
652 	mac_perim_exit(mph);
653 failed:
654 	dlerrorack(q, mp, DL_PROMISCON_REQ, dl_err, (t_uscalar_t)err);
655 }
656 
657 /*
658  * DL_PROMISCOFF_REQ
659  */
660 static void
proto_promiscoff_req(dld_str_t * dsp,mblk_t * mp)661 proto_promiscoff_req(dld_str_t *dsp, mblk_t *mp)
662 {
663 	dl_promiscoff_req_t *dlp = (dl_promiscoff_req_t *)mp->b_rptr;
664 	int		err = 0;
665 	t_uscalar_t	dl_err;
666 	uint32_t	new_flags;
667 	queue_t		*q = dsp->ds_wq;
668 	mac_perim_handle_t	mph;
669 
670 	if (MBLKL(mp) < sizeof (dl_promiscoff_req_t)) {
671 		dl_err = DL_BADPRIM;
672 		goto failed;
673 	}
674 
675 	if (dsp->ds_dlstate == DL_UNATTACHED ||
676 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
677 		dl_err = DL_OUTSTATE;
678 		goto failed;
679 	}
680 
681 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
682 
683 	new_flags = dsp->ds_promisc;
684 	switch (dlp->dl_level) {
685 	case DL_PROMISC_SAP:
686 		if (!(dsp->ds_promisc & DLS_PROMISC_SAP)) {
687 			dl_err = DL_NOTENAB;
688 			goto failed2;
689 		}
690 		new_flags &= ~DLS_PROMISC_SAP;
691 		break;
692 
693 	case DL_PROMISC_MULTI:
694 		if (!(dsp->ds_promisc & DLS_PROMISC_MULTI)) {
695 			dl_err = DL_NOTENAB;
696 			goto failed2;
697 		}
698 		new_flags &= ~DLS_PROMISC_MULTI;
699 		break;
700 
701 	case DL_PROMISC_PHYS:
702 		if (!(dsp->ds_promisc & DLS_PROMISC_PHYS)) {
703 			dl_err = DL_NOTENAB;
704 			goto failed2;
705 		}
706 		new_flags &= ~DLS_PROMISC_PHYS;
707 		break;
708 
709 	case DL_PROMISC_RX_ONLY:
710 		if (!(dsp->ds_promisc & DLS_PROMISC_RX_ONLY)) {
711 			dl_err = DL_NOTENAB;
712 			goto failed2;
713 		}
714 		new_flags &= ~DLS_PROMISC_RX_ONLY;
715 		break;
716 
717 	case DL_PROMISC_INCOMING:
718 		if (!(dsp->ds_promisc & DLS_PROMISC_INCOMING)) {
719 			dl_err = DL_NOTENAB;
720 			goto failed2;
721 		}
722 		new_flags &= ~DLS_PROMISC_INCOMING;
723 		break;
724 
725 	case DL_PROMISC_OUTGOING:
726 		if (!(dsp->ds_promisc & DLS_PROMISC_OUTGOING)) {
727 			dl_err = DL_NOTENAB;
728 			goto failed2;
729 		}
730 		new_flags &= ~DLS_PROMISC_OUTGOING;
731 		break;
732 
733 	default:
734 		dl_err = DL_NOTSUPPORTED;
735 		goto failed2;
736 	}
737 
738 	/*
739 	 * Adjust channel promiscuity.
740 	 */
741 	err = dls_promisc(dsp, new_flags);
742 
743 	if (err != 0) {
744 		dl_err = DL_SYSERR;
745 		goto failed2;
746 	}
747 
748 	ASSERT(dsp->ds_promisc == new_flags);
749 	if (dsp->ds_promisc == 0)
750 		dls_active_clear(dsp, B_FALSE);
751 
752 	mac_perim_exit(mph);
753 
754 	dlokack(q, mp, DL_PROMISCOFF_REQ);
755 	return;
756 failed2:
757 	mac_perim_exit(mph);
758 failed:
759 	dlerrorack(q, mp, DL_PROMISCOFF_REQ, dl_err, (t_uscalar_t)err);
760 }
761 
762 /*
763  * DL_ENABMULTI_REQ
764  */
765 static void
proto_enabmulti_req(dld_str_t * dsp,mblk_t * mp)766 proto_enabmulti_req(dld_str_t *dsp, mblk_t *mp)
767 {
768 	dl_enabmulti_req_t *dlp = (dl_enabmulti_req_t *)mp->b_rptr;
769 	int		err = 0;
770 	t_uscalar_t	dl_err;
771 	queue_t		*q = dsp->ds_wq;
772 	mac_perim_handle_t	mph;
773 
774 	if (dsp->ds_dlstate == DL_UNATTACHED ||
775 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
776 		dl_err = DL_OUTSTATE;
777 		goto failed;
778 	}
779 
780 	if (MBLKL(mp) < sizeof (dl_enabmulti_req_t) ||
781 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
782 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
783 		dl_err = DL_BADPRIM;
784 		goto failed;
785 	}
786 
787 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
788 
789 	if ((dsp->ds_dmap == NULL) && (err = dls_active_set(dsp)) != 0) {
790 		dl_err = DL_SYSERR;
791 		goto failed2;
792 	}
793 
794 	err = dls_multicst_add(dsp, mp->b_rptr + dlp->dl_addr_offset);
795 	if (err != 0) {
796 		switch (err) {
797 		case EINVAL:
798 			dl_err = DL_BADADDR;
799 			err = 0;
800 			break;
801 		case ENOSPC:
802 			dl_err = DL_TOOMANY;
803 			err = 0;
804 			break;
805 		default:
806 			dl_err = DL_SYSERR;
807 			break;
808 		}
809 		if (dsp->ds_dmap == NULL)
810 			dls_active_clear(dsp, B_FALSE);
811 		goto failed2;
812 	}
813 
814 	mac_perim_exit(mph);
815 
816 	dlokack(q, mp, DL_ENABMULTI_REQ);
817 	return;
818 
819 failed2:
820 	mac_perim_exit(mph);
821 failed:
822 	dlerrorack(q, mp, DL_ENABMULTI_REQ, dl_err, (t_uscalar_t)err);
823 }
824 
825 /*
826  * DL_DISABMULTI_REQ
827  */
828 static void
proto_disabmulti_req(dld_str_t * dsp,mblk_t * mp)829 proto_disabmulti_req(dld_str_t *dsp, mblk_t *mp)
830 {
831 	dl_disabmulti_req_t *dlp = (dl_disabmulti_req_t *)mp->b_rptr;
832 	int		err = 0;
833 	t_uscalar_t	dl_err;
834 	queue_t		*q = dsp->ds_wq;
835 	mac_perim_handle_t	mph;
836 
837 	if (dsp->ds_dlstate == DL_UNATTACHED ||
838 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
839 		dl_err = DL_OUTSTATE;
840 		goto failed;
841 	}
842 
843 	if (MBLKL(mp) < sizeof (dl_disabmulti_req_t) ||
844 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
845 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
846 		dl_err = DL_BADPRIM;
847 		goto failed;
848 	}
849 
850 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
851 	err = dls_multicst_remove(dsp, mp->b_rptr + dlp->dl_addr_offset);
852 	if ((err == 0) && (dsp->ds_dmap == NULL))
853 		dls_active_clear(dsp, B_FALSE);
854 	mac_perim_exit(mph);
855 
856 	if (err != 0) {
857 		switch (err) {
858 		case EINVAL:
859 			dl_err = DL_BADADDR;
860 			err = 0;
861 			break;
862 
863 		case ENOENT:
864 			dl_err = DL_NOTENAB;
865 			err = 0;
866 			break;
867 
868 		default:
869 			dl_err = DL_SYSERR;
870 			break;
871 		}
872 		goto failed;
873 	}
874 	dlokack(q, mp, DL_DISABMULTI_REQ);
875 	return;
876 failed:
877 	dlerrorack(q, mp, DL_DISABMULTI_REQ, dl_err, (t_uscalar_t)err);
878 }
879 
880 /*
881  * DL_PHYS_ADDR_REQ
882  */
883 static void
proto_physaddr_req(dld_str_t * dsp,mblk_t * mp)884 proto_physaddr_req(dld_str_t *dsp, mblk_t *mp)
885 {
886 	dl_phys_addr_req_t *dlp = (dl_phys_addr_req_t *)mp->b_rptr;
887 	queue_t		*q = dsp->ds_wq;
888 	t_uscalar_t	dl_err = 0;
889 	char		*addr = NULL;
890 	uint_t		addr_length;
891 
892 	if (MBLKL(mp) < sizeof (dl_phys_addr_req_t)) {
893 		dl_err = DL_BADPRIM;
894 		goto done;
895 	}
896 
897 	if (dsp->ds_dlstate == DL_UNATTACHED ||
898 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
899 		dl_err = DL_OUTSTATE;
900 		goto done;
901 	}
902 
903 	addr_length = dsp->ds_mip->mi_addr_length;
904 	if (addr_length > 0) {
905 		addr = kmem_alloc(addr_length, KM_SLEEP);
906 		switch (dlp->dl_addr_type) {
907 		case DL_CURR_PHYS_ADDR:
908 			mac_unicast_primary_get(dsp->ds_mh, (uint8_t *)addr);
909 			break;
910 		case DL_FACT_PHYS_ADDR:
911 			bcopy(dsp->ds_mip->mi_unicst_addr, addr, addr_length);
912 			break;
913 		case DL_CURR_DEST_ADDR:
914 			if (!mac_dst_get(dsp->ds_mh, (uint8_t *)addr))
915 				dl_err = DL_NOTSUPPORTED;
916 			break;
917 		default:
918 			dl_err = DL_UNSUPPORTED;
919 		}
920 	}
921 done:
922 	if (dl_err == 0)
923 		dlphysaddrack(q, mp, addr, (t_uscalar_t)addr_length);
924 	else
925 		dlerrorack(q, mp, DL_PHYS_ADDR_REQ, dl_err, 0);
926 	if (addr != NULL)
927 		kmem_free(addr, addr_length);
928 }
929 
930 /*
931  * DL_SET_PHYS_ADDR_REQ
932  */
933 static void
proto_setphysaddr_req(dld_str_t * dsp,mblk_t * mp)934 proto_setphysaddr_req(dld_str_t *dsp, mblk_t *mp)
935 {
936 	dl_set_phys_addr_req_t *dlp = (dl_set_phys_addr_req_t *)mp->b_rptr;
937 	int		err = 0;
938 	t_uscalar_t	dl_err;
939 	queue_t		*q = dsp->ds_wq;
940 	mac_perim_handle_t	mph;
941 
942 	if (dsp->ds_dlstate == DL_UNATTACHED ||
943 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
944 		dl_err = DL_OUTSTATE;
945 		goto failed;
946 	}
947 
948 	if (MBLKL(mp) < sizeof (dl_set_phys_addr_req_t) ||
949 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
950 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
951 		dl_err = DL_BADPRIM;
952 		goto failed;
953 	}
954 
955 	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
956 
957 	if ((err = dls_active_set(dsp)) != 0) {
958 		dl_err = DL_SYSERR;
959 		goto failed2;
960 	}
961 
962 	/*
963 	 * If mac-nospoof is enabled and the link is owned by a
964 	 * non-global zone, changing the mac address is not allowed.
965 	 */
966 	if (dsp->ds_dlp->dl_zid != GLOBAL_ZONEID &&
967 	    mac_protect_enabled(dsp->ds_mch, MPT_MACNOSPOOF)) {
968 		dls_active_clear(dsp, B_FALSE);
969 		err = EACCES;
970 		goto failed2;
971 	}
972 
973 	err = mac_unicast_primary_set(dsp->ds_mh,
974 	    mp->b_rptr + dlp->dl_addr_offset);
975 	if (err != 0) {
976 		switch (err) {
977 		case EINVAL:
978 			dl_err = DL_BADADDR;
979 			err = 0;
980 			break;
981 
982 		default:
983 			dl_err = DL_SYSERR;
984 			break;
985 		}
986 		dls_active_clear(dsp, B_FALSE);
987 		goto failed2;
988 
989 	}
990 
991 	mac_perim_exit(mph);
992 
993 	dlokack(q, mp, DL_SET_PHYS_ADDR_REQ);
994 	return;
995 
996 failed2:
997 	mac_perim_exit(mph);
998 failed:
999 	dlerrorack(q, mp, DL_SET_PHYS_ADDR_REQ, dl_err, (t_uscalar_t)err);
1000 }
1001 
1002 /*
1003  * DL_UDQOS_REQ
1004  */
1005 static void
proto_udqos_req(dld_str_t * dsp,mblk_t * mp)1006 proto_udqos_req(dld_str_t *dsp, mblk_t *mp)
1007 {
1008 	dl_udqos_req_t *dlp = (dl_udqos_req_t *)mp->b_rptr;
1009 	dl_qos_cl_sel1_t *selp;
1010 	int		off, len;
1011 	t_uscalar_t	dl_err;
1012 	queue_t		*q = dsp->ds_wq;
1013 
1014 	off = dlp->dl_qos_offset;
1015 	len = dlp->dl_qos_length;
1016 
1017 	if (MBLKL(mp) < sizeof (dl_udqos_req_t) || !MBLKIN(mp, off, len)) {
1018 		dl_err = DL_BADPRIM;
1019 		goto failed;
1020 	}
1021 
1022 	selp = (dl_qos_cl_sel1_t *)(mp->b_rptr + off);
1023 	if (selp->dl_qos_type != DL_QOS_CL_SEL1) {
1024 		dl_err = DL_BADQOSTYPE;
1025 		goto failed;
1026 	}
1027 
1028 	if (selp->dl_priority > (1 << VLAN_PRI_SIZE) - 1 ||
1029 	    selp->dl_priority < 0) {
1030 		dl_err = DL_BADQOSPARAM;
1031 		goto failed;
1032 	}
1033 
1034 	dsp->ds_pri = selp->dl_priority;
1035 	dlokack(q, mp, DL_UDQOS_REQ);
1036 	return;
1037 failed:
1038 	dlerrorack(q, mp, DL_UDQOS_REQ, dl_err, 0);
1039 }
1040 
1041 static boolean_t
check_mod_above(queue_t * q,const char * mod)1042 check_mod_above(queue_t *q, const char *mod)
1043 {
1044 	queue_t		*next_q;
1045 	boolean_t	ret = B_TRUE;
1046 
1047 	claimstr(q);
1048 	next_q = q->q_next;
1049 	if (strcmp(next_q->q_qinfo->qi_minfo->mi_idname, mod) != 0)
1050 		ret = B_FALSE;
1051 	releasestr(q);
1052 	return (ret);
1053 }
1054 
1055 /*
1056  * DL_CAPABILITY_REQ
1057  */
1058 static void
proto_capability_req(dld_str_t * dsp,mblk_t * mp)1059 proto_capability_req(dld_str_t *dsp, mblk_t *mp)
1060 {
1061 	dl_capability_req_t *dlp = (dl_capability_req_t *)mp->b_rptr;
1062 	dl_capability_sub_t *sp;
1063 	size_t		size, len;
1064 	offset_t	off, end;
1065 	t_uscalar_t	dl_err;
1066 	queue_t		*q = dsp->ds_wq;
1067 
1068 	if (MBLKL(mp) < sizeof (dl_capability_req_t)) {
1069 		dl_err = DL_BADPRIM;
1070 		goto failed;
1071 	}
1072 
1073 	if (dsp->ds_dlstate == DL_UNATTACHED ||
1074 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1075 		dl_err = DL_OUTSTATE;
1076 		goto failed;
1077 	}
1078 
1079 	/*
1080 	 * This request is overloaded. If there are no requested capabilities
1081 	 * then we just want to acknowledge with all the capabilities we
1082 	 * support. Otherwise we enable the set of capabilities requested.
1083 	 */
1084 	if (dlp->dl_sub_length == 0) {
1085 		proto_capability_advertise(dsp, mp);
1086 		return;
1087 	}
1088 
1089 	if (!MBLKIN(mp, dlp->dl_sub_offset, dlp->dl_sub_length)) {
1090 		dl_err = DL_BADPRIM;
1091 		goto failed;
1092 	}
1093 
1094 	dlp->dl_primitive = DL_CAPABILITY_ACK;
1095 
1096 	off = dlp->dl_sub_offset;
1097 	len = dlp->dl_sub_length;
1098 
1099 	/*
1100 	 * Walk the list of capabilities to be enabled.
1101 	 */
1102 	for (end = off + len; off < end; ) {
1103 		sp = (dl_capability_sub_t *)(mp->b_rptr + off);
1104 		size = sizeof (dl_capability_sub_t) + sp->dl_length;
1105 
1106 		if (off + size > end ||
1107 		    !IS_P2ALIGNED(off, sizeof (uint32_t))) {
1108 			dl_err = DL_BADPRIM;
1109 			goto failed;
1110 		}
1111 
1112 		switch (sp->dl_cap) {
1113 		/*
1114 		 * TCP/IP checksum offload to hardware.
1115 		 */
1116 		case DL_CAPAB_HCKSUM: {
1117 			dl_capab_hcksum_t *hcksump;
1118 			dl_capab_hcksum_t hcksum;
1119 
1120 			hcksump = (dl_capab_hcksum_t *)&sp[1];
1121 			/*
1122 			 * Copy for alignment.
1123 			 */
1124 			bcopy(hcksump, &hcksum, sizeof (dl_capab_hcksum_t));
1125 			dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1126 			bcopy(&hcksum, hcksump, sizeof (dl_capab_hcksum_t));
1127 			break;
1128 		}
1129 
1130 		case DL_CAPAB_DLD: {
1131 			dl_capab_dld_t	*dldp;
1132 			dl_capab_dld_t	dld;
1133 
1134 			dldp = (dl_capab_dld_t *)&sp[1];
1135 			/*
1136 			 * Copy for alignment.
1137 			 */
1138 			bcopy(dldp, &dld, sizeof (dl_capab_dld_t));
1139 			dlcapabsetqid(&(dld.dld_mid), dsp->ds_rq);
1140 			bcopy(&dld, dldp, sizeof (dl_capab_dld_t));
1141 			break;
1142 		}
1143 		default:
1144 			break;
1145 		}
1146 		off += size;
1147 	}
1148 	qreply(q, mp);
1149 	return;
1150 failed:
1151 	dlerrorack(q, mp, DL_CAPABILITY_REQ, dl_err, 0);
1152 }
1153 
1154 /*
1155  * DL_NOTIFY_REQ
1156  */
1157 static void
proto_notify_req(dld_str_t * dsp,mblk_t * mp)1158 proto_notify_req(dld_str_t *dsp, mblk_t *mp)
1159 {
1160 	dl_notify_req_t	*dlp = (dl_notify_req_t *)mp->b_rptr;
1161 	t_uscalar_t	dl_err;
1162 	queue_t		*q = dsp->ds_wq;
1163 	uint_t		note =
1164 	    DL_NOTE_PROMISC_ON_PHYS |
1165 	    DL_NOTE_PROMISC_OFF_PHYS |
1166 	    DL_NOTE_PHYS_ADDR |
1167 	    DL_NOTE_LINK_UP |
1168 	    DL_NOTE_LINK_DOWN |
1169 	    DL_NOTE_CAPAB_RENEG |
1170 	    DL_NOTE_FASTPATH_FLUSH |
1171 	    DL_NOTE_SPEED |
1172 	    DL_NOTE_SDU_SIZE|
1173 	    DL_NOTE_SDU_SIZE2|
1174 	    DL_NOTE_ALLOWED_IPS;
1175 
1176 	if (MBLKL(mp) < sizeof (dl_notify_req_t)) {
1177 		dl_err = DL_BADPRIM;
1178 		goto failed;
1179 	}
1180 
1181 	if (dsp->ds_dlstate == DL_UNATTACHED ||
1182 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1183 		dl_err = DL_OUTSTATE;
1184 		goto failed;
1185 	}
1186 
1187 	note &= ~(mac_no_notification(dsp->ds_mh));
1188 
1189 	/*
1190 	 * Cache the notifications that are being enabled.
1191 	 */
1192 	dsp->ds_notifications = dlp->dl_notifications & note;
1193 	/*
1194 	 * The ACK carries all notifications regardless of which set is
1195 	 * being enabled.
1196 	 */
1197 	dlnotifyack(q, mp, note);
1198 
1199 	/*
1200 	 * Generate DL_NOTIFY_IND messages for each enabled notification.
1201 	 */
1202 	if (dsp->ds_notifications != 0) {
1203 		dld_str_notify_ind(dsp);
1204 	}
1205 	return;
1206 failed:
1207 	dlerrorack(q, mp, DL_NOTIFY_REQ, dl_err, 0);
1208 }
1209 
1210 /*
1211  * DL_UINTDATA_REQ
1212  */
1213 void
proto_unitdata_req(dld_str_t * dsp,mblk_t * mp)1214 proto_unitdata_req(dld_str_t *dsp, mblk_t *mp)
1215 {
1216 	queue_t			*q = dsp->ds_wq;
1217 	dl_unitdata_req_t	*dlp = (dl_unitdata_req_t *)mp->b_rptr;
1218 	off_t			off;
1219 	size_t			len, size;
1220 	const uint8_t		*addr;
1221 	uint16_t		sap;
1222 	uint_t			addr_length;
1223 	mblk_t			*bp, *payload;
1224 	t_uscalar_t		dl_err;
1225 	uint_t			max_sdu;
1226 
1227 	if (MBLKL(mp) < sizeof (dl_unitdata_req_t) || mp->b_cont == NULL) {
1228 		dlerrorack(q, mp, DL_UNITDATA_REQ, DL_BADPRIM, 0);
1229 		return;
1230 	}
1231 
1232 	mutex_enter(&dsp->ds_lock);
1233 	if (dsp->ds_dlstate != DL_IDLE) {
1234 		mutex_exit(&dsp->ds_lock);
1235 		dlerrorack(q, mp, DL_UNITDATA_REQ, DL_OUTSTATE, 0);
1236 		return;
1237 	}
1238 	DLD_DATATHR_INC(dsp);
1239 	mutex_exit(&dsp->ds_lock);
1240 
1241 	addr_length = dsp->ds_mip->mi_addr_length;
1242 
1243 	off = dlp->dl_dest_addr_offset;
1244 	len = dlp->dl_dest_addr_length;
1245 
1246 	if (!MBLKIN(mp, off, len) || !IS_P2ALIGNED(off, sizeof (uint16_t))) {
1247 		dl_err = DL_BADPRIM;
1248 		goto failed;
1249 	}
1250 
1251 	if (len != addr_length + sizeof (uint16_t)) {
1252 		dl_err = DL_BADADDR;
1253 		goto failed;
1254 	}
1255 
1256 	addr = mp->b_rptr + off;
1257 	sap = *(uint16_t *)(mp->b_rptr + off + addr_length);
1258 
1259 	/*
1260 	 * Check the length of the packet and the block types.
1261 	 */
1262 	size = 0;
1263 	payload = mp->b_cont;
1264 	for (bp = payload; bp != NULL; bp = bp->b_cont) {
1265 		if (DB_TYPE(bp) != M_DATA)
1266 			goto baddata;
1267 
1268 		size += MBLKL(bp);
1269 	}
1270 
1271 	mac_sdu_get(dsp->ds_mh, NULL, &max_sdu);
1272 	if (size > max_sdu)
1273 		goto baddata;
1274 
1275 	/*
1276 	 * Build a packet header.
1277 	 */
1278 	if ((bp = dls_header(dsp, addr, sap, dlp->dl_priority.dl_max,
1279 	    &payload)) == NULL) {
1280 		dl_err = DL_BADADDR;
1281 		goto failed;
1282 	}
1283 
1284 	/*
1285 	 * We no longer need the M_PROTO header, so free it.
1286 	 */
1287 	freeb(mp);
1288 
1289 	/*
1290 	 * Transfer the checksum offload information if it is present.
1291 	 */
1292 	mac_hcksum_clone(payload, bp);
1293 
1294 	/*
1295 	 * Link the payload onto the new header.
1296 	 */
1297 	ASSERT(bp->b_cont == NULL);
1298 	bp->b_cont = payload;
1299 
1300 	/*
1301 	 * No lock can be held across modules and putnext()'s,
1302 	 * which can happen here with the call from DLD_TX().
1303 	 */
1304 	if (DLD_TX(dsp, bp, 0, 0) != 0) {
1305 		/* flow-controlled */
1306 		DLD_SETQFULL(dsp);
1307 	}
1308 	DLD_DATATHR_DCR(dsp);
1309 	return;
1310 
1311 failed:
1312 	dlerrorack(q, mp, DL_UNITDATA_REQ, dl_err, 0);
1313 	DLD_DATATHR_DCR(dsp);
1314 	return;
1315 
1316 baddata:
1317 	dluderrorind(q, mp, (void *)addr, len, DL_BADDATA, 0);
1318 	DLD_DATATHR_DCR(dsp);
1319 }
1320 
1321 /*
1322  * DL_PASSIVE_REQ
1323  */
1324 static void
proto_passive_req(dld_str_t * dsp,mblk_t * mp)1325 proto_passive_req(dld_str_t *dsp, mblk_t *mp)
1326 {
1327 	t_uscalar_t dl_err;
1328 
1329 	/*
1330 	 * If we've already become active by issuing an active primitive,
1331 	 * then it's too late to try to become passive.
1332 	 */
1333 	if (dsp->ds_passivestate == DLD_ACTIVE) {
1334 		dl_err = DL_OUTSTATE;
1335 		goto failed;
1336 	}
1337 
1338 	if (MBLKL(mp) < sizeof (dl_passive_req_t)) {
1339 		dl_err = DL_BADPRIM;
1340 		goto failed;
1341 	}
1342 
1343 	dsp->ds_passivestate = DLD_PASSIVE;
1344 	dlokack(dsp->ds_wq, mp, DL_PASSIVE_REQ);
1345 	return;
1346 failed:
1347 	dlerrorack(dsp->ds_wq, mp, DL_PASSIVE_REQ, dl_err, 0);
1348 }
1349 
1350 
1351 /*
1352  * Catch-all handler.
1353  */
1354 static void
proto_req(dld_str_t * dsp,mblk_t * mp)1355 proto_req(dld_str_t *dsp, mblk_t *mp)
1356 {
1357 	union DL_primitives	*dlp = (union DL_primitives *)mp->b_rptr;
1358 
1359 	dlerrorack(dsp->ds_wq, mp, dlp->dl_primitive, DL_UNSUPPORTED, 0);
1360 }
1361 
1362 static int
dld_capab_perim(dld_str_t * dsp,void * data,uint_t flags)1363 dld_capab_perim(dld_str_t *dsp, void *data, uint_t flags)
1364 {
1365 	switch (flags) {
1366 	case DLD_ENABLE:
1367 		mac_perim_enter_by_mh(dsp->ds_mh, (mac_perim_handle_t *)data);
1368 		return (0);
1369 
1370 	case DLD_DISABLE:
1371 		mac_perim_exit((mac_perim_handle_t)data);
1372 		return (0);
1373 
1374 	case DLD_QUERY:
1375 		return (mac_perim_held(dsp->ds_mh));
1376 	}
1377 	return (0);
1378 }
1379 
1380 static int
dld_capab_direct(dld_str_t * dsp,void * data,uint_t flags)1381 dld_capab_direct(dld_str_t *dsp, void *data, uint_t flags)
1382 {
1383 	dld_capab_direct_t	*direct = data;
1384 
1385 	ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1386 
1387 	switch (flags) {
1388 	case DLD_ENABLE:
1389 		dls_rx_set(dsp, (dls_rx_t)direct->di_rx_cf,
1390 		    direct->di_rx_ch);
1391 
1392 		direct->di_tx_df = (uintptr_t)str_mdata_fastpath_put;
1393 		direct->di_tx_dh = dsp;
1394 		direct->di_tx_cb_df = (uintptr_t)mac_client_tx_notify;
1395 		direct->di_tx_cb_dh = dsp->ds_mch;
1396 		direct->di_tx_fctl_df = (uintptr_t)mac_tx_is_flow_blocked;
1397 		direct->di_tx_fctl_dh = dsp->ds_mch;
1398 
1399 		dsp->ds_direct = B_TRUE;
1400 
1401 		return (0);
1402 
1403 	case DLD_DISABLE:
1404 		dls_rx_set(dsp, (dsp->ds_mode == DLD_FASTPATH) ?
1405 		    dld_str_rx_fastpath : dld_str_rx_unitdata, (void *)dsp);
1406 		dsp->ds_direct = B_FALSE;
1407 
1408 		return (0);
1409 	}
1410 	return (ENOTSUP);
1411 }
1412 
1413 /*
1414  * This function is misnamed. All polling and fanouts are run out of
1415  * the lower MAC for VNICs and out of the MAC for NICs. The
1416  * availability of Rx rings and promiscous mode is taken care of
1417  * between the soft ring set (mac_srs), the Rx ring, and the SW
1418  * classifier. Fanout, if necessary, is done by the soft rings that
1419  * are part of the SRS. By default the SRS divvies up the packets
1420  * based on protocol: TCP, UDP, or Other (OTH).
1421  *
1422  * The SRS (or its associated soft rings) always store the ill_rx_ring
1423  * (the cookie returned when they registered with IP during plumb) as their
1424  * 2nd argument which is passed up as mac_resource_handle_t. The upcall
1425  * function and 1st argument is what the caller registered when they
1426  * called mac_rx_classify_flow_add() to register the flow. For VNIC,
1427  * the function is vnic_rx and argument is vnic_t. For regular NIC
1428  * case, it mac_rx_default and mac_handle_t. As explained above, the
1429  * SRS (or its soft ring) will add the ill_rx_ring (mac_resource_handle_t)
1430  * from its stored 2nd argument.
1431  */
1432 static int
dld_capab_poll_enable(dld_str_t * dsp,dld_capab_poll_t * poll)1433 dld_capab_poll_enable(dld_str_t *dsp, dld_capab_poll_t *poll)
1434 {
1435 	if (dsp->ds_polling)
1436 		return (EINVAL);
1437 
1438 	if ((dld_opt & DLD_OPT_NO_POLL) != 0 || dsp->ds_mode == DLD_RAW)
1439 		return (ENOTSUP);
1440 
1441 	/*
1442 	 * Enable client polling if and only if DLS bypass is
1443 	 * possible. Some traffic requires DLS processing in the Rx
1444 	 * data path. In such a case we can neither allow the client
1445 	 * (IP) to directly poll the soft ring (since DLS processing
1446 	 * hasn't been done) nor can we allow DLS bypass.
1447 	 */
1448 	if (!mac_rx_bypass_set(dsp->ds_mch, dsp->ds_rx, dsp->ds_rx_arg,
1449 	    dsp->ds_sap == ETHERTYPE_IPV6))
1450 		return (ENOTSUP);
1451 
1452 	/*
1453 	 * Register soft ring resources. This will come in handy later if
1454 	 * the user decides to modify CPU bindings to use more CPUs for the
1455 	 * device in which case we will switch to fanout using soft rings.
1456 	 */
1457 	mac_resource_set_common(dsp->ds_mch,
1458 	    (mac_resource_add_t)poll->poll_ring_add_cf,
1459 	    (mac_resource_remove_t)poll->poll_ring_remove_cf,
1460 	    (mac_resource_quiesce_t)poll->poll_ring_quiesce_cf,
1461 	    (mac_resource_restart_t)poll->poll_ring_restart_cf,
1462 	    (mac_resource_bind_t)poll->poll_ring_bind_cf,
1463 	    poll->poll_ring_ch);
1464 
1465 	mac_client_poll_enable(dsp->ds_mch);
1466 
1467 	dsp->ds_polling = B_TRUE;
1468 	return (0);
1469 }
1470 
1471 /* ARGSUSED */
1472 static int
dld_capab_poll_disable(dld_str_t * dsp,dld_capab_poll_t * poll)1473 dld_capab_poll_disable(dld_str_t *dsp, dld_capab_poll_t *poll)
1474 {
1475 	if (!dsp->ds_polling)
1476 		return (EINVAL);
1477 
1478 	mac_client_poll_disable(dsp->ds_mch);
1479 	mac_resource_set(dsp->ds_mch, NULL, NULL);
1480 
1481 	dsp->ds_polling = B_FALSE;
1482 	return (0);
1483 }
1484 
1485 static int
dld_capab_poll(dld_str_t * dsp,void * data,uint_t flags)1486 dld_capab_poll(dld_str_t *dsp, void *data, uint_t flags)
1487 {
1488 	dld_capab_poll_t	*poll = data;
1489 
1490 	ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1491 
1492 	switch (flags) {
1493 	case DLD_ENABLE:
1494 		return (dld_capab_poll_enable(dsp, poll));
1495 	case DLD_DISABLE:
1496 		return (dld_capab_poll_disable(dsp, poll));
1497 	}
1498 	return (ENOTSUP);
1499 }
1500 
1501 static int
dld_capab_lso(dld_str_t * dsp,void * data,uint_t flags)1502 dld_capab_lso(dld_str_t *dsp, void *data, uint_t flags)
1503 {
1504 	dld_capab_lso_t		*lso = data;
1505 
1506 	ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1507 
1508 	switch (flags) {
1509 	case DLD_ENABLE: {
1510 		mac_capab_lso_t		mac_lso;
1511 
1512 		/*
1513 		 * Check if LSO is supported on this MAC & enable LSO
1514 		 * accordingly.
1515 		 */
1516 		if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_LSO, &mac_lso)) {
1517 			lso->lso_max_tcpv4 = mac_lso.lso_basic_tcp_ipv4.lso_max;
1518 			lso->lso_max_tcpv6 = mac_lso.lso_basic_tcp_ipv6.lso_max;
1519 			lso->lso_flags = 0;
1520 			/* translate the flag for mac clients */
1521 			if ((mac_lso.lso_flags & LSO_TX_BASIC_TCP_IPV4) != 0)
1522 				lso->lso_flags |= DLD_LSO_BASIC_TCP_IPV4;
1523 			if ((mac_lso.lso_flags & LSO_TX_BASIC_TCP_IPV6) != 0)
1524 				lso->lso_flags |= DLD_LSO_BASIC_TCP_IPV6;
1525 			dsp->ds_lso = lso->lso_flags != 0;
1526 			/*
1527 			 * DLS uses this to try and make sure that a raw ioctl
1528 			 * doesn't send too much data, but doesn't currently
1529 			 * check the actual SAP that is sending this (or that
1530 			 * it's TCP). So for now, just use the max value here.
1531 			 */
1532 			dsp->ds_lso_max = MAX(lso->lso_max_tcpv4,
1533 			    lso->lso_max_tcpv6);
1534 		} else {
1535 			dsp->ds_lso = B_FALSE;
1536 			dsp->ds_lso_max = 0;
1537 			return (ENOTSUP);
1538 		}
1539 		return (0);
1540 	}
1541 	case DLD_DISABLE: {
1542 		dsp->ds_lso = B_FALSE;
1543 		dsp->ds_lso_max = 0;
1544 		return (0);
1545 	}
1546 	}
1547 	return (ENOTSUP);
1548 }
1549 
1550 static int
dld_capab(dld_str_t * dsp,uint_t type,void * data,uint_t flags)1551 dld_capab(dld_str_t *dsp, uint_t type, void *data, uint_t flags)
1552 {
1553 	int	err;
1554 
1555 	/*
1556 	 * Don't enable direct callback capabilities unless the caller is
1557 	 * the IP client. When a module is inserted in a stream (_I_INSERT)
1558 	 * the stack initiates capability disable, but due to races, the
1559 	 * module insertion may complete before the capability disable
1560 	 * completes. So we limit the check to DLD_ENABLE case.
1561 	 */
1562 	if ((flags == DLD_ENABLE && type != DLD_CAPAB_PERIM) &&
1563 	    (!(dsp->ds_sap == ETHERTYPE_IP || dsp->ds_sap == ETHERTYPE_IPV6) ||
1564 	    !check_mod_above(dsp->ds_rq, "ip"))) {
1565 		return (ENOTSUP);
1566 	}
1567 
1568 	switch (type) {
1569 	case DLD_CAPAB_DIRECT:
1570 		err = dld_capab_direct(dsp, data, flags);
1571 		break;
1572 
1573 	case DLD_CAPAB_POLL:
1574 		err =  dld_capab_poll(dsp, data, flags);
1575 		break;
1576 
1577 	case DLD_CAPAB_PERIM:
1578 		err = dld_capab_perim(dsp, data, flags);
1579 		break;
1580 
1581 	case DLD_CAPAB_LSO:
1582 		err = dld_capab_lso(dsp, data, flags);
1583 		break;
1584 
1585 	default:
1586 		err = ENOTSUP;
1587 		break;
1588 	}
1589 
1590 	return (err);
1591 }
1592 
1593 /*
1594  * DL_CAPABILITY_ACK/DL_ERROR_ACK
1595  */
1596 static void
proto_capability_advertise(dld_str_t * dsp,mblk_t * mp)1597 proto_capability_advertise(dld_str_t *dsp, mblk_t *mp)
1598 {
1599 	dl_capability_ack_t	*dlap;
1600 	dl_capability_sub_t	*dlsp;
1601 	size_t			subsize;
1602 	dl_capab_dld_t		dld;
1603 	dl_capab_hcksum_t	hcksum;
1604 	dl_capab_zerocopy_t	zcopy;
1605 	dl_capab_vrrp_t		vrrp;
1606 	mac_capab_vrrp_t	vrrp_capab;
1607 	uint8_t			*ptr;
1608 	queue_t			*q = dsp->ds_wq;
1609 	mblk_t			*mp1;
1610 	boolean_t		hcksum_capable = B_FALSE;
1611 	boolean_t		zcopy_capable = B_FALSE;
1612 	boolean_t		dld_capable = B_FALSE;
1613 	boolean_t		vrrp_capable = B_FALSE;
1614 
1615 	/*
1616 	 * Initially assume no capabilities.
1617 	 */
1618 	subsize = 0;
1619 
1620 	/*
1621 	 * Check if checksum offload is supported on this MAC.
1622 	 */
1623 	bzero(&hcksum, sizeof (dl_capab_hcksum_t));
1624 	if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_HCKSUM,
1625 	    &hcksum.hcksum_txflags)) {
1626 		if (hcksum.hcksum_txflags != 0) {
1627 			hcksum_capable = B_TRUE;
1628 			subsize += sizeof (dl_capability_sub_t) +
1629 			    sizeof (dl_capab_hcksum_t);
1630 		}
1631 	}
1632 
1633 	/*
1634 	 * Check if zerocopy is supported on this interface.
1635 	 * If advertising DL_CAPAB_ZEROCOPY has not been explicitly disabled
1636 	 * then reserve space for that capability.
1637 	 */
1638 	if (!mac_capab_get(dsp->ds_mh, MAC_CAPAB_NO_ZCOPY, NULL) &&
1639 	    !(dld_opt & DLD_OPT_NO_ZEROCOPY)) {
1640 		zcopy_capable = B_TRUE;
1641 		subsize += sizeof (dl_capability_sub_t) +
1642 		    sizeof (dl_capab_zerocopy_t);
1643 	}
1644 
1645 	/*
1646 	 * Direct capability negotiation interface between IP and DLD
1647 	 */
1648 	if ((dsp->ds_sap == ETHERTYPE_IP || dsp->ds_sap == ETHERTYPE_IPV6) &&
1649 	    check_mod_above(dsp->ds_rq, "ip")) {
1650 		dld_capable = B_TRUE;
1651 		subsize += sizeof (dl_capability_sub_t) +
1652 		    sizeof (dl_capab_dld_t);
1653 	}
1654 
1655 	/*
1656 	 * Check if vrrp is supported on this interface. If so, reserve
1657 	 * space for that capability.
1658 	 */
1659 	if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_VRRP, &vrrp_capab)) {
1660 		vrrp_capable = B_TRUE;
1661 		subsize += sizeof (dl_capability_sub_t) +
1662 		    sizeof (dl_capab_vrrp_t);
1663 	}
1664 
1665 	/*
1666 	 * If there are no capabilities to advertise or if we
1667 	 * can't allocate a response, send a DL_ERROR_ACK.
1668 	 */
1669 	if ((mp1 = reallocb(mp,
1670 	    sizeof (dl_capability_ack_t) + subsize, 0)) == NULL) {
1671 		dlerrorack(q, mp, DL_CAPABILITY_REQ, DL_NOTSUPPORTED, 0);
1672 		return;
1673 	}
1674 
1675 	mp = mp1;
1676 	DB_TYPE(mp) = M_PROTO;
1677 	mp->b_wptr = mp->b_rptr + sizeof (dl_capability_ack_t) + subsize;
1678 	bzero(mp->b_rptr, MBLKL(mp));
1679 	dlap = (dl_capability_ack_t *)mp->b_rptr;
1680 	dlap->dl_primitive = DL_CAPABILITY_ACK;
1681 	dlap->dl_sub_offset = sizeof (dl_capability_ack_t);
1682 	dlap->dl_sub_length = subsize;
1683 	ptr = (uint8_t *)&dlap[1];
1684 
1685 	/*
1686 	 * TCP/IP checksum offload.
1687 	 */
1688 	if (hcksum_capable) {
1689 		dlsp = (dl_capability_sub_t *)ptr;
1690 
1691 		dlsp->dl_cap = DL_CAPAB_HCKSUM;
1692 		dlsp->dl_length = sizeof (dl_capab_hcksum_t);
1693 		ptr += sizeof (dl_capability_sub_t);
1694 
1695 		hcksum.hcksum_version = HCKSUM_VERSION_1;
1696 		dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1697 		bcopy(&hcksum, ptr, sizeof (dl_capab_hcksum_t));
1698 		ptr += sizeof (dl_capab_hcksum_t);
1699 	}
1700 
1701 	/*
1702 	 * Zero copy
1703 	 */
1704 	if (zcopy_capable) {
1705 		dlsp = (dl_capability_sub_t *)ptr;
1706 
1707 		dlsp->dl_cap = DL_CAPAB_ZEROCOPY;
1708 		dlsp->dl_length = sizeof (dl_capab_zerocopy_t);
1709 		ptr += sizeof (dl_capability_sub_t);
1710 
1711 		bzero(&zcopy, sizeof (dl_capab_zerocopy_t));
1712 		zcopy.zerocopy_version = ZEROCOPY_VERSION_1;
1713 		zcopy.zerocopy_flags = DL_CAPAB_VMSAFE_MEM;
1714 
1715 		dlcapabsetqid(&(zcopy.zerocopy_mid), dsp->ds_rq);
1716 		bcopy(&zcopy, ptr, sizeof (dl_capab_zerocopy_t));
1717 		ptr += sizeof (dl_capab_zerocopy_t);
1718 	}
1719 
1720 	/*
1721 	 * VRRP capability negotiation
1722 	 */
1723 	if (vrrp_capable) {
1724 		dlsp = (dl_capability_sub_t *)ptr;
1725 		dlsp->dl_cap = DL_CAPAB_VRRP;
1726 		dlsp->dl_length = sizeof (dl_capab_vrrp_t);
1727 		ptr += sizeof (dl_capability_sub_t);
1728 
1729 		bzero(&vrrp, sizeof (dl_capab_vrrp_t));
1730 		vrrp.vrrp_af = vrrp_capab.mcv_af;
1731 		bcopy(&vrrp, ptr, sizeof (dl_capab_vrrp_t));
1732 		ptr += sizeof (dl_capab_vrrp_t);
1733 	}
1734 
1735 	/*
1736 	 * Direct capability negotiation interface between IP and DLD.
1737 	 * Refer to dld.h for details.
1738 	 */
1739 	if (dld_capable) {
1740 		dlsp = (dl_capability_sub_t *)ptr;
1741 		dlsp->dl_cap = DL_CAPAB_DLD;
1742 		dlsp->dl_length = sizeof (dl_capab_dld_t);
1743 		ptr += sizeof (dl_capability_sub_t);
1744 
1745 		bzero(&dld, sizeof (dl_capab_dld_t));
1746 		dld.dld_version = DLD_CURRENT_VERSION;
1747 		dld.dld_capab = (uintptr_t)dld_capab;
1748 		dld.dld_capab_handle = (uintptr_t)dsp;
1749 
1750 		dlcapabsetqid(&(dld.dld_mid), dsp->ds_rq);
1751 		bcopy(&dld, ptr, sizeof (dl_capab_dld_t));
1752 		ptr += sizeof (dl_capab_dld_t);
1753 	}
1754 
1755 	ASSERT(ptr == mp->b_rptr + sizeof (dl_capability_ack_t) + subsize);
1756 	qreply(q, mp);
1757 }
1758 
1759 /*
1760  * Disable any enabled capabilities.
1761  */
1762 void
dld_capabilities_disable(dld_str_t * dsp)1763 dld_capabilities_disable(dld_str_t *dsp)
1764 {
1765 	if (dsp->ds_polling)
1766 		(void) dld_capab_poll_disable(dsp, NULL);
1767 }
1768