1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2012, Nexenta Systems, Inc. All rights reserved.
24 * Copyright (c) 2018, Joyent, Inc.
25 * Copyright 2025 Oxide Computer Company
26 */
27
28 /*
29 * Data-Link Driver
30 */
31 #include <sys/sysmacros.h>
32 #include <sys/strsubr.h>
33 #include <sys/strsun.h>
34 #include <sys/vlan.h>
35 #include <sys/dld_impl.h>
36 #include <sys/mac_client.h>
37 #include <sys/mac_client_impl.h>
38 #include <sys/mac_client_priv.h>
39
40 typedef void proto_reqfunc_t(dld_str_t *, mblk_t *);
41
42 static proto_reqfunc_t proto_info_req, proto_attach_req, proto_detach_req,
43 proto_bind_req, proto_unbind_req, proto_promiscon_req, proto_promiscoff_req,
44 proto_enabmulti_req, proto_disabmulti_req, proto_physaddr_req,
45 proto_setphysaddr_req, proto_udqos_req, proto_req, proto_capability_req,
46 proto_notify_req, proto_passive_req;
47
48 static void proto_capability_advertise(dld_str_t *, mblk_t *);
49 static int dld_capab_poll_disable(dld_str_t *, dld_capab_poll_t *);
50 static boolean_t check_mod_above(queue_t *, const char *);
51
52 #define DL_ACK_PENDING(state) \
53 ((state) == DL_ATTACH_PENDING || \
54 (state) == DL_DETACH_PENDING || \
55 (state) == DL_BIND_PENDING || \
56 (state) == DL_UNBIND_PENDING)
57
58 /*
59 * Process a DLPI protocol message.
60 * The primitives DL_BIND_REQ, DL_ENABMULTI_REQ, DL_PROMISCON_REQ,
61 * DL_SET_PHYS_ADDR_REQ put the data link below our dld_str_t into an
62 * 'active' state. The primitive DL_PASSIVE_REQ marks our dld_str_t
63 * as 'passive' and forbids it from being subsequently made 'active'
64 * by the above primitives.
65 */
66 void
dld_proto(dld_str_t * dsp,mblk_t * mp)67 dld_proto(dld_str_t *dsp, mblk_t *mp)
68 {
69 t_uscalar_t prim;
70
71 if (MBLKL(mp) < sizeof (t_uscalar_t)) {
72 freemsg(mp);
73 return;
74 }
75 prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive;
76
77 switch (prim) {
78 case DL_INFO_REQ:
79 proto_info_req(dsp, mp);
80 break;
81 case DL_BIND_REQ:
82 proto_bind_req(dsp, mp);
83 break;
84 case DL_UNBIND_REQ:
85 proto_unbind_req(dsp, mp);
86 break;
87 case DL_UNITDATA_REQ:
88 proto_unitdata_req(dsp, mp);
89 break;
90 case DL_UDQOS_REQ:
91 proto_udqos_req(dsp, mp);
92 break;
93 case DL_ATTACH_REQ:
94 proto_attach_req(dsp, mp);
95 break;
96 case DL_DETACH_REQ:
97 proto_detach_req(dsp, mp);
98 break;
99 case DL_ENABMULTI_REQ:
100 proto_enabmulti_req(dsp, mp);
101 break;
102 case DL_DISABMULTI_REQ:
103 proto_disabmulti_req(dsp, mp);
104 break;
105 case DL_PROMISCON_REQ:
106 proto_promiscon_req(dsp, mp);
107 break;
108 case DL_PROMISCOFF_REQ:
109 proto_promiscoff_req(dsp, mp);
110 break;
111 case DL_PHYS_ADDR_REQ:
112 proto_physaddr_req(dsp, mp);
113 break;
114 case DL_SET_PHYS_ADDR_REQ:
115 proto_setphysaddr_req(dsp, mp);
116 break;
117 case DL_NOTIFY_REQ:
118 proto_notify_req(dsp, mp);
119 break;
120 case DL_CAPABILITY_REQ:
121 proto_capability_req(dsp, mp);
122 break;
123 case DL_PASSIVE_REQ:
124 proto_passive_req(dsp, mp);
125 break;
126 default:
127 proto_req(dsp, mp);
128 break;
129 }
130 }
131
132 #define NEG(x) -(x)
133 typedef struct dl_info_ack_wrapper {
134 dl_info_ack_t dl_info;
135 uint8_t dl_addr[MAXMACADDRLEN + sizeof (uint16_t)];
136 uint8_t dl_brdcst_addr[MAXMACADDRLEN];
137 dl_qos_cl_range1_t dl_qos_range1;
138 dl_qos_cl_sel1_t dl_qos_sel1;
139 } dl_info_ack_wrapper_t;
140
141 /*
142 * DL_INFO_REQ
143 */
144 static void
proto_info_req(dld_str_t * dsp,mblk_t * mp)145 proto_info_req(dld_str_t *dsp, mblk_t *mp)
146 {
147 dl_info_ack_wrapper_t *dlwp;
148 dl_info_ack_t *dlp;
149 dl_qos_cl_sel1_t *selp;
150 dl_qos_cl_range1_t *rangep;
151 uint8_t *addr;
152 uint8_t *brdcst_addr;
153 uint_t addr_length;
154 uint_t sap_length;
155 mac_info_t minfo;
156 mac_info_t *minfop;
157 queue_t *q = dsp->ds_wq;
158
159 /*
160 * Swap the request message for one large enough to contain the
161 * wrapper structure defined above.
162 */
163 if ((mp = mexchange(q, mp, sizeof (dl_info_ack_wrapper_t),
164 M_PCPROTO, 0)) == NULL)
165 return;
166
167 bzero(mp->b_rptr, sizeof (dl_info_ack_wrapper_t));
168 dlwp = (dl_info_ack_wrapper_t *)mp->b_rptr;
169
170 dlp = &(dlwp->dl_info);
171 ASSERT(dlp == (dl_info_ack_t *)mp->b_rptr);
172
173 dlp->dl_primitive = DL_INFO_ACK;
174
175 /*
176 * Set up the sub-structure pointers.
177 */
178 addr = dlwp->dl_addr;
179 brdcst_addr = dlwp->dl_brdcst_addr;
180 rangep = &(dlwp->dl_qos_range1);
181 selp = &(dlwp->dl_qos_sel1);
182
183 /*
184 * This driver supports only version 2 connectionless DLPI provider
185 * nodes.
186 */
187 dlp->dl_service_mode = DL_CLDLS;
188 dlp->dl_version = DL_VERSION_2;
189
190 /*
191 * Set the style of the provider
192 */
193 dlp->dl_provider_style = dsp->ds_style;
194 ASSERT(dlp->dl_provider_style == DL_STYLE1 ||
195 dlp->dl_provider_style == DL_STYLE2);
196
197 /*
198 * Set the current DLPI state.
199 */
200 dlp->dl_current_state = dsp->ds_dlstate;
201
202 /*
203 * Gratuitously set the media type. This is to deal with modules
204 * that assume the media type is known prior to DL_ATTACH_REQ
205 * being completed.
206 */
207 dlp->dl_mac_type = DL_ETHER;
208
209 /*
210 * If the stream is not at least attached we try to retrieve the
211 * mac_info using mac_info_get()
212 */
213 if (dsp->ds_dlstate == DL_UNATTACHED ||
214 dsp->ds_dlstate == DL_ATTACH_PENDING ||
215 dsp->ds_dlstate == DL_DETACH_PENDING) {
216 if (!mac_info_get(ddi_major_to_name(dsp->ds_major), &minfo)) {
217 /*
218 * Cannot find mac_info. giving up.
219 */
220 goto done;
221 }
222 minfop = &minfo;
223 } else {
224 minfop = (mac_info_t *)dsp->ds_mip;
225 /* We can only get the sdu if we're attached. */
226 mac_sdu_get(dsp->ds_mh, &dlp->dl_min_sdu, &dlp->dl_max_sdu);
227 }
228
229 /*
230 * Set the media type (properly this time).
231 */
232 if (dsp->ds_native)
233 dlp->dl_mac_type = minfop->mi_nativemedia;
234 else
235 dlp->dl_mac_type = minfop->mi_media;
236
237 /*
238 * Set the DLSAP length. We only support 16 bit values and they
239 * appear after the MAC address portion of DLSAP addresses.
240 */
241 sap_length = sizeof (uint16_t);
242 dlp->dl_sap_length = NEG(sap_length);
243
244 addr_length = minfop->mi_addr_length;
245
246 /*
247 * Copy in the media broadcast address.
248 */
249 if (minfop->mi_brdcst_addr != NULL) {
250 dlp->dl_brdcst_addr_offset =
251 (uintptr_t)brdcst_addr - (uintptr_t)dlp;
252 bcopy(minfop->mi_brdcst_addr, brdcst_addr, addr_length);
253 dlp->dl_brdcst_addr_length = addr_length;
254 }
255
256 /* Only VLAN links and links that have a normal tag mode support QOS. */
257 if ((dsp->ds_mch != NULL &&
258 mac_client_vid(dsp->ds_mch) != VLAN_ID_NONE) ||
259 (dsp->ds_dlp != NULL &&
260 dsp->ds_dlp->dl_tagmode == LINK_TAGMODE_NORMAL)) {
261 dlp->dl_qos_range_offset = (uintptr_t)rangep - (uintptr_t)dlp;
262 dlp->dl_qos_range_length = sizeof (dl_qos_cl_range1_t);
263
264 rangep->dl_qos_type = DL_QOS_CL_RANGE1;
265 rangep->dl_trans_delay.dl_target_value = DL_UNKNOWN;
266 rangep->dl_trans_delay.dl_accept_value = DL_UNKNOWN;
267 rangep->dl_protection.dl_min = DL_UNKNOWN;
268 rangep->dl_protection.dl_max = DL_UNKNOWN;
269 rangep->dl_residual_error = DL_UNKNOWN;
270
271 /*
272 * Specify the supported range of priorities.
273 */
274 rangep->dl_priority.dl_min = 0;
275 rangep->dl_priority.dl_max = (1 << VLAN_PRI_SIZE) - 1;
276
277 dlp->dl_qos_offset = (uintptr_t)selp - (uintptr_t)dlp;
278 dlp->dl_qos_length = sizeof (dl_qos_cl_sel1_t);
279
280 selp->dl_qos_type = DL_QOS_CL_SEL1;
281 selp->dl_trans_delay = DL_UNKNOWN;
282 selp->dl_protection = DL_UNKNOWN;
283 selp->dl_residual_error = DL_UNKNOWN;
284
285 /*
286 * Specify the current priority (which can be changed by
287 * the DL_UDQOS_REQ primitive).
288 */
289 selp->dl_priority = dsp->ds_pri;
290 }
291
292 dlp->dl_addr_length = addr_length + sizeof (uint16_t);
293 if (dsp->ds_dlstate == DL_IDLE) {
294 /*
295 * The stream is bound. Therefore we can formulate a valid
296 * DLSAP address.
297 */
298 dlp->dl_addr_offset = (uintptr_t)addr - (uintptr_t)dlp;
299 if (addr_length > 0)
300 mac_unicast_primary_get(dsp->ds_mh, addr);
301
302 *(uint16_t *)(addr + addr_length) = dsp->ds_sap;
303 }
304
305 done:
306 IMPLY(dlp->dl_qos_offset != 0, dlp->dl_qos_length != 0);
307 IMPLY(dlp->dl_qos_range_offset != 0,
308 dlp->dl_qos_range_length != 0);
309 IMPLY(dlp->dl_addr_offset != 0, dlp->dl_addr_length != 0);
310 IMPLY(dlp->dl_brdcst_addr_offset != 0,
311 dlp->dl_brdcst_addr_length != 0);
312
313 qreply(q, mp);
314 }
315
316 /*
317 * DL_ATTACH_REQ
318 */
319 static void
proto_attach_req(dld_str_t * dsp,mblk_t * mp)320 proto_attach_req(dld_str_t *dsp, mblk_t *mp)
321 {
322 dl_attach_req_t *dlp = (dl_attach_req_t *)mp->b_rptr;
323 int err = 0;
324 t_uscalar_t dl_err;
325 queue_t *q = dsp->ds_wq;
326
327 if (MBLKL(mp) < sizeof (dl_attach_req_t) ||
328 dsp->ds_style == DL_STYLE1) {
329 dl_err = DL_BADPRIM;
330 goto failed;
331 }
332
333 if (dsp->ds_dlstate != DL_UNATTACHED) {
334 dl_err = DL_OUTSTATE;
335 goto failed;
336 }
337
338 dsp->ds_dlstate = DL_ATTACH_PENDING;
339
340 err = dld_str_attach(dsp, dlp->dl_ppa);
341 if (err != 0) {
342 switch (err) {
343 case ENOENT:
344 dl_err = DL_BADPPA;
345 err = 0;
346 break;
347 default:
348 dl_err = DL_SYSERR;
349 break;
350 }
351 dsp->ds_dlstate = DL_UNATTACHED;
352 goto failed;
353 }
354 ASSERT(dsp->ds_dlstate == DL_UNBOUND);
355 dlokack(q, mp, DL_ATTACH_REQ);
356 return;
357
358 failed:
359 dlerrorack(q, mp, DL_ATTACH_REQ, dl_err, (t_uscalar_t)err);
360 }
361
362 /*
363 * DL_DETACH_REQ
364 */
365 static void
proto_detach_req(dld_str_t * dsp,mblk_t * mp)366 proto_detach_req(dld_str_t *dsp, mblk_t *mp)
367 {
368 queue_t *q = dsp->ds_wq;
369 t_uscalar_t dl_err;
370
371 if (MBLKL(mp) < sizeof (dl_detach_req_t)) {
372 dl_err = DL_BADPRIM;
373 goto failed;
374 }
375
376 if (dsp->ds_dlstate != DL_UNBOUND) {
377 dl_err = DL_OUTSTATE;
378 goto failed;
379 }
380
381 if (dsp->ds_style == DL_STYLE1) {
382 dl_err = DL_BADPRIM;
383 goto failed;
384 }
385
386 ASSERT(dsp->ds_datathr_cnt == 0);
387 dsp->ds_dlstate = DL_DETACH_PENDING;
388
389 dld_str_detach(dsp);
390 dlokack(dsp->ds_wq, mp, DL_DETACH_REQ);
391 return;
392
393 failed:
394 dlerrorack(q, mp, DL_DETACH_REQ, dl_err, 0);
395 }
396
397 /*
398 * DL_BIND_REQ
399 */
400 static void
proto_bind_req(dld_str_t * dsp,mblk_t * mp)401 proto_bind_req(dld_str_t *dsp, mblk_t *mp)
402 {
403 dl_bind_req_t *dlp = (dl_bind_req_t *)mp->b_rptr;
404 int err = 0;
405 uint8_t dlsap_addr[MAXMACADDRLEN + sizeof (uint16_t)];
406 uint_t dlsap_addr_length;
407 t_uscalar_t dl_err;
408 t_scalar_t sap;
409 queue_t *q = dsp->ds_wq;
410 mac_perim_handle_t mph;
411 void *mdip;
412 int32_t intr_cpu;
413
414 if (MBLKL(mp) < sizeof (dl_bind_req_t)) {
415 dl_err = DL_BADPRIM;
416 goto failed;
417 }
418
419 if (dlp->dl_xidtest_flg != 0) {
420 dl_err = DL_NOAUTO;
421 goto failed;
422 }
423
424 if (dlp->dl_service_mode != DL_CLDLS) {
425 dl_err = DL_UNSUPPORTED;
426 goto failed;
427 }
428
429 if (dsp->ds_dlstate != DL_UNBOUND) {
430 dl_err = DL_OUTSTATE;
431 goto failed;
432 }
433
434 mac_perim_enter_by_mh(dsp->ds_mh, &mph);
435
436 if ((err = dls_active_set(dsp)) != 0) {
437 dl_err = DL_SYSERR;
438 goto failed2;
439 }
440
441 dsp->ds_dlstate = DL_BIND_PENDING;
442 /*
443 * Set the receive callback.
444 */
445 dls_rx_set(dsp, (dsp->ds_mode == DLD_RAW) ?
446 dld_str_rx_raw : dld_str_rx_unitdata, dsp);
447
448 /*
449 * Bind the channel such that it can receive packets.
450 */
451 sap = dlp->dl_sap;
452 dsp->ds_nonip = !check_mod_above(dsp->ds_rq, "ip") &&
453 !check_mod_above(dsp->ds_rq, "arp");
454
455 err = dls_bind(dsp, sap);
456 if (err != 0) {
457 switch (err) {
458 case EINVAL:
459 dl_err = DL_BADADDR;
460 err = 0;
461 break;
462 default:
463 dl_err = DL_SYSERR;
464 break;
465 }
466
467 dsp->ds_dlstate = DL_UNBOUND;
468 dls_active_clear(dsp, B_FALSE);
469 goto failed2;
470 }
471
472 intr_cpu = mac_client_intr_cpu(dsp->ds_mch);
473 mdip = mac_get_devinfo(dsp->ds_mh);
474 mac_perim_exit(mph);
475
476 /*
477 * We do this after we get out of the perim to avoid deadlocks
478 * etc. since part of mac_client_retarget_intr is to walk the
479 * device tree in order to find and retarget the interrupts.
480 */
481 if (intr_cpu != -1)
482 mac_client_set_intr_cpu(mdip, dsp->ds_mch, intr_cpu);
483
484 /*
485 * Copy in MAC address.
486 */
487 dlsap_addr_length = dsp->ds_mip->mi_addr_length;
488 mac_unicast_primary_get(dsp->ds_mh, dlsap_addr);
489
490 /*
491 * Copy in the SAP.
492 */
493 *(uint16_t *)(dlsap_addr + dlsap_addr_length) = sap;
494 dlsap_addr_length += sizeof (uint16_t);
495
496 dsp->ds_dlstate = DL_IDLE;
497 dlbindack(q, mp, sap, dlsap_addr, dlsap_addr_length, 0, 0);
498 return;
499
500 failed2:
501 mac_perim_exit(mph);
502 failed:
503 dlerrorack(q, mp, DL_BIND_REQ, dl_err, (t_uscalar_t)err);
504 }
505
506 /*
507 * DL_UNBIND_REQ
508 */
509 static void
proto_unbind_req(dld_str_t * dsp,mblk_t * mp)510 proto_unbind_req(dld_str_t *dsp, mblk_t *mp)
511 {
512 queue_t *q = dsp->ds_wq;
513 t_uscalar_t dl_err;
514 mac_perim_handle_t mph;
515
516 if (MBLKL(mp) < sizeof (dl_unbind_req_t)) {
517 dl_err = DL_BADPRIM;
518 goto failed;
519 }
520
521 if (dsp->ds_dlstate != DL_IDLE) {
522 dl_err = DL_OUTSTATE;
523 goto failed;
524 }
525
526 mutex_enter(&dsp->ds_lock);
527 while (dsp->ds_datathr_cnt != 0)
528 cv_wait(&dsp->ds_datathr_cv, &dsp->ds_lock);
529
530 dsp->ds_dlstate = DL_UNBIND_PENDING;
531 mutex_exit(&dsp->ds_lock);
532
533 mac_perim_enter_by_mh(dsp->ds_mh, &mph);
534 /*
535 * Unbind the channel to stop packets being received.
536 */
537 dls_unbind(dsp);
538
539 /*
540 * Disable polling mode, if it is enabled.
541 */
542 (void) dld_capab_poll_disable(dsp, NULL);
543
544 /*
545 * Clear LSO flags.
546 */
547 dsp->ds_lso = B_FALSE;
548 dsp->ds_lso_max = 0;
549
550 /*
551 * Clear the receive callback.
552 */
553 dls_rx_set(dsp, NULL, NULL);
554 dsp->ds_direct = B_FALSE;
555
556 /*
557 * Set the mode back to the default (unitdata).
558 */
559 dsp->ds_mode = DLD_UNITDATA;
560 dsp->ds_dlstate = DL_UNBOUND;
561
562 dls_active_clear(dsp, B_FALSE);
563 mac_perim_exit(mph);
564 dlokack(dsp->ds_wq, mp, DL_UNBIND_REQ);
565 return;
566 failed:
567 dlerrorack(q, mp, DL_UNBIND_REQ, dl_err, 0);
568 }
569
570 /*
571 * DL_PROMISCON_REQ
572 */
573 static void
proto_promiscon_req(dld_str_t * dsp,mblk_t * mp)574 proto_promiscon_req(dld_str_t *dsp, mblk_t *mp)
575 {
576 dl_promiscon_req_t *dlp = (dl_promiscon_req_t *)mp->b_rptr;
577 int err = 0;
578 t_uscalar_t dl_err;
579 uint32_t new_flags, promisc_saved;
580 queue_t *q = dsp->ds_wq;
581 mac_perim_handle_t mph;
582
583 if (MBLKL(mp) < sizeof (dl_promiscon_req_t)) {
584 dl_err = DL_BADPRIM;
585 goto failed;
586 }
587
588 if (dsp->ds_dlstate == DL_UNATTACHED ||
589 DL_ACK_PENDING(dsp->ds_dlstate)) {
590 dl_err = DL_OUTSTATE;
591 goto failed;
592 }
593
594 mac_perim_enter_by_mh(dsp->ds_mh, &mph);
595
596 new_flags = promisc_saved = dsp->ds_promisc;
597 switch (dlp->dl_level) {
598 case DL_PROMISC_SAP:
599 new_flags |= DLS_PROMISC_SAP;
600 break;
601
602 case DL_PROMISC_MULTI:
603 new_flags |= DLS_PROMISC_MULTI;
604 break;
605
606 case DL_PROMISC_PHYS:
607 new_flags |= DLS_PROMISC_PHYS;
608 break;
609
610 case DL_PROMISC_RX_ONLY:
611 new_flags |= DLS_PROMISC_RX_ONLY;
612 break;
613
614 case DL_PROMISC_INCOMING:
615 new_flags |= DLS_PROMISC_INCOMING;
616 break;
617
618 case DL_PROMISC_OUTGOING:
619 new_flags |= DLS_PROMISC_OUTGOING;
620 break;
621
622 default:
623 dl_err = DL_NOTSUPPORTED;
624 goto failed2;
625 }
626
627 if ((promisc_saved == 0) && (err = dls_active_set(dsp)) != 0) {
628 ASSERT(dsp->ds_promisc == promisc_saved);
629 dl_err = DL_SYSERR;
630 goto failed2;
631 }
632
633 /*
634 * Adjust channel promiscuity.
635 */
636 err = dls_promisc(dsp, new_flags);
637
638 if (err != 0) {
639 dl_err = DL_SYSERR;
640 dsp->ds_promisc = promisc_saved;
641 if (promisc_saved == 0)
642 dls_active_clear(dsp, B_FALSE);
643 goto failed2;
644 }
645
646 mac_perim_exit(mph);
647
648 dlokack(q, mp, DL_PROMISCON_REQ);
649 return;
650
651 failed2:
652 mac_perim_exit(mph);
653 failed:
654 dlerrorack(q, mp, DL_PROMISCON_REQ, dl_err, (t_uscalar_t)err);
655 }
656
657 /*
658 * DL_PROMISCOFF_REQ
659 */
660 static void
proto_promiscoff_req(dld_str_t * dsp,mblk_t * mp)661 proto_promiscoff_req(dld_str_t *dsp, mblk_t *mp)
662 {
663 dl_promiscoff_req_t *dlp = (dl_promiscoff_req_t *)mp->b_rptr;
664 int err = 0;
665 t_uscalar_t dl_err;
666 uint32_t new_flags;
667 queue_t *q = dsp->ds_wq;
668 mac_perim_handle_t mph;
669
670 if (MBLKL(mp) < sizeof (dl_promiscoff_req_t)) {
671 dl_err = DL_BADPRIM;
672 goto failed;
673 }
674
675 if (dsp->ds_dlstate == DL_UNATTACHED ||
676 DL_ACK_PENDING(dsp->ds_dlstate)) {
677 dl_err = DL_OUTSTATE;
678 goto failed;
679 }
680
681 mac_perim_enter_by_mh(dsp->ds_mh, &mph);
682
683 new_flags = dsp->ds_promisc;
684 switch (dlp->dl_level) {
685 case DL_PROMISC_SAP:
686 if (!(dsp->ds_promisc & DLS_PROMISC_SAP)) {
687 dl_err = DL_NOTENAB;
688 goto failed2;
689 }
690 new_flags &= ~DLS_PROMISC_SAP;
691 break;
692
693 case DL_PROMISC_MULTI:
694 if (!(dsp->ds_promisc & DLS_PROMISC_MULTI)) {
695 dl_err = DL_NOTENAB;
696 goto failed2;
697 }
698 new_flags &= ~DLS_PROMISC_MULTI;
699 break;
700
701 case DL_PROMISC_PHYS:
702 if (!(dsp->ds_promisc & DLS_PROMISC_PHYS)) {
703 dl_err = DL_NOTENAB;
704 goto failed2;
705 }
706 new_flags &= ~DLS_PROMISC_PHYS;
707 break;
708
709 case DL_PROMISC_RX_ONLY:
710 if (!(dsp->ds_promisc & DLS_PROMISC_RX_ONLY)) {
711 dl_err = DL_NOTENAB;
712 goto failed2;
713 }
714 new_flags &= ~DLS_PROMISC_RX_ONLY;
715 break;
716
717 case DL_PROMISC_INCOMING:
718 if (!(dsp->ds_promisc & DLS_PROMISC_INCOMING)) {
719 dl_err = DL_NOTENAB;
720 goto failed2;
721 }
722 new_flags &= ~DLS_PROMISC_INCOMING;
723 break;
724
725 case DL_PROMISC_OUTGOING:
726 if (!(dsp->ds_promisc & DLS_PROMISC_OUTGOING)) {
727 dl_err = DL_NOTENAB;
728 goto failed2;
729 }
730 new_flags &= ~DLS_PROMISC_OUTGOING;
731 break;
732
733 default:
734 dl_err = DL_NOTSUPPORTED;
735 goto failed2;
736 }
737
738 /*
739 * Adjust channel promiscuity.
740 */
741 err = dls_promisc(dsp, new_flags);
742
743 if (err != 0) {
744 dl_err = DL_SYSERR;
745 goto failed2;
746 }
747
748 ASSERT(dsp->ds_promisc == new_flags);
749 if (dsp->ds_promisc == 0)
750 dls_active_clear(dsp, B_FALSE);
751
752 mac_perim_exit(mph);
753
754 dlokack(q, mp, DL_PROMISCOFF_REQ);
755 return;
756 failed2:
757 mac_perim_exit(mph);
758 failed:
759 dlerrorack(q, mp, DL_PROMISCOFF_REQ, dl_err, (t_uscalar_t)err);
760 }
761
762 /*
763 * DL_ENABMULTI_REQ
764 */
765 static void
proto_enabmulti_req(dld_str_t * dsp,mblk_t * mp)766 proto_enabmulti_req(dld_str_t *dsp, mblk_t *mp)
767 {
768 dl_enabmulti_req_t *dlp = (dl_enabmulti_req_t *)mp->b_rptr;
769 int err = 0;
770 t_uscalar_t dl_err;
771 queue_t *q = dsp->ds_wq;
772 mac_perim_handle_t mph;
773
774 if (dsp->ds_dlstate == DL_UNATTACHED ||
775 DL_ACK_PENDING(dsp->ds_dlstate)) {
776 dl_err = DL_OUTSTATE;
777 goto failed;
778 }
779
780 if (MBLKL(mp) < sizeof (dl_enabmulti_req_t) ||
781 !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
782 dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
783 dl_err = DL_BADPRIM;
784 goto failed;
785 }
786
787 mac_perim_enter_by_mh(dsp->ds_mh, &mph);
788
789 if ((dsp->ds_dmap == NULL) && (err = dls_active_set(dsp)) != 0) {
790 dl_err = DL_SYSERR;
791 goto failed2;
792 }
793
794 err = dls_multicst_add(dsp, mp->b_rptr + dlp->dl_addr_offset);
795 if (err != 0) {
796 switch (err) {
797 case EINVAL:
798 dl_err = DL_BADADDR;
799 err = 0;
800 break;
801 case ENOSPC:
802 dl_err = DL_TOOMANY;
803 err = 0;
804 break;
805 default:
806 dl_err = DL_SYSERR;
807 break;
808 }
809 if (dsp->ds_dmap == NULL)
810 dls_active_clear(dsp, B_FALSE);
811 goto failed2;
812 }
813
814 mac_perim_exit(mph);
815
816 dlokack(q, mp, DL_ENABMULTI_REQ);
817 return;
818
819 failed2:
820 mac_perim_exit(mph);
821 failed:
822 dlerrorack(q, mp, DL_ENABMULTI_REQ, dl_err, (t_uscalar_t)err);
823 }
824
825 /*
826 * DL_DISABMULTI_REQ
827 */
828 static void
proto_disabmulti_req(dld_str_t * dsp,mblk_t * mp)829 proto_disabmulti_req(dld_str_t *dsp, mblk_t *mp)
830 {
831 dl_disabmulti_req_t *dlp = (dl_disabmulti_req_t *)mp->b_rptr;
832 int err = 0;
833 t_uscalar_t dl_err;
834 queue_t *q = dsp->ds_wq;
835 mac_perim_handle_t mph;
836
837 if (dsp->ds_dlstate == DL_UNATTACHED ||
838 DL_ACK_PENDING(dsp->ds_dlstate)) {
839 dl_err = DL_OUTSTATE;
840 goto failed;
841 }
842
843 if (MBLKL(mp) < sizeof (dl_disabmulti_req_t) ||
844 !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
845 dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
846 dl_err = DL_BADPRIM;
847 goto failed;
848 }
849
850 mac_perim_enter_by_mh(dsp->ds_mh, &mph);
851 err = dls_multicst_remove(dsp, mp->b_rptr + dlp->dl_addr_offset);
852 if ((err == 0) && (dsp->ds_dmap == NULL))
853 dls_active_clear(dsp, B_FALSE);
854 mac_perim_exit(mph);
855
856 if (err != 0) {
857 switch (err) {
858 case EINVAL:
859 dl_err = DL_BADADDR;
860 err = 0;
861 break;
862
863 case ENOENT:
864 dl_err = DL_NOTENAB;
865 err = 0;
866 break;
867
868 default:
869 dl_err = DL_SYSERR;
870 break;
871 }
872 goto failed;
873 }
874 dlokack(q, mp, DL_DISABMULTI_REQ);
875 return;
876 failed:
877 dlerrorack(q, mp, DL_DISABMULTI_REQ, dl_err, (t_uscalar_t)err);
878 }
879
880 /*
881 * DL_PHYS_ADDR_REQ
882 */
883 static void
proto_physaddr_req(dld_str_t * dsp,mblk_t * mp)884 proto_physaddr_req(dld_str_t *dsp, mblk_t *mp)
885 {
886 dl_phys_addr_req_t *dlp = (dl_phys_addr_req_t *)mp->b_rptr;
887 queue_t *q = dsp->ds_wq;
888 t_uscalar_t dl_err = 0;
889 char *addr = NULL;
890 uint_t addr_length;
891
892 if (MBLKL(mp) < sizeof (dl_phys_addr_req_t)) {
893 dl_err = DL_BADPRIM;
894 goto done;
895 }
896
897 if (dsp->ds_dlstate == DL_UNATTACHED ||
898 DL_ACK_PENDING(dsp->ds_dlstate)) {
899 dl_err = DL_OUTSTATE;
900 goto done;
901 }
902
903 addr_length = dsp->ds_mip->mi_addr_length;
904 if (addr_length > 0) {
905 addr = kmem_alloc(addr_length, KM_SLEEP);
906 switch (dlp->dl_addr_type) {
907 case DL_CURR_PHYS_ADDR:
908 mac_unicast_primary_get(dsp->ds_mh, (uint8_t *)addr);
909 break;
910 case DL_FACT_PHYS_ADDR:
911 bcopy(dsp->ds_mip->mi_unicst_addr, addr, addr_length);
912 break;
913 case DL_CURR_DEST_ADDR:
914 if (!mac_dst_get(dsp->ds_mh, (uint8_t *)addr))
915 dl_err = DL_NOTSUPPORTED;
916 break;
917 default:
918 dl_err = DL_UNSUPPORTED;
919 }
920 }
921 done:
922 if (dl_err == 0)
923 dlphysaddrack(q, mp, addr, (t_uscalar_t)addr_length);
924 else
925 dlerrorack(q, mp, DL_PHYS_ADDR_REQ, dl_err, 0);
926 if (addr != NULL)
927 kmem_free(addr, addr_length);
928 }
929
930 /*
931 * DL_SET_PHYS_ADDR_REQ
932 */
933 static void
proto_setphysaddr_req(dld_str_t * dsp,mblk_t * mp)934 proto_setphysaddr_req(dld_str_t *dsp, mblk_t *mp)
935 {
936 dl_set_phys_addr_req_t *dlp = (dl_set_phys_addr_req_t *)mp->b_rptr;
937 int err = 0;
938 t_uscalar_t dl_err;
939 queue_t *q = dsp->ds_wq;
940 mac_perim_handle_t mph;
941
942 if (dsp->ds_dlstate == DL_UNATTACHED ||
943 DL_ACK_PENDING(dsp->ds_dlstate)) {
944 dl_err = DL_OUTSTATE;
945 goto failed;
946 }
947
948 if (MBLKL(mp) < sizeof (dl_set_phys_addr_req_t) ||
949 !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
950 dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
951 dl_err = DL_BADPRIM;
952 goto failed;
953 }
954
955 mac_perim_enter_by_mh(dsp->ds_mh, &mph);
956
957 if ((err = dls_active_set(dsp)) != 0) {
958 dl_err = DL_SYSERR;
959 goto failed2;
960 }
961
962 /*
963 * If mac-nospoof is enabled and the link is owned by a
964 * non-global zone, changing the mac address is not allowed.
965 */
966 if (dsp->ds_dlp->dl_zid != GLOBAL_ZONEID &&
967 mac_protect_enabled(dsp->ds_mch, MPT_MACNOSPOOF)) {
968 dls_active_clear(dsp, B_FALSE);
969 err = EACCES;
970 goto failed2;
971 }
972
973 err = mac_unicast_primary_set(dsp->ds_mh,
974 mp->b_rptr + dlp->dl_addr_offset);
975 if (err != 0) {
976 switch (err) {
977 case EINVAL:
978 dl_err = DL_BADADDR;
979 err = 0;
980 break;
981
982 default:
983 dl_err = DL_SYSERR;
984 break;
985 }
986 dls_active_clear(dsp, B_FALSE);
987 goto failed2;
988
989 }
990
991 mac_perim_exit(mph);
992
993 dlokack(q, mp, DL_SET_PHYS_ADDR_REQ);
994 return;
995
996 failed2:
997 mac_perim_exit(mph);
998 failed:
999 dlerrorack(q, mp, DL_SET_PHYS_ADDR_REQ, dl_err, (t_uscalar_t)err);
1000 }
1001
1002 /*
1003 * DL_UDQOS_REQ
1004 */
1005 static void
proto_udqos_req(dld_str_t * dsp,mblk_t * mp)1006 proto_udqos_req(dld_str_t *dsp, mblk_t *mp)
1007 {
1008 dl_udqos_req_t *dlp = (dl_udqos_req_t *)mp->b_rptr;
1009 dl_qos_cl_sel1_t *selp;
1010 int off, len;
1011 t_uscalar_t dl_err;
1012 queue_t *q = dsp->ds_wq;
1013
1014 off = dlp->dl_qos_offset;
1015 len = dlp->dl_qos_length;
1016
1017 if (MBLKL(mp) < sizeof (dl_udqos_req_t) || !MBLKIN(mp, off, len)) {
1018 dl_err = DL_BADPRIM;
1019 goto failed;
1020 }
1021
1022 selp = (dl_qos_cl_sel1_t *)(mp->b_rptr + off);
1023 if (selp->dl_qos_type != DL_QOS_CL_SEL1) {
1024 dl_err = DL_BADQOSTYPE;
1025 goto failed;
1026 }
1027
1028 if (selp->dl_priority > (1 << VLAN_PRI_SIZE) - 1 ||
1029 selp->dl_priority < 0) {
1030 dl_err = DL_BADQOSPARAM;
1031 goto failed;
1032 }
1033
1034 dsp->ds_pri = selp->dl_priority;
1035 dlokack(q, mp, DL_UDQOS_REQ);
1036 return;
1037 failed:
1038 dlerrorack(q, mp, DL_UDQOS_REQ, dl_err, 0);
1039 }
1040
1041 static boolean_t
check_mod_above(queue_t * q,const char * mod)1042 check_mod_above(queue_t *q, const char *mod)
1043 {
1044 queue_t *next_q;
1045 boolean_t ret = B_TRUE;
1046
1047 claimstr(q);
1048 next_q = q->q_next;
1049 if (strcmp(next_q->q_qinfo->qi_minfo->mi_idname, mod) != 0)
1050 ret = B_FALSE;
1051 releasestr(q);
1052 return (ret);
1053 }
1054
1055 /*
1056 * DL_CAPABILITY_REQ
1057 */
1058 static void
proto_capability_req(dld_str_t * dsp,mblk_t * mp)1059 proto_capability_req(dld_str_t *dsp, mblk_t *mp)
1060 {
1061 dl_capability_req_t *dlp = (dl_capability_req_t *)mp->b_rptr;
1062 dl_capability_sub_t *sp;
1063 size_t size, len;
1064 offset_t off, end;
1065 t_uscalar_t dl_err;
1066 queue_t *q = dsp->ds_wq;
1067
1068 if (MBLKL(mp) < sizeof (dl_capability_req_t)) {
1069 dl_err = DL_BADPRIM;
1070 goto failed;
1071 }
1072
1073 if (dsp->ds_dlstate == DL_UNATTACHED ||
1074 DL_ACK_PENDING(dsp->ds_dlstate)) {
1075 dl_err = DL_OUTSTATE;
1076 goto failed;
1077 }
1078
1079 /*
1080 * This request is overloaded. If there are no requested capabilities
1081 * then we just want to acknowledge with all the capabilities we
1082 * support. Otherwise we enable the set of capabilities requested.
1083 */
1084 if (dlp->dl_sub_length == 0) {
1085 proto_capability_advertise(dsp, mp);
1086 return;
1087 }
1088
1089 if (!MBLKIN(mp, dlp->dl_sub_offset, dlp->dl_sub_length)) {
1090 dl_err = DL_BADPRIM;
1091 goto failed;
1092 }
1093
1094 dlp->dl_primitive = DL_CAPABILITY_ACK;
1095
1096 off = dlp->dl_sub_offset;
1097 len = dlp->dl_sub_length;
1098
1099 /*
1100 * Walk the list of capabilities to be enabled.
1101 */
1102 for (end = off + len; off < end; ) {
1103 sp = (dl_capability_sub_t *)(mp->b_rptr + off);
1104 size = sizeof (dl_capability_sub_t) + sp->dl_length;
1105
1106 if (off + size > end ||
1107 !IS_P2ALIGNED(off, sizeof (uint32_t))) {
1108 dl_err = DL_BADPRIM;
1109 goto failed;
1110 }
1111
1112 switch (sp->dl_cap) {
1113 /*
1114 * TCP/IP checksum offload to hardware.
1115 */
1116 case DL_CAPAB_HCKSUM: {
1117 dl_capab_hcksum_t *hcksump;
1118 dl_capab_hcksum_t hcksum;
1119
1120 hcksump = (dl_capab_hcksum_t *)&sp[1];
1121 /*
1122 * Copy for alignment.
1123 */
1124 bcopy(hcksump, &hcksum, sizeof (dl_capab_hcksum_t));
1125 dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1126 bcopy(&hcksum, hcksump, sizeof (dl_capab_hcksum_t));
1127 break;
1128 }
1129
1130 case DL_CAPAB_DLD: {
1131 dl_capab_dld_t *dldp;
1132 dl_capab_dld_t dld;
1133
1134 dldp = (dl_capab_dld_t *)&sp[1];
1135 /*
1136 * Copy for alignment.
1137 */
1138 bcopy(dldp, &dld, sizeof (dl_capab_dld_t));
1139 dlcapabsetqid(&(dld.dld_mid), dsp->ds_rq);
1140 bcopy(&dld, dldp, sizeof (dl_capab_dld_t));
1141 break;
1142 }
1143 default:
1144 break;
1145 }
1146 off += size;
1147 }
1148 qreply(q, mp);
1149 return;
1150 failed:
1151 dlerrorack(q, mp, DL_CAPABILITY_REQ, dl_err, 0);
1152 }
1153
1154 /*
1155 * DL_NOTIFY_REQ
1156 */
1157 static void
proto_notify_req(dld_str_t * dsp,mblk_t * mp)1158 proto_notify_req(dld_str_t *dsp, mblk_t *mp)
1159 {
1160 dl_notify_req_t *dlp = (dl_notify_req_t *)mp->b_rptr;
1161 t_uscalar_t dl_err;
1162 queue_t *q = dsp->ds_wq;
1163 uint_t note =
1164 DL_NOTE_PROMISC_ON_PHYS |
1165 DL_NOTE_PROMISC_OFF_PHYS |
1166 DL_NOTE_PHYS_ADDR |
1167 DL_NOTE_LINK_UP |
1168 DL_NOTE_LINK_DOWN |
1169 DL_NOTE_CAPAB_RENEG |
1170 DL_NOTE_FASTPATH_FLUSH |
1171 DL_NOTE_SPEED |
1172 DL_NOTE_SDU_SIZE|
1173 DL_NOTE_SDU_SIZE2|
1174 DL_NOTE_ALLOWED_IPS;
1175
1176 if (MBLKL(mp) < sizeof (dl_notify_req_t)) {
1177 dl_err = DL_BADPRIM;
1178 goto failed;
1179 }
1180
1181 if (dsp->ds_dlstate == DL_UNATTACHED ||
1182 DL_ACK_PENDING(dsp->ds_dlstate)) {
1183 dl_err = DL_OUTSTATE;
1184 goto failed;
1185 }
1186
1187 note &= ~(mac_no_notification(dsp->ds_mh));
1188
1189 /*
1190 * Cache the notifications that are being enabled.
1191 */
1192 dsp->ds_notifications = dlp->dl_notifications & note;
1193 /*
1194 * The ACK carries all notifications regardless of which set is
1195 * being enabled.
1196 */
1197 dlnotifyack(q, mp, note);
1198
1199 /*
1200 * Generate DL_NOTIFY_IND messages for each enabled notification.
1201 */
1202 if (dsp->ds_notifications != 0) {
1203 dld_str_notify_ind(dsp);
1204 }
1205 return;
1206 failed:
1207 dlerrorack(q, mp, DL_NOTIFY_REQ, dl_err, 0);
1208 }
1209
1210 /*
1211 * DL_UINTDATA_REQ
1212 */
1213 void
proto_unitdata_req(dld_str_t * dsp,mblk_t * mp)1214 proto_unitdata_req(dld_str_t *dsp, mblk_t *mp)
1215 {
1216 queue_t *q = dsp->ds_wq;
1217 dl_unitdata_req_t *dlp = (dl_unitdata_req_t *)mp->b_rptr;
1218 off_t off;
1219 size_t len, size;
1220 const uint8_t *addr;
1221 uint16_t sap;
1222 uint_t addr_length;
1223 mblk_t *bp, *payload;
1224 t_uscalar_t dl_err;
1225 uint_t max_sdu;
1226
1227 if (MBLKL(mp) < sizeof (dl_unitdata_req_t) || mp->b_cont == NULL) {
1228 dlerrorack(q, mp, DL_UNITDATA_REQ, DL_BADPRIM, 0);
1229 return;
1230 }
1231
1232 mutex_enter(&dsp->ds_lock);
1233 if (dsp->ds_dlstate != DL_IDLE) {
1234 mutex_exit(&dsp->ds_lock);
1235 dlerrorack(q, mp, DL_UNITDATA_REQ, DL_OUTSTATE, 0);
1236 return;
1237 }
1238 DLD_DATATHR_INC(dsp);
1239 mutex_exit(&dsp->ds_lock);
1240
1241 addr_length = dsp->ds_mip->mi_addr_length;
1242
1243 off = dlp->dl_dest_addr_offset;
1244 len = dlp->dl_dest_addr_length;
1245
1246 if (!MBLKIN(mp, off, len) || !IS_P2ALIGNED(off, sizeof (uint16_t))) {
1247 dl_err = DL_BADPRIM;
1248 goto failed;
1249 }
1250
1251 if (len != addr_length + sizeof (uint16_t)) {
1252 dl_err = DL_BADADDR;
1253 goto failed;
1254 }
1255
1256 addr = mp->b_rptr + off;
1257 sap = *(uint16_t *)(mp->b_rptr + off + addr_length);
1258
1259 /*
1260 * Check the length of the packet and the block types.
1261 */
1262 size = 0;
1263 payload = mp->b_cont;
1264 for (bp = payload; bp != NULL; bp = bp->b_cont) {
1265 if (DB_TYPE(bp) != M_DATA)
1266 goto baddata;
1267
1268 size += MBLKL(bp);
1269 }
1270
1271 mac_sdu_get(dsp->ds_mh, NULL, &max_sdu);
1272 if (size > max_sdu)
1273 goto baddata;
1274
1275 /*
1276 * Build a packet header.
1277 */
1278 if ((bp = dls_header(dsp, addr, sap, dlp->dl_priority.dl_max,
1279 &payload)) == NULL) {
1280 dl_err = DL_BADADDR;
1281 goto failed;
1282 }
1283
1284 /*
1285 * We no longer need the M_PROTO header, so free it.
1286 */
1287 freeb(mp);
1288
1289 /*
1290 * Transfer the checksum offload information if it is present.
1291 */
1292 mac_hcksum_clone(payload, bp);
1293
1294 /*
1295 * Link the payload onto the new header.
1296 */
1297 ASSERT(bp->b_cont == NULL);
1298 bp->b_cont = payload;
1299
1300 /*
1301 * No lock can be held across modules and putnext()'s,
1302 * which can happen here with the call from DLD_TX().
1303 */
1304 if (DLD_TX(dsp, bp, 0, 0) != 0) {
1305 /* flow-controlled */
1306 DLD_SETQFULL(dsp);
1307 }
1308 DLD_DATATHR_DCR(dsp);
1309 return;
1310
1311 failed:
1312 dlerrorack(q, mp, DL_UNITDATA_REQ, dl_err, 0);
1313 DLD_DATATHR_DCR(dsp);
1314 return;
1315
1316 baddata:
1317 dluderrorind(q, mp, (void *)addr, len, DL_BADDATA, 0);
1318 DLD_DATATHR_DCR(dsp);
1319 }
1320
1321 /*
1322 * DL_PASSIVE_REQ
1323 */
1324 static void
proto_passive_req(dld_str_t * dsp,mblk_t * mp)1325 proto_passive_req(dld_str_t *dsp, mblk_t *mp)
1326 {
1327 t_uscalar_t dl_err;
1328
1329 /*
1330 * If we've already become active by issuing an active primitive,
1331 * then it's too late to try to become passive.
1332 */
1333 if (dsp->ds_passivestate == DLD_ACTIVE) {
1334 dl_err = DL_OUTSTATE;
1335 goto failed;
1336 }
1337
1338 if (MBLKL(mp) < sizeof (dl_passive_req_t)) {
1339 dl_err = DL_BADPRIM;
1340 goto failed;
1341 }
1342
1343 dsp->ds_passivestate = DLD_PASSIVE;
1344 dlokack(dsp->ds_wq, mp, DL_PASSIVE_REQ);
1345 return;
1346 failed:
1347 dlerrorack(dsp->ds_wq, mp, DL_PASSIVE_REQ, dl_err, 0);
1348 }
1349
1350
1351 /*
1352 * Catch-all handler.
1353 */
1354 static void
proto_req(dld_str_t * dsp,mblk_t * mp)1355 proto_req(dld_str_t *dsp, mblk_t *mp)
1356 {
1357 union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr;
1358
1359 dlerrorack(dsp->ds_wq, mp, dlp->dl_primitive, DL_UNSUPPORTED, 0);
1360 }
1361
1362 static int
dld_capab_perim(dld_str_t * dsp,void * data,uint_t flags)1363 dld_capab_perim(dld_str_t *dsp, void *data, uint_t flags)
1364 {
1365 switch (flags) {
1366 case DLD_ENABLE:
1367 mac_perim_enter_by_mh(dsp->ds_mh, (mac_perim_handle_t *)data);
1368 return (0);
1369
1370 case DLD_DISABLE:
1371 mac_perim_exit((mac_perim_handle_t)data);
1372 return (0);
1373
1374 case DLD_QUERY:
1375 return (mac_perim_held(dsp->ds_mh));
1376 }
1377 return (0);
1378 }
1379
1380 static int
dld_capab_direct(dld_str_t * dsp,void * data,uint_t flags)1381 dld_capab_direct(dld_str_t *dsp, void *data, uint_t flags)
1382 {
1383 dld_capab_direct_t *direct = data;
1384
1385 ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1386
1387 switch (flags) {
1388 case DLD_ENABLE:
1389 dls_rx_set(dsp, (dls_rx_t)direct->di_rx_cf,
1390 direct->di_rx_ch);
1391
1392 direct->di_tx_df = (uintptr_t)str_mdata_fastpath_put;
1393 direct->di_tx_dh = dsp;
1394 direct->di_tx_cb_df = (uintptr_t)mac_client_tx_notify;
1395 direct->di_tx_cb_dh = dsp->ds_mch;
1396 direct->di_tx_fctl_df = (uintptr_t)mac_tx_is_flow_blocked;
1397 direct->di_tx_fctl_dh = dsp->ds_mch;
1398
1399 dsp->ds_direct = B_TRUE;
1400
1401 return (0);
1402
1403 case DLD_DISABLE:
1404 dls_rx_set(dsp, (dsp->ds_mode == DLD_FASTPATH) ?
1405 dld_str_rx_fastpath : dld_str_rx_unitdata, (void *)dsp);
1406 dsp->ds_direct = B_FALSE;
1407
1408 return (0);
1409 }
1410 return (ENOTSUP);
1411 }
1412
1413 /*
1414 * This function is misnamed. All polling and fanouts are run out of
1415 * the lower MAC for VNICs and out of the MAC for NICs. The
1416 * availability of Rx rings and promiscous mode is taken care of
1417 * between the soft ring set (mac_srs), the Rx ring, and the SW
1418 * classifier. Fanout, if necessary, is done by the soft rings that
1419 * are part of the SRS. By default the SRS divvies up the packets
1420 * based on protocol: TCP, UDP, or Other (OTH).
1421 *
1422 * The SRS (or its associated soft rings) always store the ill_rx_ring
1423 * (the cookie returned when they registered with IP during plumb) as their
1424 * 2nd argument which is passed up as mac_resource_handle_t. The upcall
1425 * function and 1st argument is what the caller registered when they
1426 * called mac_rx_classify_flow_add() to register the flow. For VNIC,
1427 * the function is vnic_rx and argument is vnic_t. For regular NIC
1428 * case, it mac_rx_default and mac_handle_t. As explained above, the
1429 * SRS (or its soft ring) will add the ill_rx_ring (mac_resource_handle_t)
1430 * from its stored 2nd argument.
1431 */
1432 static int
dld_capab_poll_enable(dld_str_t * dsp,dld_capab_poll_t * poll)1433 dld_capab_poll_enable(dld_str_t *dsp, dld_capab_poll_t *poll)
1434 {
1435 if (dsp->ds_polling)
1436 return (EINVAL);
1437
1438 if ((dld_opt & DLD_OPT_NO_POLL) != 0 || dsp->ds_mode == DLD_RAW)
1439 return (ENOTSUP);
1440
1441 /*
1442 * Enable client polling if and only if DLS bypass is
1443 * possible. Some traffic requires DLS processing in the Rx
1444 * data path. In such a case we can neither allow the client
1445 * (IP) to directly poll the soft ring (since DLS processing
1446 * hasn't been done) nor can we allow DLS bypass.
1447 */
1448 if (!mac_rx_bypass_set(dsp->ds_mch, dsp->ds_rx, dsp->ds_rx_arg,
1449 dsp->ds_sap == ETHERTYPE_IPV6))
1450 return (ENOTSUP);
1451
1452 /*
1453 * Register soft ring resources. This will come in handy later if
1454 * the user decides to modify CPU bindings to use more CPUs for the
1455 * device in which case we will switch to fanout using soft rings.
1456 */
1457 mac_resource_set_common(dsp->ds_mch,
1458 (mac_resource_add_t)poll->poll_ring_add_cf,
1459 (mac_resource_remove_t)poll->poll_ring_remove_cf,
1460 (mac_resource_quiesce_t)poll->poll_ring_quiesce_cf,
1461 (mac_resource_restart_t)poll->poll_ring_restart_cf,
1462 (mac_resource_bind_t)poll->poll_ring_bind_cf,
1463 poll->poll_ring_ch);
1464
1465 mac_client_poll_enable(dsp->ds_mch);
1466
1467 dsp->ds_polling = B_TRUE;
1468 return (0);
1469 }
1470
1471 /* ARGSUSED */
1472 static int
dld_capab_poll_disable(dld_str_t * dsp,dld_capab_poll_t * poll)1473 dld_capab_poll_disable(dld_str_t *dsp, dld_capab_poll_t *poll)
1474 {
1475 if (!dsp->ds_polling)
1476 return (EINVAL);
1477
1478 mac_client_poll_disable(dsp->ds_mch);
1479 mac_resource_set(dsp->ds_mch, NULL, NULL);
1480
1481 dsp->ds_polling = B_FALSE;
1482 return (0);
1483 }
1484
1485 static int
dld_capab_poll(dld_str_t * dsp,void * data,uint_t flags)1486 dld_capab_poll(dld_str_t *dsp, void *data, uint_t flags)
1487 {
1488 dld_capab_poll_t *poll = data;
1489
1490 ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1491
1492 switch (flags) {
1493 case DLD_ENABLE:
1494 return (dld_capab_poll_enable(dsp, poll));
1495 case DLD_DISABLE:
1496 return (dld_capab_poll_disable(dsp, poll));
1497 }
1498 return (ENOTSUP);
1499 }
1500
1501 static int
dld_capab_lso(dld_str_t * dsp,void * data,uint_t flags)1502 dld_capab_lso(dld_str_t *dsp, void *data, uint_t flags)
1503 {
1504 dld_capab_lso_t *lso = data;
1505
1506 ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1507
1508 switch (flags) {
1509 case DLD_ENABLE: {
1510 mac_capab_lso_t mac_lso;
1511
1512 /*
1513 * Check if LSO is supported on this MAC & enable LSO
1514 * accordingly.
1515 */
1516 if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_LSO, &mac_lso)) {
1517 lso->lso_max_tcpv4 = mac_lso.lso_basic_tcp_ipv4.lso_max;
1518 lso->lso_max_tcpv6 = mac_lso.lso_basic_tcp_ipv6.lso_max;
1519 lso->lso_flags = 0;
1520 /* translate the flag for mac clients */
1521 if ((mac_lso.lso_flags & LSO_TX_BASIC_TCP_IPV4) != 0)
1522 lso->lso_flags |= DLD_LSO_BASIC_TCP_IPV4;
1523 if ((mac_lso.lso_flags & LSO_TX_BASIC_TCP_IPV6) != 0)
1524 lso->lso_flags |= DLD_LSO_BASIC_TCP_IPV6;
1525 dsp->ds_lso = lso->lso_flags != 0;
1526 /*
1527 * DLS uses this to try and make sure that a raw ioctl
1528 * doesn't send too much data, but doesn't currently
1529 * check the actual SAP that is sending this (or that
1530 * it's TCP). So for now, just use the max value here.
1531 */
1532 dsp->ds_lso_max = MAX(lso->lso_max_tcpv4,
1533 lso->lso_max_tcpv6);
1534 } else {
1535 dsp->ds_lso = B_FALSE;
1536 dsp->ds_lso_max = 0;
1537 return (ENOTSUP);
1538 }
1539 return (0);
1540 }
1541 case DLD_DISABLE: {
1542 dsp->ds_lso = B_FALSE;
1543 dsp->ds_lso_max = 0;
1544 return (0);
1545 }
1546 }
1547 return (ENOTSUP);
1548 }
1549
1550 static int
dld_capab(dld_str_t * dsp,uint_t type,void * data,uint_t flags)1551 dld_capab(dld_str_t *dsp, uint_t type, void *data, uint_t flags)
1552 {
1553 int err;
1554
1555 /*
1556 * Don't enable direct callback capabilities unless the caller is
1557 * the IP client. When a module is inserted in a stream (_I_INSERT)
1558 * the stack initiates capability disable, but due to races, the
1559 * module insertion may complete before the capability disable
1560 * completes. So we limit the check to DLD_ENABLE case.
1561 */
1562 if ((flags == DLD_ENABLE && type != DLD_CAPAB_PERIM) &&
1563 (!(dsp->ds_sap == ETHERTYPE_IP || dsp->ds_sap == ETHERTYPE_IPV6) ||
1564 !check_mod_above(dsp->ds_rq, "ip"))) {
1565 return (ENOTSUP);
1566 }
1567
1568 switch (type) {
1569 case DLD_CAPAB_DIRECT:
1570 err = dld_capab_direct(dsp, data, flags);
1571 break;
1572
1573 case DLD_CAPAB_POLL:
1574 err = dld_capab_poll(dsp, data, flags);
1575 break;
1576
1577 case DLD_CAPAB_PERIM:
1578 err = dld_capab_perim(dsp, data, flags);
1579 break;
1580
1581 case DLD_CAPAB_LSO:
1582 err = dld_capab_lso(dsp, data, flags);
1583 break;
1584
1585 default:
1586 err = ENOTSUP;
1587 break;
1588 }
1589
1590 return (err);
1591 }
1592
1593 /*
1594 * DL_CAPABILITY_ACK/DL_ERROR_ACK
1595 */
1596 static void
proto_capability_advertise(dld_str_t * dsp,mblk_t * mp)1597 proto_capability_advertise(dld_str_t *dsp, mblk_t *mp)
1598 {
1599 dl_capability_ack_t *dlap;
1600 dl_capability_sub_t *dlsp;
1601 size_t subsize;
1602 dl_capab_dld_t dld;
1603 dl_capab_hcksum_t hcksum;
1604 dl_capab_zerocopy_t zcopy;
1605 dl_capab_vrrp_t vrrp;
1606 mac_capab_vrrp_t vrrp_capab;
1607 uint8_t *ptr;
1608 queue_t *q = dsp->ds_wq;
1609 mblk_t *mp1;
1610 boolean_t hcksum_capable = B_FALSE;
1611 boolean_t zcopy_capable = B_FALSE;
1612 boolean_t dld_capable = B_FALSE;
1613 boolean_t vrrp_capable = B_FALSE;
1614
1615 /*
1616 * Initially assume no capabilities.
1617 */
1618 subsize = 0;
1619
1620 /*
1621 * Check if checksum offload is supported on this MAC.
1622 */
1623 bzero(&hcksum, sizeof (dl_capab_hcksum_t));
1624 if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_HCKSUM,
1625 &hcksum.hcksum_txflags)) {
1626 if (hcksum.hcksum_txflags != 0) {
1627 hcksum_capable = B_TRUE;
1628 subsize += sizeof (dl_capability_sub_t) +
1629 sizeof (dl_capab_hcksum_t);
1630 }
1631 }
1632
1633 /*
1634 * Check if zerocopy is supported on this interface.
1635 * If advertising DL_CAPAB_ZEROCOPY has not been explicitly disabled
1636 * then reserve space for that capability.
1637 */
1638 if (!mac_capab_get(dsp->ds_mh, MAC_CAPAB_NO_ZCOPY, NULL) &&
1639 !(dld_opt & DLD_OPT_NO_ZEROCOPY)) {
1640 zcopy_capable = B_TRUE;
1641 subsize += sizeof (dl_capability_sub_t) +
1642 sizeof (dl_capab_zerocopy_t);
1643 }
1644
1645 /*
1646 * Direct capability negotiation interface between IP and DLD
1647 */
1648 if ((dsp->ds_sap == ETHERTYPE_IP || dsp->ds_sap == ETHERTYPE_IPV6) &&
1649 check_mod_above(dsp->ds_rq, "ip")) {
1650 dld_capable = B_TRUE;
1651 subsize += sizeof (dl_capability_sub_t) +
1652 sizeof (dl_capab_dld_t);
1653 }
1654
1655 /*
1656 * Check if vrrp is supported on this interface. If so, reserve
1657 * space for that capability.
1658 */
1659 if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_VRRP, &vrrp_capab)) {
1660 vrrp_capable = B_TRUE;
1661 subsize += sizeof (dl_capability_sub_t) +
1662 sizeof (dl_capab_vrrp_t);
1663 }
1664
1665 /*
1666 * If there are no capabilities to advertise or if we
1667 * can't allocate a response, send a DL_ERROR_ACK.
1668 */
1669 if ((mp1 = reallocb(mp,
1670 sizeof (dl_capability_ack_t) + subsize, 0)) == NULL) {
1671 dlerrorack(q, mp, DL_CAPABILITY_REQ, DL_NOTSUPPORTED, 0);
1672 return;
1673 }
1674
1675 mp = mp1;
1676 DB_TYPE(mp) = M_PROTO;
1677 mp->b_wptr = mp->b_rptr + sizeof (dl_capability_ack_t) + subsize;
1678 bzero(mp->b_rptr, MBLKL(mp));
1679 dlap = (dl_capability_ack_t *)mp->b_rptr;
1680 dlap->dl_primitive = DL_CAPABILITY_ACK;
1681 dlap->dl_sub_offset = sizeof (dl_capability_ack_t);
1682 dlap->dl_sub_length = subsize;
1683 ptr = (uint8_t *)&dlap[1];
1684
1685 /*
1686 * TCP/IP checksum offload.
1687 */
1688 if (hcksum_capable) {
1689 dlsp = (dl_capability_sub_t *)ptr;
1690
1691 dlsp->dl_cap = DL_CAPAB_HCKSUM;
1692 dlsp->dl_length = sizeof (dl_capab_hcksum_t);
1693 ptr += sizeof (dl_capability_sub_t);
1694
1695 hcksum.hcksum_version = HCKSUM_VERSION_1;
1696 dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1697 bcopy(&hcksum, ptr, sizeof (dl_capab_hcksum_t));
1698 ptr += sizeof (dl_capab_hcksum_t);
1699 }
1700
1701 /*
1702 * Zero copy
1703 */
1704 if (zcopy_capable) {
1705 dlsp = (dl_capability_sub_t *)ptr;
1706
1707 dlsp->dl_cap = DL_CAPAB_ZEROCOPY;
1708 dlsp->dl_length = sizeof (dl_capab_zerocopy_t);
1709 ptr += sizeof (dl_capability_sub_t);
1710
1711 bzero(&zcopy, sizeof (dl_capab_zerocopy_t));
1712 zcopy.zerocopy_version = ZEROCOPY_VERSION_1;
1713 zcopy.zerocopy_flags = DL_CAPAB_VMSAFE_MEM;
1714
1715 dlcapabsetqid(&(zcopy.zerocopy_mid), dsp->ds_rq);
1716 bcopy(&zcopy, ptr, sizeof (dl_capab_zerocopy_t));
1717 ptr += sizeof (dl_capab_zerocopy_t);
1718 }
1719
1720 /*
1721 * VRRP capability negotiation
1722 */
1723 if (vrrp_capable) {
1724 dlsp = (dl_capability_sub_t *)ptr;
1725 dlsp->dl_cap = DL_CAPAB_VRRP;
1726 dlsp->dl_length = sizeof (dl_capab_vrrp_t);
1727 ptr += sizeof (dl_capability_sub_t);
1728
1729 bzero(&vrrp, sizeof (dl_capab_vrrp_t));
1730 vrrp.vrrp_af = vrrp_capab.mcv_af;
1731 bcopy(&vrrp, ptr, sizeof (dl_capab_vrrp_t));
1732 ptr += sizeof (dl_capab_vrrp_t);
1733 }
1734
1735 /*
1736 * Direct capability negotiation interface between IP and DLD.
1737 * Refer to dld.h for details.
1738 */
1739 if (dld_capable) {
1740 dlsp = (dl_capability_sub_t *)ptr;
1741 dlsp->dl_cap = DL_CAPAB_DLD;
1742 dlsp->dl_length = sizeof (dl_capab_dld_t);
1743 ptr += sizeof (dl_capability_sub_t);
1744
1745 bzero(&dld, sizeof (dl_capab_dld_t));
1746 dld.dld_version = DLD_CURRENT_VERSION;
1747 dld.dld_capab = (uintptr_t)dld_capab;
1748 dld.dld_capab_handle = (uintptr_t)dsp;
1749
1750 dlcapabsetqid(&(dld.dld_mid), dsp->ds_rq);
1751 bcopy(&dld, ptr, sizeof (dl_capab_dld_t));
1752 ptr += sizeof (dl_capab_dld_t);
1753 }
1754
1755 ASSERT(ptr == mp->b_rptr + sizeof (dl_capability_ack_t) + subsize);
1756 qreply(q, mp);
1757 }
1758
1759 /*
1760 * Disable any enabled capabilities.
1761 */
1762 void
dld_capabilities_disable(dld_str_t * dsp)1763 dld_capabilities_disable(dld_str_t *dsp)
1764 {
1765 if (dsp->ds_polling)
1766 (void) dld_capab_poll_disable(dsp, NULL);
1767 }
1768