1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /*
26 * Data-Link Driver
27 */
28
29 #include <inet/common.h>
30 #include <sys/strsubr.h>
31 #include <sys/stropts.h>
32 #include <sys/strsun.h>
33 #include <sys/vlan.h>
34 #include <sys/dld_impl.h>
35 #include <sys/cpuvar.h>
36 #include <sys/callb.h>
37 #include <sys/list.h>
38 #include <sys/mac_client.h>
39 #include <sys/mac_client_priv.h>
40 #include <sys/mac_flow.h>
41
42 static int str_constructor(void *, void *, int);
43 static void str_destructor(void *, void *);
44 static mblk_t *str_unitdata_ind(dld_str_t *, mblk_t *, boolean_t);
45 static void str_notify_promisc_on_phys(dld_str_t *);
46 static void str_notify_promisc_off_phys(dld_str_t *);
47 static void str_notify_phys_addr(dld_str_t *, uint_t, const uint8_t *);
48 static void str_notify_link_up(dld_str_t *);
49 static void str_notify_link_down(dld_str_t *);
50 static void str_notify_capab_reneg(dld_str_t *);
51 static void str_notify_speed(dld_str_t *, uint32_t);
52
53 static void ioc_native(dld_str_t *, mblk_t *);
54 static void ioc_margin(dld_str_t *, mblk_t *);
55 static void ioc_raw(dld_str_t *, mblk_t *);
56 static void ioc_fast(dld_str_t *, mblk_t *);
57 static void ioc_lowlink(dld_str_t *, mblk_t *);
58 static void ioc(dld_str_t *, mblk_t *);
59 static void dld_ioc(dld_str_t *, mblk_t *);
60 static void dld_wput_nondata(dld_str_t *, mblk_t *);
61
62 static void str_mdata_raw_put(dld_str_t *, mblk_t *);
63 static mblk_t *i_dld_ether_header_update_tag(mblk_t *, uint_t, uint16_t,
64 link_tagmode_t);
65 static mblk_t *i_dld_ether_header_strip_tag(mblk_t *, boolean_t);
66
67 static uint32_t str_count;
68 static kmem_cache_t *str_cachep;
69 static mod_hash_t *str_hashp;
70
71 #define STR_HASHSZ 64
72 #define STR_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key))
73
74 #define dld_taskq system_taskq
75
76 static kmutex_t dld_taskq_lock;
77 static kcondvar_t dld_taskq_cv;
78 static list_t dld_taskq_list; /* List of dld_str_t */
79 boolean_t dld_taskq_quit;
80 boolean_t dld_taskq_done;
81
82 static void dld_taskq_dispatch(void);
83
84 /*
85 * Some notes on entry points, flow-control, queueing.
86 *
87 * This driver exports the traditional STREAMS put entry point as well as
88 * the non-STREAMS fast-path transmit routine which is provided to IP via
89 * the DL_CAPAB_POLL negotiation. The put procedure handles all control
90 * and data operations, while the fast-path routine deals only with M_DATA
91 * fast-path packets. Regardless of the entry point, all outbound packets
92 * will end up in DLD_TX(), where they will be delivered to the MAC layer.
93 *
94 * The transmit logic operates in the following way: All packets coming
95 * into DLD will be sent to the MAC layer through DLD_TX(). Flow-control
96 * happens when the MAC layer indicates the packets couldn't be
97 * transmitted due to 1) lack of resources (e.g. running out of
98 * descriptors), or 2) reaching the allowed bandwidth limit for this
99 * particular flow. The indication comes in the form of a Tx cookie that
100 * identifies the blocked ring. In such case, DLD will place a
101 * dummy message on its write-side STREAMS queue so that the queue is
102 * marked as "full". Any subsequent packets arriving at the driver will
103 * still be sent to the MAC layer where it either gets queued in the Tx
104 * SRS or discarded it if queue limit is exceeded. The write-side STREAMS
105 * queue gets enabled when MAC layer notifies DLD through MAC_NOTE_TX.
106 * When the write service procedure runs, it will remove the dummy
107 * message from the write-side STREAMS queue; in effect this will trigger
108 * backenabling. The sizes of q_hiwat and q_lowat are set to 1 and 0,
109 * respectively, due to the above reasons.
110 *
111 * All non-data operations, both DLPI and ioctls are single threaded on a per
112 * dld_str_t endpoint. This is done using a taskq so that the control operation
113 * has kernel context and can cv_wait for resources. In addition all set type
114 * operations that involve mac level state modification are serialized on a
115 * per mac end point using the perimeter mechanism provided by the mac layer.
116 * This serializes all mac clients trying to modify a single mac end point over
117 * the entire sequence of mac calls made by that client as an atomic unit. The
118 * mac framework locking is described in mac.c. A critical element is that
119 * DLD/DLS does not hold any locks across the mac perimeter.
120 *
121 * dld_finddevinfo() returns the dev_info_t * corresponding to a particular
122 * dev_t. It searches str_hashp (a table of dld_str_t's) for streams that
123 * match dev_t. If a stream is found and it is attached, its dev_info_t *
124 * is returned. If the mac handle is non-null, it can be safely accessed
125 * below. The mac handle won't be freed until the mac_unregister which
126 * won't happen until the driver detaches. The DDI framework ensures that
127 * the detach won't happen while a getinfo is in progress.
128 */
129 typedef struct i_dld_str_state_s {
130 major_t ds_major;
131 minor_t ds_minor;
132 int ds_instance;
133 dev_info_t *ds_dip;
134 } i_dld_str_state_t;
135
136 /* ARGSUSED */
137 static uint_t
i_dld_str_walker(mod_hash_key_t key,mod_hash_val_t * val,void * arg)138 i_dld_str_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
139 {
140 i_dld_str_state_t *statep = arg;
141 dld_str_t *dsp = (dld_str_t *)val;
142 mac_handle_t mh;
143
144 if (statep->ds_major != dsp->ds_major)
145 return (MH_WALK_CONTINUE);
146
147 ASSERT(statep->ds_minor != 0);
148 mh = dsp->ds_mh;
149
150 if (statep->ds_minor == dsp->ds_minor) {
151 /*
152 * Clone: a clone minor is unique. we can terminate the
153 * walk if we find a matching stream -- even if we fail
154 * to obtain the devinfo.
155 */
156 if (mh != NULL) {
157 statep->ds_dip = mac_devinfo_get(mh);
158 statep->ds_instance = DLS_MINOR2INST(mac_minor(mh));
159 }
160 return (MH_WALK_TERMINATE);
161 }
162 return (MH_WALK_CONTINUE);
163 }
164
165 static dev_info_t *
dld_finddevinfo(dev_t dev)166 dld_finddevinfo(dev_t dev)
167 {
168 dev_info_t *dip;
169 i_dld_str_state_t state;
170
171 if (getminor(dev) == 0)
172 return (NULL);
173
174 /*
175 * See if it's a minor node of a link
176 */
177 if ((dip = dls_link_devinfo(dev)) != NULL)
178 return (dip);
179
180 state.ds_minor = getminor(dev);
181 state.ds_major = getmajor(dev);
182 state.ds_dip = NULL;
183 state.ds_instance = -1;
184
185 mod_hash_walk(str_hashp, i_dld_str_walker, &state);
186 return (state.ds_dip);
187 }
188
189 int
dld_devt_to_instance(dev_t dev)190 dld_devt_to_instance(dev_t dev)
191 {
192 minor_t minor;
193 i_dld_str_state_t state;
194
195 /*
196 * GLDv3 numbers DLPI style 1 node as the instance number + 1.
197 * Minor number 0 is reserved for the DLPI style 2 unattached
198 * node.
199 */
200
201 if ((minor = getminor(dev)) == 0)
202 return (-1);
203
204 /*
205 * Check for unopened style 1 node.
206 * Note that this doesn't *necessarily* work for legacy
207 * devices, but this code is only called within the
208 * getinfo(9e) implementation for true GLDv3 devices, so it
209 * doesn't matter.
210 */
211 if (minor > 0 && minor <= DLS_MAX_MINOR) {
212 return (DLS_MINOR2INST(minor));
213 }
214
215 state.ds_minor = getminor(dev);
216 state.ds_major = getmajor(dev);
217 state.ds_dip = NULL;
218 state.ds_instance = -1;
219
220 mod_hash_walk(str_hashp, i_dld_str_walker, &state);
221 return (state.ds_instance);
222 }
223
224 /*
225 * devo_getinfo: getinfo(9e)
226 *
227 * NB: This may be called for a provider before the provider's
228 * instances are attached. Hence, if a particular provider needs a
229 * special mapping (the mac instance != ddi_get_instance()), then it
230 * may need to provide its own implmentation using the
231 * mac_devt_to_instance() function, and translating the returned mac
232 * instance to a devinfo instance. For dev_t's where the minor number
233 * is too large (i.e. > MAC_MAX_MINOR), the provider can call this
234 * function indirectly via the mac_getinfo() function.
235 */
236 /*ARGSUSED*/
237 int
dld_getinfo(dev_info_t * dip,ddi_info_cmd_t cmd,void * arg,void ** resp)238 dld_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resp)
239 {
240 dev_info_t *devinfo;
241 minor_t minor = getminor((dev_t)arg);
242 int rc = DDI_FAILURE;
243
244 switch (cmd) {
245 case DDI_INFO_DEVT2DEVINFO:
246 if ((devinfo = dld_finddevinfo((dev_t)arg)) != NULL) {
247 *(dev_info_t **)resp = devinfo;
248 rc = DDI_SUCCESS;
249 }
250 break;
251 case DDI_INFO_DEVT2INSTANCE:
252 if (minor > 0 && minor <= DLS_MAX_MINOR) {
253 *resp = (void *)(uintptr_t)DLS_MINOR2INST(minor);
254 rc = DDI_SUCCESS;
255 } else if (minor > DLS_MAX_MINOR &&
256 (devinfo = dld_finddevinfo((dev_t)arg)) != NULL) {
257 *resp = (void *)(uintptr_t)ddi_get_instance(devinfo);
258 rc = DDI_SUCCESS;
259 }
260 break;
261 }
262 return (rc);
263 }
264
265 void *
dld_str_private(queue_t * q)266 dld_str_private(queue_t *q)
267 {
268 return (((dld_str_t *)(q->q_ptr))->ds_private);
269 }
270
271 int
dld_str_open(queue_t * rq,dev_t * devp,void * private)272 dld_str_open(queue_t *rq, dev_t *devp, void *private)
273 {
274 dld_str_t *dsp;
275 major_t major;
276 minor_t minor;
277 int err;
278
279 major = getmajor(*devp);
280 minor = getminor(*devp);
281
282 /*
283 * Create a new dld_str_t for the stream. This will grab a new minor
284 * number that will be handed back in the cloned dev_t. Creation may
285 * fail if we can't allocate the dummy mblk used for flow-control.
286 */
287 dsp = dld_str_create(rq, DLD_DLPI, major,
288 ((minor == 0) ? DL_STYLE2 : DL_STYLE1));
289 if (dsp == NULL)
290 return (ENOSR);
291
292 ASSERT(dsp->ds_dlstate == DL_UNATTACHED);
293 dsp->ds_private = private;
294 if (minor != 0) {
295 /*
296 * Style 1 open
297 */
298 if ((err = dld_str_attach(dsp, (t_uscalar_t)minor - 1)) != 0)
299 goto failed;
300
301 ASSERT(dsp->ds_dlstate == DL_UNBOUND);
302 } else {
303 (void) qassociate(rq, -1);
304 }
305
306 /*
307 * Enable the queue srv(9e) routine.
308 */
309 qprocson(rq);
310
311 /*
312 * Construct a cloned dev_t to hand back.
313 */
314 *devp = makedevice(getmajor(*devp), dsp->ds_minor);
315 return (0);
316
317 failed:
318 dld_str_destroy(dsp);
319 return (err);
320 }
321
322 int
dld_str_close(queue_t * rq)323 dld_str_close(queue_t *rq)
324 {
325 dld_str_t *dsp = rq->q_ptr;
326
327 /*
328 * All modules on top have been popped off. So there can't be any
329 * threads from the top.
330 */
331 ASSERT(dsp->ds_datathr_cnt == 0);
332
333 /*
334 * Wait until pending DLPI requests are processed.
335 */
336 mutex_enter(&dsp->ds_lock);
337 while (dsp->ds_dlpi_pending)
338 cv_wait(&dsp->ds_dlpi_pending_cv, &dsp->ds_lock);
339 mutex_exit(&dsp->ds_lock);
340
341
342 /*
343 * This stream was open to a provider node. Check to see
344 * if it has been cleanly shut down.
345 */
346 if (dsp->ds_dlstate != DL_UNATTACHED) {
347 /*
348 * The stream is either open to a style 1 provider or
349 * this is not clean shutdown. Detach from the PPA.
350 * (This is still ok even in the style 1 case).
351 */
352 dld_str_detach(dsp);
353 }
354
355 dld_str_destroy(dsp);
356 return (0);
357 }
358
359 /*
360 * qi_qopen: open(9e)
361 */
362 /*ARGSUSED*/
363 int
dld_open(queue_t * rq,dev_t * devp,int flag,int sflag,cred_t * credp)364 dld_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp)
365 {
366 if (sflag == MODOPEN)
367 return (ENOTSUP);
368
369 /*
370 * This is a cloning driver and therefore each queue should only
371 * ever get opened once.
372 */
373 if (rq->q_ptr != NULL)
374 return (EBUSY);
375
376 return (dld_str_open(rq, devp, NULL));
377 }
378
379 /*
380 * qi_qclose: close(9e)
381 */
382 int
dld_close(queue_t * rq)383 dld_close(queue_t *rq)
384 {
385 /*
386 * Disable the queue srv(9e) routine.
387 */
388 qprocsoff(rq);
389
390 return (dld_str_close(rq));
391 }
392
393 /*
394 * qi_qputp: put(9e)
395 */
396 void
dld_wput(queue_t * wq,mblk_t * mp)397 dld_wput(queue_t *wq, mblk_t *mp)
398 {
399 dld_str_t *dsp = (dld_str_t *)wq->q_ptr;
400 dld_str_mode_t mode;
401
402 switch (DB_TYPE(mp)) {
403 case M_DATA:
404 mutex_enter(&dsp->ds_lock);
405 mode = dsp->ds_mode;
406 if ((dsp->ds_dlstate != DL_IDLE) ||
407 (mode != DLD_FASTPATH && mode != DLD_RAW)) {
408 mutex_exit(&dsp->ds_lock);
409 freemsg(mp);
410 break;
411 }
412
413 DLD_DATATHR_INC(dsp);
414 mutex_exit(&dsp->ds_lock);
415 if (mode == DLD_FASTPATH) {
416 if (dsp->ds_mip->mi_media == DL_ETHER &&
417 (MBLKL(mp) < sizeof (struct ether_header))) {
418 freemsg(mp);
419 } else {
420 (void) str_mdata_fastpath_put(dsp, mp, 0, 0);
421 }
422 } else {
423 str_mdata_raw_put(dsp, mp);
424 }
425 DLD_DATATHR_DCR(dsp);
426 break;
427 case M_PROTO:
428 case M_PCPROTO: {
429 t_uscalar_t prim;
430
431 if (MBLKL(mp) < sizeof (t_uscalar_t))
432 break;
433
434 prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive;
435
436 if (prim == DL_UNITDATA_REQ) {
437 proto_unitdata_req(dsp, mp);
438 } else {
439 dld_wput_nondata(dsp, mp);
440 }
441 break;
442 }
443
444 case M_IOCTL:
445 dld_wput_nondata(dsp, mp);
446 break;
447
448 case M_FLUSH:
449 if (*mp->b_rptr & FLUSHW) {
450 DLD_CLRQFULL(dsp);
451 *mp->b_rptr &= ~FLUSHW;
452 }
453
454 if (*mp->b_rptr & FLUSHR) {
455 qreply(wq, mp);
456 } else {
457 freemsg(mp);
458 }
459 break;
460
461 default:
462 freemsg(mp);
463 break;
464 }
465 }
466
467 /*
468 * qi_srvp: srv(9e)
469 */
470 void
dld_wsrv(queue_t * wq)471 dld_wsrv(queue_t *wq)
472 {
473 dld_str_t *dsp = wq->q_ptr;
474
475 DLD_CLRQFULL(dsp);
476 }
477
478 void
dld_init_ops(struct dev_ops * ops,const char * name)479 dld_init_ops(struct dev_ops *ops, const char *name)
480 {
481 struct streamtab *stream;
482 struct qinit *rq, *wq;
483 struct module_info *modinfo;
484
485 modinfo = kmem_zalloc(sizeof (struct module_info), KM_SLEEP);
486 modinfo->mi_idname = kmem_zalloc(FMNAMESZ, KM_SLEEP);
487 (void) snprintf(modinfo->mi_idname, FMNAMESZ, "%s", name);
488 modinfo->mi_minpsz = 0;
489 modinfo->mi_maxpsz = 64*1024;
490 modinfo->mi_hiwat = 1;
491 modinfo->mi_lowat = 0;
492
493 rq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP);
494 rq->qi_qopen = dld_open;
495 rq->qi_qclose = dld_close;
496 rq->qi_minfo = modinfo;
497
498 wq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP);
499 wq->qi_putp = (pfi_t)dld_wput;
500 wq->qi_srvp = (pfi_t)dld_wsrv;
501 wq->qi_minfo = modinfo;
502
503 stream = kmem_zalloc(sizeof (struct streamtab), KM_SLEEP);
504 stream->st_rdinit = rq;
505 stream->st_wrinit = wq;
506 ops->devo_cb_ops->cb_str = stream;
507
508 if (ops->devo_getinfo == NULL)
509 ops->devo_getinfo = &dld_getinfo;
510 }
511
512 void
dld_fini_ops(struct dev_ops * ops)513 dld_fini_ops(struct dev_ops *ops)
514 {
515 struct streamtab *stream;
516 struct qinit *rq, *wq;
517 struct module_info *modinfo;
518
519 stream = ops->devo_cb_ops->cb_str;
520 rq = stream->st_rdinit;
521 wq = stream->st_wrinit;
522 modinfo = rq->qi_minfo;
523 ASSERT(wq->qi_minfo == modinfo);
524
525 kmem_free(stream, sizeof (struct streamtab));
526 kmem_free(wq, sizeof (struct qinit));
527 kmem_free(rq, sizeof (struct qinit));
528 kmem_free(modinfo->mi_idname, FMNAMESZ);
529 kmem_free(modinfo, sizeof (struct module_info));
530 }
531
532 /*
533 * Initialize this module's data structures.
534 */
535 void
dld_str_init(void)536 dld_str_init(void)
537 {
538 /*
539 * Create dld_str_t object cache.
540 */
541 str_cachep = kmem_cache_create("dld_str_cache", sizeof (dld_str_t),
542 0, str_constructor, str_destructor, NULL, NULL, NULL, 0);
543 ASSERT(str_cachep != NULL);
544
545 /*
546 * Create a hash table for maintaining dld_str_t's.
547 * The ds_minor field (the clone minor number) of a dld_str_t
548 * is used as a key for this hash table because this number is
549 * globally unique (allocated from "dls_minor_arena").
550 */
551 str_hashp = mod_hash_create_idhash("dld_str_hash", STR_HASHSZ,
552 mod_hash_null_valdtor);
553
554 mutex_init(&dld_taskq_lock, NULL, MUTEX_DRIVER, NULL);
555 cv_init(&dld_taskq_cv, NULL, CV_DRIVER, NULL);
556
557 dld_taskq_quit = B_FALSE;
558 dld_taskq_done = B_FALSE;
559 list_create(&dld_taskq_list, sizeof (dld_str_t),
560 offsetof(dld_str_t, ds_tqlist));
561 (void) thread_create(NULL, 0, dld_taskq_dispatch, NULL, 0,
562 &p0, TS_RUN, minclsyspri);
563 }
564
565 /*
566 * Tear down this module's data structures.
567 */
568 int
dld_str_fini(void)569 dld_str_fini(void)
570 {
571 /*
572 * Make sure that there are no objects in use.
573 */
574 if (str_count != 0)
575 return (EBUSY);
576
577 /*
578 * Ask the dld_taskq thread to quit and wait for it to be done
579 */
580 mutex_enter(&dld_taskq_lock);
581 dld_taskq_quit = B_TRUE;
582 cv_signal(&dld_taskq_cv);
583 while (!dld_taskq_done)
584 cv_wait(&dld_taskq_cv, &dld_taskq_lock);
585 mutex_exit(&dld_taskq_lock);
586 list_destroy(&dld_taskq_list);
587 /*
588 * Destroy object cache.
589 */
590 kmem_cache_destroy(str_cachep);
591 mod_hash_destroy_idhash(str_hashp);
592 return (0);
593 }
594
595 /*
596 * Create a new dld_str_t object.
597 */
598 dld_str_t *
dld_str_create(queue_t * rq,uint_t type,major_t major,t_uscalar_t style)599 dld_str_create(queue_t *rq, uint_t type, major_t major, t_uscalar_t style)
600 {
601 dld_str_t *dsp;
602 int err;
603
604 /*
605 * Allocate an object from the cache.
606 */
607 atomic_inc_32(&str_count);
608 dsp = kmem_cache_alloc(str_cachep, KM_SLEEP);
609
610 /*
611 * Allocate the dummy mblk for flow-control.
612 */
613 dsp->ds_tx_flow_mp = allocb(1, BPRI_HI);
614 if (dsp->ds_tx_flow_mp == NULL) {
615 kmem_cache_free(str_cachep, dsp);
616 atomic_dec_32(&str_count);
617 return (NULL);
618 }
619 dsp->ds_type = type;
620 dsp->ds_major = major;
621 dsp->ds_style = style;
622
623 /*
624 * Initialize the queue pointers.
625 */
626 ASSERT(RD(rq) == rq);
627 dsp->ds_rq = rq;
628 dsp->ds_wq = WR(rq);
629 rq->q_ptr = WR(rq)->q_ptr = (void *)dsp;
630
631 /*
632 * We want explicit control over our write-side STREAMS queue
633 * where the dummy mblk gets added/removed for flow-control.
634 */
635 noenable(WR(rq));
636
637 err = mod_hash_insert(str_hashp, STR_HASH_KEY(dsp->ds_minor),
638 (mod_hash_val_t)dsp);
639 ASSERT(err == 0);
640 return (dsp);
641 }
642
643 /*
644 * Destroy a dld_str_t object.
645 */
646 void
dld_str_destroy(dld_str_t * dsp)647 dld_str_destroy(dld_str_t *dsp)
648 {
649 queue_t *rq;
650 queue_t *wq;
651 mod_hash_val_t val;
652
653 /*
654 * Clear the queue pointers.
655 */
656 rq = dsp->ds_rq;
657 wq = dsp->ds_wq;
658 ASSERT(wq == WR(rq));
659 rq->q_ptr = wq->q_ptr = NULL;
660 dsp->ds_rq = dsp->ds_wq = NULL;
661
662 ASSERT(dsp->ds_dlstate == DL_UNATTACHED);
663 ASSERT(dsp->ds_sap == 0);
664 ASSERT(dsp->ds_mh == NULL);
665 ASSERT(dsp->ds_mch == NULL);
666 ASSERT(dsp->ds_promisc == 0);
667 ASSERT(dsp->ds_mph == NULL);
668 ASSERT(dsp->ds_mip == NULL);
669 ASSERT(dsp->ds_mnh == NULL);
670
671 ASSERT(dsp->ds_polling == B_FALSE);
672 ASSERT(dsp->ds_direct == B_FALSE);
673 ASSERT(dsp->ds_lso == B_FALSE);
674 ASSERT(dsp->ds_lso_max == 0);
675 ASSERT(dsp->ds_passivestate != DLD_ACTIVE);
676
677 /*
678 * Reinitialize all the flags.
679 */
680 dsp->ds_notifications = 0;
681 dsp->ds_passivestate = DLD_UNINITIALIZED;
682 dsp->ds_mode = DLD_UNITDATA;
683 dsp->ds_native = B_FALSE;
684 dsp->ds_nonip = B_FALSE;
685
686 ASSERT(dsp->ds_datathr_cnt == 0);
687 ASSERT(dsp->ds_pending_head == NULL);
688 ASSERT(dsp->ds_pending_tail == NULL);
689 ASSERT(!dsp->ds_dlpi_pending);
690
691 ASSERT(dsp->ds_dlp == NULL);
692 ASSERT(dsp->ds_dmap == NULL);
693 ASSERT(dsp->ds_rx == NULL);
694 ASSERT(dsp->ds_rx_arg == NULL);
695 ASSERT(dsp->ds_next == NULL);
696 ASSERT(dsp->ds_head == NULL);
697
698 /*
699 * Free the dummy mblk if exists.
700 */
701 if (dsp->ds_tx_flow_mp != NULL) {
702 freeb(dsp->ds_tx_flow_mp);
703 dsp->ds_tx_flow_mp = NULL;
704 }
705
706 (void) mod_hash_remove(str_hashp, STR_HASH_KEY(dsp->ds_minor), &val);
707 ASSERT(dsp == (dld_str_t *)val);
708
709 /*
710 * Free the object back to the cache.
711 */
712 kmem_cache_free(str_cachep, dsp);
713 atomic_dec_32(&str_count);
714 }
715
716 /*
717 * kmem_cache contructor function: see kmem_cache_create(9f).
718 */
719 /*ARGSUSED*/
720 static int
str_constructor(void * buf,void * cdrarg,int kmflags)721 str_constructor(void *buf, void *cdrarg, int kmflags)
722 {
723 dld_str_t *dsp = buf;
724
725 bzero(buf, sizeof (dld_str_t));
726
727 /*
728 * Allocate a new minor number.
729 */
730 if ((dsp->ds_minor = mac_minor_hold(kmflags == KM_SLEEP)) == 0)
731 return (-1);
732
733 /*
734 * Initialize the DLPI state machine.
735 */
736 dsp->ds_dlstate = DL_UNATTACHED;
737
738 mutex_init(&dsp->ds_lock, NULL, MUTEX_DRIVER, NULL);
739 cv_init(&dsp->ds_datathr_cv, NULL, CV_DRIVER, NULL);
740 cv_init(&dsp->ds_dlpi_pending_cv, NULL, CV_DRIVER, NULL);
741
742 return (0);
743 }
744
745 /*
746 * kmem_cache destructor function.
747 */
748 /*ARGSUSED*/
749 static void
str_destructor(void * buf,void * cdrarg)750 str_destructor(void *buf, void *cdrarg)
751 {
752 dld_str_t *dsp = buf;
753
754 /*
755 * Release the minor number.
756 */
757 mac_minor_rele(dsp->ds_minor);
758
759 ASSERT(dsp->ds_tx_flow_mp == NULL);
760
761 mutex_destroy(&dsp->ds_lock);
762 cv_destroy(&dsp->ds_datathr_cv);
763 cv_destroy(&dsp->ds_dlpi_pending_cv);
764 }
765
766 /*
767 * Update the priority bits and VID (may need to insert tag if mp points
768 * to an untagged packet.
769 * If vid is VLAN_ID_NONE, use the VID encoded in the packet.
770 */
771 static mblk_t *
i_dld_ether_header_update_tag(mblk_t * mp,uint_t pri,uint16_t vid,link_tagmode_t tagmode)772 i_dld_ether_header_update_tag(mblk_t *mp, uint_t pri, uint16_t vid,
773 link_tagmode_t tagmode)
774 {
775 mblk_t *hmp;
776 struct ether_vlan_header *evhp;
777 struct ether_header *ehp;
778 uint16_t old_tci = 0;
779 size_t len;
780
781 ASSERT(pri != 0 || vid != VLAN_ID_NONE);
782
783 evhp = (struct ether_vlan_header *)mp->b_rptr;
784 if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN) {
785 /*
786 * Tagged packet, update the priority bits.
787 */
788 len = sizeof (struct ether_vlan_header);
789
790 if ((DB_REF(mp) > 1) || (MBLKL(mp) < len)) {
791 /*
792 * In case some drivers only check the db_ref
793 * count of the first mblk, we pullup the
794 * message into a single mblk.
795 */
796 hmp = msgpullup(mp, -1);
797 if ((hmp == NULL) || (MBLKL(hmp) < len)) {
798 freemsg(hmp);
799 return (NULL);
800 } else {
801 freemsg(mp);
802 mp = hmp;
803 }
804 }
805
806 evhp = (struct ether_vlan_header *)mp->b_rptr;
807 old_tci = ntohs(evhp->ether_tci);
808 } else {
809 /*
810 * Untagged packet. Two factors will cause us to insert a
811 * VLAN header:
812 * - This is a VLAN link (vid is specified)
813 * - The link supports user priority tagging and the priority
814 * is non-zero.
815 */
816 if (vid == VLAN_ID_NONE && tagmode == LINK_TAGMODE_VLANONLY)
817 return (mp);
818
819 hmp = allocb(sizeof (struct ether_vlan_header), BPRI_MED);
820 if (hmp == NULL)
821 return (NULL);
822
823 evhp = (struct ether_vlan_header *)hmp->b_rptr;
824 ehp = (struct ether_header *)mp->b_rptr;
825
826 /*
827 * Copy the MAC addresses and typelen
828 */
829 bcopy(ehp, evhp, (ETHERADDRL * 2));
830 evhp->ether_type = ehp->ether_type;
831 evhp->ether_tpid = htons(ETHERTYPE_VLAN);
832
833 hmp->b_wptr += sizeof (struct ether_vlan_header);
834 mp->b_rptr += sizeof (struct ether_header);
835
836 /*
837 * Free the original message if it's now empty. Link the
838 * rest of the messages to the header message.
839 */
840 if (MBLKL(mp) == 0) {
841 hmp->b_cont = mp->b_cont;
842 freeb(mp);
843 } else {
844 hmp->b_cont = mp;
845 }
846 mp = hmp;
847 }
848
849 if (pri == 0)
850 pri = VLAN_PRI(old_tci);
851 if (vid == VLAN_ID_NONE)
852 vid = VLAN_ID(old_tci);
853 evhp->ether_tci = htons(VLAN_TCI(pri, VLAN_CFI(old_tci), vid));
854 return (mp);
855 }
856
857 /*
858 * M_DATA put (IP fast-path mode)
859 */
860 mac_tx_cookie_t
str_mdata_fastpath_put(dld_str_t * dsp,mblk_t * mp,uintptr_t f_hint,uint16_t flag)861 str_mdata_fastpath_put(dld_str_t *dsp, mblk_t *mp, uintptr_t f_hint,
862 uint16_t flag)
863 {
864 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER);
865 mblk_t *newmp;
866 uint_t pri;
867 mac_tx_cookie_t cookie;
868
869 if (is_ethernet) {
870 /*
871 * Update the priority bits to the assigned priority.
872 */
873 pri = (VLAN_MBLKPRI(mp) == 0) ? dsp->ds_pri : VLAN_MBLKPRI(mp);
874
875 if (pri != 0) {
876 newmp = i_dld_ether_header_update_tag(mp, pri,
877 VLAN_ID_NONE, dsp->ds_dlp->dl_tagmode);
878 if (newmp == NULL)
879 goto discard;
880 mp = newmp;
881 }
882 }
883
884 if ((cookie = DLD_TX(dsp, mp, f_hint, flag)) != NULL) {
885 DLD_SETQFULL(dsp);
886 }
887 return (cookie);
888
889 discard:
890 /* TODO: bump kstat? */
891 freemsg(mp);
892 return (NULL);
893 }
894
895 /*
896 * M_DATA put (DLIOCRAW mode)
897 */
898 static void
str_mdata_raw_put(dld_str_t * dsp,mblk_t * mp)899 str_mdata_raw_put(dld_str_t *dsp, mblk_t *mp)
900 {
901 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER);
902 mblk_t *bp, *newmp;
903 size_t size;
904 mac_header_info_t mhi;
905 uint_t pri, vid, dvid;
906 uint_t max_sdu;
907
908 /*
909 * Certain MAC type plugins provide an illusion for raw DLPI
910 * consumers. They pretend that the MAC layer is something that
911 * it's not for the benefit of observability tools. For example,
912 * mac_wifi pretends that it's Ethernet for such consumers.
913 * Here, unless native mode is enabled, we call into the MAC layer so
914 * that this illusion can be maintained. The plugin will optionally
915 * transform the MAC header here into something that can be passed
916 * down. The header goes from raw mode to "cooked" mode.
917 */
918 if (!dsp->ds_native) {
919 if ((newmp = mac_header_cook(dsp->ds_mh, mp)) == NULL)
920 goto discard;
921 mp = newmp;
922 }
923
924 size = MBLKL(mp);
925
926 /*
927 * Check the packet is not too big and that any remaining
928 * fragment list is composed entirely of M_DATA messages. (We
929 * know the first fragment was M_DATA otherwise we could not
930 * have got here).
931 */
932 for (bp = mp->b_cont; bp != NULL; bp = bp->b_cont) {
933 if (DB_TYPE(bp) != M_DATA)
934 goto discard;
935 size += MBLKL(bp);
936 }
937
938 if (mac_vlan_header_info(dsp->ds_mh, mp, &mhi) != 0)
939 goto discard;
940
941 mac_sdu_get(dsp->ds_mh, NULL, &max_sdu);
942 /*
943 * If LSO is enabled, check the size against lso_max. Otherwise,
944 * compare the packet size with max_sdu.
945 */
946 max_sdu = dsp->ds_lso ? dsp->ds_lso_max : max_sdu;
947 if (size > max_sdu + mhi.mhi_hdrsize)
948 goto discard;
949
950 if (is_ethernet) {
951 dvid = mac_client_vid(dsp->ds_mch);
952
953 /*
954 * Discard the packet if this is a VLAN stream but the VID in
955 * the packet is not correct.
956 */
957 vid = VLAN_ID(mhi.mhi_tci);
958 if ((dvid != VLAN_ID_NONE) && (vid != VLAN_ID_NONE))
959 goto discard;
960
961 /*
962 * Discard the packet if this packet is a tagged packet
963 * but both pri and VID are 0.
964 */
965 pri = VLAN_PRI(mhi.mhi_tci);
966 if (mhi.mhi_istagged && !mhi.mhi_ispvid && pri == 0 &&
967 vid == VLAN_ID_NONE)
968 goto discard;
969
970 /*
971 * Update the priority bits to the per-stream priority if
972 * priority is not set in the packet. Update the VID for
973 * packets on a VLAN stream.
974 */
975 pri = (pri == 0) ? dsp->ds_pri : 0;
976 if ((pri != 0) || (dvid != VLAN_ID_NONE)) {
977 if ((newmp = i_dld_ether_header_update_tag(mp, pri,
978 dvid, dsp->ds_dlp->dl_tagmode)) == NULL) {
979 goto discard;
980 }
981 mp = newmp;
982 }
983 }
984
985 if (DLD_TX(dsp, mp, 0, 0) != NULL) {
986 /* Turn on flow-control for dld */
987 DLD_SETQFULL(dsp);
988 }
989 return;
990
991 discard:
992 /* TODO: bump kstat? */
993 freemsg(mp);
994 }
995
996 /*
997 * Process DL_ATTACH_REQ (style 2) or open(2) (style 1).
998 */
999 int
dld_str_attach(dld_str_t * dsp,t_uscalar_t ppa)1000 dld_str_attach(dld_str_t *dsp, t_uscalar_t ppa)
1001 {
1002 dev_t dev;
1003 int err;
1004 const char *drvname;
1005 mac_perim_handle_t mph = NULL;
1006 boolean_t qassociated = B_FALSE;
1007 dls_link_t *dlp = NULL;
1008 dls_dl_handle_t ddp = NULL;
1009
1010 if ((drvname = ddi_major_to_name(dsp->ds_major)) == NULL)
1011 return (EINVAL);
1012
1013 if (dsp->ds_style == DL_STYLE2 && ppa > DLS_MAX_PPA)
1014 return (ENOTSUP);
1015
1016 /*
1017 * /dev node access. This will still be supported for backward
1018 * compatibility reason.
1019 */
1020 if ((dsp->ds_style == DL_STYLE2) && (strcmp(drvname, "aggr") != 0) &&
1021 (strcmp(drvname, "vnic") != 0)) {
1022 if (qassociate(dsp->ds_wq, DLS_PPA2INST(ppa)) != 0)
1023 return (EINVAL);
1024 qassociated = B_TRUE;
1025 }
1026
1027 dev = makedevice(dsp->ds_major, (minor_t)ppa + 1);
1028 if ((err = dls_devnet_hold_by_dev(dev, &ddp)) != 0)
1029 goto failed;
1030
1031 if ((err = mac_perim_enter_by_macname(dls_devnet_mac(ddp), &mph)) != 0)
1032 goto failed;
1033
1034 /*
1035 * Open a channel.
1036 */
1037 if ((err = dls_link_hold(dls_devnet_mac(ddp), &dlp)) != 0)
1038 goto failed;
1039
1040 if ((err = dls_open(dlp, ddp, dsp)) != 0)
1041 goto failed;
1042
1043 /*
1044 * Set the default packet priority.
1045 */
1046 dsp->ds_pri = 0;
1047
1048 /*
1049 * Add a notify function so that the we get updates from the MAC.
1050 */
1051 dsp->ds_mnh = mac_notify_add(dsp->ds_mh, str_notify, dsp);
1052 dsp->ds_dlstate = DL_UNBOUND;
1053 mac_perim_exit(mph);
1054 return (0);
1055
1056 failed:
1057 if (dlp != NULL)
1058 dls_link_rele(dlp);
1059 if (mph != NULL)
1060 mac_perim_exit(mph);
1061 if (ddp != NULL)
1062 dls_devnet_rele(ddp);
1063 if (qassociated)
1064 (void) qassociate(dsp->ds_wq, -1);
1065
1066 return (err);
1067 }
1068
1069 /*
1070 * Process DL_DETACH_REQ (style 2) or close(2) (style 1). Can also be called
1071 * from close(2) for style 2.
1072 */
1073 void
dld_str_detach(dld_str_t * dsp)1074 dld_str_detach(dld_str_t *dsp)
1075 {
1076 mac_perim_handle_t mph;
1077 int err;
1078
1079 ASSERT(dsp->ds_datathr_cnt == 0);
1080
1081 mac_perim_enter_by_mh(dsp->ds_mh, &mph);
1082 /*
1083 * Remove the notify function.
1084 *
1085 * Note that we cannot wait for the notification callback to be removed
1086 * since it could cause the deadlock with str_notify() since they both
1087 * need the mac perimeter. Continue if we cannot remove the
1088 * notification callback right now and wait after we leave the
1089 * perimeter.
1090 */
1091 err = mac_notify_remove(dsp->ds_mnh, B_FALSE);
1092 dsp->ds_mnh = NULL;
1093
1094 /*
1095 * Disable the capabilities
1096 */
1097 dld_capabilities_disable(dsp);
1098
1099 /*
1100 * Clear LSO flags.
1101 */
1102 dsp->ds_lso = B_FALSE;
1103 dsp->ds_lso_max = 0;
1104
1105 dls_close(dsp);
1106 mac_perim_exit(mph);
1107
1108 /*
1109 * Now we leave the mac perimeter. If mac_notify_remove() failed
1110 * because the notification callback was in progress, wait for
1111 * it to finish before we proceed.
1112 */
1113 if (err != 0)
1114 mac_notify_remove_wait(dsp->ds_mh);
1115
1116 /*
1117 * An unreferenced tagged (non-persistent) vlan gets destroyed
1118 * automatically in the call to dls_devnet_rele.
1119 */
1120 dls_devnet_rele(dsp->ds_ddh);
1121
1122 dsp->ds_sap = 0;
1123 dsp->ds_mh = NULL;
1124 dsp->ds_mch = NULL;
1125 dsp->ds_mip = NULL;
1126
1127 if (dsp->ds_style == DL_STYLE2)
1128 (void) qassociate(dsp->ds_wq, -1);
1129
1130 /*
1131 * Re-initialize the DLPI state machine.
1132 */
1133 dsp->ds_dlstate = DL_UNATTACHED;
1134 }
1135
1136 /*
1137 * This function is only called for VLAN streams. In raw mode, we strip VLAN
1138 * tags before sending packets up to the DLS clients, with the exception of
1139 * special priority tagged packets, in that case, we set the VID to 0.
1140 * mp must be a VLAN tagged packet.
1141 */
1142 static mblk_t *
i_dld_ether_header_strip_tag(mblk_t * mp,boolean_t keep_pri)1143 i_dld_ether_header_strip_tag(mblk_t *mp, boolean_t keep_pri)
1144 {
1145 mblk_t *newmp;
1146 struct ether_vlan_header *evhp;
1147 uint16_t tci, new_tci;
1148
1149 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header));
1150 if (DB_REF(mp) > 1) {
1151 newmp = copymsg(mp);
1152 if (newmp == NULL)
1153 return (NULL);
1154 freemsg(mp);
1155 mp = newmp;
1156 }
1157 evhp = (struct ether_vlan_header *)mp->b_rptr;
1158
1159 tci = ntohs(evhp->ether_tci);
1160 if (VLAN_PRI(tci) == 0 || !keep_pri) {
1161 /*
1162 * Priority is 0, strip the tag.
1163 */
1164 ovbcopy(mp->b_rptr, mp->b_rptr + VLAN_TAGSZ, 2 * ETHERADDRL);
1165 mp->b_rptr += VLAN_TAGSZ;
1166 } else {
1167 /*
1168 * Priority is not 0, update the VID to 0.
1169 */
1170 new_tci = VLAN_TCI(VLAN_PRI(tci), VLAN_CFI(tci), VLAN_ID_NONE);
1171 evhp->ether_tci = htons(new_tci);
1172 }
1173 return (mp);
1174 }
1175
1176 /*
1177 * Raw mode receive function.
1178 */
1179 /*ARGSUSED*/
1180 void
dld_str_rx_raw(void * arg,mac_resource_handle_t mrh,mblk_t * mp,mac_header_info_t * mhip)1181 dld_str_rx_raw(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
1182 mac_header_info_t *mhip)
1183 {
1184 dld_str_t *dsp = (dld_str_t *)arg;
1185 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER);
1186 mblk_t *next, *newmp;
1187
1188 ASSERT(mp != NULL);
1189 do {
1190 /*
1191 * Get the pointer to the next packet in the chain and then
1192 * clear b_next before the packet gets passed on.
1193 */
1194 next = mp->b_next;
1195 mp->b_next = NULL;
1196
1197 /*
1198 * Wind back b_rptr to point at the MAC header.
1199 */
1200 ASSERT(mp->b_rptr >= DB_BASE(mp) + mhip->mhi_hdrsize);
1201 mp->b_rptr -= mhip->mhi_hdrsize;
1202
1203 /*
1204 * Certain MAC type plugins provide an illusion for raw
1205 * DLPI consumers. They pretend that the MAC layer is
1206 * something that it's not for the benefit of observability
1207 * tools. For example, mac_wifi pretends that it's Ethernet
1208 * for such consumers. Here, unless native mode is enabled,
1209 * we call into the MAC layer so that this illusion can be
1210 * maintained. The plugin will optionally transform the MAC
1211 * header here into something that can be passed up to raw
1212 * consumers. The header goes from "cooked" mode to raw mode.
1213 */
1214 if (!dsp->ds_native) {
1215 newmp = mac_header_uncook(dsp->ds_mh, mp);
1216 if (newmp == NULL) {
1217 freemsg(mp);
1218 goto next;
1219 }
1220 mp = newmp;
1221 }
1222
1223 /*
1224 * Strip the VLAN tag for VLAN streams.
1225 */
1226 if (is_ethernet &&
1227 mac_client_vid(dsp->ds_mch) != VLAN_ID_NONE) {
1228 /*
1229 * The priority should be kept only for VLAN
1230 * data-links.
1231 */
1232 newmp = i_dld_ether_header_strip_tag(mp,
1233 mac_client_is_vlan_vnic(dsp->ds_mch));
1234 if (newmp == NULL) {
1235 freemsg(mp);
1236 goto next;
1237 }
1238 mp = newmp;
1239 }
1240
1241 /*
1242 * Pass the packet on.
1243 */
1244 if (canputnext(dsp->ds_rq))
1245 putnext(dsp->ds_rq, mp);
1246 else
1247 freemsg(mp);
1248
1249 next:
1250 /*
1251 * Move on to the next packet in the chain.
1252 */
1253 mp = next;
1254 } while (mp != NULL);
1255 }
1256
1257 /*
1258 * Fast-path receive function.
1259 */
1260 /*ARGSUSED*/
1261 void
dld_str_rx_fastpath(void * arg,mac_resource_handle_t mrh,mblk_t * mp,mac_header_info_t * mhip)1262 dld_str_rx_fastpath(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
1263 mac_header_info_t *mhip)
1264 {
1265 dld_str_t *dsp = (dld_str_t *)arg;
1266 mblk_t *next;
1267 size_t offset = 0;
1268
1269 /*
1270 * MAC header stripping rules:
1271 * - Tagged packets:
1272 * a. VLAN streams. Strip the whole VLAN header including the tag.
1273 * b. Physical streams
1274 * - VLAN packets (non-zero VID). The stream must be either a
1275 * DL_PROMISC_SAP listener or a ETHERTYPE_VLAN listener.
1276 * Strip the Ethernet header but keep the VLAN header.
1277 * - Special tagged packets (zero VID)
1278 * * The stream is either a DL_PROMISC_SAP listener or a
1279 * ETHERTYPE_VLAN listener, strip the Ethernet header but
1280 * keep the VLAN header.
1281 * * Otherwise, strip the whole VLAN header.
1282 * - Untagged packets. Strip the whole MAC header.
1283 */
1284 if (mhip->mhi_istagged &&
1285 (mac_client_vid(dsp->ds_mch) == VLAN_ID_NONE) &&
1286 ((dsp->ds_sap == ETHERTYPE_VLAN) ||
1287 (dsp->ds_promisc & DLS_PROMISC_SAP))) {
1288 offset = VLAN_TAGSZ;
1289 }
1290
1291 ASSERT(mp != NULL);
1292 do {
1293 /*
1294 * Get the pointer to the next packet in the chain and then
1295 * clear b_next before the packet gets passed on.
1296 */
1297 next = mp->b_next;
1298 mp->b_next = NULL;
1299
1300 /*
1301 * Wind back b_rptr to point at the VLAN header.
1302 */
1303 ASSERT(mp->b_rptr >= DB_BASE(mp) + offset);
1304 mp->b_rptr -= offset;
1305
1306 /*
1307 * Pass the packet on.
1308 */
1309 if (canputnext(dsp->ds_rq))
1310 putnext(dsp->ds_rq, mp);
1311 else
1312 freemsg(mp);
1313 /*
1314 * Move on to the next packet in the chain.
1315 */
1316 mp = next;
1317 } while (mp != NULL);
1318 }
1319
1320 /*
1321 * Default receive function (send DL_UNITDATA_IND messages).
1322 */
1323 /*ARGSUSED*/
1324 void
dld_str_rx_unitdata(void * arg,mac_resource_handle_t mrh,mblk_t * mp,mac_header_info_t * mhip)1325 dld_str_rx_unitdata(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
1326 mac_header_info_t *mhip)
1327 {
1328 dld_str_t *dsp = (dld_str_t *)arg;
1329 mblk_t *ud_mp;
1330 mblk_t *next;
1331 size_t offset = 0;
1332 boolean_t strip_vlan = B_TRUE;
1333
1334 /*
1335 * See MAC header stripping rules in the dld_str_rx_fastpath() function.
1336 */
1337 if (mhip->mhi_istagged &&
1338 (mac_client_vid(dsp->ds_mch) == VLAN_ID_NONE) &&
1339 ((dsp->ds_sap == ETHERTYPE_VLAN) ||
1340 (dsp->ds_promisc & DLS_PROMISC_SAP))) {
1341 offset = VLAN_TAGSZ;
1342 strip_vlan = B_FALSE;
1343 }
1344
1345 ASSERT(mp != NULL);
1346 do {
1347 /*
1348 * Get the pointer to the next packet in the chain and then
1349 * clear b_next before the packet gets passed on.
1350 */
1351 next = mp->b_next;
1352 mp->b_next = NULL;
1353
1354 /*
1355 * Wind back b_rptr to point at the MAC header.
1356 */
1357 ASSERT(mp->b_rptr >= DB_BASE(mp) + mhip->mhi_hdrsize);
1358 mp->b_rptr -= mhip->mhi_hdrsize;
1359
1360 /*
1361 * Create the DL_UNITDATA_IND M_PROTO.
1362 */
1363 if ((ud_mp = str_unitdata_ind(dsp, mp, strip_vlan)) == NULL) {
1364 freemsgchain(mp);
1365 return;
1366 }
1367
1368 /*
1369 * Advance b_rptr to point at the payload (or the VLAN header).
1370 */
1371 mp->b_rptr += (mhip->mhi_hdrsize - offset);
1372
1373 /*
1374 * Prepend the DL_UNITDATA_IND.
1375 */
1376 ud_mp->b_cont = mp;
1377
1378 /*
1379 * Send the message.
1380 */
1381 if (canputnext(dsp->ds_rq))
1382 putnext(dsp->ds_rq, ud_mp);
1383 else
1384 freemsg(ud_mp);
1385
1386 /*
1387 * Move on to the next packet in the chain.
1388 */
1389 mp = next;
1390 } while (mp != NULL);
1391 }
1392
1393 /*
1394 * DL_NOTIFY_IND: DL_NOTE_SDU_SIZE
1395 */
1396 static void
str_notify_sdu_size(dld_str_t * dsp,uint_t max_sdu,uint_t multicast_sdu)1397 str_notify_sdu_size(dld_str_t *dsp, uint_t max_sdu, uint_t multicast_sdu)
1398 {
1399 mblk_t *mp;
1400 dl_notify_ind_t *dlip;
1401
1402 if (!(dsp->ds_notifications & (DL_NOTE_SDU_SIZE|DL_NOTE_SDU_SIZE2)))
1403 return;
1404
1405 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t),
1406 M_PROTO, 0)) == NULL)
1407 return;
1408
1409 bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
1410 dlip = (dl_notify_ind_t *)mp->b_rptr;
1411 dlip->dl_primitive = DL_NOTIFY_IND;
1412 if (dsp->ds_notifications & DL_NOTE_SDU_SIZE2) {
1413 dlip->dl_notification = DL_NOTE_SDU_SIZE2;
1414 dlip->dl_data1 = max_sdu;
1415 dlip->dl_data2 = multicast_sdu;
1416 } else {
1417 dlip->dl_notification = DL_NOTE_SDU_SIZE;
1418 dlip->dl_data = max_sdu;
1419 }
1420
1421 qreply(dsp->ds_wq, mp);
1422 }
1423
1424 /*
1425 * Generate DL_NOTIFY_IND messages to notify the DLPI consumer of the
1426 * current state of the interface.
1427 */
1428 void
dld_str_notify_ind(dld_str_t * dsp)1429 dld_str_notify_ind(dld_str_t *dsp)
1430 {
1431 mac_notify_type_t type;
1432
1433 for (type = 0; type < MAC_NNOTE; type++)
1434 str_notify(dsp, type);
1435 }
1436
1437 typedef struct dl_unitdata_ind_wrapper {
1438 dl_unitdata_ind_t dl_unitdata;
1439 uint8_t dl_dest_addr[MAXMACADDRLEN + sizeof (uint16_t)];
1440 uint8_t dl_src_addr[MAXMACADDRLEN + sizeof (uint16_t)];
1441 } dl_unitdata_ind_wrapper_t;
1442
1443 /*
1444 * Create a DL_UNITDATA_IND M_PROTO message.
1445 */
1446 static mblk_t *
str_unitdata_ind(dld_str_t * dsp,mblk_t * mp,boolean_t strip_vlan)1447 str_unitdata_ind(dld_str_t *dsp, mblk_t *mp, boolean_t strip_vlan)
1448 {
1449 mblk_t *nmp;
1450 dl_unitdata_ind_wrapper_t *dlwp;
1451 dl_unitdata_ind_t *dlp;
1452 mac_header_info_t mhi;
1453 uint_t addr_length;
1454 uint8_t *daddr;
1455 uint8_t *saddr;
1456
1457 /*
1458 * Get the packet header information.
1459 */
1460 if (mac_vlan_header_info(dsp->ds_mh, mp, &mhi) != 0)
1461 return (NULL);
1462
1463 /*
1464 * Allocate a message large enough to contain the wrapper structure
1465 * defined above.
1466 */
1467 if ((nmp = mexchange(dsp->ds_wq, NULL,
1468 sizeof (dl_unitdata_ind_wrapper_t), M_PROTO,
1469 DL_UNITDATA_IND)) == NULL)
1470 return (NULL);
1471
1472 dlwp = (dl_unitdata_ind_wrapper_t *)nmp->b_rptr;
1473
1474 dlp = &(dlwp->dl_unitdata);
1475 ASSERT(dlp == (dl_unitdata_ind_t *)nmp->b_rptr);
1476 ASSERT(dlp->dl_primitive == DL_UNITDATA_IND);
1477
1478 /*
1479 * Copy in the destination address.
1480 */
1481 addr_length = dsp->ds_mip->mi_addr_length;
1482 daddr = dlwp->dl_dest_addr;
1483 dlp->dl_dest_addr_offset = (uintptr_t)daddr - (uintptr_t)dlp;
1484 bcopy(mhi.mhi_daddr, daddr, addr_length);
1485
1486 /*
1487 * Set the destination DLSAP to the SAP value encoded in the packet.
1488 */
1489 if (mhi.mhi_istagged && !strip_vlan)
1490 *(uint16_t *)(daddr + addr_length) = ETHERTYPE_VLAN;
1491 else
1492 *(uint16_t *)(daddr + addr_length) = mhi.mhi_bindsap;
1493 dlp->dl_dest_addr_length = addr_length + sizeof (uint16_t);
1494
1495 /*
1496 * If the destination address was multicast or broadcast then the
1497 * dl_group_address field should be non-zero.
1498 */
1499 dlp->dl_group_address = (mhi.mhi_dsttype == MAC_ADDRTYPE_MULTICAST) ||
1500 (mhi.mhi_dsttype == MAC_ADDRTYPE_BROADCAST);
1501
1502 /*
1503 * Copy in the source address if one exists. Some MAC types (DL_IB
1504 * for example) may not have access to source information.
1505 */
1506 if (mhi.mhi_saddr == NULL) {
1507 dlp->dl_src_addr_offset = dlp->dl_src_addr_length = 0;
1508 } else {
1509 saddr = dlwp->dl_src_addr;
1510 dlp->dl_src_addr_offset = (uintptr_t)saddr - (uintptr_t)dlp;
1511 bcopy(mhi.mhi_saddr, saddr, addr_length);
1512
1513 /*
1514 * Set the source DLSAP to the packet ethertype.
1515 */
1516 *(uint16_t *)(saddr + addr_length) = mhi.mhi_origsap;
1517 dlp->dl_src_addr_length = addr_length + sizeof (uint16_t);
1518 }
1519
1520 return (nmp);
1521 }
1522
1523 /*
1524 * DL_NOTIFY_IND: DL_NOTE_PROMISC_ON_PHYS
1525 */
1526 static void
str_notify_promisc_on_phys(dld_str_t * dsp)1527 str_notify_promisc_on_phys(dld_str_t *dsp)
1528 {
1529 mblk_t *mp;
1530 dl_notify_ind_t *dlip;
1531
1532 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_ON_PHYS))
1533 return;
1534
1535 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t),
1536 M_PROTO, 0)) == NULL)
1537 return;
1538
1539 bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
1540 dlip = (dl_notify_ind_t *)mp->b_rptr;
1541 dlip->dl_primitive = DL_NOTIFY_IND;
1542 dlip->dl_notification = DL_NOTE_PROMISC_ON_PHYS;
1543
1544 qreply(dsp->ds_wq, mp);
1545 }
1546
1547 /*
1548 * DL_NOTIFY_IND: DL_NOTE_PROMISC_OFF_PHYS
1549 */
1550 static void
str_notify_promisc_off_phys(dld_str_t * dsp)1551 str_notify_promisc_off_phys(dld_str_t *dsp)
1552 {
1553 mblk_t *mp;
1554 dl_notify_ind_t *dlip;
1555
1556 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_OFF_PHYS))
1557 return;
1558
1559 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t),
1560 M_PROTO, 0)) == NULL)
1561 return;
1562
1563 bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
1564 dlip = (dl_notify_ind_t *)mp->b_rptr;
1565 dlip->dl_primitive = DL_NOTIFY_IND;
1566 dlip->dl_notification = DL_NOTE_PROMISC_OFF_PHYS;
1567
1568 qreply(dsp->ds_wq, mp);
1569 }
1570
1571 /*
1572 * DL_NOTIFY_IND: DL_NOTE_PHYS_ADDR
1573 */
1574 static void
str_notify_phys_addr(dld_str_t * dsp,uint_t addr_type,const uint8_t * addr)1575 str_notify_phys_addr(dld_str_t *dsp, uint_t addr_type, const uint8_t *addr)
1576 {
1577 mblk_t *mp;
1578 dl_notify_ind_t *dlip;
1579 uint_t addr_length;
1580 uint16_t ethertype;
1581
1582 if (!(dsp->ds_notifications & DL_NOTE_PHYS_ADDR))
1583 return;
1584
1585 addr_length = dsp->ds_mip->mi_addr_length;
1586 if ((mp = mexchange(dsp->ds_wq, NULL,
1587 sizeof (dl_notify_ind_t) + addr_length + sizeof (uint16_t),
1588 M_PROTO, 0)) == NULL)
1589 return;
1590
1591 bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
1592 dlip = (dl_notify_ind_t *)mp->b_rptr;
1593 dlip->dl_primitive = DL_NOTIFY_IND;
1594 dlip->dl_notification = DL_NOTE_PHYS_ADDR;
1595 dlip->dl_data = addr_type;
1596 dlip->dl_addr_offset = sizeof (dl_notify_ind_t);
1597 dlip->dl_addr_length = addr_length + sizeof (uint16_t);
1598
1599 bcopy(addr, &dlip[1], addr_length);
1600
1601 ethertype = (dsp->ds_sap < ETHERTYPE_802_MIN) ? 0 : dsp->ds_sap;
1602 *(uint16_t *)((uchar_t *)(dlip + 1) + addr_length) = ethertype;
1603
1604 qreply(dsp->ds_wq, mp);
1605 }
1606
1607 /*
1608 * DL_NOTIFY_IND: DL_NOTE_LINK_UP
1609 */
1610 static void
str_notify_link_up(dld_str_t * dsp)1611 str_notify_link_up(dld_str_t *dsp)
1612 {
1613 mblk_t *mp;
1614 dl_notify_ind_t *dlip;
1615
1616 if (!(dsp->ds_notifications & DL_NOTE_LINK_UP))
1617 return;
1618
1619 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t),
1620 M_PROTO, 0)) == NULL)
1621 return;
1622
1623 bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
1624 dlip = (dl_notify_ind_t *)mp->b_rptr;
1625 dlip->dl_primitive = DL_NOTIFY_IND;
1626 dlip->dl_notification = DL_NOTE_LINK_UP;
1627
1628 qreply(dsp->ds_wq, mp);
1629 }
1630
1631 /*
1632 * DL_NOTIFY_IND: DL_NOTE_LINK_DOWN
1633 */
1634 static void
str_notify_link_down(dld_str_t * dsp)1635 str_notify_link_down(dld_str_t *dsp)
1636 {
1637 mblk_t *mp;
1638 dl_notify_ind_t *dlip;
1639
1640 if (!(dsp->ds_notifications & DL_NOTE_LINK_DOWN))
1641 return;
1642
1643 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t),
1644 M_PROTO, 0)) == NULL)
1645 return;
1646
1647 bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
1648 dlip = (dl_notify_ind_t *)mp->b_rptr;
1649 dlip->dl_primitive = DL_NOTIFY_IND;
1650 dlip->dl_notification = DL_NOTE_LINK_DOWN;
1651
1652 qreply(dsp->ds_wq, mp);
1653 }
1654
1655 /*
1656 * DL_NOTIFY_IND: DL_NOTE_SPEED
1657 */
1658 static void
str_notify_speed(dld_str_t * dsp,uint32_t speed)1659 str_notify_speed(dld_str_t *dsp, uint32_t speed)
1660 {
1661 mblk_t *mp;
1662 dl_notify_ind_t *dlip;
1663
1664 if (!(dsp->ds_notifications & DL_NOTE_SPEED))
1665 return;
1666
1667 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t),
1668 M_PROTO, 0)) == NULL)
1669 return;
1670
1671 bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
1672 dlip = (dl_notify_ind_t *)mp->b_rptr;
1673 dlip->dl_primitive = DL_NOTIFY_IND;
1674 dlip->dl_notification = DL_NOTE_SPEED;
1675 dlip->dl_data = speed;
1676
1677 qreply(dsp->ds_wq, mp);
1678 }
1679
1680 /*
1681 * DL_NOTIFY_IND: DL_NOTE_CAPAB_RENEG
1682 */
1683 static void
str_notify_capab_reneg(dld_str_t * dsp)1684 str_notify_capab_reneg(dld_str_t *dsp)
1685 {
1686 mblk_t *mp;
1687 dl_notify_ind_t *dlip;
1688
1689 if (!(dsp->ds_notifications & DL_NOTE_CAPAB_RENEG))
1690 return;
1691
1692 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t),
1693 M_PROTO, 0)) == NULL)
1694 return;
1695
1696 bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
1697 dlip = (dl_notify_ind_t *)mp->b_rptr;
1698 dlip->dl_primitive = DL_NOTIFY_IND;
1699 dlip->dl_notification = DL_NOTE_CAPAB_RENEG;
1700
1701 qreply(dsp->ds_wq, mp);
1702 }
1703
1704 /*
1705 * DL_NOTIFY_IND: DL_NOTE_FASTPATH_FLUSH
1706 */
1707 static void
str_notify_fastpath_flush(dld_str_t * dsp)1708 str_notify_fastpath_flush(dld_str_t *dsp)
1709 {
1710 mblk_t *mp;
1711 dl_notify_ind_t *dlip;
1712
1713 if (!(dsp->ds_notifications & DL_NOTE_FASTPATH_FLUSH))
1714 return;
1715
1716 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t),
1717 M_PROTO, 0)) == NULL)
1718 return;
1719
1720 bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
1721 dlip = (dl_notify_ind_t *)mp->b_rptr;
1722 dlip->dl_primitive = DL_NOTIFY_IND;
1723 dlip->dl_notification = DL_NOTE_FASTPATH_FLUSH;
1724
1725 qreply(dsp->ds_wq, mp);
1726 }
1727
1728 static void
str_notify_allowed_ips(dld_str_t * dsp)1729 str_notify_allowed_ips(dld_str_t *dsp)
1730 {
1731 mblk_t *mp;
1732 dl_notify_ind_t *dlip;
1733 size_t mp_size;
1734 mac_protect_t *mrp;
1735
1736 if (!(dsp->ds_notifications & DL_NOTE_ALLOWED_IPS))
1737 return;
1738
1739 mp_size = sizeof (mac_protect_t) + sizeof (dl_notify_ind_t);
1740 if ((mp = mexchange(dsp->ds_wq, NULL, mp_size, M_PROTO, 0)) == NULL)
1741 return;
1742
1743 mrp = mac_protect_get(dsp->ds_mh);
1744 bzero(mp->b_rptr, mp_size);
1745 dlip = (dl_notify_ind_t *)mp->b_rptr;
1746 dlip->dl_primitive = DL_NOTIFY_IND;
1747 dlip->dl_notification = DL_NOTE_ALLOWED_IPS;
1748 dlip->dl_data = 0;
1749 dlip->dl_addr_offset = sizeof (dl_notify_ind_t);
1750 dlip->dl_addr_length = sizeof (mac_protect_t);
1751 bcopy(mrp, mp->b_rptr + sizeof (dl_notify_ind_t),
1752 sizeof (mac_protect_t));
1753
1754 qreply(dsp->ds_wq, mp);
1755 }
1756
1757 /*
1758 * MAC notification callback.
1759 */
1760 void
str_notify(void * arg,mac_notify_type_t type)1761 str_notify(void *arg, mac_notify_type_t type)
1762 {
1763 dld_str_t *dsp = (dld_str_t *)arg;
1764 queue_t *q = dsp->ds_wq;
1765 mac_handle_t mh = dsp->ds_mh;
1766 mac_client_handle_t mch = dsp->ds_mch;
1767 uint8_t addr[MAXMACADDRLEN];
1768
1769 switch (type) {
1770 case MAC_NOTE_TX:
1771 qenable(q);
1772 break;
1773
1774 case MAC_NOTE_DEVPROMISC:
1775 /*
1776 * Send the appropriate DL_NOTIFY_IND.
1777 */
1778 if (mac_promisc_get(mh))
1779 str_notify_promisc_on_phys(dsp);
1780 else
1781 str_notify_promisc_off_phys(dsp);
1782 break;
1783
1784 case MAC_NOTE_UNICST:
1785 /*
1786 * This notification is sent whenever the MAC unicast
1787 * address changes.
1788 */
1789 mac_unicast_primary_get(mh, addr);
1790
1791 /*
1792 * Send the appropriate DL_NOTIFY_IND.
1793 */
1794 str_notify_phys_addr(dsp, DL_CURR_PHYS_ADDR, addr);
1795 break;
1796
1797 case MAC_NOTE_DEST:
1798 /*
1799 * Only send up DL_NOTE_DEST_ADDR if the link has a
1800 * destination address.
1801 */
1802 if (mac_dst_get(dsp->ds_mh, addr))
1803 str_notify_phys_addr(dsp, DL_CURR_DEST_ADDR, addr);
1804 break;
1805
1806 case MAC_NOTE_LOWLINK:
1807 case MAC_NOTE_LINK:
1808 /*
1809 * LOWLINK refers to the actual link status. For links that
1810 * are not part of a bridge instance LOWLINK and LINK state
1811 * are the same. But for a link part of a bridge instance
1812 * LINK state refers to the aggregate link status: "up" when
1813 * at least one link part of the bridge is up and is "down"
1814 * when all links part of the bridge are down.
1815 *
1816 * Clients can request to be notified of the LOWLINK state
1817 * using the DLIOCLOWLINK ioctl. Clients such as the bridge
1818 * daemon request lowlink state changes and upper layer clients
1819 * receive notifications of the aggregate link state changes
1820 * which is the default when requesting LINK UP/DOWN state
1821 * notifications.
1822 */
1823
1824 /*
1825 * Check that the notification type matches the one that we
1826 * want. If we want lower-level link notifications, and this
1827 * is upper, or if we want upper and this is lower, then
1828 * ignore.
1829 */
1830 if ((type == MAC_NOTE_LOWLINK) != dsp->ds_lowlink)
1831 break;
1832 /*
1833 * This notification is sent every time the MAC driver
1834 * updates the link state.
1835 */
1836 switch (mac_client_stat_get(mch, dsp->ds_lowlink ?
1837 MAC_STAT_LOWLINK_STATE : MAC_STAT_LINK_STATE)) {
1838 case LINK_STATE_UP: {
1839 uint64_t speed;
1840 /*
1841 * The link is up so send the appropriate
1842 * DL_NOTIFY_IND.
1843 */
1844 str_notify_link_up(dsp);
1845
1846 speed = mac_stat_get(mh, MAC_STAT_IFSPEED);
1847 str_notify_speed(dsp, (uint32_t)(speed / 1000ull));
1848 break;
1849 }
1850 case LINK_STATE_DOWN:
1851 /*
1852 * The link is down so send the appropriate
1853 * DL_NOTIFY_IND.
1854 */
1855 str_notify_link_down(dsp);
1856 break;
1857
1858 default:
1859 break;
1860 }
1861 break;
1862
1863 case MAC_NOTE_CAPAB_CHG:
1864 /*
1865 * This notification is sent whenever the MAC resources
1866 * change or capabilities change. We need to renegotiate
1867 * the capabilities. Send the appropriate DL_NOTIFY_IND.
1868 */
1869 str_notify_capab_reneg(dsp);
1870 break;
1871
1872 case MAC_NOTE_SDU_SIZE: {
1873 uint_t max_sdu;
1874 uint_t multicast_sdu;
1875 mac_sdu_get2(dsp->ds_mh, NULL, &max_sdu, &multicast_sdu);
1876 str_notify_sdu_size(dsp, max_sdu, multicast_sdu);
1877 break;
1878 }
1879
1880 case MAC_NOTE_FASTPATH_FLUSH:
1881 str_notify_fastpath_flush(dsp);
1882 break;
1883
1884 /* Unused notifications */
1885 case MAC_NOTE_MARGIN:
1886 break;
1887
1888 case MAC_NOTE_ALLOWED_IPS:
1889 str_notify_allowed_ips(dsp);
1890 break;
1891
1892 default:
1893 ASSERT(B_FALSE);
1894 break;
1895 }
1896 }
1897
1898 /*
1899 * This function is called via a taskq mechansim to process all control
1900 * messages on a per 'dsp' end point.
1901 */
1902 static void
dld_wput_nondata_task(void * arg)1903 dld_wput_nondata_task(void *arg)
1904 {
1905 dld_str_t *dsp = arg;
1906 mblk_t *mp;
1907
1908 mutex_enter(&dsp->ds_lock);
1909 while (dsp->ds_pending_head != NULL) {
1910 mp = dsp->ds_pending_head;
1911 dsp->ds_pending_head = mp->b_next;
1912 mp->b_next = NULL;
1913 if (dsp->ds_pending_head == NULL)
1914 dsp->ds_pending_tail = NULL;
1915 mutex_exit(&dsp->ds_lock);
1916
1917 switch (DB_TYPE(mp)) {
1918 case M_PROTO:
1919 case M_PCPROTO:
1920 dld_proto(dsp, mp);
1921 break;
1922 case M_IOCTL:
1923 dld_ioc(dsp, mp);
1924 break;
1925 default:
1926 ASSERT(0);
1927 }
1928
1929 mutex_enter(&dsp->ds_lock);
1930 }
1931 ASSERT(dsp->ds_pending_tail == NULL);
1932 dsp->ds_dlpi_pending = 0;
1933 cv_broadcast(&dsp->ds_dlpi_pending_cv);
1934 mutex_exit(&dsp->ds_lock);
1935 }
1936
1937 /*
1938 * Kernel thread to handle taskq dispatch failures in dld_wput_data. This
1939 * thread is started at boot time.
1940 */
1941 static void
dld_taskq_dispatch(void)1942 dld_taskq_dispatch(void)
1943 {
1944 callb_cpr_t cprinfo;
1945 dld_str_t *dsp;
1946
1947 CALLB_CPR_INIT(&cprinfo, &dld_taskq_lock, callb_generic_cpr,
1948 "dld_taskq_dispatch");
1949 mutex_enter(&dld_taskq_lock);
1950
1951 while (!dld_taskq_quit) {
1952 dsp = list_head(&dld_taskq_list);
1953 while (dsp != NULL) {
1954 list_remove(&dld_taskq_list, dsp);
1955 mutex_exit(&dld_taskq_lock);
1956 VERIFY(taskq_dispatch(dld_taskq, dld_wput_nondata_task,
1957 dsp, TQ_SLEEP) != 0);
1958 mutex_enter(&dld_taskq_lock);
1959 dsp = list_head(&dld_taskq_list);
1960 }
1961
1962 CALLB_CPR_SAFE_BEGIN(&cprinfo);
1963 cv_wait(&dld_taskq_cv, &dld_taskq_lock);
1964 CALLB_CPR_SAFE_END(&cprinfo, &dld_taskq_lock);
1965 }
1966
1967 dld_taskq_done = B_TRUE;
1968 cv_signal(&dld_taskq_cv);
1969 CALLB_CPR_EXIT(&cprinfo);
1970 thread_exit();
1971 }
1972
1973 /*
1974 * All control operations are serialized on the 'dsp' and are also funneled
1975 * through a taskq mechanism to ensure that subsequent processing has kernel
1976 * context and can safely use cv_wait.
1977 *
1978 * Mechanisms to handle taskq dispatch failures
1979 *
1980 * The only way to be sure that taskq dispatch does not fail is to either
1981 * specify TQ_SLEEP or to use a static taskq and prepopulate it with
1982 * some number of entries and make sure that the number of outstanding requests
1983 * are less than that number. We can't use TQ_SLEEP since we don't know the
1984 * context. Nor can we bound the total number of 'dsp' end points. So we are
1985 * unable to use either of the above schemes, and are forced to deal with
1986 * taskq dispatch failures. Note that even dynamic taskq could fail in
1987 * dispatch if TQ_NOSLEEP is specified, since this flag is translated
1988 * eventually to KM_NOSLEEP and kmem allocations could fail in the taskq
1989 * framework.
1990 *
1991 * We maintain a queue of 'dsp's that encountered taskq dispatch failure.
1992 * We also have a single global thread to retry the taskq dispatch. This
1993 * thread loops in 'dld_taskq_dispatch' and retries the taskq dispatch, but
1994 * uses TQ_SLEEP to ensure eventual success of the dispatch operation.
1995 */
1996 static void
dld_wput_nondata(dld_str_t * dsp,mblk_t * mp)1997 dld_wput_nondata(dld_str_t *dsp, mblk_t *mp)
1998 {
1999 ASSERT(mp->b_next == NULL);
2000 mutex_enter(&dsp->ds_lock);
2001 if (dsp->ds_pending_head != NULL) {
2002 ASSERT(dsp->ds_dlpi_pending);
2003 dsp->ds_pending_tail->b_next = mp;
2004 dsp->ds_pending_tail = mp;
2005 mutex_exit(&dsp->ds_lock);
2006 return;
2007 }
2008 ASSERT(dsp->ds_pending_tail == NULL);
2009 dsp->ds_pending_head = dsp->ds_pending_tail = mp;
2010 /*
2011 * At this point if ds_dlpi_pending is set, it implies that the taskq
2012 * thread is still active and is processing the last message, though
2013 * the pending queue has been emptied.
2014 */
2015 if (dsp->ds_dlpi_pending) {
2016 mutex_exit(&dsp->ds_lock);
2017 return;
2018 }
2019
2020 dsp->ds_dlpi_pending = 1;
2021 mutex_exit(&dsp->ds_lock);
2022
2023 if (taskq_dispatch(dld_taskq, dld_wput_nondata_task, dsp,
2024 TQ_NOSLEEP) != 0)
2025 return;
2026
2027 mutex_enter(&dld_taskq_lock);
2028 list_insert_tail(&dld_taskq_list, dsp);
2029 cv_signal(&dld_taskq_cv);
2030 mutex_exit(&dld_taskq_lock);
2031 }
2032
2033 /*
2034 * Process an M_IOCTL message.
2035 */
2036 static void
dld_ioc(dld_str_t * dsp,mblk_t * mp)2037 dld_ioc(dld_str_t *dsp, mblk_t *mp)
2038 {
2039 uint_t cmd;
2040
2041 cmd = ((struct iocblk *)mp->b_rptr)->ioc_cmd;
2042 ASSERT(dsp->ds_type == DLD_DLPI);
2043
2044 switch (cmd) {
2045 case DLIOCNATIVE:
2046 ioc_native(dsp, mp);
2047 break;
2048 case DLIOCMARGININFO:
2049 ioc_margin(dsp, mp);
2050 break;
2051 case DLIOCRAW:
2052 ioc_raw(dsp, mp);
2053 break;
2054 case DLIOCHDRINFO:
2055 ioc_fast(dsp, mp);
2056 break;
2057 case DLIOCLOWLINK:
2058 ioc_lowlink(dsp, mp);
2059 break;
2060 default:
2061 ioc(dsp, mp);
2062 }
2063 }
2064
2065 /*
2066 * DLIOCNATIVE
2067 */
2068 static void
ioc_native(dld_str_t * dsp,mblk_t * mp)2069 ioc_native(dld_str_t *dsp, mblk_t *mp)
2070 {
2071 queue_t *q = dsp->ds_wq;
2072 const mac_info_t *mip = dsp->ds_mip;
2073
2074 /*
2075 * Native mode can be enabled if it's disabled and if the
2076 * native media type is different.
2077 */
2078 if (!dsp->ds_native && mip->mi_media != mip->mi_nativemedia)
2079 dsp->ds_native = B_TRUE;
2080
2081 if (dsp->ds_native)
2082 miocack(q, mp, 0, mip->mi_nativemedia);
2083 else
2084 miocnak(q, mp, 0, ENOTSUP);
2085 }
2086
2087 /*
2088 * DLIOCMARGININFO
2089 */
2090 static void
ioc_margin(dld_str_t * dsp,mblk_t * mp)2091 ioc_margin(dld_str_t *dsp, mblk_t *mp)
2092 {
2093 queue_t *q = dsp->ds_wq;
2094 uint32_t margin;
2095 int err;
2096
2097 if (dsp->ds_dlstate == DL_UNATTACHED) {
2098 err = EINVAL;
2099 goto failed;
2100 }
2101 if ((err = miocpullup(mp, sizeof (uint32_t))) != 0)
2102 goto failed;
2103
2104 mac_margin_get(dsp->ds_mh, &margin);
2105 *((uint32_t *)mp->b_cont->b_rptr) = margin;
2106 miocack(q, mp, sizeof (uint32_t), 0);
2107 return;
2108
2109 failed:
2110 miocnak(q, mp, 0, err);
2111 }
2112
2113 /*
2114 * DLIOCRAW
2115 */
2116 static void
ioc_raw(dld_str_t * dsp,mblk_t * mp)2117 ioc_raw(dld_str_t *dsp, mblk_t *mp)
2118 {
2119 queue_t *q = dsp->ds_wq;
2120 mac_perim_handle_t mph;
2121
2122 if (dsp->ds_mh == NULL) {
2123 dsp->ds_mode = DLD_RAW;
2124 miocack(q, mp, 0, 0);
2125 return;
2126 }
2127
2128 mac_perim_enter_by_mh(dsp->ds_mh, &mph);
2129 if (dsp->ds_polling || dsp->ds_direct) {
2130 mac_perim_exit(mph);
2131 miocnak(q, mp, 0, EPROTO);
2132 return;
2133 }
2134
2135 if (dsp->ds_mode != DLD_RAW && dsp->ds_dlstate == DL_IDLE) {
2136 /*
2137 * Set the receive callback.
2138 */
2139 dls_rx_set(dsp, dld_str_rx_raw, dsp);
2140 }
2141
2142 /*
2143 * Note that raw mode is enabled.
2144 */
2145 dsp->ds_mode = DLD_RAW;
2146 mac_perim_exit(mph);
2147
2148 miocack(q, mp, 0, 0);
2149 }
2150
2151 /*
2152 * DLIOCHDRINFO
2153 */
2154 static void
ioc_fast(dld_str_t * dsp,mblk_t * mp)2155 ioc_fast(dld_str_t *dsp, mblk_t *mp)
2156 {
2157 dl_unitdata_req_t *dlp;
2158 off_t off;
2159 size_t len;
2160 const uint8_t *addr;
2161 uint16_t sap;
2162 mblk_t *nmp;
2163 mblk_t *hmp;
2164 uint_t addr_length;
2165 queue_t *q = dsp->ds_wq;
2166 int err;
2167 mac_perim_handle_t mph;
2168
2169 if (dld_opt & DLD_OPT_NO_FASTPATH) {
2170 err = ENOTSUP;
2171 goto failed;
2172 }
2173
2174 /*
2175 * DLIOCHDRINFO should only come from IP. The one initiated from
2176 * user-land should not be allowed.
2177 */
2178 if (((struct iocblk *)mp->b_rptr)->ioc_cr != kcred) {
2179 err = EINVAL;
2180 goto failed;
2181 }
2182
2183 nmp = mp->b_cont;
2184 if (nmp == NULL || MBLKL(nmp) < sizeof (dl_unitdata_req_t) ||
2185 (dlp = (dl_unitdata_req_t *)nmp->b_rptr,
2186 dlp->dl_primitive != DL_UNITDATA_REQ)) {
2187 err = EINVAL;
2188 goto failed;
2189 }
2190
2191 off = dlp->dl_dest_addr_offset;
2192 len = dlp->dl_dest_addr_length;
2193
2194 if (!MBLKIN(nmp, off, len)) {
2195 err = EINVAL;
2196 goto failed;
2197 }
2198
2199 if (dsp->ds_dlstate != DL_IDLE) {
2200 err = ENOTSUP;
2201 goto failed;
2202 }
2203
2204 addr_length = dsp->ds_mip->mi_addr_length;
2205 if (len != addr_length + sizeof (uint16_t)) {
2206 err = EINVAL;
2207 goto failed;
2208 }
2209
2210 addr = nmp->b_rptr + off;
2211 sap = *(uint16_t *)(nmp->b_rptr + off + addr_length);
2212
2213 if ((hmp = dls_header(dsp, addr, sap, 0, NULL)) == NULL) {
2214 err = ENOMEM;
2215 goto failed;
2216 }
2217
2218 /*
2219 * This ioctl might happen concurrently with a direct call to dld_capab
2220 * that tries to enable direct and/or poll capabilities. Since the
2221 * stack does not serialize them, we do so here to avoid mixing
2222 * the callbacks.
2223 */
2224 mac_perim_enter_by_mh(dsp->ds_mh, &mph);
2225 if (dsp->ds_mode != DLD_FASTPATH) {
2226 /*
2227 * Set the receive callback (unless polling is enabled).
2228 */
2229 if (!dsp->ds_polling && !dsp->ds_direct)
2230 dls_rx_set(dsp, dld_str_rx_fastpath, dsp);
2231
2232 /*
2233 * Note that fast-path mode is enabled.
2234 */
2235 dsp->ds_mode = DLD_FASTPATH;
2236 }
2237 mac_perim_exit(mph);
2238
2239 freemsg(nmp->b_cont);
2240 nmp->b_cont = hmp;
2241
2242 miocack(q, mp, MBLKL(nmp) + MBLKL(hmp), 0);
2243 return;
2244 failed:
2245 miocnak(q, mp, 0, err);
2246 }
2247
2248 /*
2249 * DLIOCLOWLINK: request actual link state changes. When the
2250 * link is part of a bridge instance the client receives actual
2251 * link state changes and not the aggregate link status. Used by
2252 * the bridging daemon (bridged) for proper RSTP operation.
2253 */
2254 static void
ioc_lowlink(dld_str_t * dsp,mblk_t * mp)2255 ioc_lowlink(dld_str_t *dsp, mblk_t *mp)
2256 {
2257 queue_t *q = dsp->ds_wq;
2258 int err;
2259
2260 if ((err = miocpullup(mp, sizeof (int))) != 0) {
2261 miocnak(q, mp, 0, err);
2262 } else {
2263 /* LINTED: alignment */
2264 dsp->ds_lowlink = *(boolean_t *)mp->b_cont->b_rptr;
2265 miocack(q, mp, 0, 0);
2266 }
2267 }
2268
2269 /*
2270 * Catch-all handler.
2271 */
2272 static void
ioc(dld_str_t * dsp,mblk_t * mp)2273 ioc(dld_str_t *dsp, mblk_t *mp)
2274 {
2275 queue_t *q = dsp->ds_wq;
2276
2277 if (dsp->ds_dlstate == DL_UNATTACHED) {
2278 miocnak(q, mp, 0, EINVAL);
2279 return;
2280 }
2281 mac_ioctl(dsp->ds_mh, q, mp);
2282 }
2283