1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2018 Joyent, Inc.
26 */
27
28 #include <sys/strsun.h>
29 #include <sys/sdt.h>
30 #include <sys/mac.h>
31 #include <sys/mac_impl.h>
32 #include <sys/mac_client_impl.h>
33 #include <sys/mac_stat.h>
34 #include <sys/dls.h>
35 #include <sys/dls_impl.h>
36 #include <sys/mac_soft_ring.h>
37 #include <sys/ethernet.h>
38 #include <sys/cpupart.h>
39 #include <sys/pool.h>
40 #include <sys/pool_pset.h>
41 #include <sys/vlan.h>
42 #include <inet/ip.h>
43 #include <inet/ip6.h>
44 #include <netinet/tcp.h>
45 #include <netinet/udp.h>
46 #include <netinet/sctp.h>
47
48 typedef struct flow_stats_s {
49 uint64_t fs_obytes;
50 uint64_t fs_opackets;
51 uint64_t fs_oerrors;
52 uint64_t fs_ibytes;
53 uint64_t fs_ipackets;
54 uint64_t fs_ierrors;
55 } flow_stats_t;
56
57
58 /* global flow table, will be a per exclusive-zone table later */
59 static mod_hash_t *flow_hash;
60 static krwlock_t flow_tab_lock;
61
62 static kmem_cache_t *flow_cache;
63 static kmem_cache_t *flow_tab_cache;
64 static flow_ops_t flow_l2_ops;
65
66 typedef struct {
67 const char *fs_name;
68 uint_t fs_offset;
69 } flow_stats_info_t;
70
71 #define FS_OFF(f) (offsetof(flow_stats_t, f))
72 static flow_stats_info_t flow_stats_list[] = {
73 {"rbytes", FS_OFF(fs_ibytes)},
74 {"ipackets", FS_OFF(fs_ipackets)},
75 {"ierrors", FS_OFF(fs_ierrors)},
76 {"obytes", FS_OFF(fs_obytes)},
77 {"opackets", FS_OFF(fs_opackets)},
78 {"oerrors", FS_OFF(fs_oerrors)}
79 };
80 #define FS_SIZE (sizeof (flow_stats_list) / sizeof (flow_stats_info_t))
81
82 /*
83 * Checks whether a flow mask is legal.
84 */
85 static flow_tab_info_t *mac_flow_tab_info_get(flow_mask_t);
86
87 static void
flow_stat_init(kstat_named_t * knp)88 flow_stat_init(kstat_named_t *knp)
89 {
90 int i;
91
92 for (i = 0; i < FS_SIZE; i++, knp++) {
93 kstat_named_init(knp, flow_stats_list[i].fs_name,
94 KSTAT_DATA_UINT64);
95 }
96 }
97
98 static int
flow_stat_update(kstat_t * ksp,int rw)99 flow_stat_update(kstat_t *ksp, int rw)
100 {
101 flow_entry_t *fep = ksp->ks_private;
102 kstat_named_t *knp = ksp->ks_data;
103 uint64_t *statp;
104 int i;
105 mac_rx_stats_t *mac_rx_stat;
106 mac_tx_stats_t *mac_tx_stat;
107 flow_stats_t flow_stats;
108 mac_soft_ring_set_t *mac_srs;
109
110 if (rw != KSTAT_READ)
111 return (EACCES);
112
113 bzero(&flow_stats, sizeof (flow_stats_t));
114
115 for (i = 0; i < fep->fe_rx_srs_cnt; i++) {
116 mac_srs = (mac_soft_ring_set_t *)fep->fe_rx_srs[i];
117 if (mac_srs == NULL) /* Multicast flow */
118 break;
119 mac_rx_stat = &mac_srs->srs_rx.sr_stat;
120
121 flow_stats.fs_ibytes += mac_rx_stat->mrs_intrbytes +
122 mac_rx_stat->mrs_pollbytes + mac_rx_stat->mrs_lclbytes;
123
124 flow_stats.fs_ipackets += mac_rx_stat->mrs_intrcnt +
125 mac_rx_stat->mrs_pollcnt + mac_rx_stat->mrs_lclcnt;
126
127 flow_stats.fs_ierrors += mac_rx_stat->mrs_ierrors;
128 }
129
130 mac_srs = (mac_soft_ring_set_t *)fep->fe_tx_srs;
131 if (mac_srs == NULL) /* Multicast flow */
132 goto done;
133 mac_tx_stat = &mac_srs->srs_tx.st_stat;
134
135 flow_stats.fs_obytes = mac_tx_stat->mts_obytes;
136 flow_stats.fs_opackets = mac_tx_stat->mts_opackets;
137 flow_stats.fs_oerrors = mac_tx_stat->mts_oerrors;
138
139 done:
140 for (i = 0; i < FS_SIZE; i++, knp++) {
141 statp = (uint64_t *)
142 ((uchar_t *)&flow_stats + flow_stats_list[i].fs_offset);
143 knp->value.ui64 = *statp;
144 }
145 return (0);
146 }
147
148 static void
flow_stat_create(flow_entry_t * fep)149 flow_stat_create(flow_entry_t *fep)
150 {
151 kstat_t *ksp;
152 kstat_named_t *knp;
153 uint_t nstats = FS_SIZE;
154
155 /*
156 * Fow now, flow entries are only manipulated and visible from the
157 * global zone.
158 */
159 ksp = kstat_create_zone("unix", 0, (char *)fep->fe_flow_name, "flow",
160 KSTAT_TYPE_NAMED, nstats, 0, GLOBAL_ZONEID);
161 if (ksp == NULL)
162 return;
163
164 ksp->ks_update = flow_stat_update;
165 ksp->ks_private = fep;
166 fep->fe_ksp = ksp;
167
168 knp = (kstat_named_t *)ksp->ks_data;
169 flow_stat_init(knp);
170 kstat_install(ksp);
171 }
172
173 void
flow_stat_destroy(flow_entry_t * fep)174 flow_stat_destroy(flow_entry_t *fep)
175 {
176 if (fep->fe_ksp != NULL) {
177 kstat_delete(fep->fe_ksp);
178 fep->fe_ksp = NULL;
179 }
180 }
181
182 /*
183 * Initialize the flow table
184 */
185 void
mac_flow_init()186 mac_flow_init()
187 {
188 flow_cache = kmem_cache_create("flow_entry_cache",
189 sizeof (flow_entry_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
190 flow_tab_cache = kmem_cache_create("flow_tab_cache",
191 sizeof (flow_tab_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
192 flow_hash = mod_hash_create_extended("flow_hash",
193 100, mod_hash_null_keydtor, mod_hash_null_valdtor,
194 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
195 rw_init(&flow_tab_lock, NULL, RW_DEFAULT, NULL);
196 }
197
198 /*
199 * Cleanup and release the flow table
200 */
201 void
mac_flow_fini()202 mac_flow_fini()
203 {
204 kmem_cache_destroy(flow_cache);
205 kmem_cache_destroy(flow_tab_cache);
206 mod_hash_destroy_hash(flow_hash);
207 rw_destroy(&flow_tab_lock);
208 }
209
210 /*
211 * mac_create_flow(): create a flow_entry_t.
212 */
213 int
mac_flow_create(flow_desc_t * fd,mac_resource_props_t * mrp,char * name,void * client_cookie,uint_t type,flow_entry_t ** flentp)214 mac_flow_create(flow_desc_t *fd, mac_resource_props_t *mrp, char *name,
215 void *client_cookie, uint_t type, flow_entry_t **flentp)
216 {
217 flow_entry_t *flent = *flentp;
218 int err = 0;
219
220 if (mrp != NULL) {
221 err = mac_validate_props(NULL, mrp);
222 if (err != 0)
223 return (err);
224 }
225
226 if (flent == NULL) {
227 flent = kmem_cache_alloc(flow_cache, KM_SLEEP);
228 bzero(flent, sizeof (*flent));
229 mutex_init(&flent->fe_lock, NULL, MUTEX_DEFAULT, NULL);
230 cv_init(&flent->fe_cv, NULL, CV_DEFAULT, NULL);
231
232 /* Initialize the receiver function to a safe routine */
233 flent->fe_cb_fn = (flow_fn_t)mac_rx_def;
234 flent->fe_index = -1;
235 }
236 (void) strlcpy(flent->fe_flow_name, name, MAXFLOWNAMELEN);
237
238 /* This is an initial flow, will be configured later */
239 if (fd == NULL) {
240 *flentp = flent;
241 return (0);
242 }
243
244 flent->fe_client_cookie = client_cookie;
245 flent->fe_type = type;
246
247 /* Save flow desc */
248 bcopy(fd, &flent->fe_flow_desc, sizeof (*fd));
249
250 if (mrp != NULL) {
251 /*
252 * We have already set fe_resource_props for a Link.
253 */
254 if (type & FLOW_USER) {
255 bcopy(mrp, &flent->fe_resource_props,
256 sizeof (mac_resource_props_t));
257 }
258 /*
259 * The effective resource list should reflect the priority
260 * that we set implicitly.
261 */
262 if (!(mrp->mrp_mask & MRP_PRIORITY))
263 mrp->mrp_mask |= MRP_PRIORITY;
264 if (type & FLOW_USER)
265 mrp->mrp_priority = MPL_SUBFLOW_DEFAULT;
266 else
267 mrp->mrp_priority = MPL_LINK_DEFAULT;
268 bzero(mrp->mrp_pool, MAXPATHLEN);
269 bzero(&mrp->mrp_cpus, sizeof (mac_cpus_t));
270 bcopy(mrp, &flent->fe_effective_props,
271 sizeof (mac_resource_props_t));
272 }
273 flow_stat_create(flent);
274
275 *flentp = flent;
276 return (0);
277 }
278
279 /*
280 * Validate flow entry and add it to a flow table.
281 */
282 int
mac_flow_add(flow_tab_t * ft,flow_entry_t * flent)283 mac_flow_add(flow_tab_t *ft, flow_entry_t *flent)
284 {
285 flow_entry_t **headp, **p;
286 flow_ops_t *ops = &ft->ft_ops;
287 flow_mask_t mask;
288 uint32_t index;
289 int err;
290
291 ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
292
293 /*
294 * Check for invalid bits in mask.
295 */
296 mask = flent->fe_flow_desc.fd_mask;
297 if ((mask & ft->ft_mask) == 0 || (mask & ~ft->ft_mask) != 0)
298 return (EOPNOTSUPP);
299
300 /*
301 * Validate flent.
302 */
303 if ((err = ops->fo_accept_fe(ft, flent)) != 0) {
304 DTRACE_PROBE3(accept_failed, flow_tab_t *, ft,
305 flow_entry_t *, flent, int, err);
306 return (err);
307 }
308
309 /*
310 * Flent is valid. now calculate hash and insert it
311 * into hash table.
312 */
313 index = ops->fo_hash_fe(ft, flent);
314
315 /*
316 * We do not need a lock up until now because we were
317 * not accessing the flow table.
318 */
319 rw_enter(&ft->ft_lock, RW_WRITER);
320 headp = &ft->ft_table[index];
321
322 /*
323 * Check for duplicate flow.
324 */
325 for (p = headp; *p != NULL; p = &(*p)->fe_next) {
326 if ((*p)->fe_flow_desc.fd_mask !=
327 flent->fe_flow_desc.fd_mask)
328 continue;
329
330 if (ft->ft_ops.fo_match_fe(ft, *p, flent)) {
331 rw_exit(&ft->ft_lock);
332 DTRACE_PROBE3(dup_flow, flow_tab_t *, ft,
333 flow_entry_t *, flent, int, err);
334 return (EALREADY);
335 }
336 }
337
338 /*
339 * Insert flow to hash list.
340 */
341 err = ops->fo_insert_fe(ft, headp, flent);
342 if (err != 0) {
343 rw_exit(&ft->ft_lock);
344 DTRACE_PROBE3(insert_failed, flow_tab_t *, ft,
345 flow_entry_t *, flent, int, err);
346 return (err);
347 }
348
349 /*
350 * Save the hash index so it can be used by mac_flow_remove().
351 */
352 flent->fe_index = (int)index;
353
354 /*
355 * Save the flow tab back reference.
356 */
357 flent->fe_flow_tab = ft;
358 FLOW_MARK(flent, FE_FLOW_TAB);
359 ft->ft_flow_count++;
360 rw_exit(&ft->ft_lock);
361 return (0);
362 }
363
364 /*
365 * Remove a flow from a mac client's subflow table
366 */
367 void
mac_flow_rem_subflow(flow_entry_t * flent)368 mac_flow_rem_subflow(flow_entry_t *flent)
369 {
370 flow_tab_t *ft = flent->fe_flow_tab;
371 mac_client_impl_t *mcip = ft->ft_mcip;
372 mac_handle_t mh = (mac_handle_t)ft->ft_mip;
373
374 ASSERT(MAC_PERIM_HELD(mh));
375
376 mac_flow_remove(ft, flent, B_FALSE);
377 if (flent->fe_mcip == NULL) {
378 /*
379 * The interface is not yet plumbed and mac_client_flow_add
380 * was not done.
381 */
382 if (FLOW_TAB_EMPTY(ft)) {
383 mac_flow_tab_destroy(ft);
384 mcip->mci_subflow_tab = NULL;
385 }
386 } else {
387 mac_flow_wait(flent, FLOW_DRIVER_UPCALL);
388 mac_link_flow_clean((mac_client_handle_t)mcip, flent);
389 }
390 mac_fastpath_enable(mh);
391 }
392
393 /*
394 * Add a flow to a mac client's subflow table and instantiate the flow
395 * in the mac by creating the associated SRSs etc.
396 */
397 int
mac_flow_add_subflow(mac_client_handle_t mch,flow_entry_t * flent,boolean_t instantiate_flow)398 mac_flow_add_subflow(mac_client_handle_t mch, flow_entry_t *flent,
399 boolean_t instantiate_flow)
400 {
401 mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
402 mac_handle_t mh = (mac_handle_t)mcip->mci_mip;
403 flow_tab_info_t *ftinfo;
404 flow_mask_t mask;
405 flow_tab_t *ft;
406 int err;
407 boolean_t ft_created = B_FALSE;
408
409 ASSERT(MAC_PERIM_HELD(mh));
410
411 if ((err = mac_fastpath_disable(mh)) != 0)
412 return (err);
413
414 /*
415 * If the subflow table exists already just add the new subflow
416 * to the existing table, else we create a new subflow table below.
417 */
418 ft = mcip->mci_subflow_tab;
419 if (ft == NULL) {
420 mask = flent->fe_flow_desc.fd_mask;
421 /*
422 * Try to create a new table and then add the subflow to the
423 * newly created subflow table
424 */
425 if ((ftinfo = mac_flow_tab_info_get(mask)) == NULL) {
426 mac_fastpath_enable(mh);
427 return (EOPNOTSUPP);
428 }
429
430 mac_flow_tab_create(ftinfo->fti_ops, mask, ftinfo->fti_size,
431 mcip->mci_mip, &ft);
432 ft_created = B_TRUE;
433 }
434
435 err = mac_flow_add(ft, flent);
436 if (err != 0) {
437 if (ft_created)
438 mac_flow_tab_destroy(ft);
439 mac_fastpath_enable(mh);
440 return (err);
441 }
442
443 if (instantiate_flow) {
444 /* Now activate the flow by creating its SRSs */
445 ASSERT(MCIP_DATAPATH_SETUP(mcip));
446 err = mac_link_flow_init((mac_client_handle_t)mcip, flent);
447 if (err != 0) {
448 mac_flow_remove(ft, flent, B_FALSE);
449 if (ft_created)
450 mac_flow_tab_destroy(ft);
451 mac_fastpath_enable(mh);
452 return (err);
453 }
454 } else {
455 FLOW_MARK(flent, FE_UF_NO_DATAPATH);
456 }
457 if (ft_created) {
458 ASSERT(mcip->mci_subflow_tab == NULL);
459 ft->ft_mcip = mcip;
460 mcip->mci_subflow_tab = ft;
461 if (instantiate_flow)
462 mac_client_update_classifier(mcip, B_TRUE);
463 }
464 return (0);
465 }
466
467 /*
468 * Remove flow entry from flow table.
469 */
470 void
mac_flow_remove(flow_tab_t * ft,flow_entry_t * flent,boolean_t temp)471 mac_flow_remove(flow_tab_t *ft, flow_entry_t *flent, boolean_t temp)
472 {
473 flow_entry_t **fp;
474
475 ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
476 if (!(flent->fe_flags & FE_FLOW_TAB))
477 return;
478
479 rw_enter(&ft->ft_lock, RW_WRITER);
480 /*
481 * If this is a permanent removal from the flow table, mark it
482 * CONDEMNED to prevent future references. If this is a temporary
483 * removal from the table, say to update the flow descriptor then
484 * we don't mark it CONDEMNED
485 */
486 if (!temp)
487 FLOW_MARK(flent, FE_CONDEMNED);
488 /*
489 * Locate the specified flent.
490 */
491 fp = &ft->ft_table[flent->fe_index];
492 while (*fp != flent)
493 fp = &(*fp)->fe_next;
494
495 /*
496 * The flent must exist. Otherwise it's a bug.
497 */
498 ASSERT(fp != NULL);
499 *fp = flent->fe_next;
500 flent->fe_next = NULL;
501
502 /*
503 * Reset fe_index to -1 so any attempt to call mac_flow_remove()
504 * on a flent that is supposed to be in the table (FE_FLOW_TAB)
505 * will panic.
506 */
507 flent->fe_index = -1;
508 FLOW_UNMARK(flent, FE_FLOW_TAB);
509 ft->ft_flow_count--;
510 rw_exit(&ft->ft_lock);
511 }
512
513 /*
514 * This is the flow lookup routine used by the mac sw classifier engine.
515 */
516 int
mac_flow_lookup(flow_tab_t * ft,mblk_t * mp,uint_t flags,flow_entry_t ** flentp)517 mac_flow_lookup(flow_tab_t *ft, mblk_t *mp, uint_t flags, flow_entry_t **flentp)
518 {
519 flow_state_t s;
520 flow_entry_t *flent;
521 flow_ops_t *ops = &ft->ft_ops;
522 boolean_t retried = B_FALSE;
523 int i, err;
524
525 s.fs_flags = flags;
526 retry:
527 s.fs_mp = mp;
528
529 /*
530 * Walk the list of predeclared accept functions.
531 * Each of these would accumulate enough state to allow the next
532 * accept routine to make progress.
533 */
534 for (i = 0; i < FLOW_MAX_ACCEPT && ops->fo_accept[i] != NULL; i++) {
535 if ((err = (ops->fo_accept[i])(ft, &s)) != 0) {
536 mblk_t *last;
537
538 /*
539 * ENOBUFS indicates that the mp could be too short
540 * and may need a pullup.
541 */
542 if (err != ENOBUFS || retried)
543 return (err);
544
545 /*
546 * The pullup is done on the last processed mblk, not
547 * the starting one. pullup is not done if the mblk
548 * has references or if b_cont is NULL.
549 */
550 last = s.fs_mp;
551 if (DB_REF(last) > 1 || last->b_cont == NULL ||
552 pullupmsg(last, -1) == 0)
553 return (EINVAL);
554
555 retried = B_TRUE;
556 DTRACE_PROBE2(need_pullup, flow_tab_t *, ft,
557 flow_state_t *, &s);
558 goto retry;
559 }
560 }
561
562 /*
563 * The packet is considered sane. We may now attempt to
564 * find the corresponding flent.
565 */
566 rw_enter(&ft->ft_lock, RW_READER);
567 flent = ft->ft_table[ops->fo_hash(ft, &s)];
568 for (; flent != NULL; flent = flent->fe_next) {
569 if (flent->fe_match(ft, flent, &s)) {
570 FLOW_TRY_REFHOLD(flent, err);
571 if (err != 0)
572 continue;
573 *flentp = flent;
574 rw_exit(&ft->ft_lock);
575 return (0);
576 }
577 }
578 rw_exit(&ft->ft_lock);
579 return (ENOENT);
580 }
581
582 /*
583 * Walk flow table.
584 * The caller is assumed to have proper perimeter protection.
585 */
586 int
mac_flow_walk_nolock(flow_tab_t * ft,int (* fn)(flow_entry_t *,void *),void * arg)587 mac_flow_walk_nolock(flow_tab_t *ft, int (*fn)(flow_entry_t *, void *),
588 void *arg)
589 {
590 int err, i, cnt = 0;
591 flow_entry_t *flent;
592
593 if (ft == NULL)
594 return (0);
595
596 for (i = 0; i < ft->ft_size; i++) {
597 for (flent = ft->ft_table[i]; flent != NULL;
598 flent = flent->fe_next) {
599 cnt++;
600 err = (*fn)(flent, arg);
601 if (err != 0)
602 return (err);
603 }
604 }
605 VERIFY(cnt == ft->ft_flow_count);
606 return (0);
607 }
608
609 /*
610 * Same as the above except a mutex is used for protection here.
611 */
612 int
mac_flow_walk(flow_tab_t * ft,int (* fn)(flow_entry_t *,void *),void * arg)613 mac_flow_walk(flow_tab_t *ft, int (*fn)(flow_entry_t *, void *),
614 void *arg)
615 {
616 int err;
617
618 if (ft == NULL)
619 return (0);
620
621 rw_enter(&ft->ft_lock, RW_WRITER);
622 err = mac_flow_walk_nolock(ft, fn, arg);
623 rw_exit(&ft->ft_lock);
624 return (err);
625 }
626
627 static boolean_t mac_flow_clean(flow_entry_t *);
628
629 /*
630 * Destroy a flow entry. Called when the last reference on a flow is released.
631 */
632 void
mac_flow_destroy(flow_entry_t * flent)633 mac_flow_destroy(flow_entry_t *flent)
634 {
635 ASSERT(flent->fe_refcnt == 0);
636
637 if ((flent->fe_type & FLOW_USER) != 0) {
638 ASSERT(mac_flow_clean(flent));
639 } else {
640 mac_flow_cleanup(flent);
641 }
642 mac_misc_stat_delete(flent);
643 mutex_destroy(&flent->fe_lock);
644 cv_destroy(&flent->fe_cv);
645 flow_stat_destroy(flent);
646 kmem_cache_free(flow_cache, flent);
647 }
648
649 /*
650 * XXX eric
651 * The MAC_FLOW_PRIORITY checks in mac_resource_ctl_set() and
652 * mac_link_flow_modify() should really be moved/reworked into the
653 * two functions below. This would consolidate all the mac property
654 * checking in one place. I'm leaving this alone for now since it's
655 * out of scope of the new flows work.
656 */
657 /* ARGSUSED */
658 uint32_t
mac_flow_modify_props(flow_entry_t * flent,mac_resource_props_t * mrp)659 mac_flow_modify_props(flow_entry_t *flent, mac_resource_props_t *mrp)
660 {
661 uint32_t changed_mask = 0;
662 mac_resource_props_t *fmrp = &flent->fe_effective_props;
663 int i;
664
665 if ((mrp->mrp_mask & MRP_MAXBW) != 0 &&
666 (!(fmrp->mrp_mask & MRP_MAXBW) ||
667 (fmrp->mrp_maxbw != mrp->mrp_maxbw))) {
668 changed_mask |= MRP_MAXBW;
669 if (mrp->mrp_maxbw == MRP_MAXBW_RESETVAL) {
670 fmrp->mrp_mask &= ~MRP_MAXBW;
671 fmrp->mrp_maxbw = 0;
672 } else {
673 fmrp->mrp_mask |= MRP_MAXBW;
674 fmrp->mrp_maxbw = mrp->mrp_maxbw;
675 }
676 }
677
678 if ((mrp->mrp_mask & MRP_PRIORITY) != 0) {
679 if (fmrp->mrp_priority != mrp->mrp_priority)
680 changed_mask |= MRP_PRIORITY;
681 if (mrp->mrp_priority == MPL_RESET) {
682 fmrp->mrp_priority = MPL_SUBFLOW_DEFAULT;
683 fmrp->mrp_mask &= ~MRP_PRIORITY;
684 } else {
685 fmrp->mrp_priority = mrp->mrp_priority;
686 fmrp->mrp_mask |= MRP_PRIORITY;
687 }
688 }
689
690 /* modify fanout */
691 if ((mrp->mrp_mask & MRP_CPUS) != 0) {
692 if ((fmrp->mrp_ncpus == mrp->mrp_ncpus) &&
693 (fmrp->mrp_fanout_mode == mrp->mrp_fanout_mode)) {
694 for (i = 0; i < mrp->mrp_ncpus; i++) {
695 if (mrp->mrp_cpu[i] != fmrp->mrp_cpu[i])
696 break;
697 }
698 if (i == mrp->mrp_ncpus) {
699 /*
700 * The new set of cpus passed is exactly
701 * the same as the existing set.
702 */
703 return (changed_mask);
704 }
705 }
706 changed_mask |= MRP_CPUS;
707 MAC_COPY_CPUS(mrp, fmrp);
708 }
709
710 /*
711 * Modify the rings property.
712 */
713 if (mrp->mrp_mask & MRP_RX_RINGS || mrp->mrp_mask & MRP_TX_RINGS)
714 mac_set_rings_effective(flent->fe_mcip);
715
716 if ((mrp->mrp_mask & MRP_POOL) != 0) {
717 if (strcmp(fmrp->mrp_pool, mrp->mrp_pool) != 0)
718 changed_mask |= MRP_POOL;
719 if (strlen(mrp->mrp_pool) == 0)
720 fmrp->mrp_mask &= ~MRP_POOL;
721 else
722 fmrp->mrp_mask |= MRP_POOL;
723 (void) strncpy(fmrp->mrp_pool, mrp->mrp_pool, MAXPATHLEN);
724 }
725 return (changed_mask);
726 }
727
728 void
mac_flow_modify(flow_tab_t * ft,flow_entry_t * flent,mac_resource_props_t * mrp)729 mac_flow_modify(flow_tab_t *ft, flow_entry_t *flent, mac_resource_props_t *mrp)
730 {
731 uint32_t changed_mask;
732 mac_client_impl_t *mcip = flent->fe_mcip;
733 mac_resource_props_t *mcip_mrp = MCIP_RESOURCE_PROPS(mcip);
734 mac_resource_props_t *emrp = MCIP_EFFECTIVE_PROPS(mcip);
735 cpupart_t *cpupart = NULL;
736 boolean_t use_default = B_FALSE;
737
738 ASSERT(flent != NULL);
739 ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
740
741 rw_enter(&ft->ft_lock, RW_WRITER);
742
743 /* Update the cached values inside the subflow entry */
744 changed_mask = mac_flow_modify_props(flent, mrp);
745 rw_exit(&ft->ft_lock);
746 /*
747 * Push the changed parameters to the scheduling code in the
748 * SRS's, to take effect right away.
749 */
750 if (changed_mask & MRP_MAXBW) {
751 mac_srs_update_bwlimit(flent, mrp);
752 /*
753 * If bandwidth is changed, we may have to change
754 * the number of soft ring to be used for fanout.
755 * Call mac_flow_update_fanout() if MAC_BIND_CPU
756 * is not set and there is no user supplied cpu
757 * info. This applies only to link at this time.
758 */
759 if (!(flent->fe_type & FLOW_USER) &&
760 !(changed_mask & MRP_CPUS) &&
761 !(mcip_mrp->mrp_mask & MRP_CPUS_USERSPEC)) {
762 mac_fanout_setup(mcip, flent, mcip_mrp,
763 mac_rx_deliver, mcip, NULL, NULL);
764 }
765 }
766 if (mrp->mrp_mask & MRP_PRIORITY)
767 mac_flow_update_priority(mcip, flent);
768
769 if (changed_mask & MRP_CPUS)
770 mac_fanout_setup(mcip, flent, mrp, mac_rx_deliver, mcip, NULL,
771 NULL);
772
773 if (mrp->mrp_mask & MRP_POOL) {
774 pool_lock();
775 cpupart = mac_pset_find(mrp, &use_default);
776 mac_fanout_setup(mcip, flent, mrp, mac_rx_deliver, mcip, NULL,
777 cpupart);
778 mac_set_pool_effective(use_default, cpupart, mrp, emrp);
779 pool_unlock();
780 }
781 }
782
783 /*
784 * This function waits for a certain condition to be met and is generally
785 * used before a destructive or quiescing operation.
786 */
787 void
mac_flow_wait(flow_entry_t * flent,mac_flow_state_t event)788 mac_flow_wait(flow_entry_t *flent, mac_flow_state_t event)
789 {
790 mutex_enter(&flent->fe_lock);
791 flent->fe_flags |= FE_WAITER;
792
793 switch (event) {
794 case FLOW_DRIVER_UPCALL:
795 /*
796 * We want to make sure the driver upcalls have finished before
797 * we signal the Rx SRS worker to quit.
798 */
799 while (flent->fe_refcnt != 1)
800 cv_wait(&flent->fe_cv, &flent->fe_lock);
801 break;
802
803 case FLOW_USER_REF:
804 /*
805 * Wait for the fe_user_refcnt to drop to 0. The flow has
806 * been removed from the global flow hash.
807 */
808 ASSERT(!(flent->fe_flags & FE_G_FLOW_HASH));
809 while (flent->fe_user_refcnt != 0)
810 cv_wait(&flent->fe_cv, &flent->fe_lock);
811 break;
812
813 default:
814 ASSERT(0);
815 }
816
817 flent->fe_flags &= ~FE_WAITER;
818 mutex_exit(&flent->fe_lock);
819 }
820
821 static boolean_t
mac_flow_clean(flow_entry_t * flent)822 mac_flow_clean(flow_entry_t *flent)
823 {
824 ASSERT(flent->fe_next == NULL);
825 ASSERT(flent->fe_tx_srs == NULL);
826 ASSERT(flent->fe_rx_srs_cnt == 0 && flent->fe_rx_srs[0] == NULL);
827 ASSERT(flent->fe_mbg == NULL);
828
829 return (B_TRUE);
830 }
831
832 void
mac_flow_cleanup(flow_entry_t * flent)833 mac_flow_cleanup(flow_entry_t *flent)
834 {
835 if ((flent->fe_type & FLOW_USER) == 0) {
836 ASSERT((flent->fe_mbg == NULL && flent->fe_mcip != NULL) ||
837 (flent->fe_mbg != NULL && flent->fe_mcip == NULL));
838 ASSERT(flent->fe_refcnt == 0);
839 } else {
840 ASSERT(flent->fe_refcnt == 1);
841 }
842
843 if (flent->fe_mbg != NULL) {
844 ASSERT(flent->fe_tx_srs == NULL);
845 /* This is a multicast or broadcast flow entry */
846 mac_bcast_grp_free(flent->fe_mbg);
847 flent->fe_mbg = NULL;
848 }
849
850 if (flent->fe_tx_srs != NULL) {
851 ASSERT(flent->fe_mbg == NULL);
852 mac_srs_free(flent->fe_tx_srs);
853 flent->fe_tx_srs = NULL;
854 }
855
856 /*
857 * In the normal case fe_rx_srs_cnt is 1. However in the error case
858 * when mac_unicast_add fails we may not have set up any SRS
859 * in which case fe_rx_srs_cnt will be zero.
860 */
861 if (flent->fe_rx_srs_cnt != 0) {
862 ASSERT(flent->fe_rx_srs_cnt == 1);
863 mac_srs_free(flent->fe_rx_srs[0]);
864 flent->fe_rx_srs[0] = NULL;
865 flent->fe_rx_srs_cnt = 0;
866 }
867 ASSERT(flent->fe_rx_srs[0] == NULL);
868 }
869
870 void
mac_flow_get_desc(flow_entry_t * flent,flow_desc_t * fd)871 mac_flow_get_desc(flow_entry_t *flent, flow_desc_t *fd)
872 {
873 /*
874 * Grab the fe_lock to see a self-consistent fe_flow_desc.
875 * Updates to the fe_flow_desc happen under the fe_lock
876 * after removing the flent from the flow table
877 */
878 mutex_enter(&flent->fe_lock);
879 bcopy(&flent->fe_flow_desc, fd, sizeof (*fd));
880 mutex_exit(&flent->fe_lock);
881 }
882
883 /*
884 * Update a field of a flow entry. The mac perimeter ensures that
885 * this is the only thread doing a modify operation on this mac end point.
886 * So the flow table can't change or disappear. The ft_lock protects access
887 * to the flow entry, and holding the lock ensures that there isn't any thread
888 * accessing the flow entry or attempting a flow table lookup. However
889 * data threads that are using the flow entry based on the old descriptor
890 * will continue to use the flow entry. If strong coherence is required
891 * then the flow will have to be quiesced before the descriptor can be
892 * changed.
893 */
894 void
mac_flow_set_desc(flow_entry_t * flent,flow_desc_t * fd)895 mac_flow_set_desc(flow_entry_t *flent, flow_desc_t *fd)
896 {
897 flow_tab_t *ft = flent->fe_flow_tab;
898 flow_desc_t old_desc;
899 int err;
900
901 if (ft == NULL) {
902 /*
903 * The flow hasn't yet been inserted into the table,
904 * so only the caller knows about this flow, however for
905 * uniformity we grab the fe_lock here.
906 */
907 mutex_enter(&flent->fe_lock);
908 bcopy(fd, &flent->fe_flow_desc, sizeof (*fd));
909 mutex_exit(&flent->fe_lock);
910 }
911
912 ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
913
914 /*
915 * Need to remove the flow entry from the table and reinsert it,
916 * into a potentially diference hash line. The hash depends on
917 * the new descriptor fields. However access to fe_desc itself
918 * is always under the fe_lock. This helps log and stat functions
919 * see a self-consistent fe_flow_desc.
920 */
921 mac_flow_remove(ft, flent, B_TRUE);
922 old_desc = flent->fe_flow_desc;
923
924 mutex_enter(&flent->fe_lock);
925 bcopy(fd, &flent->fe_flow_desc, sizeof (*fd));
926 mutex_exit(&flent->fe_lock);
927
928 if (mac_flow_add(ft, flent) != 0) {
929 /*
930 * The add failed say due to an invalid flow descriptor.
931 * Undo the update
932 */
933 flent->fe_flow_desc = old_desc;
934 err = mac_flow_add(ft, flent);
935 ASSERT(err == 0);
936 }
937 }
938
939 void
mac_flow_set_name(flow_entry_t * flent,const char * name)940 mac_flow_set_name(flow_entry_t *flent, const char *name)
941 {
942 flow_tab_t *ft = flent->fe_flow_tab;
943
944 if (ft == NULL) {
945 /*
946 * The flow hasn't yet been inserted into the table,
947 * so only the caller knows about this flow
948 */
949 (void) strlcpy(flent->fe_flow_name, name, MAXFLOWNAMELEN);
950 } else {
951 ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
952 }
953
954 mutex_enter(&flent->fe_lock);
955 (void) strlcpy(flent->fe_flow_name, name, MAXFLOWNAMELEN);
956 mutex_exit(&flent->fe_lock);
957 }
958
959 /*
960 * Return the client-private cookie that was associated with
961 * the flow when it was created.
962 */
963 void *
mac_flow_get_client_cookie(flow_entry_t * flent)964 mac_flow_get_client_cookie(flow_entry_t *flent)
965 {
966 return (flent->fe_client_cookie);
967 }
968
969 /*
970 * Forward declarations.
971 */
972 static uint32_t flow_l2_hash(flow_tab_t *, flow_state_t *);
973 static uint32_t flow_l2_hash_fe(flow_tab_t *, flow_entry_t *);
974 static int flow_l2_accept(flow_tab_t *, flow_state_t *);
975 static uint32_t flow_ether_hash(flow_tab_t *, flow_state_t *);
976 static uint32_t flow_ether_hash_fe(flow_tab_t *, flow_entry_t *);
977 static int flow_ether_accept(flow_tab_t *, flow_state_t *);
978
979 /*
980 * Create flow table.
981 */
982 void
mac_flow_tab_create(flow_ops_t * ops,flow_mask_t mask,uint_t size,mac_impl_t * mip,flow_tab_t ** ftp)983 mac_flow_tab_create(flow_ops_t *ops, flow_mask_t mask, uint_t size,
984 mac_impl_t *mip, flow_tab_t **ftp)
985 {
986 flow_tab_t *ft;
987 flow_ops_t *new_ops;
988
989 ft = kmem_cache_alloc(flow_tab_cache, KM_SLEEP);
990 bzero(ft, sizeof (*ft));
991
992 ft->ft_table = kmem_zalloc(size * sizeof (flow_entry_t *), KM_SLEEP);
993
994 /*
995 * We make a copy of the ops vector instead of just pointing to it
996 * because we might want to customize the ops vector on a per table
997 * basis (e.g. for optimization).
998 */
999 new_ops = &ft->ft_ops;
1000 bcopy(ops, new_ops, sizeof (*ops));
1001 ft->ft_mask = mask;
1002 ft->ft_size = size;
1003 ft->ft_mip = mip;
1004
1005 /*
1006 * Optimizations for DL_ETHER media.
1007 */
1008 if (mip->mi_info.mi_nativemedia == DL_ETHER) {
1009 if (new_ops->fo_hash == flow_l2_hash)
1010 new_ops->fo_hash = flow_ether_hash;
1011 if (new_ops->fo_hash_fe == flow_l2_hash_fe)
1012 new_ops->fo_hash_fe = flow_ether_hash_fe;
1013 if (new_ops->fo_accept[0] == flow_l2_accept)
1014 new_ops->fo_accept[0] = flow_ether_accept;
1015 }
1016 *ftp = ft;
1017 }
1018
1019 void
mac_flow_l2tab_create(mac_impl_t * mip,flow_tab_t ** ftp)1020 mac_flow_l2tab_create(mac_impl_t *mip, flow_tab_t **ftp)
1021 {
1022 mac_flow_tab_create(&flow_l2_ops, FLOW_LINK_DST | FLOW_LINK_VID,
1023 1024, mip, ftp);
1024 }
1025
1026 /*
1027 * Destroy flow table.
1028 */
1029 void
mac_flow_tab_destroy(flow_tab_t * ft)1030 mac_flow_tab_destroy(flow_tab_t *ft)
1031 {
1032 if (ft == NULL)
1033 return;
1034
1035 ASSERT(ft->ft_flow_count == 0);
1036 kmem_free(ft->ft_table, ft->ft_size * sizeof (flow_entry_t *));
1037 bzero(ft, sizeof (*ft));
1038 kmem_cache_free(flow_tab_cache, ft);
1039 }
1040
1041 /*
1042 * Add a new flow entry to the global flow hash table
1043 */
1044 int
mac_flow_hash_add(flow_entry_t * flent)1045 mac_flow_hash_add(flow_entry_t *flent)
1046 {
1047 int err;
1048
1049 rw_enter(&flow_tab_lock, RW_WRITER);
1050 err = mod_hash_insert(flow_hash,
1051 (mod_hash_key_t)flent->fe_flow_name, (mod_hash_val_t)flent);
1052 if (err != 0) {
1053 rw_exit(&flow_tab_lock);
1054 return (EEXIST);
1055 }
1056 /* Mark as inserted into the global flow hash table */
1057 FLOW_MARK(flent, FE_G_FLOW_HASH);
1058 rw_exit(&flow_tab_lock);
1059 return (err);
1060 }
1061
1062 /*
1063 * Remove a flow entry from the global flow hash table
1064 */
1065 void
mac_flow_hash_remove(flow_entry_t * flent)1066 mac_flow_hash_remove(flow_entry_t *flent)
1067 {
1068 mod_hash_val_t val;
1069
1070 rw_enter(&flow_tab_lock, RW_WRITER);
1071 VERIFY(mod_hash_remove(flow_hash,
1072 (mod_hash_key_t)flent->fe_flow_name, &val) == 0);
1073
1074 /* Clear the mark that says inserted into the global flow hash table */
1075 FLOW_UNMARK(flent, FE_G_FLOW_HASH);
1076 rw_exit(&flow_tab_lock);
1077 }
1078
1079 /*
1080 * Retrieve a flow entry from the global flow hash table.
1081 */
1082 int
mac_flow_lookup_byname(char * name,flow_entry_t ** flentp)1083 mac_flow_lookup_byname(char *name, flow_entry_t **flentp)
1084 {
1085 int err;
1086 flow_entry_t *flent;
1087
1088 rw_enter(&flow_tab_lock, RW_READER);
1089 err = mod_hash_find(flow_hash, (mod_hash_key_t)name,
1090 (mod_hash_val_t *)&flent);
1091 if (err != 0) {
1092 rw_exit(&flow_tab_lock);
1093 return (ENOENT);
1094 }
1095 ASSERT(flent != NULL);
1096 FLOW_USER_REFHOLD(flent);
1097 rw_exit(&flow_tab_lock);
1098
1099 *flentp = flent;
1100 return (0);
1101 }
1102
1103 /*
1104 * Initialize or release mac client flows by walking the subflow table.
1105 * These are typically invoked during plumb/unplumb of links.
1106 */
1107
1108 static int
mac_link_init_flows_cb(flow_entry_t * flent,void * arg)1109 mac_link_init_flows_cb(flow_entry_t *flent, void *arg)
1110 {
1111 mac_client_impl_t *mcip = arg;
1112
1113 if (mac_link_flow_init(arg, flent) != 0) {
1114 cmn_err(CE_WARN, "Failed to initialize flow '%s' on link '%s'",
1115 flent->fe_flow_name, mcip->mci_name);
1116 } else {
1117 FLOW_UNMARK(flent, FE_UF_NO_DATAPATH);
1118 }
1119 return (0);
1120 }
1121
1122 void
mac_link_init_flows(mac_client_handle_t mch)1123 mac_link_init_flows(mac_client_handle_t mch)
1124 {
1125 mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
1126
1127 (void) mac_flow_walk_nolock(mcip->mci_subflow_tab,
1128 mac_link_init_flows_cb, mcip);
1129 /*
1130 * If mac client had subflow(s) configured before plumb, change
1131 * function to mac_rx_srs_subflow_process and in case of hardware
1132 * classification, disable polling.
1133 */
1134 mac_client_update_classifier(mcip, B_TRUE);
1135
1136 }
1137
1138 boolean_t
mac_link_has_flows(mac_client_handle_t mch)1139 mac_link_has_flows(mac_client_handle_t mch)
1140 {
1141 mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
1142
1143 if (!FLOW_TAB_EMPTY(mcip->mci_subflow_tab))
1144 return (B_TRUE);
1145
1146 return (B_FALSE);
1147 }
1148
1149 static int
mac_link_release_flows_cb(flow_entry_t * flent,void * arg)1150 mac_link_release_flows_cb(flow_entry_t *flent, void *arg)
1151 {
1152 FLOW_MARK(flent, FE_UF_NO_DATAPATH);
1153 mac_flow_wait(flent, FLOW_DRIVER_UPCALL);
1154 mac_link_flow_clean(arg, flent);
1155 return (0);
1156 }
1157
1158 void
mac_link_release_flows(mac_client_handle_t mch)1159 mac_link_release_flows(mac_client_handle_t mch)
1160 {
1161 mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
1162
1163 /*
1164 * Change the mci_flent callback back to mac_rx_srs_process()
1165 * because flows are about to be deactivated.
1166 */
1167 mac_client_update_classifier(mcip, B_FALSE);
1168 (void) mac_flow_walk_nolock(mcip->mci_subflow_tab,
1169 mac_link_release_flows_cb, mcip);
1170 }
1171
1172 void
mac_rename_flow(flow_entry_t * fep,const char * new_name)1173 mac_rename_flow(flow_entry_t *fep, const char *new_name)
1174 {
1175 mac_flow_set_name(fep, new_name);
1176 if (fep->fe_ksp != NULL) {
1177 flow_stat_destroy(fep);
1178 flow_stat_create(fep);
1179 }
1180 }
1181
1182 /*
1183 * mac_link_flow_init()
1184 * Internal flow interface used for allocating SRSs and related
1185 * data structures. Not meant to be used by mac clients.
1186 */
1187 int
mac_link_flow_init(mac_client_handle_t mch,flow_entry_t * sub_flow)1188 mac_link_flow_init(mac_client_handle_t mch, flow_entry_t *sub_flow)
1189 {
1190 mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
1191 mac_impl_t *mip = mcip->mci_mip;
1192 int err;
1193
1194 ASSERT(mch != NULL);
1195 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
1196
1197 if ((err = mac_datapath_setup(mcip, sub_flow, SRST_FLOW)) != 0)
1198 return (err);
1199
1200 sub_flow->fe_mcip = mcip;
1201
1202 return (0);
1203 }
1204
1205 /*
1206 * mac_link_flow_add()
1207 * Used by flowadm(8) or kernel mac clients for creating flows.
1208 */
1209 int
mac_link_flow_add(datalink_id_t linkid,char * flow_name,flow_desc_t * flow_desc,mac_resource_props_t * mrp)1210 mac_link_flow_add(datalink_id_t linkid, char *flow_name,
1211 flow_desc_t *flow_desc, mac_resource_props_t *mrp)
1212 {
1213 flow_entry_t *flent = NULL;
1214 int err;
1215 dls_dl_handle_t dlh;
1216 dls_link_t *dlp;
1217 boolean_t link_held = B_FALSE;
1218 boolean_t hash_added = B_FALSE;
1219 mac_perim_handle_t mph;
1220
1221 err = mac_flow_lookup_byname(flow_name, &flent);
1222 if (err == 0) {
1223 FLOW_USER_REFRELE(flent);
1224 return (EEXIST);
1225 }
1226
1227 /*
1228 * First create a flow entry given the description provided
1229 * by the caller.
1230 */
1231 err = mac_flow_create(flow_desc, mrp, flow_name, NULL,
1232 FLOW_USER | FLOW_OTHER, &flent);
1233
1234 if (err != 0)
1235 return (err);
1236
1237 /*
1238 * We've got a local variable referencing this flow now, so we need
1239 * to hold it. We'll release this flow before returning.
1240 * All failures until we return will undo any action that may internally
1241 * held the flow, so the last REFRELE will assure a clean freeing
1242 * of resources.
1243 */
1244 FLOW_REFHOLD(flent);
1245
1246 flent->fe_link_id = linkid;
1247 FLOW_MARK(flent, FE_INCIPIENT);
1248
1249 err = mac_perim_enter_by_linkid(linkid, &mph);
1250 if (err != 0) {
1251 FLOW_FINAL_REFRELE(flent);
1252 return (err);
1253 }
1254
1255 /*
1256 * dls will eventually be merged with mac so it's ok
1257 * to call dls' internal functions.
1258 */
1259 err = dls_devnet_hold_link(linkid, &dlh, &dlp);
1260 if (err != 0)
1261 goto bail;
1262
1263 link_held = B_TRUE;
1264
1265 /*
1266 * Add the flow to the global flow table, this table will be per
1267 * exclusive zone so each zone can have its own flow namespace.
1268 * RFE 6625651 will fix this.
1269 *
1270 */
1271 if ((err = mac_flow_hash_add(flent)) != 0)
1272 goto bail;
1273
1274 hash_added = B_TRUE;
1275
1276 /*
1277 * do not allow flows to be configured on an anchor VNIC
1278 */
1279 if (mac_capab_get(dlp->dl_mh, MAC_CAPAB_ANCHOR_VNIC, NULL)) {
1280 err = ENOTSUP;
1281 goto bail;
1282 }
1283
1284 /*
1285 * Add the subflow to the subflow table. Also instantiate the flow
1286 * in the mac if there is an active user (we check if the MAC client's
1287 * datapath has been setup).
1288 */
1289 err = mac_flow_add_subflow(dlp->dl_mch, flent,
1290 MCIP_DATAPATH_SETUP((mac_client_impl_t *)dlp->dl_mch));
1291 if (err != 0)
1292 goto bail;
1293
1294 FLOW_UNMARK(flent, FE_INCIPIENT);
1295 dls_devnet_rele_link(dlh, dlp);
1296 mac_perim_exit(mph);
1297 return (0);
1298
1299 bail:
1300 if (hash_added)
1301 mac_flow_hash_remove(flent);
1302
1303 if (link_held)
1304 dls_devnet_rele_link(dlh, dlp);
1305
1306 /*
1307 * Wait for any transient global flow hash refs to clear
1308 * and then release the creation reference on the flow
1309 */
1310 mac_flow_wait(flent, FLOW_USER_REF);
1311 FLOW_FINAL_REFRELE(flent);
1312 mac_perim_exit(mph);
1313 return (err);
1314 }
1315
1316 /*
1317 * mac_link_flow_clean()
1318 * Internal flow interface used for freeing SRSs and related
1319 * data structures. Not meant to be used by mac clients.
1320 */
1321 void
mac_link_flow_clean(mac_client_handle_t mch,flow_entry_t * sub_flow)1322 mac_link_flow_clean(mac_client_handle_t mch, flow_entry_t *sub_flow)
1323 {
1324 mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
1325 mac_impl_t *mip = mcip->mci_mip;
1326 boolean_t last_subflow;
1327
1328 ASSERT(mch != NULL);
1329 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
1330
1331 /*
1332 * This sub flow entry may fail to be fully initialized by
1333 * mac_link_flow_init(). If so, simply return.
1334 */
1335 if (sub_flow->fe_mcip == NULL)
1336 return;
1337
1338 last_subflow = FLOW_TAB_EMPTY(mcip->mci_subflow_tab);
1339 /*
1340 * Tear down the data path
1341 */
1342 mac_datapath_teardown(mcip, sub_flow, SRST_FLOW);
1343 sub_flow->fe_mcip = NULL;
1344
1345 /*
1346 * Delete the SRSs associated with this subflow. If this is being
1347 * driven by flowadm(8) then the subflow will be deleted by
1348 * dls_rem_flow. However if this is a result of the interface being
1349 * unplumbed then the subflow itself won't be deleted.
1350 */
1351 mac_flow_cleanup(sub_flow);
1352
1353 /*
1354 * If all the subflows are gone, renable some of the stuff
1355 * we disabled when adding a subflow, polling etc.
1356 */
1357 if (last_subflow) {
1358 /*
1359 * The subflow table itself is not protected by any locks or
1360 * refcnts. Hence quiesce the client upfront before clearing
1361 * mci_subflow_tab.
1362 */
1363 mac_client_quiesce(mcip);
1364 mac_client_update_classifier(mcip, B_FALSE);
1365 mac_flow_tab_destroy(mcip->mci_subflow_tab);
1366 mcip->mci_subflow_tab = NULL;
1367 mac_client_restart(mcip);
1368 }
1369 }
1370
1371 /*
1372 * mac_link_flow_remove()
1373 * Used by flowadm(8) or kernel mac clients for removing flows.
1374 */
1375 int
mac_link_flow_remove(char * flow_name)1376 mac_link_flow_remove(char *flow_name)
1377 {
1378 flow_entry_t *flent;
1379 mac_perim_handle_t mph;
1380 int err;
1381 datalink_id_t linkid;
1382
1383 err = mac_flow_lookup_byname(flow_name, &flent);
1384 if (err != 0)
1385 return (err);
1386
1387 linkid = flent->fe_link_id;
1388 FLOW_USER_REFRELE(flent);
1389
1390 /*
1391 * The perim must be acquired before acquiring any other references
1392 * to maintain the lock and perimeter hierarchy. Please note the
1393 * FLOW_REFRELE above.
1394 */
1395 err = mac_perim_enter_by_linkid(linkid, &mph);
1396 if (err != 0)
1397 return (err);
1398
1399 /*
1400 * Note the second lookup of the flow, because a concurrent thread
1401 * may have removed it already while we were waiting to enter the
1402 * link's perimeter.
1403 */
1404 err = mac_flow_lookup_byname(flow_name, &flent);
1405 if (err != 0) {
1406 mac_perim_exit(mph);
1407 return (err);
1408 }
1409 FLOW_USER_REFRELE(flent);
1410
1411 /*
1412 * Remove the flow from the subflow table and deactivate the flow
1413 * by quiescing and removings its SRSs
1414 */
1415 mac_flow_rem_subflow(flent);
1416
1417 /*
1418 * Finally, remove the flow from the global table.
1419 */
1420 mac_flow_hash_remove(flent);
1421
1422 /*
1423 * Wait for any transient global flow hash refs to clear
1424 * and then release the creation reference on the flow
1425 */
1426 mac_flow_wait(flent, FLOW_USER_REF);
1427 FLOW_FINAL_REFRELE(flent);
1428
1429 mac_perim_exit(mph);
1430
1431 return (0);
1432 }
1433
1434 /*
1435 * mac_link_flow_modify()
1436 * Modifies the properties of a flow identified by its name.
1437 */
1438 int
mac_link_flow_modify(char * flow_name,mac_resource_props_t * mrp)1439 mac_link_flow_modify(char *flow_name, mac_resource_props_t *mrp)
1440 {
1441 flow_entry_t *flent;
1442 mac_client_impl_t *mcip;
1443 int err = 0;
1444 mac_perim_handle_t mph;
1445 datalink_id_t linkid;
1446 flow_tab_t *flow_tab;
1447
1448 err = mac_validate_props(NULL, mrp);
1449 if (err != 0)
1450 return (err);
1451
1452 err = mac_flow_lookup_byname(flow_name, &flent);
1453 if (err != 0)
1454 return (err);
1455
1456 linkid = flent->fe_link_id;
1457 FLOW_USER_REFRELE(flent);
1458
1459 /*
1460 * The perim must be acquired before acquiring any other references
1461 * to maintain the lock and perimeter hierarchy. Please note the
1462 * FLOW_REFRELE above.
1463 */
1464 err = mac_perim_enter_by_linkid(linkid, &mph);
1465 if (err != 0)
1466 return (err);
1467
1468 /*
1469 * Note the second lookup of the flow, because a concurrent thread
1470 * may have removed it already while we were waiting to enter the
1471 * link's perimeter.
1472 */
1473 err = mac_flow_lookup_byname(flow_name, &flent);
1474 if (err != 0) {
1475 mac_perim_exit(mph);
1476 return (err);
1477 }
1478 FLOW_USER_REFRELE(flent);
1479
1480 /*
1481 * If this flow is attached to a MAC client, then pass the request
1482 * along to the client.
1483 * Otherwise, just update the cached values.
1484 */
1485 mcip = flent->fe_mcip;
1486 mac_update_resources(mrp, &flent->fe_resource_props, B_TRUE);
1487 if (mcip != NULL) {
1488 if ((flow_tab = mcip->mci_subflow_tab) == NULL) {
1489 err = ENOENT;
1490 } else {
1491 mac_flow_modify(flow_tab, flent, mrp);
1492 }
1493 } else {
1494 (void) mac_flow_modify_props(flent, mrp);
1495 }
1496
1497 done:
1498 mac_perim_exit(mph);
1499 return (err);
1500 }
1501
1502
1503 /*
1504 * State structure and misc functions used by mac_link_flow_walk().
1505 */
1506 typedef struct {
1507 int (*ws_func)(mac_flowinfo_t *, void *);
1508 void *ws_arg;
1509 } flow_walk_state_t;
1510
1511 static void
mac_link_flowinfo_copy(mac_flowinfo_t * finfop,flow_entry_t * flent)1512 mac_link_flowinfo_copy(mac_flowinfo_t *finfop, flow_entry_t *flent)
1513 {
1514 (void) strlcpy(finfop->fi_flow_name, flent->fe_flow_name,
1515 MAXFLOWNAMELEN);
1516 finfop->fi_link_id = flent->fe_link_id;
1517 finfop->fi_flow_desc = flent->fe_flow_desc;
1518 finfop->fi_resource_props = flent->fe_resource_props;
1519 }
1520
1521 static int
mac_link_flow_walk_cb(flow_entry_t * flent,void * arg)1522 mac_link_flow_walk_cb(flow_entry_t *flent, void *arg)
1523 {
1524 flow_walk_state_t *statep = arg;
1525 mac_flowinfo_t *finfo;
1526 int err;
1527
1528 finfo = kmem_zalloc(sizeof (*finfo), KM_SLEEP);
1529 mac_link_flowinfo_copy(finfo, flent);
1530 err = statep->ws_func(finfo, statep->ws_arg);
1531 kmem_free(finfo, sizeof (*finfo));
1532 return (err);
1533 }
1534
1535 /*
1536 * mac_link_flow_walk()
1537 * Invokes callback 'func' for all flows belonging to the specified link.
1538 */
1539 int
mac_link_flow_walk(datalink_id_t linkid,int (* func)(mac_flowinfo_t *,void *),void * arg)1540 mac_link_flow_walk(datalink_id_t linkid,
1541 int (*func)(mac_flowinfo_t *, void *), void *arg)
1542 {
1543 mac_client_impl_t *mcip;
1544 mac_perim_handle_t mph;
1545 flow_walk_state_t state;
1546 dls_dl_handle_t dlh;
1547 dls_link_t *dlp;
1548 int err;
1549
1550 err = mac_perim_enter_by_linkid(linkid, &mph);
1551 if (err != 0)
1552 return (err);
1553
1554 err = dls_devnet_hold_link(linkid, &dlh, &dlp);
1555 if (err != 0) {
1556 mac_perim_exit(mph);
1557 return (err);
1558 }
1559
1560 mcip = (mac_client_impl_t *)dlp->dl_mch;
1561 state.ws_func = func;
1562 state.ws_arg = arg;
1563
1564 err = mac_flow_walk_nolock(mcip->mci_subflow_tab,
1565 mac_link_flow_walk_cb, &state);
1566
1567 dls_devnet_rele_link(dlh, dlp);
1568 mac_perim_exit(mph);
1569 return (err);
1570 }
1571
1572 /*
1573 * mac_link_flow_info()
1574 * Retrieves information about a specific flow.
1575 */
1576 int
mac_link_flow_info(char * flow_name,mac_flowinfo_t * finfo)1577 mac_link_flow_info(char *flow_name, mac_flowinfo_t *finfo)
1578 {
1579 flow_entry_t *flent;
1580 int err;
1581
1582 err = mac_flow_lookup_byname(flow_name, &flent);
1583 if (err != 0)
1584 return (err);
1585
1586 mac_link_flowinfo_copy(finfo, flent);
1587 FLOW_USER_REFRELE(flent);
1588 return (0);
1589 }
1590
1591 /*
1592 * Hash function macro that takes an Ethernet address and VLAN id as input.
1593 */
1594 #define HASH_ETHER_VID(a, v, s) \
1595 ((((uint32_t)(a)[3] + (a)[4] + (a)[5]) ^ (v)) % (s))
1596
1597 /*
1598 * Generic layer-2 address hashing function that takes an address and address
1599 * length as input. This is the DJB hash function.
1600 */
1601 static uint32_t
flow_l2_addrhash(uint8_t * addr,size_t addrlen,size_t htsize)1602 flow_l2_addrhash(uint8_t *addr, size_t addrlen, size_t htsize)
1603 {
1604 uint32_t hash = 5381;
1605 size_t i;
1606
1607 for (i = 0; i < addrlen; i++)
1608 hash = ((hash << 5) + hash) + addr[i];
1609 return (hash % htsize);
1610 }
1611
1612 #define PKT_TOO_SMALL(s, end) ((s)->fs_mp->b_wptr < (end))
1613
1614 #define CHECK_AND_ADJUST_START_PTR(s, start) { \
1615 if ((s)->fs_mp->b_wptr == (start)) { \
1616 mblk_t *next = (s)->fs_mp->b_cont; \
1617 if (next == NULL) \
1618 return (EINVAL); \
1619 \
1620 (s)->fs_mp = next; \
1621 (start) = next->b_rptr; \
1622 } \
1623 }
1624
1625 /* ARGSUSED */
1626 static boolean_t
flow_l2_match(flow_tab_t * ft,flow_entry_t * flent,flow_state_t * s)1627 flow_l2_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
1628 {
1629 flow_l2info_t *l2 = &s->fs_l2info;
1630 flow_desc_t *fd = &flent->fe_flow_desc;
1631
1632 return (l2->l2_vid == fd->fd_vid &&
1633 bcmp(l2->l2_daddr, fd->fd_dst_mac, fd->fd_mac_len) == 0);
1634 }
1635
1636 /*
1637 * Layer 2 hash function.
1638 * Must be paired with flow_l2_accept() within a set of flow_ops
1639 * because it assumes the dest address is already extracted.
1640 */
1641 static uint32_t
flow_l2_hash(flow_tab_t * ft,flow_state_t * s)1642 flow_l2_hash(flow_tab_t *ft, flow_state_t *s)
1643 {
1644 return (flow_l2_addrhash(s->fs_l2info.l2_daddr,
1645 ft->ft_mip->mi_type->mt_addr_length, ft->ft_size));
1646 }
1647
1648 /*
1649 * This is the generic layer 2 accept function.
1650 * It makes use of mac_header_info() to extract the header length,
1651 * sap, vlan ID and destination address.
1652 */
1653 static int
flow_l2_accept(flow_tab_t * ft,flow_state_t * s)1654 flow_l2_accept(flow_tab_t *ft, flow_state_t *s)
1655 {
1656 boolean_t is_ether;
1657 flow_l2info_t *l2 = &s->fs_l2info;
1658 mac_header_info_t mhi;
1659 int err;
1660
1661 is_ether = (ft->ft_mip->mi_info.mi_nativemedia == DL_ETHER);
1662 if ((err = mac_header_info((mac_handle_t)ft->ft_mip,
1663 s->fs_mp, &mhi)) != 0) {
1664 if (err == EINVAL)
1665 err = ENOBUFS;
1666
1667 return (err);
1668 }
1669
1670 l2->l2_start = s->fs_mp->b_rptr;
1671 l2->l2_daddr = (uint8_t *)mhi.mhi_daddr;
1672
1673 if (is_ether && mhi.mhi_bindsap == ETHERTYPE_VLAN &&
1674 ((s->fs_flags & FLOW_IGNORE_VLAN) == 0)) {
1675 struct ether_vlan_header *evhp =
1676 (struct ether_vlan_header *)l2->l2_start;
1677
1678 if (PKT_TOO_SMALL(s, l2->l2_start + sizeof (*evhp)))
1679 return (ENOBUFS);
1680
1681 l2->l2_sap = ntohs(evhp->ether_type);
1682 l2->l2_vid = VLAN_ID(ntohs(evhp->ether_tci));
1683 l2->l2_hdrsize = sizeof (*evhp);
1684 } else {
1685 l2->l2_sap = mhi.mhi_bindsap;
1686 l2->l2_vid = 0;
1687 l2->l2_hdrsize = (uint32_t)mhi.mhi_hdrsize;
1688 }
1689 return (0);
1690 }
1691
1692 /*
1693 * flow_ether_hash()/accept() are optimized versions of flow_l2_hash()/
1694 * accept(). The notable difference is that dest address is now extracted
1695 * by hash() rather than by accept(). This saves a few memory references
1696 * for flow tables that do not care about mac addresses.
1697 */
1698 static uint32_t
flow_ether_hash(flow_tab_t * ft,flow_state_t * s)1699 flow_ether_hash(flow_tab_t *ft, flow_state_t *s)
1700 {
1701 flow_l2info_t *l2 = &s->fs_l2info;
1702 struct ether_vlan_header *evhp;
1703
1704 evhp = (struct ether_vlan_header *)l2->l2_start;
1705 l2->l2_daddr = evhp->ether_dhost.ether_addr_octet;
1706 return (HASH_ETHER_VID(l2->l2_daddr, l2->l2_vid, ft->ft_size));
1707 }
1708
1709 static uint32_t
flow_ether_hash_fe(flow_tab_t * ft,flow_entry_t * flent)1710 flow_ether_hash_fe(flow_tab_t *ft, flow_entry_t *flent)
1711 {
1712 flow_desc_t *fd = &flent->fe_flow_desc;
1713
1714 ASSERT((fd->fd_mask & FLOW_LINK_VID) != 0 || fd->fd_vid == 0);
1715 return (HASH_ETHER_VID(fd->fd_dst_mac, fd->fd_vid, ft->ft_size));
1716 }
1717
1718 /* ARGSUSED */
1719 static int
flow_ether_accept(flow_tab_t * ft,flow_state_t * s)1720 flow_ether_accept(flow_tab_t *ft, flow_state_t *s)
1721 {
1722 flow_l2info_t *l2 = &s->fs_l2info;
1723 struct ether_vlan_header *evhp;
1724 uint16_t sap;
1725
1726 evhp = (struct ether_vlan_header *)s->fs_mp->b_rptr;
1727 l2->l2_start = (uchar_t *)evhp;
1728
1729 if (PKT_TOO_SMALL(s, l2->l2_start + sizeof (struct ether_header)))
1730 return (ENOBUFS);
1731
1732 if ((sap = ntohs(evhp->ether_tpid)) == ETHERTYPE_VLAN &&
1733 ((s->fs_flags & FLOW_IGNORE_VLAN) == 0)) {
1734 if (PKT_TOO_SMALL(s, l2->l2_start + sizeof (*evhp)))
1735 return (ENOBUFS);
1736
1737 l2->l2_sap = ntohs(evhp->ether_type);
1738 l2->l2_vid = VLAN_ID(ntohs(evhp->ether_tci));
1739 l2->l2_hdrsize = sizeof (struct ether_vlan_header);
1740 } else {
1741 l2->l2_sap = sap;
1742 l2->l2_vid = 0;
1743 l2->l2_hdrsize = sizeof (struct ether_header);
1744 }
1745 return (0);
1746 }
1747
1748 /*
1749 * Validates a layer 2 flow entry.
1750 */
1751 static int
flow_l2_accept_fe(flow_tab_t * ft,flow_entry_t * flent)1752 flow_l2_accept_fe(flow_tab_t *ft, flow_entry_t *flent)
1753 {
1754 flow_desc_t *fd = &flent->fe_flow_desc;
1755
1756 /*
1757 * Dest address is mandatory, and 0 length addresses are not yet
1758 * supported.
1759 */
1760 if ((fd->fd_mask & FLOW_LINK_DST) == 0 || fd->fd_mac_len == 0)
1761 return (EINVAL);
1762
1763 if ((fd->fd_mask & FLOW_LINK_VID) != 0) {
1764 /*
1765 * VLAN flows are only supported over ethernet macs.
1766 */
1767 if (ft->ft_mip->mi_info.mi_nativemedia != DL_ETHER)
1768 return (EINVAL);
1769
1770 if (fd->fd_vid == 0)
1771 return (EINVAL);
1772
1773 }
1774 flent->fe_match = flow_l2_match;
1775 return (0);
1776 }
1777
1778 /*
1779 * Calculates hash index of flow entry.
1780 */
1781 static uint32_t
flow_l2_hash_fe(flow_tab_t * ft,flow_entry_t * flent)1782 flow_l2_hash_fe(flow_tab_t *ft, flow_entry_t *flent)
1783 {
1784 flow_desc_t *fd = &flent->fe_flow_desc;
1785
1786 ASSERT((fd->fd_mask & FLOW_LINK_VID) == 0 && fd->fd_vid == 0);
1787 return (flow_l2_addrhash(fd->fd_dst_mac,
1788 ft->ft_mip->mi_type->mt_addr_length, ft->ft_size));
1789 }
1790
1791 /*
1792 * This is used for duplicate flow checking.
1793 */
1794 /* ARGSUSED */
1795 static boolean_t
flow_l2_match_fe(flow_tab_t * ft,flow_entry_t * f1,flow_entry_t * f2)1796 flow_l2_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2)
1797 {
1798 flow_desc_t *fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc;
1799
1800 ASSERT(fd1->fd_mac_len == fd2->fd_mac_len && fd1->fd_mac_len != 0);
1801 return (bcmp(&fd1->fd_dst_mac, &fd2->fd_dst_mac,
1802 fd1->fd_mac_len) == 0 && fd1->fd_vid == fd2->fd_vid);
1803 }
1804
1805 /*
1806 * Generic flow entry insertion function.
1807 * Used by flow tables that do not have ordering requirements.
1808 */
1809 /* ARGSUSED */
1810 static int
flow_generic_insert_fe(flow_tab_t * ft,flow_entry_t ** headp,flow_entry_t * flent)1811 flow_generic_insert_fe(flow_tab_t *ft, flow_entry_t **headp,
1812 flow_entry_t *flent)
1813 {
1814 ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
1815
1816 if (*headp != NULL) {
1817 ASSERT(flent->fe_next == NULL);
1818 flent->fe_next = *headp;
1819 }
1820 *headp = flent;
1821 return (0);
1822 }
1823
1824 /*
1825 * IP version independent DSField matching function.
1826 */
1827 /* ARGSUSED */
1828 static boolean_t
flow_ip_dsfield_match(flow_tab_t * ft,flow_entry_t * flent,flow_state_t * s)1829 flow_ip_dsfield_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
1830 {
1831 flow_l3info_t *l3info = &s->fs_l3info;
1832 flow_desc_t *fd = &flent->fe_flow_desc;
1833
1834 switch (l3info->l3_version) {
1835 case IPV4_VERSION: {
1836 ipha_t *ipha = (ipha_t *)l3info->l3_start;
1837
1838 return ((ipha->ipha_type_of_service &
1839 fd->fd_dsfield_mask) == fd->fd_dsfield);
1840 }
1841 case IPV6_VERSION: {
1842 ip6_t *ip6h = (ip6_t *)l3info->l3_start;
1843
1844 return ((IPV6_FLOW_TCLASS(ip6h->ip6_vcf) &
1845 fd->fd_dsfield_mask) == fd->fd_dsfield);
1846 }
1847 default:
1848 return (B_FALSE);
1849 }
1850 }
1851
1852 /*
1853 * IP v4 and v6 address matching.
1854 * The netmask only needs to be applied on the packet but not on the
1855 * flow_desc since fd_local_addr/fd_remote_addr are premasked subnets.
1856 */
1857
1858 /* ARGSUSED */
1859 static boolean_t
flow_ip_v4_match(flow_tab_t * ft,flow_entry_t * flent,flow_state_t * s)1860 flow_ip_v4_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
1861 {
1862 flow_l3info_t *l3info = &s->fs_l3info;
1863 flow_desc_t *fd = &flent->fe_flow_desc;
1864 ipha_t *ipha = (ipha_t *)l3info->l3_start;
1865 in_addr_t addr;
1866
1867 addr = (l3info->l3_dst_or_src ? ipha->ipha_dst : ipha->ipha_src);
1868 if ((fd->fd_mask & FLOW_IP_LOCAL) != 0) {
1869 return ((addr & V4_PART_OF_V6(fd->fd_local_netmask)) ==
1870 V4_PART_OF_V6(fd->fd_local_addr));
1871 }
1872 return ((addr & V4_PART_OF_V6(fd->fd_remote_netmask)) ==
1873 V4_PART_OF_V6(fd->fd_remote_addr));
1874 }
1875
1876 /* ARGSUSED */
1877 static boolean_t
flow_ip_v6_match(flow_tab_t * ft,flow_entry_t * flent,flow_state_t * s)1878 flow_ip_v6_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
1879 {
1880 flow_l3info_t *l3info = &s->fs_l3info;
1881 flow_desc_t *fd = &flent->fe_flow_desc;
1882 ip6_t *ip6h = (ip6_t *)l3info->l3_start;
1883 in6_addr_t *addrp;
1884
1885 addrp = (l3info->l3_dst_or_src ? &ip6h->ip6_dst : &ip6h->ip6_src);
1886 if ((fd->fd_mask & FLOW_IP_LOCAL) != 0) {
1887 return (V6_MASK_EQ(*addrp, fd->fd_local_netmask,
1888 fd->fd_local_addr));
1889 }
1890 return (V6_MASK_EQ(*addrp, fd->fd_remote_netmask, fd->fd_remote_addr));
1891 }
1892
1893 /* ARGSUSED */
1894 static boolean_t
flow_ip_proto_match(flow_tab_t * ft,flow_entry_t * flent,flow_state_t * s)1895 flow_ip_proto_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
1896 {
1897 flow_l3info_t *l3info = &s->fs_l3info;
1898 flow_desc_t *fd = &flent->fe_flow_desc;
1899
1900 return (l3info->l3_protocol == fd->fd_protocol);
1901 }
1902
1903 static uint32_t
flow_ip_hash(flow_tab_t * ft,flow_state_t * s)1904 flow_ip_hash(flow_tab_t *ft, flow_state_t *s)
1905 {
1906 flow_l3info_t *l3info = &s->fs_l3info;
1907 flow_mask_t mask = ft->ft_mask;
1908
1909 if ((mask & FLOW_IP_LOCAL) != 0) {
1910 l3info->l3_dst_or_src = ((s->fs_flags & FLOW_INBOUND) != 0);
1911 } else if ((mask & FLOW_IP_REMOTE) != 0) {
1912 l3info->l3_dst_or_src = ((s->fs_flags & FLOW_OUTBOUND) != 0);
1913 } else if ((mask & FLOW_IP_DSFIELD) != 0) {
1914 /*
1915 * DSField flents are arranged as a single list.
1916 */
1917 return (0);
1918 }
1919 /*
1920 * IP addr flents are hashed into two lists, v4 or v6.
1921 */
1922 ASSERT(ft->ft_size >= 2);
1923 return ((l3info->l3_version == IPV4_VERSION) ? 0 : 1);
1924 }
1925
1926 static uint32_t
flow_ip_proto_hash(flow_tab_t * ft,flow_state_t * s)1927 flow_ip_proto_hash(flow_tab_t *ft, flow_state_t *s)
1928 {
1929 flow_l3info_t *l3info = &s->fs_l3info;
1930
1931 return (l3info->l3_protocol % ft->ft_size);
1932 }
1933
1934 /* ARGSUSED */
1935 static int
flow_ip_accept(flow_tab_t * ft,flow_state_t * s)1936 flow_ip_accept(flow_tab_t *ft, flow_state_t *s)
1937 {
1938 flow_l2info_t *l2info = &s->fs_l2info;
1939 flow_l3info_t *l3info = &s->fs_l3info;
1940 uint16_t sap = l2info->l2_sap;
1941 uchar_t *l3_start;
1942
1943 l3_start = l2info->l2_start + l2info->l2_hdrsize;
1944
1945 /*
1946 * Adjust start pointer if we're at the end of an mblk.
1947 */
1948 CHECK_AND_ADJUST_START_PTR(s, l3_start);
1949
1950 l3info->l3_start = l3_start;
1951 if (!OK_32PTR(l3_start))
1952 return (EINVAL);
1953
1954 switch (sap) {
1955 case ETHERTYPE_IP: {
1956 ipha_t *ipha = (ipha_t *)l3_start;
1957
1958 if (PKT_TOO_SMALL(s, l3_start + IP_SIMPLE_HDR_LENGTH))
1959 return (ENOBUFS);
1960
1961 l3info->l3_hdrsize = IPH_HDR_LENGTH(ipha);
1962 l3info->l3_protocol = ipha->ipha_protocol;
1963 l3info->l3_version = IPV4_VERSION;
1964 l3info->l3_fragmented =
1965 IS_V4_FRAGMENT(ipha->ipha_fragment_offset_and_flags);
1966 break;
1967 }
1968 case ETHERTYPE_IPV6: {
1969 ip6_t *ip6h = (ip6_t *)l3_start;
1970 ip6_frag_t *frag = NULL;
1971 uint16_t ip6_hdrlen;
1972 uint8_t nexthdr;
1973
1974 if (!mac_ip_hdr_length_v6(ip6h, s->fs_mp->b_wptr, &ip6_hdrlen,
1975 &nexthdr, &frag)) {
1976 return (ENOBUFS);
1977 }
1978 l3info->l3_hdrsize = ip6_hdrlen;
1979 l3info->l3_protocol = nexthdr;
1980 l3info->l3_version = IPV6_VERSION;
1981 l3info->l3_fragmented = (frag != NULL);
1982 break;
1983 }
1984 default:
1985 return (EINVAL);
1986 }
1987 return (0);
1988 }
1989
1990 /* ARGSUSED */
1991 static int
flow_ip_proto_accept_fe(flow_tab_t * ft,flow_entry_t * flent)1992 flow_ip_proto_accept_fe(flow_tab_t *ft, flow_entry_t *flent)
1993 {
1994 flow_desc_t *fd = &flent->fe_flow_desc;
1995
1996 switch (fd->fd_protocol) {
1997 case IPPROTO_TCP:
1998 case IPPROTO_UDP:
1999 case IPPROTO_SCTP:
2000 case IPPROTO_ICMP:
2001 case IPPROTO_ICMPV6:
2002 flent->fe_match = flow_ip_proto_match;
2003 return (0);
2004 default:
2005 return (EINVAL);
2006 }
2007 }
2008
2009 /* ARGSUSED */
2010 static int
flow_ip_accept_fe(flow_tab_t * ft,flow_entry_t * flent)2011 flow_ip_accept_fe(flow_tab_t *ft, flow_entry_t *flent)
2012 {
2013 flow_desc_t *fd = &flent->fe_flow_desc;
2014 flow_mask_t mask;
2015 uint8_t version;
2016 in6_addr_t *addr, *netmask;
2017
2018 /*
2019 * DSField does not require a IP version.
2020 */
2021 if (fd->fd_mask == FLOW_IP_DSFIELD) {
2022 if (fd->fd_dsfield_mask == 0)
2023 return (EINVAL);
2024
2025 flent->fe_match = flow_ip_dsfield_match;
2026 return (0);
2027 }
2028
2029 /*
2030 * IP addresses must come with a version to avoid ambiguity.
2031 */
2032 if ((fd->fd_mask & FLOW_IP_VERSION) == 0)
2033 return (EINVAL);
2034
2035 version = fd->fd_ipversion;
2036 if (version != IPV4_VERSION && version != IPV6_VERSION)
2037 return (EINVAL);
2038
2039 mask = fd->fd_mask & ~FLOW_IP_VERSION;
2040 switch (mask) {
2041 case FLOW_IP_LOCAL:
2042 addr = &fd->fd_local_addr;
2043 netmask = &fd->fd_local_netmask;
2044 break;
2045 case FLOW_IP_REMOTE:
2046 addr = &fd->fd_remote_addr;
2047 netmask = &fd->fd_remote_netmask;
2048 break;
2049 default:
2050 return (EINVAL);
2051 }
2052
2053 /*
2054 * Apply netmask onto specified address.
2055 */
2056 V6_MASK_COPY(*addr, *netmask, *addr);
2057 if (version == IPV4_VERSION) {
2058 ipaddr_t v4addr = V4_PART_OF_V6((*addr));
2059 ipaddr_t v4mask = V4_PART_OF_V6((*netmask));
2060
2061 if (v4addr == 0 || v4mask == 0)
2062 return (EINVAL);
2063 flent->fe_match = flow_ip_v4_match;
2064 } else {
2065 if (IN6_IS_ADDR_UNSPECIFIED(addr) ||
2066 IN6_IS_ADDR_UNSPECIFIED(netmask))
2067 return (EINVAL);
2068 flent->fe_match = flow_ip_v6_match;
2069 }
2070 return (0);
2071 }
2072
2073 static uint32_t
flow_ip_proto_hash_fe(flow_tab_t * ft,flow_entry_t * flent)2074 flow_ip_proto_hash_fe(flow_tab_t *ft, flow_entry_t *flent)
2075 {
2076 flow_desc_t *fd = &flent->fe_flow_desc;
2077
2078 return (fd->fd_protocol % ft->ft_size);
2079 }
2080
2081 static uint32_t
flow_ip_hash_fe(flow_tab_t * ft,flow_entry_t * flent)2082 flow_ip_hash_fe(flow_tab_t *ft, flow_entry_t *flent)
2083 {
2084 flow_desc_t *fd = &flent->fe_flow_desc;
2085
2086 /*
2087 * DSField flents are arranged as a single list.
2088 */
2089 if ((fd->fd_mask & FLOW_IP_DSFIELD) != 0)
2090 return (0);
2091
2092 /*
2093 * IP addr flents are hashed into two lists, v4 or v6.
2094 */
2095 ASSERT(ft->ft_size >= 2);
2096 return ((fd->fd_ipversion == IPV4_VERSION) ? 0 : 1);
2097 }
2098
2099 /* ARGSUSED */
2100 static boolean_t
flow_ip_proto_match_fe(flow_tab_t * ft,flow_entry_t * f1,flow_entry_t * f2)2101 flow_ip_proto_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2)
2102 {
2103 flow_desc_t *fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc;
2104
2105 return (fd1->fd_protocol == fd2->fd_protocol);
2106 }
2107
2108 /* ARGSUSED */
2109 static boolean_t
flow_ip_match_fe(flow_tab_t * ft,flow_entry_t * f1,flow_entry_t * f2)2110 flow_ip_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2)
2111 {
2112 flow_desc_t *fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc;
2113 in6_addr_t *a1, *m1, *a2, *m2;
2114
2115 ASSERT(fd1->fd_mask == fd2->fd_mask);
2116 if (fd1->fd_mask == FLOW_IP_DSFIELD) {
2117 return (fd1->fd_dsfield == fd2->fd_dsfield &&
2118 fd1->fd_dsfield_mask == fd2->fd_dsfield_mask);
2119 }
2120
2121 /*
2122 * flow_ip_accept_fe() already validated the version.
2123 */
2124 ASSERT((fd1->fd_mask & FLOW_IP_VERSION) != 0);
2125 if (fd1->fd_ipversion != fd2->fd_ipversion)
2126 return (B_FALSE);
2127
2128 switch (fd1->fd_mask & ~FLOW_IP_VERSION) {
2129 case FLOW_IP_LOCAL:
2130 a1 = &fd1->fd_local_addr;
2131 m1 = &fd1->fd_local_netmask;
2132 a2 = &fd2->fd_local_addr;
2133 m2 = &fd2->fd_local_netmask;
2134 break;
2135 case FLOW_IP_REMOTE:
2136 a1 = &fd1->fd_remote_addr;
2137 m1 = &fd1->fd_remote_netmask;
2138 a2 = &fd2->fd_remote_addr;
2139 m2 = &fd2->fd_remote_netmask;
2140 break;
2141 default:
2142 /*
2143 * This is unreachable given the checks in
2144 * flow_ip_accept_fe().
2145 */
2146 return (B_FALSE);
2147 }
2148
2149 if (fd1->fd_ipversion == IPV4_VERSION) {
2150 return (V4_PART_OF_V6((*a1)) == V4_PART_OF_V6((*a2)) &&
2151 V4_PART_OF_V6((*m1)) == V4_PART_OF_V6((*m2)));
2152
2153 } else {
2154 return (IN6_ARE_ADDR_EQUAL(a1, a2) &&
2155 IN6_ARE_ADDR_EQUAL(m1, m2));
2156 }
2157 }
2158
2159 static int
flow_ip_mask2plen(in6_addr_t * v6mask)2160 flow_ip_mask2plen(in6_addr_t *v6mask)
2161 {
2162 int bits;
2163 int plen = IPV6_ABITS;
2164 int i;
2165
2166 for (i = 3; i >= 0; i--) {
2167 if (v6mask->s6_addr32[i] == 0) {
2168 plen -= 32;
2169 continue;
2170 }
2171 bits = ffs(ntohl(v6mask->s6_addr32[i])) - 1;
2172 if (bits == 0)
2173 break;
2174 plen -= bits;
2175 }
2176 return (plen);
2177 }
2178
2179 /* ARGSUSED */
2180 static int
flow_ip_insert_fe(flow_tab_t * ft,flow_entry_t ** headp,flow_entry_t * flent)2181 flow_ip_insert_fe(flow_tab_t *ft, flow_entry_t **headp,
2182 flow_entry_t *flent)
2183 {
2184 flow_entry_t **p = headp;
2185 flow_desc_t *fd0, *fd;
2186 in6_addr_t *m0, *m;
2187 int plen0, plen;
2188
2189 ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
2190
2191 /*
2192 * No special ordering needed for dsfield.
2193 */
2194 fd0 = &flent->fe_flow_desc;
2195 if ((fd0->fd_mask & FLOW_IP_DSFIELD) != 0) {
2196 if (*p != NULL) {
2197 ASSERT(flent->fe_next == NULL);
2198 flent->fe_next = *p;
2199 }
2200 *p = flent;
2201 return (0);
2202 }
2203
2204 /*
2205 * IP address flows are arranged in descending prefix length order.
2206 */
2207 m0 = ((fd0->fd_mask & FLOW_IP_LOCAL) != 0) ?
2208 &fd0->fd_local_netmask : &fd0->fd_remote_netmask;
2209 plen0 = flow_ip_mask2plen(m0);
2210 ASSERT(plen0 != 0);
2211
2212 for (; *p != NULL; p = &(*p)->fe_next) {
2213 fd = &(*p)->fe_flow_desc;
2214
2215 /*
2216 * Normally a dsfield flent shouldn't end up on the same
2217 * list as an IP address because flow tables are (for now)
2218 * disjoint. If we decide to support both IP and dsfield
2219 * in the same table in the future, this check will allow
2220 * for that.
2221 */
2222 if ((fd->fd_mask & FLOW_IP_DSFIELD) != 0)
2223 continue;
2224
2225 /*
2226 * We also allow for the mixing of local and remote address
2227 * flents within one list.
2228 */
2229 m = ((fd->fd_mask & FLOW_IP_LOCAL) != 0) ?
2230 &fd->fd_local_netmask : &fd->fd_remote_netmask;
2231 plen = flow_ip_mask2plen(m);
2232
2233 if (plen <= plen0)
2234 break;
2235 }
2236 if (*p != NULL) {
2237 ASSERT(flent->fe_next == NULL);
2238 flent->fe_next = *p;
2239 }
2240 *p = flent;
2241 return (0);
2242 }
2243
2244 /*
2245 * Transport layer protocol and port matching functions.
2246 */
2247
2248 /* ARGSUSED */
2249 static boolean_t
flow_transport_lport_match(flow_tab_t * ft,flow_entry_t * flent,flow_state_t * s)2250 flow_transport_lport_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
2251 {
2252 flow_l3info_t *l3info = &s->fs_l3info;
2253 flow_l4info_t *l4info = &s->fs_l4info;
2254 flow_desc_t *fd = &flent->fe_flow_desc;
2255
2256 return (fd->fd_protocol == l3info->l3_protocol &&
2257 fd->fd_local_port == l4info->l4_hash_port);
2258 }
2259
2260 /* ARGSUSED */
2261 static boolean_t
flow_transport_rport_match(flow_tab_t * ft,flow_entry_t * flent,flow_state_t * s)2262 flow_transport_rport_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
2263 {
2264 flow_l3info_t *l3info = &s->fs_l3info;
2265 flow_l4info_t *l4info = &s->fs_l4info;
2266 flow_desc_t *fd = &flent->fe_flow_desc;
2267
2268 return (fd->fd_protocol == l3info->l3_protocol &&
2269 fd->fd_remote_port == l4info->l4_hash_port);
2270 }
2271
2272 /*
2273 * Transport hash function.
2274 * Since we only support either local or remote port flows,
2275 * we only need to extract one of the ports to be used for
2276 * matching.
2277 */
2278 static uint32_t
flow_transport_hash(flow_tab_t * ft,flow_state_t * s)2279 flow_transport_hash(flow_tab_t *ft, flow_state_t *s)
2280 {
2281 flow_l3info_t *l3info = &s->fs_l3info;
2282 flow_l4info_t *l4info = &s->fs_l4info;
2283 uint8_t proto = l3info->l3_protocol;
2284 boolean_t dst_or_src;
2285
2286 if ((ft->ft_mask & FLOW_ULP_PORT_LOCAL) != 0) {
2287 dst_or_src = ((s->fs_flags & FLOW_INBOUND) != 0);
2288 } else {
2289 dst_or_src = ((s->fs_flags & FLOW_OUTBOUND) != 0);
2290 }
2291
2292 l4info->l4_hash_port = dst_or_src ? l4info->l4_dst_port :
2293 l4info->l4_src_port;
2294
2295 return ((l4info->l4_hash_port ^ (proto << 4)) % ft->ft_size);
2296 }
2297
2298 /*
2299 * Unlike other accept() functions above, we do not need to get the header
2300 * size because this is our highest layer so far. If we want to do support
2301 * other higher layer protocols, we would need to save the l4_hdrsize
2302 * in the code below.
2303 */
2304
2305 /* ARGSUSED */
2306 static int
flow_transport_accept(flow_tab_t * ft,flow_state_t * s)2307 flow_transport_accept(flow_tab_t *ft, flow_state_t *s)
2308 {
2309 flow_l3info_t *l3info = &s->fs_l3info;
2310 flow_l4info_t *l4info = &s->fs_l4info;
2311 uint8_t proto = l3info->l3_protocol;
2312 uchar_t *l4_start;
2313
2314 l4_start = l3info->l3_start + l3info->l3_hdrsize;
2315
2316 /*
2317 * Adjust start pointer if we're at the end of an mblk.
2318 */
2319 CHECK_AND_ADJUST_START_PTR(s, l4_start);
2320
2321 l4info->l4_start = l4_start;
2322 if (!OK_32PTR(l4_start))
2323 return (EINVAL);
2324
2325 if (l3info->l3_fragmented == B_TRUE)
2326 return (EINVAL);
2327
2328 switch (proto) {
2329 case IPPROTO_TCP: {
2330 struct tcphdr *tcph = (struct tcphdr *)l4_start;
2331
2332 if (PKT_TOO_SMALL(s, l4_start + sizeof (*tcph)))
2333 return (ENOBUFS);
2334
2335 l4info->l4_src_port = tcph->th_sport;
2336 l4info->l4_dst_port = tcph->th_dport;
2337 break;
2338 }
2339 case IPPROTO_UDP: {
2340 struct udphdr *udph = (struct udphdr *)l4_start;
2341
2342 if (PKT_TOO_SMALL(s, l4_start + sizeof (*udph)))
2343 return (ENOBUFS);
2344
2345 l4info->l4_src_port = udph->uh_sport;
2346 l4info->l4_dst_port = udph->uh_dport;
2347 break;
2348 }
2349 case IPPROTO_SCTP: {
2350 sctp_hdr_t *sctph = (sctp_hdr_t *)l4_start;
2351
2352 if (PKT_TOO_SMALL(s, l4_start + sizeof (*sctph)))
2353 return (ENOBUFS);
2354
2355 l4info->l4_src_port = sctph->sh_sport;
2356 l4info->l4_dst_port = sctph->sh_dport;
2357 break;
2358 }
2359 default:
2360 return (EINVAL);
2361 }
2362
2363 return (0);
2364 }
2365
2366 /*
2367 * Validates transport flow entry.
2368 * The protocol field must be present.
2369 */
2370
2371 /* ARGSUSED */
2372 static int
flow_transport_accept_fe(flow_tab_t * ft,flow_entry_t * flent)2373 flow_transport_accept_fe(flow_tab_t *ft, flow_entry_t *flent)
2374 {
2375 flow_desc_t *fd = &flent->fe_flow_desc;
2376 flow_mask_t mask = fd->fd_mask;
2377
2378 if ((mask & FLOW_IP_PROTOCOL) == 0)
2379 return (EINVAL);
2380
2381 switch (fd->fd_protocol) {
2382 case IPPROTO_TCP:
2383 case IPPROTO_UDP:
2384 case IPPROTO_SCTP:
2385 break;
2386 default:
2387 return (EINVAL);
2388 }
2389
2390 switch (mask & ~FLOW_IP_PROTOCOL) {
2391 case FLOW_ULP_PORT_LOCAL:
2392 if (fd->fd_local_port == 0)
2393 return (EINVAL);
2394
2395 flent->fe_match = flow_transport_lport_match;
2396 break;
2397 case FLOW_ULP_PORT_REMOTE:
2398 if (fd->fd_remote_port == 0)
2399 return (EINVAL);
2400
2401 flent->fe_match = flow_transport_rport_match;
2402 break;
2403 case 0:
2404 /*
2405 * transport-only flows conflicts with our table type.
2406 */
2407 return (EOPNOTSUPP);
2408 default:
2409 return (EINVAL);
2410 }
2411
2412 return (0);
2413 }
2414
2415 static uint32_t
flow_transport_hash_fe(flow_tab_t * ft,flow_entry_t * flent)2416 flow_transport_hash_fe(flow_tab_t *ft, flow_entry_t *flent)
2417 {
2418 flow_desc_t *fd = &flent->fe_flow_desc;
2419 uint16_t port = 0;
2420
2421 port = ((fd->fd_mask & FLOW_ULP_PORT_LOCAL) != 0) ?
2422 fd->fd_local_port : fd->fd_remote_port;
2423
2424 return ((port ^ (fd->fd_protocol << 4)) % ft->ft_size);
2425 }
2426
2427 /* ARGSUSED */
2428 static boolean_t
flow_transport_match_fe(flow_tab_t * ft,flow_entry_t * f1,flow_entry_t * f2)2429 flow_transport_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2)
2430 {
2431 flow_desc_t *fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc;
2432
2433 if (fd1->fd_protocol != fd2->fd_protocol)
2434 return (B_FALSE);
2435
2436 if ((fd1->fd_mask & FLOW_ULP_PORT_LOCAL) != 0)
2437 return (fd1->fd_local_port == fd2->fd_local_port);
2438
2439 if ((fd1->fd_mask & FLOW_ULP_PORT_REMOTE) != 0)
2440 return (fd1->fd_remote_port == fd2->fd_remote_port);
2441
2442 return (B_TRUE);
2443 }
2444
2445 static flow_ops_t flow_l2_ops = {
2446 flow_l2_accept_fe,
2447 flow_l2_hash_fe,
2448 flow_l2_match_fe,
2449 flow_generic_insert_fe,
2450 flow_l2_hash,
2451 {flow_l2_accept}
2452 };
2453
2454 static flow_ops_t flow_ip_ops = {
2455 flow_ip_accept_fe,
2456 flow_ip_hash_fe,
2457 flow_ip_match_fe,
2458 flow_ip_insert_fe,
2459 flow_ip_hash,
2460 {flow_l2_accept, flow_ip_accept}
2461 };
2462
2463 static flow_ops_t flow_ip_proto_ops = {
2464 flow_ip_proto_accept_fe,
2465 flow_ip_proto_hash_fe,
2466 flow_ip_proto_match_fe,
2467 flow_generic_insert_fe,
2468 flow_ip_proto_hash,
2469 {flow_l2_accept, flow_ip_accept}
2470 };
2471
2472 static flow_ops_t flow_transport_ops = {
2473 flow_transport_accept_fe,
2474 flow_transport_hash_fe,
2475 flow_transport_match_fe,
2476 flow_generic_insert_fe,
2477 flow_transport_hash,
2478 {flow_l2_accept, flow_ip_accept, flow_transport_accept}
2479 };
2480
2481 static flow_tab_info_t flow_tab_info_list[] = {
2482 {&flow_ip_ops, FLOW_IP_VERSION | FLOW_IP_LOCAL, 2},
2483 {&flow_ip_ops, FLOW_IP_VERSION | FLOW_IP_REMOTE, 2},
2484 {&flow_ip_ops, FLOW_IP_DSFIELD, 1},
2485 {&flow_ip_proto_ops, FLOW_IP_PROTOCOL, 256},
2486 {&flow_transport_ops, FLOW_IP_PROTOCOL | FLOW_ULP_PORT_LOCAL, 1024},
2487 {&flow_transport_ops, FLOW_IP_PROTOCOL | FLOW_ULP_PORT_REMOTE, 1024}
2488 };
2489
2490 #define FLOW_MAX_TAB_INFO \
2491 ((sizeof (flow_tab_info_list)) / sizeof (flow_tab_info_t))
2492
2493 static flow_tab_info_t *
mac_flow_tab_info_get(flow_mask_t mask)2494 mac_flow_tab_info_get(flow_mask_t mask)
2495 {
2496 int i;
2497
2498 for (i = 0; i < FLOW_MAX_TAB_INFO; i++) {
2499 if (mask == flow_tab_info_list[i].fti_mask)
2500 return (&flow_tab_info_list[i]);
2501 }
2502 return (NULL);
2503 }
2504