1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2016 Joyent, Inc.
14 * Copyright 2022 MNX Cloud, Inc.
15 */
16
17 /*
18 * Overlay device target cache management
19 *
20 * For more information, see the big theory statement in
21 * uts/common/io/overlay/overlay.c
22 */
23
24 #include <sys/types.h>
25 #include <sys/ethernet.h>
26 #include <sys/kmem.h>
27 #include <sys/policy.h>
28 #include <sys/sysmacros.h>
29 #include <sys/stream.h>
30 #include <sys/strsun.h>
31 #include <sys/strsubr.h>
32 #include <sys/mac_provider.h>
33 #include <sys/mac_client.h>
34 #include <sys/mac_client_priv.h>
35 #include <sys/vlan.h>
36 #include <sys/crc32.h>
37 #include <sys/cred.h>
38 #include <sys/file.h>
39 #include <sys/errno.h>
40 #include <sys/ddi.h>
41 #include <sys/sunddi.h>
42
43 #include <sys/overlay_impl.h>
44 #include <sys/sdt.h>
45
46 /*
47 * This is total straw man, but at least it's a prime number. Here we're
48 * going to have to go through and do a lot of evaluation and understanding as
49 * to how these target caches should grow and shrink, as well as, memory
50 * pressure and evictions. This just gives us a starting point that'll be 'good
51 * enough', until it's not.
52 */
53 #define OVERLAY_HSIZE 823
54
55 /*
56 * We use this data structure to keep track of what requests have been actively
57 * allocated to a given instance so we know what to put back on the pending
58 * list.
59 */
60 typedef struct overlay_target_hdl {
61 minor_t oth_minor; /* RO */
62 zoneid_t oth_zoneid; /* RO */
63 int oth_oflags; /* RO */
64 list_node_t oth_link; /* overlay_target_lock */
65 kmutex_t oth_lock;
66 list_t oth_outstanding; /* oth_lock */
67 } overlay_target_hdl_t;
68
69 typedef int (*overlay_target_copyin_f)(const void *, void **, size_t *, int);
70 typedef int (*overlay_target_ioctl_f)(overlay_target_hdl_t *, void *);
71 typedef int (*overlay_target_copyout_f)(void *, void *, size_t, int);
72
73 typedef struct overlay_target_ioctl {
74 int oti_cmd; /* ioctl id */
75 boolean_t oti_write; /* ioctl requires FWRITE */
76 boolean_t oti_ncopyout; /* copyout data? */
77 overlay_target_copyin_f oti_copyin; /* copyin func */
78 overlay_target_ioctl_f oti_func; /* function to call */
79 overlay_target_copyout_f oti_copyout; /* copyin func */
80 size_t oti_size; /* size of user level structure */
81 } overlay_target_ioctl_t;
82
83 static kmem_cache_t *overlay_target_cache;
84 static kmem_cache_t *overlay_entry_cache;
85 static id_space_t *overlay_thdl_idspace;
86 static void *overlay_thdl_state;
87
88 /*
89 * When we support overlay devices in the NGZ, then all of these need to become
90 * zone aware, by plugging into the netstack engine and becoming per-netstack
91 * data.
92 */
93 static list_t overlay_thdl_list;
94 static kmutex_t overlay_target_lock;
95 static kcondvar_t overlay_target_condvar;
96 static list_t overlay_target_list;
97 static boolean_t overlay_target_excl;
98
99 /*
100 * Outstanding data per hash table entry.
101 */
102 static int overlay_ent_size = 128 * 1024;
103
104 /* ARGSUSED */
105 static int
overlay_target_cache_constructor(void * buf,void * arg,int kmflgs)106 overlay_target_cache_constructor(void *buf, void *arg, int kmflgs)
107 {
108 overlay_target_t *ott = buf;
109
110 mutex_init(&ott->ott_lock, NULL, MUTEX_DRIVER, NULL);
111 cv_init(&ott->ott_cond, NULL, CV_DRIVER, NULL);
112 return (0);
113 }
114
115 /* ARGSUSED */
116 static void
overlay_target_cache_destructor(void * buf,void * arg)117 overlay_target_cache_destructor(void *buf, void *arg)
118 {
119 overlay_target_t *ott = buf;
120
121 cv_destroy(&ott->ott_cond);
122 mutex_destroy(&ott->ott_lock);
123 }
124
125 /* ARGSUSED */
126 static int
overlay_entry_cache_constructor(void * buf,void * arg,int kmflgs)127 overlay_entry_cache_constructor(void *buf, void *arg, int kmflgs)
128 {
129 overlay_target_entry_t *ote = buf;
130
131 bzero(ote, sizeof (overlay_target_entry_t));
132 mutex_init(&ote->ote_lock, NULL, MUTEX_DRIVER, NULL);
133 return (0);
134 }
135
136 /* ARGSUSED */
137 static void
overlay_entry_cache_destructor(void * buf,void * arg)138 overlay_entry_cache_destructor(void *buf, void *arg)
139 {
140 overlay_target_entry_t *ote = buf;
141
142 mutex_destroy(&ote->ote_lock);
143 }
144
145 static uint64_t
overlay_mac_hash(const void * v)146 overlay_mac_hash(const void *v)
147 {
148 uint32_t crc;
149 CRC32(crc, v, ETHERADDRL, -1U, crc32_table);
150 return (crc);
151 }
152
153 static int
overlay_mac_cmp(const void * a,const void * b)154 overlay_mac_cmp(const void *a, const void *b)
155 {
156 return (bcmp(a, b, ETHERADDRL));
157 }
158
159 /* ARGSUSED */
160 static void
overlay_target_entry_dtor(void * arg)161 overlay_target_entry_dtor(void *arg)
162 {
163 overlay_target_entry_t *ote = arg;
164
165 ote->ote_flags = 0;
166 bzero(ote->ote_addr, ETHERADDRL);
167 ote->ote_ott = NULL;
168 ote->ote_odd = NULL;
169 freemsgchain(ote->ote_chead);
170 ote->ote_chead = ote->ote_ctail = NULL;
171 ote->ote_mbsize = 0;
172 ote->ote_vtime = 0;
173 kmem_cache_free(overlay_entry_cache, ote);
174 }
175
176 static int
overlay_mac_avl(const void * a,const void * b)177 overlay_mac_avl(const void *a, const void *b)
178 {
179 int i;
180 const overlay_target_entry_t *l, *r;
181 l = a;
182 r = b;
183
184 for (i = 0; i < ETHERADDRL; i++) {
185 if (l->ote_addr[i] > r->ote_addr[i])
186 return (1);
187 else if (l->ote_addr[i] < r->ote_addr[i])
188 return (-1);
189 }
190
191 return (0);
192 }
193
194 void
overlay_target_init(void)195 overlay_target_init(void)
196 {
197 int ret;
198 ret = ddi_soft_state_init(&overlay_thdl_state,
199 sizeof (overlay_target_hdl_t), 1);
200 VERIFY(ret == 0);
201 overlay_target_cache = kmem_cache_create("overlay_target",
202 sizeof (overlay_target_t), 0, overlay_target_cache_constructor,
203 overlay_target_cache_destructor, NULL, NULL, NULL, 0);
204 overlay_entry_cache = kmem_cache_create("overlay_entry",
205 sizeof (overlay_target_entry_t), 0, overlay_entry_cache_constructor,
206 overlay_entry_cache_destructor, NULL, NULL, NULL, 0);
207 mutex_init(&overlay_target_lock, NULL, MUTEX_DRIVER, NULL);
208 cv_init(&overlay_target_condvar, NULL, CV_DRIVER, NULL);
209 list_create(&overlay_target_list, sizeof (overlay_target_entry_t),
210 offsetof(overlay_target_entry_t, ote_qlink));
211 list_create(&overlay_thdl_list, sizeof (overlay_target_hdl_t),
212 offsetof(overlay_target_hdl_t, oth_link));
213 overlay_thdl_idspace = id_space_create("overlay_target_minors",
214 1, INT32_MAX);
215 }
216
217 void
overlay_target_fini(void)218 overlay_target_fini(void)
219 {
220 id_space_destroy(overlay_thdl_idspace);
221 list_destroy(&overlay_thdl_list);
222 list_destroy(&overlay_target_list);
223 cv_destroy(&overlay_target_condvar);
224 mutex_destroy(&overlay_target_lock);
225 kmem_cache_destroy(overlay_entry_cache);
226 kmem_cache_destroy(overlay_target_cache);
227 ddi_soft_state_fini(&overlay_thdl_state);
228 }
229
230 void
overlay_target_free(overlay_dev_t * odd)231 overlay_target_free(overlay_dev_t *odd)
232 {
233 if (odd->odd_target == NULL)
234 return;
235
236 if (odd->odd_target->ott_mode == OVERLAY_TARGET_DYNAMIC) {
237 refhash_t *rp = odd->odd_target->ott_u.ott_dyn.ott_dhash;
238 avl_tree_t *ap = &odd->odd_target->ott_u.ott_dyn.ott_tree;
239 overlay_target_entry_t *ote;
240
241 /*
242 * Our AVL tree and hashtable contain the same elements,
243 * therefore we should just remove it from the tree, but then
244 * delete the entries when we remove them from the hash table
245 * (which happens through the refhash dtor).
246 */
247 while ((ote = avl_first(ap)) != NULL)
248 avl_remove(ap, ote);
249
250 avl_destroy(ap);
251 for (ote = refhash_first(rp); ote != NULL;
252 ote = refhash_next(rp, ote)) {
253 refhash_remove(rp, ote);
254 }
255 refhash_destroy(rp);
256 }
257
258 ASSERT(odd->odd_target->ott_ocount == 0);
259 kmem_cache_free(overlay_target_cache, odd->odd_target);
260 }
261
262 int
overlay_target_busy()263 overlay_target_busy()
264 {
265 int ret;
266
267 mutex_enter(&overlay_target_lock);
268 ret = !list_is_empty(&overlay_thdl_list);
269 mutex_exit(&overlay_target_lock);
270
271 return (ret);
272 }
273
274 static void
overlay_target_queue(overlay_target_entry_t * entry)275 overlay_target_queue(overlay_target_entry_t *entry)
276 {
277 mutex_enter(&overlay_target_lock);
278 mutex_enter(&entry->ote_ott->ott_lock);
279 if (entry->ote_ott->ott_flags & OVERLAY_T_TEARDOWN) {
280 mutex_exit(&entry->ote_ott->ott_lock);
281 mutex_exit(&overlay_target_lock);
282 return;
283 }
284 entry->ote_ott->ott_ocount++;
285 mutex_exit(&entry->ote_ott->ott_lock);
286 list_insert_tail(&overlay_target_list, entry);
287 cv_signal(&overlay_target_condvar);
288 mutex_exit(&overlay_target_lock);
289 }
290
291 void
overlay_target_quiesce(overlay_target_t * ott)292 overlay_target_quiesce(overlay_target_t *ott)
293 {
294 if (ott == NULL)
295 return;
296 mutex_enter(&ott->ott_lock);
297 ott->ott_flags |= OVERLAY_T_TEARDOWN;
298 while (ott->ott_ocount != 0)
299 cv_wait(&ott->ott_cond, &ott->ott_lock);
300 mutex_exit(&ott->ott_lock);
301 }
302
303 /*
304 * This functions assumes that the destination mode is OVERLAY_PLUGIN_D_IP |
305 * OVERLAY_PLUGIN_D_PORT. As we don't have an implementation of anything else at
306 * this time, say for NVGRE, we drop all packets that mcuh this.
307 */
308 int
overlay_target_lookup(overlay_dev_t * odd,mblk_t * mp,struct sockaddr * sock,socklen_t * slenp)309 overlay_target_lookup(overlay_dev_t *odd, mblk_t *mp, struct sockaddr *sock,
310 socklen_t *slenp)
311 {
312 int ret;
313 struct sockaddr_in6 *v6;
314 overlay_target_t *ott;
315 mac_header_info_t mhi;
316 overlay_target_entry_t *entry;
317
318 ASSERT(odd->odd_target != NULL);
319
320 /*
321 * At this point, the overlay device is in a mux which means that it's
322 * been activated. At this point, parts of the target, such as the mode
323 * and the destination are now read-only and we don't have to worry
324 * about synchronization for them.
325 */
326 ott = odd->odd_target;
327 if (ott->ott_dest != (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT))
328 return (OVERLAY_TARGET_DROP);
329
330 v6 = (struct sockaddr_in6 *)sock;
331 bzero(v6, sizeof (struct sockaddr_in6));
332 v6->sin6_family = AF_INET6;
333
334 if (ott->ott_mode == OVERLAY_TARGET_POINT) {
335 mutex_enter(&ott->ott_lock);
336 bcopy(&ott->ott_u.ott_point.otp_ip, &v6->sin6_addr,
337 sizeof (struct in6_addr));
338 v6->sin6_port = htons(ott->ott_u.ott_point.otp_port);
339 mutex_exit(&ott->ott_lock);
340 *slenp = sizeof (struct sockaddr_in6);
341
342 return (OVERLAY_TARGET_OK);
343 }
344
345 ASSERT(ott->ott_mode == OVERLAY_TARGET_DYNAMIC);
346
347 /*
348 * Note we only want the MAC address here, therefore we won't bother
349 * using mac_vlan_header_info(). If any caller needs the vlan info at
350 * this point, this should change to a call to mac_vlan_header_info().
351 */
352 if (mac_header_info(odd->odd_mh, mp, &mhi) != 0)
353 return (OVERLAY_TARGET_DROP);
354 mutex_enter(&ott->ott_lock);
355 entry = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
356 mhi.mhi_daddr);
357 if (entry == NULL) {
358 entry = kmem_cache_alloc(overlay_entry_cache, KM_NOSLEEP_LAZY);
359 if (entry == NULL) {
360 mutex_exit(&ott->ott_lock);
361 return (OVERLAY_TARGET_DROP);
362 }
363 bcopy(mhi.mhi_daddr, entry->ote_addr, ETHERADDRL);
364 entry->ote_chead = entry->ote_ctail = mp;
365 entry->ote_mbsize = msgsize(mp);
366 entry->ote_flags |= OVERLAY_ENTRY_F_PENDING;
367 entry->ote_ott = ott;
368 entry->ote_odd = odd;
369 refhash_insert(ott->ott_u.ott_dyn.ott_dhash, entry);
370 avl_add(&ott->ott_u.ott_dyn.ott_tree, entry);
371 mutex_exit(&ott->ott_lock);
372 overlay_target_queue(entry);
373 return (OVERLAY_TARGET_ASYNC);
374 }
375 refhash_hold(ott->ott_u.ott_dyn.ott_dhash, entry);
376 mutex_exit(&ott->ott_lock);
377
378 mutex_enter(&entry->ote_lock);
379 if (entry->ote_flags & OVERLAY_ENTRY_F_DROP) {
380 ret = OVERLAY_TARGET_DROP;
381 } else if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) {
382 bcopy(&entry->ote_dest.otp_ip, &v6->sin6_addr,
383 sizeof (struct in6_addr));
384 v6->sin6_port = htons(entry->ote_dest.otp_port);
385 *slenp = sizeof (struct sockaddr_in6);
386 ret = OVERLAY_TARGET_OK;
387 } else {
388 size_t mlen = msgsize(mp);
389
390 if (mlen + entry->ote_mbsize > overlay_ent_size) {
391 ret = OVERLAY_TARGET_DROP;
392 } else {
393 if (entry->ote_ctail != NULL) {
394 ASSERT(entry->ote_ctail->b_next ==
395 NULL);
396 entry->ote_ctail->b_next = mp;
397 entry->ote_ctail = mp;
398 } else {
399 entry->ote_chead = mp;
400 entry->ote_ctail = mp;
401 }
402 entry->ote_mbsize += mlen;
403 if ((entry->ote_flags &
404 OVERLAY_ENTRY_F_PENDING) == 0) {
405 entry->ote_flags |=
406 OVERLAY_ENTRY_F_PENDING;
407 overlay_target_queue(entry);
408 }
409 ret = OVERLAY_TARGET_ASYNC;
410 }
411 }
412 mutex_exit(&entry->ote_lock);
413
414 mutex_enter(&ott->ott_lock);
415 refhash_rele(ott->ott_u.ott_dyn.ott_dhash, entry);
416 mutex_exit(&ott->ott_lock);
417
418 return (ret);
419 }
420
421 /* ARGSUSED */
422 static int
overlay_target_info(overlay_target_hdl_t * thdl,void * arg)423 overlay_target_info(overlay_target_hdl_t *thdl, void *arg)
424 {
425 overlay_dev_t *odd;
426 overlay_targ_info_t *oti = arg;
427
428 odd = overlay_hold_by_dlid(oti->oti_linkid);
429 if (odd == NULL)
430 return (ENOENT);
431
432 mutex_enter(&odd->odd_lock);
433 oti->oti_flags = 0;
434 oti->oti_needs = odd->odd_plugin->ovp_dest;
435 if (odd->odd_flags & OVERLAY_F_DEGRADED)
436 oti->oti_flags |= OVERLAY_TARG_INFO_F_DEGRADED;
437 if (odd->odd_flags & OVERLAY_F_ACTIVATED)
438 oti->oti_flags |= OVERLAY_TARG_INFO_F_ACTIVE;
439 oti->oti_vnetid = odd->odd_vid;
440 mutex_exit(&odd->odd_lock);
441 overlay_hold_rele(odd);
442 return (0);
443 }
444
445 /* ARGSUSED */
446 static int
overlay_target_associate(overlay_target_hdl_t * thdl,void * arg)447 overlay_target_associate(overlay_target_hdl_t *thdl, void *arg)
448 {
449 overlay_dev_t *odd;
450 overlay_target_t *ott;
451 overlay_targ_associate_t *ota = arg;
452
453 odd = overlay_hold_by_dlid(ota->ota_linkid);
454 if (odd == NULL)
455 return (ENOENT);
456
457 if (ota->ota_id == 0) {
458 overlay_hold_rele(odd);
459 return (EINVAL);
460 }
461
462 if (ota->ota_mode != OVERLAY_TARGET_POINT &&
463 ota->ota_mode != OVERLAY_TARGET_DYNAMIC) {
464 overlay_hold_rele(odd);
465 return (EINVAL);
466 }
467
468 if (ota->ota_provides != odd->odd_plugin->ovp_dest) {
469 overlay_hold_rele(odd);
470 return (EINVAL);
471 }
472
473 if (ota->ota_mode == OVERLAY_TARGET_POINT) {
474 if (ota->ota_provides & OVERLAY_PLUGIN_D_IP) {
475 if (IN6_IS_ADDR_UNSPECIFIED(&ota->ota_point.otp_ip) ||
476 IN6_IS_ADDR_V4COMPAT(&ota->ota_point.otp_ip) ||
477 IN6_IS_ADDR_V4MAPPED_ANY(&ota->ota_point.otp_ip)) {
478 overlay_hold_rele(odd);
479 return (EINVAL);
480 }
481 }
482
483 if (ota->ota_provides & OVERLAY_PLUGIN_D_PORT) {
484 if (ota->ota_point.otp_port == 0) {
485 overlay_hold_rele(odd);
486 return (EINVAL);
487 }
488 }
489 }
490
491 ott = kmem_cache_alloc(overlay_target_cache, KM_SLEEP);
492 ott->ott_flags = 0;
493 ott->ott_ocount = 0;
494 ott->ott_mode = ota->ota_mode;
495 ott->ott_dest = ota->ota_provides;
496 ott->ott_id = ota->ota_id;
497
498 if (ott->ott_mode == OVERLAY_TARGET_POINT) {
499 bcopy(&ota->ota_point, &ott->ott_u.ott_point,
500 sizeof (overlay_target_point_t));
501 } else {
502 ott->ott_u.ott_dyn.ott_dhash = refhash_create(OVERLAY_HSIZE,
503 overlay_mac_hash, overlay_mac_cmp,
504 overlay_target_entry_dtor, sizeof (overlay_target_entry_t),
505 offsetof(overlay_target_entry_t, ote_reflink),
506 offsetof(overlay_target_entry_t, ote_addr), KM_SLEEP);
507 avl_create(&ott->ott_u.ott_dyn.ott_tree, overlay_mac_avl,
508 sizeof (overlay_target_entry_t),
509 offsetof(overlay_target_entry_t, ote_avllink));
510 }
511 mutex_enter(&odd->odd_lock);
512 if (odd->odd_flags & OVERLAY_F_VARPD) {
513 mutex_exit(&odd->odd_lock);
514 kmem_cache_free(overlay_target_cache, ott);
515 overlay_hold_rele(odd);
516 return (EEXIST);
517 }
518
519 odd->odd_flags |= OVERLAY_F_VARPD;
520 odd->odd_target = ott;
521 mutex_exit(&odd->odd_lock);
522
523 overlay_hold_rele(odd);
524
525
526 return (0);
527 }
528
529
530 /* ARGSUSED */
531 static int
overlay_target_degrade(overlay_target_hdl_t * thdl,void * arg)532 overlay_target_degrade(overlay_target_hdl_t *thdl, void *arg)
533 {
534 overlay_dev_t *odd;
535 overlay_targ_degrade_t *otd = arg;
536
537 odd = overlay_hold_by_dlid(otd->otd_linkid);
538 if (odd == NULL)
539 return (ENOENT);
540
541 overlay_fm_degrade(odd, otd->otd_buf);
542 overlay_hold_rele(odd);
543 return (0);
544 }
545
546 /* ARGSUSED */
547 static int
overlay_target_restore(overlay_target_hdl_t * thdl,void * arg)548 overlay_target_restore(overlay_target_hdl_t *thdl, void *arg)
549 {
550 overlay_dev_t *odd;
551 overlay_targ_id_t *otid = arg;
552
553 odd = overlay_hold_by_dlid(otid->otid_linkid);
554 if (odd == NULL)
555 return (ENOENT);
556
557 overlay_fm_restore(odd);
558 overlay_hold_rele(odd);
559 return (0);
560 }
561
562 /* ARGSUSED */
563 static int
overlay_target_disassociate(overlay_target_hdl_t * thdl,void * arg)564 overlay_target_disassociate(overlay_target_hdl_t *thdl, void *arg)
565 {
566 overlay_dev_t *odd;
567 overlay_targ_id_t *otid = arg;
568
569 odd = overlay_hold_by_dlid(otid->otid_linkid);
570 if (odd == NULL)
571 return (ENOENT);
572
573 mutex_enter(&odd->odd_lock);
574 odd->odd_flags &= ~OVERLAY_F_VARPD;
575 mutex_exit(&odd->odd_lock);
576
577 overlay_hold_rele(odd);
578 return (0);
579
580 }
581
582 static int
overlay_target_lookup_request(overlay_target_hdl_t * thdl,void * arg)583 overlay_target_lookup_request(overlay_target_hdl_t *thdl, void *arg)
584 {
585 overlay_targ_lookup_t *otl = arg;
586 overlay_target_entry_t *entry;
587 clock_t ret, timeout;
588 mac_header_info_t mhi;
589
590 timeout = ddi_get_lbolt() + drv_usectohz(MICROSEC);
591 again:
592 mutex_enter(&overlay_target_lock);
593 while (list_is_empty(&overlay_target_list)) {
594 ret = cv_timedwait(&overlay_target_condvar,
595 &overlay_target_lock, timeout);
596 if (ret == -1) {
597 mutex_exit(&overlay_target_lock);
598 return (ETIME);
599 }
600 }
601 entry = list_remove_head(&overlay_target_list);
602 mutex_exit(&overlay_target_lock);
603 mutex_enter(&entry->ote_lock);
604 if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) {
605 ASSERT(entry->ote_chead == NULL);
606 mutex_exit(&entry->ote_lock);
607 goto again;
608 }
609 ASSERT(entry->ote_chead != NULL);
610
611 /*
612 * If we have a bogon that doesn't have a valid mac header, drop it and
613 * try again.
614 */
615 if (mac_vlan_header_info(entry->ote_odd->odd_mh, entry->ote_chead,
616 &mhi) != 0) {
617 boolean_t queue = B_FALSE;
618 mblk_t *mp = entry->ote_chead;
619 entry->ote_chead = mp->b_next;
620 mp->b_next = NULL;
621 if (entry->ote_ctail == mp)
622 entry->ote_ctail = entry->ote_chead;
623 entry->ote_mbsize -= msgsize(mp);
624 if (entry->ote_chead != NULL)
625 queue = B_TRUE;
626 mutex_exit(&entry->ote_lock);
627 if (queue == B_TRUE)
628 overlay_target_queue(entry);
629 freemsg(mp);
630 goto again;
631 }
632
633 otl->otl_dlid = entry->ote_odd->odd_linkid;
634 otl->otl_reqid = (uintptr_t)entry;
635 otl->otl_varpdid = entry->ote_ott->ott_id;
636 otl->otl_vnetid = entry->ote_odd->odd_vid;
637
638 otl->otl_hdrsize = mhi.mhi_hdrsize;
639 otl->otl_pktsize = msgsize(entry->ote_chead) - otl->otl_hdrsize;
640 bcopy(mhi.mhi_daddr, otl->otl_dstaddr, ETHERADDRL);
641 bcopy(mhi.mhi_saddr, otl->otl_srcaddr, ETHERADDRL);
642 otl->otl_dsttype = mhi.mhi_dsttype;
643 otl->otl_sap = mhi.mhi_bindsap;
644 otl->otl_vlan = VLAN_ID(mhi.mhi_tci);
645 mutex_exit(&entry->ote_lock);
646
647 mutex_enter(&thdl->oth_lock);
648 list_insert_tail(&thdl->oth_outstanding, entry);
649 mutex_exit(&thdl->oth_lock);
650
651 return (0);
652 }
653
654 static int
overlay_target_lookup_respond(overlay_target_hdl_t * thdl,void * arg)655 overlay_target_lookup_respond(overlay_target_hdl_t *thdl, void *arg)
656 {
657 const overlay_targ_resp_t *otr = arg;
658 overlay_target_entry_t *entry;
659 mblk_t *mp;
660
661 mutex_enter(&thdl->oth_lock);
662 for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
663 entry = list_next(&thdl->oth_outstanding, entry)) {
664 if ((uintptr_t)entry == otr->otr_reqid)
665 break;
666 }
667
668 if (entry == NULL) {
669 mutex_exit(&thdl->oth_lock);
670 return (EINVAL);
671 }
672 list_remove(&thdl->oth_outstanding, entry);
673 mutex_exit(&thdl->oth_lock);
674
675 mutex_enter(&entry->ote_lock);
676 bcopy(&otr->otr_answer, &entry->ote_dest,
677 sizeof (overlay_target_point_t));
678 entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING;
679 entry->ote_flags |= OVERLAY_ENTRY_F_VALID;
680 mp = entry->ote_chead;
681 entry->ote_chead = NULL;
682 entry->ote_ctail = NULL;
683 entry->ote_mbsize = 0;
684 entry->ote_vtime = gethrtime();
685 mutex_exit(&entry->ote_lock);
686
687 /*
688 * For now do an in-situ drain.
689 */
690 mp = overlay_m_tx(entry->ote_odd, mp);
691 freemsgchain(mp);
692
693 mutex_enter(&entry->ote_ott->ott_lock);
694 entry->ote_ott->ott_ocount--;
695 cv_signal(&entry->ote_ott->ott_cond);
696 mutex_exit(&entry->ote_ott->ott_lock);
697
698 return (0);
699 }
700
701 static int
overlay_target_lookup_drop(overlay_target_hdl_t * thdl,void * arg)702 overlay_target_lookup_drop(overlay_target_hdl_t *thdl, void *arg)
703 {
704 const overlay_targ_resp_t *otr = arg;
705 overlay_target_entry_t *entry;
706 mblk_t *mp;
707 boolean_t queue = B_FALSE;
708
709 mutex_enter(&thdl->oth_lock);
710 for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
711 entry = list_next(&thdl->oth_outstanding, entry)) {
712 if ((uintptr_t)entry == otr->otr_reqid)
713 break;
714 }
715
716 if (entry == NULL) {
717 mutex_exit(&thdl->oth_lock);
718 return (EINVAL);
719 }
720 list_remove(&thdl->oth_outstanding, entry);
721 mutex_exit(&thdl->oth_lock);
722
723 mutex_enter(&entry->ote_lock);
724
725 /* Safeguard against a confused varpd */
726 if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) {
727 entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING;
728 DTRACE_PROBE1(overlay__target__valid__drop,
729 overlay_target_entry_t *, entry);
730 mutex_exit(&entry->ote_lock);
731 goto done;
732 }
733
734 mp = entry->ote_chead;
735 if (mp != NULL) {
736 entry->ote_chead = mp->b_next;
737 mp->b_next = NULL;
738 if (entry->ote_ctail == mp)
739 entry->ote_ctail = entry->ote_chead;
740 entry->ote_mbsize -= msgsize(mp);
741 }
742 if (entry->ote_chead != NULL) {
743 queue = B_TRUE;
744 entry->ote_flags |= OVERLAY_ENTRY_F_PENDING;
745 } else {
746 entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING;
747 }
748 mutex_exit(&entry->ote_lock);
749
750 if (queue == B_TRUE)
751 overlay_target_queue(entry);
752 freemsg(mp);
753
754 done:
755 mutex_enter(&entry->ote_ott->ott_lock);
756 entry->ote_ott->ott_ocount--;
757 cv_signal(&entry->ote_ott->ott_cond);
758 mutex_exit(&entry->ote_ott->ott_lock);
759
760 return (0);
761 }
762
763 /* ARGSUSED */
764 static int
overlay_target_pkt_copyin(const void * ubuf,void ** outp,size_t * bsize,int flags)765 overlay_target_pkt_copyin(const void *ubuf, void **outp, size_t *bsize,
766 int flags)
767 {
768 overlay_targ_pkt_t *pkt;
769 overlay_targ_pkt32_t *pkt32;
770
771 pkt = kmem_alloc(sizeof (overlay_targ_pkt_t), KM_SLEEP);
772 *outp = pkt;
773 *bsize = sizeof (overlay_targ_pkt_t);
774 if (ddi_model_convert_from(flags & FMODELS) == DDI_MODEL_ILP32) {
775 uintptr_t addr;
776
777 if (ddi_copyin(ubuf, pkt, sizeof (overlay_targ_pkt32_t),
778 flags & FKIOCTL) != 0) {
779 kmem_free(pkt, *bsize);
780 return (EFAULT);
781 }
782 pkt32 = (overlay_targ_pkt32_t *)pkt;
783 addr = pkt32->otp_buf;
784 pkt->otp_buf = (void *)addr;
785 } else {
786 if (ddi_copyin(ubuf, pkt, *bsize, flags & FKIOCTL) != 0) {
787 kmem_free(pkt, *bsize);
788 return (EFAULT);
789 }
790 }
791 return (0);
792 }
793
794 static int
overlay_target_pkt_copyout(void * ubuf,void * buf,size_t bufsize,int flags)795 overlay_target_pkt_copyout(void *ubuf, void *buf, size_t bufsize,
796 int flags)
797 {
798 if (ddi_model_convert_from(flags & FMODELS) == DDI_MODEL_ILP32) {
799 overlay_targ_pkt_t *pkt = buf;
800 overlay_targ_pkt32_t *pkt32 = buf;
801 uintptr_t addr = (uintptr_t)pkt->otp_buf;
802 pkt32->otp_buf = (caddr32_t)addr;
803 if (ddi_copyout(buf, ubuf, sizeof (overlay_targ_pkt32_t),
804 flags & FKIOCTL) != 0)
805 return (EFAULT);
806 } else {
807 if (ddi_copyout(buf, ubuf, bufsize, flags & FKIOCTL) != 0)
808 return (EFAULT);
809 }
810 return (0);
811 }
812
813 static int
overlay_target_packet(overlay_target_hdl_t * thdl,void * arg)814 overlay_target_packet(overlay_target_hdl_t *thdl, void *arg)
815 {
816 overlay_targ_pkt_t *pkt = arg;
817 overlay_target_entry_t *entry;
818 mblk_t *mp;
819 size_t mlen;
820 size_t boff;
821
822 mutex_enter(&thdl->oth_lock);
823 for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
824 entry = list_next(&thdl->oth_outstanding, entry)) {
825 if ((uintptr_t)entry == pkt->otp_reqid)
826 break;
827 }
828
829 if (entry == NULL) {
830 mutex_exit(&thdl->oth_lock);
831 return (EINVAL);
832 }
833 mutex_enter(&entry->ote_lock);
834 mutex_exit(&thdl->oth_lock);
835 mp = entry->ote_chead;
836 /* Protect against a rogue varpd */
837 if (mp == NULL) {
838 mutex_exit(&entry->ote_lock);
839 return (EINVAL);
840 }
841 mlen = MIN(msgsize(mp), pkt->otp_size);
842 pkt->otp_size = mlen;
843 boff = 0;
844 while (mlen > 0) {
845 size_t wlen = MIN(MBLKL(mp), mlen);
846 if (ddi_copyout(mp->b_rptr,
847 (void *)((uintptr_t)pkt->otp_buf + boff),
848 wlen, 0) != 0) {
849 mutex_exit(&entry->ote_lock);
850 return (EFAULT);
851 }
852 mlen -= wlen;
853 boff += wlen;
854 mp = mp->b_cont;
855 }
856 mutex_exit(&entry->ote_lock);
857 return (0);
858 }
859
860 static int
overlay_target_inject(overlay_target_hdl_t * thdl,void * arg)861 overlay_target_inject(overlay_target_hdl_t *thdl, void *arg)
862 {
863 overlay_targ_pkt_t *pkt = arg;
864 overlay_target_entry_t *entry;
865 overlay_dev_t *odd;
866 mblk_t *mp;
867
868 if (pkt->otp_size > ETHERMAX + VLAN_TAGSZ)
869 return (EINVAL);
870
871 mp = allocb(pkt->otp_size, 0);
872 if (mp == NULL)
873 return (ENOMEM);
874
875 if (ddi_copyin(pkt->otp_buf, mp->b_rptr, pkt->otp_size, 0) != 0) {
876 freeb(mp);
877 return (EFAULT);
878 }
879 mp->b_wptr += pkt->otp_size;
880
881 if (pkt->otp_linkid != UINT64_MAX) {
882 odd = overlay_hold_by_dlid(pkt->otp_linkid);
883 if (odd == NULL) {
884 freeb(mp);
885 return (ENOENT);
886 }
887 } else {
888 mutex_enter(&thdl->oth_lock);
889 for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
890 entry = list_next(&thdl->oth_outstanding, entry)) {
891 if ((uintptr_t)entry == pkt->otp_reqid)
892 break;
893 }
894
895 if (entry == NULL) {
896 mutex_exit(&thdl->oth_lock);
897 freeb(mp);
898 return (ENOENT);
899 }
900 odd = entry->ote_odd;
901 mutex_exit(&thdl->oth_lock);
902 }
903
904 mutex_enter(&odd->odd_lock);
905 if ((odd->odd_flags & OVERLAY_F_MDDROP) ||
906 !(odd->odd_flags & OVERLAY_F_IN_MUX)) {
907 /* Can't do receive... */
908 mutex_exit(&odd->odd_lock);
909 OVERLAY_FREEMSG(mp, "dev dropped");
910 freeb(mp);
911 return (EBUSY);
912 }
913 overlay_io_start(odd, OVERLAY_F_IN_RX);
914 mutex_exit(&odd->odd_lock);
915
916 mac_rx(odd->odd_mh, NULL, mp);
917
918 mutex_enter(&odd->odd_lock);
919 overlay_io_done(odd, OVERLAY_F_IN_RX);
920 mutex_exit(&odd->odd_lock);
921
922 return (0);
923 }
924
925 static int
overlay_target_resend(overlay_target_hdl_t * thdl,void * arg)926 overlay_target_resend(overlay_target_hdl_t *thdl, void *arg)
927 {
928 overlay_targ_pkt_t *pkt = arg;
929 overlay_target_entry_t *entry;
930 overlay_dev_t *odd;
931 mblk_t *mp;
932
933 if (pkt->otp_size > ETHERMAX + VLAN_TAGSZ)
934 return (EINVAL);
935
936 mp = allocb(pkt->otp_size, 0);
937 if (mp == NULL)
938 return (ENOMEM);
939
940 if (ddi_copyin(pkt->otp_buf, mp->b_rptr, pkt->otp_size, 0) != 0) {
941 freeb(mp);
942 return (EFAULT);
943 }
944 mp->b_wptr += pkt->otp_size;
945
946 if (pkt->otp_linkid != UINT64_MAX) {
947 odd = overlay_hold_by_dlid(pkt->otp_linkid);
948 if (odd == NULL) {
949 freeb(mp);
950 return (ENOENT);
951 }
952 } else {
953 mutex_enter(&thdl->oth_lock);
954 for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
955 entry = list_next(&thdl->oth_outstanding, entry)) {
956 if ((uintptr_t)entry == pkt->otp_reqid)
957 break;
958 }
959
960 if (entry == NULL) {
961 mutex_exit(&thdl->oth_lock);
962 freeb(mp);
963 return (ENOENT);
964 }
965 odd = entry->ote_odd;
966 mutex_exit(&thdl->oth_lock);
967 }
968
969 mp = overlay_m_tx(odd, mp);
970 freemsgchain(mp);
971
972 return (0);
973 }
974
975 typedef struct overlay_targ_list_int {
976 boolean_t otli_count;
977 uint32_t otli_cur;
978 uint32_t otli_nents;
979 uint32_t otli_ents[];
980 } overlay_targ_list_int_t;
981
982 static int
overlay_target_list_copyin(const void * ubuf,void ** outp,size_t * bsize,int flags)983 overlay_target_list_copyin(const void *ubuf, void **outp, size_t *bsize,
984 int flags)
985 {
986 overlay_targ_list_t n;
987 overlay_targ_list_int_t *otl;
988
989 if (ddi_copyin(ubuf, &n, sizeof (overlay_targ_list_t),
990 flags & FKIOCTL) != 0)
991 return (EFAULT);
992
993 /*
994 */
995 if (n.otl_nents >= INT32_MAX / sizeof (uint32_t))
996 return (EINVAL);
997 *bsize = sizeof (overlay_targ_list_int_t) +
998 sizeof (uint32_t) * n.otl_nents;
999 otl = kmem_zalloc(*bsize, KM_SLEEP);
1000 otl->otli_cur = 0;
1001 otl->otli_nents = n.otl_nents;
1002 if (otl->otli_nents != 0) {
1003 otl->otli_count = B_FALSE;
1004 if (ddi_copyin((void *)((uintptr_t)ubuf +
1005 offsetof(overlay_targ_list_t, otl_ents)),
1006 otl->otli_ents, n.otl_nents * sizeof (uint32_t),
1007 flags & FKIOCTL) != 0) {
1008 kmem_free(otl, *bsize);
1009 return (EFAULT);
1010 }
1011 } else {
1012 otl->otli_count = B_TRUE;
1013 }
1014
1015 *outp = otl;
1016 return (0);
1017 }
1018
1019 static int
overlay_target_ioctl_list_cb(overlay_dev_t * odd,void * arg)1020 overlay_target_ioctl_list_cb(overlay_dev_t *odd, void *arg)
1021 {
1022 overlay_targ_list_int_t *otl = arg;
1023
1024 if (otl->otli_cur < otl->otli_nents)
1025 otl->otli_ents[otl->otli_cur] = odd->odd_linkid;
1026 otl->otli_cur++;
1027 return (0);
1028 }
1029
1030 /* ARGSUSED */
1031 static int
overlay_target_ioctl_list(overlay_target_hdl_t * thdl,void * arg)1032 overlay_target_ioctl_list(overlay_target_hdl_t *thdl, void *arg)
1033 {
1034 overlay_dev_iter(overlay_target_ioctl_list_cb, arg);
1035 return (0);
1036 }
1037
1038 /* ARGSUSED */
1039 static int
overlay_target_list_copyout(void * ubuf,void * buf,size_t bufsize,int flags)1040 overlay_target_list_copyout(void *ubuf, void *buf, size_t bufsize, int flags)
1041 {
1042 overlay_targ_list_int_t *otl = buf;
1043
1044 if (ddi_copyout(&otl->otli_cur, ubuf, sizeof (uint32_t),
1045 flags & FKIOCTL) != 0)
1046 return (EFAULT);
1047
1048 if (otl->otli_count == B_FALSE) {
1049 if (ddi_copyout(otl->otli_ents,
1050 (void *)((uintptr_t)ubuf +
1051 offsetof(overlay_targ_list_t, otl_ents)),
1052 sizeof (uint32_t) * otl->otli_nents,
1053 flags & FKIOCTL) != 0)
1054 return (EFAULT);
1055 }
1056 return (0);
1057 }
1058
1059 /* ARGSUSED */
1060 static int
overlay_target_cache_get(overlay_target_hdl_t * thdl,void * arg)1061 overlay_target_cache_get(overlay_target_hdl_t *thdl, void *arg)
1062 {
1063 int ret = 0;
1064 overlay_dev_t *odd;
1065 overlay_target_t *ott;
1066 overlay_targ_cache_t *otc = arg;
1067
1068 odd = overlay_hold_by_dlid(otc->otc_linkid);
1069 if (odd == NULL)
1070 return (ENOENT);
1071
1072 mutex_enter(&odd->odd_lock);
1073 if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
1074 mutex_exit(&odd->odd_lock);
1075 overlay_hold_rele(odd);
1076 return (ENXIO);
1077 }
1078 ott = odd->odd_target;
1079 if (ott->ott_mode != OVERLAY_TARGET_POINT &&
1080 ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
1081 mutex_exit(&odd->odd_lock);
1082 overlay_hold_rele(odd);
1083 return (ENOTSUP);
1084 }
1085 mutex_enter(&ott->ott_lock);
1086 mutex_exit(&odd->odd_lock);
1087
1088 if (ott->ott_mode == OVERLAY_TARGET_POINT) {
1089 otc->otc_entry.otce_flags = 0;
1090 bcopy(&ott->ott_u.ott_point, &otc->otc_entry.otce_dest,
1091 sizeof (overlay_target_point_t));
1092 } else {
1093 overlay_target_entry_t *ote;
1094 ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
1095 otc->otc_entry.otce_mac);
1096 if (ote != NULL) {
1097 mutex_enter(&ote->ote_lock);
1098 if ((ote->ote_flags &
1099 OVERLAY_ENTRY_F_VALID_MASK) != 0) {
1100 if (ote->ote_flags & OVERLAY_ENTRY_F_DROP) {
1101 otc->otc_entry.otce_flags =
1102 OVERLAY_TARGET_CACHE_DROP;
1103 } else {
1104 otc->otc_entry.otce_flags = 0;
1105 bcopy(&ote->ote_dest,
1106 &otc->otc_entry.otce_dest,
1107 sizeof (overlay_target_point_t));
1108 }
1109 ret = 0;
1110 } else {
1111 ret = ENOENT;
1112 }
1113 mutex_exit(&ote->ote_lock);
1114 } else {
1115 ret = ENOENT;
1116 }
1117 }
1118
1119 mutex_exit(&ott->ott_lock);
1120 overlay_hold_rele(odd);
1121
1122 return (ret);
1123 }
1124
1125 /* ARGSUSED */
1126 static int
overlay_target_cache_set(overlay_target_hdl_t * thdl,void * arg)1127 overlay_target_cache_set(overlay_target_hdl_t *thdl, void *arg)
1128 {
1129 overlay_dev_t *odd;
1130 overlay_target_t *ott;
1131 overlay_target_entry_t *ote;
1132 overlay_targ_cache_t *otc = arg;
1133 mblk_t *mp = NULL;
1134
1135 if (otc->otc_entry.otce_flags & ~OVERLAY_TARGET_CACHE_DROP)
1136 return (EINVAL);
1137
1138 odd = overlay_hold_by_dlid(otc->otc_linkid);
1139 if (odd == NULL)
1140 return (ENOENT);
1141
1142 mutex_enter(&odd->odd_lock);
1143 if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
1144 mutex_exit(&odd->odd_lock);
1145 overlay_hold_rele(odd);
1146 return (ENXIO);
1147 }
1148 ott = odd->odd_target;
1149 if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
1150 mutex_exit(&odd->odd_lock);
1151 overlay_hold_rele(odd);
1152 return (ENOTSUP);
1153 }
1154 mutex_enter(&ott->ott_lock);
1155 mutex_exit(&odd->odd_lock);
1156
1157 ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
1158 otc->otc_entry.otce_mac);
1159 if (ote == NULL) {
1160 ote = kmem_cache_alloc(overlay_entry_cache, KM_SLEEP);
1161 bcopy(otc->otc_entry.otce_mac, ote->ote_addr, ETHERADDRL);
1162 ote->ote_chead = ote->ote_ctail = NULL;
1163 ote->ote_mbsize = 0;
1164 ote->ote_ott = ott;
1165 ote->ote_odd = odd;
1166 mutex_enter(&ote->ote_lock);
1167 refhash_insert(ott->ott_u.ott_dyn.ott_dhash, ote);
1168 avl_add(&ott->ott_u.ott_dyn.ott_tree, ote);
1169 } else {
1170 mutex_enter(&ote->ote_lock);
1171 }
1172
1173 if (otc->otc_entry.otce_flags & OVERLAY_TARGET_CACHE_DROP) {
1174 ote->ote_flags |= OVERLAY_ENTRY_F_DROP;
1175 } else {
1176 ote->ote_flags |= OVERLAY_ENTRY_F_VALID;
1177 bcopy(&otc->otc_entry.otce_dest, &ote->ote_dest,
1178 sizeof (overlay_target_point_t));
1179 mp = ote->ote_chead;
1180 ote->ote_chead = NULL;
1181 ote->ote_ctail = NULL;
1182 ote->ote_mbsize = 0;
1183 ote->ote_vtime = gethrtime();
1184 }
1185
1186 mutex_exit(&ote->ote_lock);
1187 mutex_exit(&ott->ott_lock);
1188
1189 if (mp != NULL) {
1190 mp = overlay_m_tx(ote->ote_odd, mp);
1191 freemsgchain(mp);
1192 }
1193
1194 overlay_hold_rele(odd);
1195
1196 return (0);
1197 }
1198
1199 /* ARGSUSED */
1200 static int
overlay_target_cache_remove(overlay_target_hdl_t * thdl,void * arg)1201 overlay_target_cache_remove(overlay_target_hdl_t *thdl, void *arg)
1202 {
1203 int ret = 0;
1204 overlay_dev_t *odd;
1205 overlay_target_t *ott;
1206 overlay_target_entry_t *ote;
1207 overlay_targ_cache_t *otc = arg;
1208
1209 odd = overlay_hold_by_dlid(otc->otc_linkid);
1210 if (odd == NULL)
1211 return (ENOENT);
1212
1213 mutex_enter(&odd->odd_lock);
1214 if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
1215 mutex_exit(&odd->odd_lock);
1216 overlay_hold_rele(odd);
1217 return (ENXIO);
1218 }
1219 ott = odd->odd_target;
1220 if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
1221 mutex_exit(&odd->odd_lock);
1222 overlay_hold_rele(odd);
1223 return (ENOTSUP);
1224 }
1225 mutex_enter(&ott->ott_lock);
1226 mutex_exit(&odd->odd_lock);
1227
1228 ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
1229 otc->otc_entry.otce_mac);
1230 if (ote != NULL) {
1231 mutex_enter(&ote->ote_lock);
1232 ote->ote_flags &= ~OVERLAY_ENTRY_F_VALID_MASK;
1233 mutex_exit(&ote->ote_lock);
1234 ret = 0;
1235 } else {
1236 ret = ENOENT;
1237 }
1238
1239 mutex_exit(&ott->ott_lock);
1240 overlay_hold_rele(odd);
1241
1242 return (ret);
1243 }
1244
1245 /* ARGSUSED */
1246 static int
overlay_target_cache_flush(overlay_target_hdl_t * thdl,void * arg)1247 overlay_target_cache_flush(overlay_target_hdl_t *thdl, void *arg)
1248 {
1249 avl_tree_t *avl;
1250 overlay_dev_t *odd;
1251 overlay_target_t *ott;
1252 overlay_target_entry_t *ote;
1253 overlay_targ_cache_t *otc = arg;
1254
1255 odd = overlay_hold_by_dlid(otc->otc_linkid);
1256 if (odd == NULL)
1257 return (ENOENT);
1258
1259 mutex_enter(&odd->odd_lock);
1260 if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
1261 mutex_exit(&odd->odd_lock);
1262 overlay_hold_rele(odd);
1263 return (ENXIO);
1264 }
1265 ott = odd->odd_target;
1266 if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
1267 mutex_exit(&odd->odd_lock);
1268 overlay_hold_rele(odd);
1269 return (ENOTSUP);
1270 }
1271 mutex_enter(&ott->ott_lock);
1272 mutex_exit(&odd->odd_lock);
1273 avl = &ott->ott_u.ott_dyn.ott_tree;
1274
1275 for (ote = avl_first(avl); ote != NULL; ote = AVL_NEXT(avl, ote)) {
1276 mutex_enter(&ote->ote_lock);
1277 ote->ote_flags &= ~OVERLAY_ENTRY_F_VALID_MASK;
1278 mutex_exit(&ote->ote_lock);
1279 }
1280 ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
1281 otc->otc_entry.otce_mac);
1282
1283 mutex_exit(&ott->ott_lock);
1284 overlay_hold_rele(odd);
1285
1286 return (0);
1287 }
1288
1289 static int
overlay_target_cache_iter_copyin(const void * ubuf,void ** outp,size_t * bsize,int flags)1290 overlay_target_cache_iter_copyin(const void *ubuf, void **outp, size_t *bsize,
1291 int flags)
1292 {
1293 overlay_targ_cache_iter_t base, *iter;
1294
1295 if (ddi_copyin(ubuf, &base, sizeof (overlay_targ_cache_iter_t),
1296 flags & FKIOCTL) != 0)
1297 return (EFAULT);
1298
1299 if (base.otci_count > OVERLAY_TARGET_ITER_MAX)
1300 return (E2BIG);
1301
1302 if (base.otci_count == 0)
1303 return (EINVAL);
1304
1305 *bsize = sizeof (overlay_targ_cache_iter_t) +
1306 base.otci_count * sizeof (overlay_targ_cache_entry_t);
1307 iter = kmem_alloc(*bsize, KM_SLEEP);
1308 bcopy(&base, iter, sizeof (overlay_targ_cache_iter_t));
1309 *outp = iter;
1310
1311 return (0);
1312 }
1313
1314 typedef struct overlay_targ_cache_marker {
1315 uint8_t otcm_mac[ETHERADDRL];
1316 uint16_t otcm_done;
1317 } overlay_targ_cache_marker_t;
1318
1319 /* ARGSUSED */
1320 static int
overlay_target_cache_iter(overlay_target_hdl_t * thdl,void * arg)1321 overlay_target_cache_iter(overlay_target_hdl_t *thdl, void *arg)
1322 {
1323 overlay_dev_t *odd;
1324 overlay_target_t *ott;
1325 overlay_target_entry_t lookup, *ent;
1326 overlay_targ_cache_marker_t *mark;
1327 avl_index_t where;
1328 avl_tree_t *avl;
1329 uint16_t written = 0;
1330
1331 overlay_targ_cache_iter_t *iter = arg;
1332 mark = (void *)&iter->otci_marker;
1333
1334 if (mark->otcm_done != 0) {
1335 iter->otci_count = 0;
1336 return (0);
1337 }
1338
1339 odd = overlay_hold_by_dlid(iter->otci_linkid);
1340 if (odd == NULL)
1341 return (ENOENT);
1342
1343 mutex_enter(&odd->odd_lock);
1344 if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
1345 mutex_exit(&odd->odd_lock);
1346 overlay_hold_rele(odd);
1347 return (ENXIO);
1348 }
1349 ott = odd->odd_target;
1350 if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC &&
1351 ott->ott_mode != OVERLAY_TARGET_POINT) {
1352 mutex_exit(&odd->odd_lock);
1353 overlay_hold_rele(odd);
1354 return (ENOTSUP);
1355 }
1356
1357 /*
1358 * Holding this lock across the entire iteration probably isn't very
1359 * good. We should perhaps add an r/w lock for the avl tree. But we'll
1360 * wait until we now it's necessary before we do more.
1361 */
1362 mutex_enter(&ott->ott_lock);
1363 mutex_exit(&odd->odd_lock);
1364
1365 if (ott->ott_mode == OVERLAY_TARGET_POINT) {
1366 overlay_targ_cache_entry_t *out = &iter->otci_ents[0];
1367 bzero(out->otce_mac, ETHERADDRL);
1368 out->otce_flags = 0;
1369 bcopy(&ott->ott_u.ott_point, &out->otce_dest,
1370 sizeof (overlay_target_point_t));
1371 written++;
1372 mark->otcm_done = 1;
1373 }
1374
1375 avl = &ott->ott_u.ott_dyn.ott_tree;
1376 bcopy(mark->otcm_mac, lookup.ote_addr, ETHERADDRL);
1377 ent = avl_find(avl, &lookup, &where);
1378
1379 /*
1380 * NULL ent means that the entry does not exist, so we want to start
1381 * with the closest node in the tree. This means that we implicitly rely
1382 * on the tree's order and the first node will be the mac 00:00:00:00:00
1383 * and the last will be ff:ff:ff:ff:ff:ff.
1384 */
1385 if (ent == NULL) {
1386 ent = avl_nearest(avl, where, AVL_AFTER);
1387 if (ent == NULL) {
1388 mark->otcm_done = 1;
1389 goto done;
1390 }
1391 }
1392
1393 for (; ent != NULL && written < iter->otci_count;
1394 ent = AVL_NEXT(avl, ent)) {
1395 overlay_targ_cache_entry_t *out = &iter->otci_ents[written];
1396 mutex_enter(&ent->ote_lock);
1397 if ((ent->ote_flags & OVERLAY_ENTRY_F_VALID_MASK) == 0) {
1398 mutex_exit(&ent->ote_lock);
1399 continue;
1400 }
1401 bcopy(ent->ote_addr, out->otce_mac, ETHERADDRL);
1402 out->otce_flags = 0;
1403 if (ent->ote_flags & OVERLAY_ENTRY_F_DROP)
1404 out->otce_flags |= OVERLAY_TARGET_CACHE_DROP;
1405 if (ent->ote_flags & OVERLAY_ENTRY_F_VALID)
1406 bcopy(&ent->ote_dest, &out->otce_dest,
1407 sizeof (overlay_target_point_t));
1408 written++;
1409 mutex_exit(&ent->ote_lock);
1410 }
1411
1412 if (ent != NULL) {
1413 bcopy(ent->ote_addr, mark->otcm_mac, ETHERADDRL);
1414 } else {
1415 mark->otcm_done = 1;
1416 }
1417
1418 done:
1419 iter->otci_count = written;
1420 mutex_exit(&ott->ott_lock);
1421 overlay_hold_rele(odd);
1422
1423 return (0);
1424 }
1425
1426 /* ARGSUSED */
1427 static int
overlay_target_cache_iter_copyout(void * ubuf,void * buf,size_t bufsize,int flags)1428 overlay_target_cache_iter_copyout(void *ubuf, void *buf, size_t bufsize,
1429 int flags)
1430 {
1431 size_t outsize;
1432 const overlay_targ_cache_iter_t *iter = buf;
1433
1434 outsize = sizeof (overlay_targ_cache_iter_t) +
1435 iter->otci_count * sizeof (overlay_targ_cache_entry_t);
1436
1437 if (ddi_copyout(buf, ubuf, outsize, flags & FKIOCTL) != 0)
1438 return (EFAULT);
1439
1440 return (0);
1441 }
1442
1443 static overlay_target_ioctl_t overlay_target_ioctab[] = {
1444 { OVERLAY_TARG_INFO, B_TRUE, B_TRUE,
1445 NULL, overlay_target_info,
1446 NULL, sizeof (overlay_targ_info_t) },
1447 { OVERLAY_TARG_ASSOCIATE, B_TRUE, B_FALSE,
1448 NULL, overlay_target_associate,
1449 NULL, sizeof (overlay_targ_associate_t) },
1450 { OVERLAY_TARG_DISASSOCIATE, B_TRUE, B_FALSE,
1451 NULL, overlay_target_disassociate,
1452 NULL, sizeof (overlay_targ_id_t) },
1453 { OVERLAY_TARG_DEGRADE, B_TRUE, B_FALSE,
1454 NULL, overlay_target_degrade,
1455 NULL, sizeof (overlay_targ_degrade_t) },
1456 { OVERLAY_TARG_RESTORE, B_TRUE, B_FALSE,
1457 NULL, overlay_target_restore,
1458 NULL, sizeof (overlay_targ_id_t) },
1459 { OVERLAY_TARG_LOOKUP, B_FALSE, B_TRUE,
1460 NULL, overlay_target_lookup_request,
1461 NULL, sizeof (overlay_targ_lookup_t) },
1462 { OVERLAY_TARG_RESPOND, B_TRUE, B_FALSE,
1463 NULL, overlay_target_lookup_respond,
1464 NULL, sizeof (overlay_targ_resp_t) },
1465 { OVERLAY_TARG_DROP, B_TRUE, B_FALSE,
1466 NULL, overlay_target_lookup_drop,
1467 NULL, sizeof (overlay_targ_resp_t) },
1468 { OVERLAY_TARG_PKT, B_TRUE, B_TRUE,
1469 overlay_target_pkt_copyin,
1470 overlay_target_packet,
1471 overlay_target_pkt_copyout,
1472 sizeof (overlay_targ_pkt_t) },
1473 { OVERLAY_TARG_INJECT, B_TRUE, B_FALSE,
1474 overlay_target_pkt_copyin,
1475 overlay_target_inject,
1476 NULL, sizeof (overlay_targ_pkt_t) },
1477 { OVERLAY_TARG_RESEND, B_TRUE, B_FALSE,
1478 overlay_target_pkt_copyin,
1479 overlay_target_resend,
1480 NULL, sizeof (overlay_targ_pkt_t) },
1481 { OVERLAY_TARG_LIST, B_FALSE, B_TRUE,
1482 overlay_target_list_copyin,
1483 overlay_target_ioctl_list,
1484 overlay_target_list_copyout,
1485 sizeof (overlay_targ_list_t) },
1486 { OVERLAY_TARG_CACHE_GET, B_FALSE, B_TRUE,
1487 NULL, overlay_target_cache_get,
1488 NULL, sizeof (overlay_targ_cache_t) },
1489 { OVERLAY_TARG_CACHE_SET, B_TRUE, B_TRUE,
1490 NULL, overlay_target_cache_set,
1491 NULL, sizeof (overlay_targ_cache_t) },
1492 { OVERLAY_TARG_CACHE_REMOVE, B_TRUE, B_TRUE,
1493 NULL, overlay_target_cache_remove,
1494 NULL, sizeof (overlay_targ_cache_t) },
1495 { OVERLAY_TARG_CACHE_FLUSH, B_TRUE, B_TRUE,
1496 NULL, overlay_target_cache_flush,
1497 NULL, sizeof (overlay_targ_cache_t) },
1498 { OVERLAY_TARG_CACHE_ITER, B_FALSE, B_TRUE,
1499 overlay_target_cache_iter_copyin,
1500 overlay_target_cache_iter,
1501 overlay_target_cache_iter_copyout,
1502 sizeof (overlay_targ_cache_iter_t) },
1503 { 0 }
1504 };
1505
1506 int
overlay_target_open(dev_t * devp,int flags,int otype,cred_t * credp)1507 overlay_target_open(dev_t *devp, int flags, int otype, cred_t *credp)
1508 {
1509 minor_t mid;
1510 overlay_target_hdl_t *thdl;
1511
1512 if (secpolicy_dl_config(credp) != 0)
1513 return (EPERM);
1514
1515 if (getminor(*devp) != 0)
1516 return (ENXIO);
1517
1518 if (otype & OTYP_BLK)
1519 return (EINVAL);
1520
1521 if (flags & ~(FREAD | FWRITE | FEXCL))
1522 return (EINVAL);
1523
1524 if ((flags & FWRITE) &&
1525 !(flags & FEXCL))
1526 return (EINVAL);
1527
1528 if (!(flags & FREAD) && !(flags & FWRITE))
1529 return (EINVAL);
1530
1531 if (crgetzoneid(credp) != GLOBAL_ZONEID)
1532 return (EPERM);
1533
1534 mid = id_alloc(overlay_thdl_idspace);
1535 if (ddi_soft_state_zalloc(overlay_thdl_state, mid) != 0) {
1536 id_free(overlay_thdl_idspace, mid);
1537 return (ENXIO);
1538 }
1539
1540 thdl = ddi_get_soft_state(overlay_thdl_state, mid);
1541 VERIFY(thdl != NULL);
1542 thdl->oth_minor = mid;
1543 thdl->oth_zoneid = crgetzoneid(credp);
1544 thdl->oth_oflags = flags;
1545 mutex_init(&thdl->oth_lock, NULL, MUTEX_DRIVER, NULL);
1546 list_create(&thdl->oth_outstanding, sizeof (overlay_target_entry_t),
1547 offsetof(overlay_target_entry_t, ote_qlink));
1548 *devp = makedevice(getmajor(*devp), mid);
1549
1550 mutex_enter(&overlay_target_lock);
1551 if ((flags & FEXCL) && overlay_target_excl == B_TRUE) {
1552 mutex_exit(&overlay_target_lock);
1553 list_destroy(&thdl->oth_outstanding);
1554 mutex_destroy(&thdl->oth_lock);
1555 ddi_soft_state_free(overlay_thdl_state, mid);
1556 id_free(overlay_thdl_idspace, mid);
1557 return (EEXIST);
1558 } else if ((flags & FEXCL) != 0) {
1559 VERIFY(overlay_target_excl == B_FALSE);
1560 overlay_target_excl = B_TRUE;
1561 }
1562 list_insert_tail(&overlay_thdl_list, thdl);
1563 mutex_exit(&overlay_target_lock);
1564
1565 return (0);
1566 }
1567
1568 /* ARGSUSED */
1569 int
overlay_target_ioctl(dev_t dev,int cmd,intptr_t arg,int mode,cred_t * credp,int * rvalp)1570 overlay_target_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
1571 int *rvalp)
1572 {
1573 overlay_target_ioctl_t *ioc;
1574 overlay_target_hdl_t *thdl;
1575
1576 if (secpolicy_dl_config(credp) != 0)
1577 return (EPERM);
1578
1579 if ((thdl = ddi_get_soft_state(overlay_thdl_state,
1580 getminor(dev))) == NULL)
1581 return (ENXIO);
1582
1583 for (ioc = &overlay_target_ioctab[0]; ioc->oti_cmd != 0; ioc++) {
1584 int ret;
1585 caddr_t buf;
1586 size_t bufsize;
1587
1588 if (ioc->oti_cmd != cmd)
1589 continue;
1590
1591 if (ioc->oti_write == B_TRUE && !(mode & FWRITE))
1592 return (EBADF);
1593
1594 if (ioc->oti_copyin == NULL) {
1595 bufsize = ioc->oti_size;
1596 buf = kmem_alloc(bufsize, KM_SLEEP);
1597 if (ddi_copyin((void *)(uintptr_t)arg, buf, bufsize,
1598 mode & FKIOCTL) != 0) {
1599 kmem_free(buf, bufsize);
1600 return (EFAULT);
1601 }
1602 } else {
1603 if ((ret = ioc->oti_copyin((void *)(uintptr_t)arg,
1604 (void **)&buf, &bufsize, mode)) != 0)
1605 return (ret);
1606 }
1607
1608 ret = ioc->oti_func(thdl, buf);
1609 if (ret == 0 && ioc->oti_size != 0 &&
1610 ioc->oti_ncopyout == B_TRUE) {
1611 if (ioc->oti_copyout == NULL) {
1612 if (ddi_copyout(buf, (void *)(uintptr_t)arg,
1613 bufsize, mode & FKIOCTL) != 0)
1614 ret = EFAULT;
1615 } else {
1616 ret = ioc->oti_copyout((void *)(uintptr_t)arg,
1617 buf, bufsize, mode);
1618 }
1619 }
1620
1621 kmem_free(buf, bufsize);
1622 return (ret);
1623 }
1624
1625 return (ENOTTY);
1626 }
1627
1628 /* ARGSUSED */
1629 int
overlay_target_close(dev_t dev,int flags,int otype,cred_t * credp)1630 overlay_target_close(dev_t dev, int flags, int otype, cred_t *credp)
1631 {
1632 overlay_target_hdl_t *thdl;
1633 overlay_target_entry_t *entry;
1634 minor_t mid = getminor(dev);
1635
1636 if ((thdl = ddi_get_soft_state(overlay_thdl_state, mid)) == NULL)
1637 return (ENXIO);
1638
1639 mutex_enter(&overlay_target_lock);
1640 list_remove(&overlay_thdl_list, thdl);
1641 mutex_enter(&thdl->oth_lock);
1642 while ((entry = list_remove_head(&thdl->oth_outstanding)) != NULL)
1643 list_insert_tail(&overlay_target_list, entry);
1644 cv_signal(&overlay_target_condvar);
1645 mutex_exit(&thdl->oth_lock);
1646 if ((thdl->oth_oflags & FEXCL) != 0) {
1647 VERIFY(overlay_target_excl == B_TRUE);
1648 overlay_target_excl = B_FALSE;
1649 }
1650 mutex_exit(&overlay_target_lock);
1651
1652 list_destroy(&thdl->oth_outstanding);
1653 mutex_destroy(&thdl->oth_lock);
1654 mid = thdl->oth_minor;
1655 ddi_soft_state_free(overlay_thdl_state, mid);
1656 id_free(overlay_thdl_idspace, mid);
1657
1658 return (0);
1659 }
1660