1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24 #include <sys/types.h>
25 #include <sys/stream.h>
26 #include <sys/dlpi.h>
27 #include <sys/stropts.h>
28 #include <sys/strsun.h>
29 #include <sys/sysmacros.h>
30 #include <sys/strlog.h>
31 #include <sys/ddi.h>
32 #include <sys/cmn_err.h>
33 #include <sys/socket.h>
34 #include <net/if.h>
35 #include <net/if_types.h>
36 #include <netinet/in.h>
37 #include <sys/ethernet.h>
38 #include <inet/arp.h>
39 #include <inet/ip.h>
40 #include <inet/ip6.h>
41 #include <inet/ip_ire.h>
42 #include <inet/ip_if.h>
43 #include <inet/ip_ftable.h>
44
45 #include <sys/sunddi.h>
46 #include <sys/ksynch.h>
47
48 #include <sys/rds.h>
49 #include <sys/socket.h>
50 #include <sys/socketvar.h>
51 #include <sys/sockio.h>
52 #include <sys/sysmacros.h>
53 #include <inet/common.h>
54 #include <inet/ip.h>
55 #include <net/if_types.h>
56
57 #include <sys/ib/clients/rdsv3/rdsv3.h>
58 #include <sys/ib/clients/rdsv3/rdma.h>
59 #include <sys/ib/clients/rdsv3/ib.h>
60 #include <sys/ib/clients/rdsv3/rdsv3_impl.h>
61 #include <sys/ib/clients/rdsv3/rdsv3_debug.h>
62
63 #include <sys/dls.h>
64 #include <sys/mac.h>
65 #include <sys/mac_client.h>
66 #include <sys/mac_provider.h>
67 #include <sys/mac_client_priv.h>
68
69 ddi_taskq_t *rdsv3_taskq = NULL;
70 extern kmem_cache_t *rdsv3_alloc_cache;
71
72 extern unsigned int ip_ocsum(ushort_t *address, int halfword_count,
73 unsigned int sum);
74
75 /*
76 * Check if the IP interface named by `lifrp' is RDS-capable.
77 */
78 boolean_t
rdsv3_capable_interface(struct lifreq * lifrp)79 rdsv3_capable_interface(struct lifreq *lifrp)
80 {
81 char ifname[LIFNAMSIZ];
82 char drv[MAXLINKNAMELEN];
83 uint_t ppa;
84 char *cp;
85
86 RDSV3_DPRINTF4("rdsv3_capable_interface", "Enter");
87
88 if (lifrp->lifr_type == IFT_IB)
89 return (B_TRUE);
90
91 /*
92 * Strip off the logical interface portion before getting
93 * intimate with the name.
94 */
95 (void) strlcpy(ifname, lifrp->lifr_name, LIFNAMSIZ);
96 if ((cp = strchr(ifname, ':')) != NULL)
97 *cp = '\0';
98
99 if (strcmp("lo0", ifname) == 0) {
100 /*
101 * loopback is considered RDS-capable
102 */
103 return (B_TRUE);
104 }
105
106 return (
107 ddi_parse_dlen(ifname, drv, MAXLINKNAMELEN, &ppa) == DDI_SUCCESS &&
108 rdsv3_if_lookup_by_name(drv));
109 }
110
111 int
rdsv3_do_ip_ioctl(ksocket_t so4,void ** ipaddrs,int * size,int * nifs)112 rdsv3_do_ip_ioctl(ksocket_t so4, void **ipaddrs, int *size, int *nifs)
113 {
114 struct lifnum lifn;
115 struct lifconf lifc;
116 struct lifreq *lp, *rlp, lifr;
117 int rval = 0;
118 int numifs;
119 int bufsize, rbufsize;
120 void *buf, *rbuf;
121 int i, j, n, rc;
122
123 *ipaddrs = NULL;
124 *size = 0;
125 *nifs = 0;
126
127 RDSV3_DPRINTF4("rdsv3_do_ip_ioctl", "Enter");
128
129 retry_count:
130 /* snapshot the current number of interfaces */
131 lifn.lifn_family = PF_UNSPEC;
132 lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
133 lifn.lifn_count = 0;
134 rval = ksocket_ioctl(so4, SIOCGLIFNUM, (intptr_t)&lifn, &rval,
135 CRED());
136 if (rval != 0) {
137 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl",
138 "ksocket_ioctl returned: %d", rval);
139 return (rval);
140 }
141
142 numifs = lifn.lifn_count;
143 if (numifs <= 0) {
144 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl", "No interfaces found");
145 return (0);
146 }
147
148 /* allocate extra room in case more interfaces appear */
149 numifs += 10;
150
151 /* get the interface names and ip addresses */
152 bufsize = numifs * sizeof (struct lifreq);
153 buf = kmem_alloc(bufsize, KM_SLEEP);
154
155 lifc.lifc_family = AF_UNSPEC;
156 lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
157 lifc.lifc_len = bufsize;
158 lifc.lifc_buf = buf;
159 rc = ksocket_ioctl(so4, SIOCGLIFCONF, (intptr_t)&lifc, &rval, CRED());
160 if (rc != 0) {
161 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl", "SIOCGLIFCONF failed");
162 kmem_free(buf, bufsize);
163 return (rc);
164 }
165 /* if our extra room is used up, try again */
166 if (bufsize <= lifc.lifc_len) {
167 kmem_free(buf, bufsize);
168 buf = NULL;
169 goto retry_count;
170 }
171 /* calc actual number of ifconfs */
172 n = lifc.lifc_len / sizeof (struct lifreq);
173
174 /*
175 * Count the RDS interfaces
176 */
177 for (i = 0, j = 0, lp = lifc.lifc_req; i < n; i++, lp++) {
178
179 /*
180 * Copy as the SIOCGLIFFLAGS ioctl is destructive
181 */
182 bcopy(lp, &lifr, sizeof (struct lifreq));
183 /*
184 * fetch the flags using the socket of the correct family
185 */
186 switch (lifr.lifr_addr.ss_family) {
187 case AF_INET:
188 rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)&lifr,
189 &rval, CRED());
190 break;
191 default:
192 continue;
193 }
194
195 if (rc != 0) continue;
196
197 /*
198 * If we got the flags, skip uninteresting
199 * interfaces based on flags
200 */
201 if ((lifr.lifr_flags & IFF_UP) != IFF_UP)
202 continue;
203 if (lifr.lifr_flags &
204 (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
205 continue;
206 if (!rdsv3_capable_interface(&lifr))
207 continue;
208 j++;
209 }
210
211 if (j <= 0) {
212 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl", "No RDS interfaces");
213 kmem_free(buf, bufsize);
214 return (rval);
215 }
216
217 numifs = j;
218
219 /* This is the buffer we pass back */
220 rbufsize = numifs * sizeof (struct lifreq);
221 rbuf = kmem_alloc(rbufsize, KM_SLEEP);
222 rlp = (struct lifreq *)rbuf;
223
224 /*
225 * Examine the array of interfaces and filter uninteresting ones
226 */
227 for (i = 0, lp = lifc.lifc_req; i < n; i++, lp++) {
228
229 /*
230 * Copy the address as the SIOCGLIFFLAGS ioctl is destructive
231 */
232 bcopy(lp, &lifr, sizeof (struct lifreq));
233 /*
234 * fetch the flags using the socket of the correct family
235 */
236 switch (lifr.lifr_addr.ss_family) {
237 case AF_INET:
238 rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)&lifr,
239 &rval, CRED());
240 break;
241 default:
242 continue;
243 }
244
245
246 if (rc != 0) {
247 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl",
248 "ksocket_ioctl failed" " for %s", lifr.lifr_name);
249 continue;
250 }
251
252 /*
253 * If we got the flags, skip uninteresting
254 * interfaces based on flags
255 */
256 if ((lifr.lifr_flags & IFF_UP) != IFF_UP)
257 continue;
258 if (lifr.lifr_flags &
259 (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
260 continue;
261 if (!rdsv3_capable_interface(&lifr))
262 continue;
263
264 /* save the record */
265 bcopy(lp, rlp, sizeof (struct lifreq));
266 rlp->lifr_addr.ss_family = AF_INET_OFFLOAD;
267 rlp++;
268 }
269
270 kmem_free(buf, bufsize);
271
272 *ipaddrs = rbuf;
273 *size = rbufsize;
274 *nifs = numifs;
275
276 RDSV3_DPRINTF4("rdsv3_do_ip_ioctl", "Return");
277
278 return (rval);
279 }
280
281 /*
282 * Check if the IP interface named by `ifrp' is RDS-capable.
283 */
284 boolean_t
rdsv3_capable_interface_old(struct ifreq * ifrp)285 rdsv3_capable_interface_old(struct ifreq *ifrp)
286 {
287 char ifname[IFNAMSIZ];
288 char drv[MAXLINKNAMELEN];
289 uint_t ppa;
290 char *cp;
291
292 RDSV3_DPRINTF4("rdsv3_capable_interface_old", "Enter");
293
294 /*
295 * Strip off the logical interface portion before getting
296 * intimate with the name.
297 */
298 (void) strlcpy(ifname, ifrp->ifr_name, IFNAMSIZ);
299 if ((cp = strchr(ifname, ':')) != NULL)
300 *cp = '\0';
301
302 RDSV3_DPRINTF4("rdsv3_capable_interface_old", "ifname: %s", ifname);
303
304 if ((strcmp("lo0", ifname) == 0) ||
305 (strncmp("ibd", ifname, 3) == 0)) {
306 /*
307 * loopback and IB are considered RDS-capable
308 */
309 return (B_TRUE);
310 }
311
312 return (
313 ddi_parse_dlen(ifname, drv, MAXLINKNAMELEN, &ppa) == DDI_SUCCESS &&
314 rdsv3_if_lookup_by_name(drv));
315 }
316
317 int
rdsv3_do_ip_ioctl_old(ksocket_t so4,void ** ipaddrs,int * size,int * nifs)318 rdsv3_do_ip_ioctl_old(ksocket_t so4, void **ipaddrs, int *size, int *nifs)
319 {
320 uint_t ifn;
321 struct ifconf ifc;
322 struct ifreq *lp, *rlp, ifr;
323 int rval = 0;
324 int numifs;
325 int bufsize, rbufsize;
326 void *buf, *rbuf;
327 int i, j, n, rc;
328
329 *ipaddrs = NULL;
330 *size = 0;
331 *nifs = 0;
332
333 RDSV3_DPRINTF4("rdsv3_do_ip_ioctl_old", "Enter");
334
335 retry_count:
336 rval = ksocket_ioctl(so4, SIOCGIFNUM, (intptr_t)&ifn, &rval,
337 CRED());
338 if (rval != 0) {
339 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old",
340 "ksocket_ioctl(SIOCGIFNUM) returned: %d", rval);
341 return (rval);
342 }
343
344 numifs = ifn;
345 if (numifs <= 0) {
346 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old", "No interfaces found");
347 return (0);
348 }
349
350 /* allocate extra room in case more interfaces appear */
351 numifs += 10;
352
353 /* get the interface names and ip addresses */
354 bufsize = numifs * sizeof (struct ifreq);
355 buf = kmem_alloc(bufsize, KM_SLEEP);
356
357 ifc.ifc_len = bufsize;
358 ifc.ifc_buf = buf;
359 rc = ksocket_ioctl(so4, SIOCGIFCONF, (intptr_t)&ifc, &rval, CRED());
360 if (rc != 0) {
361 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old",
362 "SIOCGLIFCONF failed: %d", rc);
363 kmem_free(buf, bufsize);
364 return (rc);
365 }
366 /* if our extra room is used up, try again */
367 if (bufsize <= ifc.ifc_len) {
368 kmem_free(buf, bufsize);
369 buf = NULL;
370 goto retry_count;
371 }
372 /* calc actual number of ifconfs */
373 n = ifc.ifc_len / sizeof (struct ifreq);
374
375 /*
376 * Count the RDS interfaces
377 */
378 for (i = 0, j = 0, lp = ifc.ifc_req; i < n; i++, lp++) {
379
380 /*
381 * Copy as the SIOCGIFFLAGS ioctl is destructive
382 */
383 bcopy(lp, &ifr, sizeof (struct ifreq));
384 /*
385 * fetch the flags using the socket of the correct family
386 */
387 switch (ifr.ifr_addr.sa_family) {
388 case AF_INET:
389 rc = ksocket_ioctl(so4, SIOCGIFFLAGS, (intptr_t)&ifr,
390 &rval, CRED());
391 break;
392 default:
393 continue;
394 }
395
396 if (rc != 0) continue;
397
398 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old",
399 "1. ifr_name: %s, flags: %d", ifr.ifr_name,
400 (ushort_t)ifr.ifr_flags);
401
402 /*
403 * If we got the flags, skip uninteresting
404 * interfaces based on flags
405 */
406 if ((((ushort_t)ifr.ifr_flags) & IFF_UP) != IFF_UP)
407 continue;
408 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old",
409 "2. ifr_name: %s, flags: %d", ifr.ifr_name,
410 (ushort_t)ifr.ifr_flags);
411 if (((ushort_t)ifr.ifr_flags) &
412 (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
413 continue;
414 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old",
415 "3. ifr_name: %s, flags: %d", ifr.ifr_name,
416 (ushort_t)ifr.ifr_flags);
417 if (!rdsv3_capable_interface_old(&ifr))
418 continue;
419 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old",
420 "4. ifr_name: %s, flags: %d", ifr.ifr_name,
421 (ushort_t)ifr.ifr_flags);
422 j++;
423 }
424
425 if (j <= 0) {
426 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old", "No RDS interfaces");
427 kmem_free(buf, bufsize);
428 return (rval);
429 }
430
431 numifs = j;
432
433 /* This is the buffer we pass back */
434 rbufsize = numifs * sizeof (struct ifreq);
435 rbuf = kmem_alloc(rbufsize, KM_SLEEP);
436 rlp = (struct ifreq *)rbuf;
437
438 /*
439 * Examine the array of interfaces and filter uninteresting ones
440 */
441 for (i = 0, lp = ifc.ifc_req; i < n; i++, lp++) {
442
443 /*
444 * Copy the address as the SIOCGIFFLAGS ioctl is destructive
445 */
446 bcopy(lp, &ifr, sizeof (struct ifreq));
447 /*
448 * fetch the flags using the socket of the correct family
449 */
450 switch (ifr.ifr_addr.sa_family) {
451 case AF_INET:
452 rc = ksocket_ioctl(so4, SIOCGIFFLAGS, (intptr_t)&ifr,
453 &rval, CRED());
454 break;
455 default:
456 continue;
457 }
458
459
460 if (rc != 0) {
461 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old",
462 "ksocket_ioctl failed: %d for %s",
463 rc, ifr.ifr_name);
464 continue;
465 }
466
467 /*
468 * If we got the flags, skip uninteresting
469 * interfaces based on flags
470 */
471 if ((((ushort_t)ifr.ifr_flags) & IFF_UP) != IFF_UP)
472 continue;
473 if (((ushort_t)ifr.ifr_flags) &
474 (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
475 continue;
476 if (!rdsv3_capable_interface_old(&ifr))
477 continue;
478
479 /* save the record */
480 bcopy(lp, rlp, sizeof (struct ifreq));
481 rlp->ifr_addr.sa_family = AF_INET_OFFLOAD;
482 rlp++;
483 }
484
485 kmem_free(buf, bufsize);
486
487 *ipaddrs = rbuf;
488 *size = rbufsize;
489 *nifs = numifs;
490
491 RDSV3_DPRINTF4("rdsv3_do_ip_ioctl_old", "Return");
492
493 return (rval);
494 }
495
496 boolean_t
rdsv3_isloopback(ipaddr_t addr)497 rdsv3_isloopback(ipaddr_t addr)
498 {
499 ip_stack_t *ipst;
500
501 ipst = netstack_find_by_zoneid(GLOBAL_ZONEID)->netstack_ip;
502 ASSERT(ipst != NULL);
503 if (ip_type_v4(addr, ipst) != IRE_LOOPBACK) {
504 netstack_rele(ipst->ips_netstack);
505 return (B_FALSE);
506 }
507 netstack_rele(ipst->ips_netstack);
508 return (B_TRUE);
509 }
510
511 /*
512 * Work Queue Implementation
513 */
514
515 #define RDSV3_WQ_THREAD_IDLE 0
516 #define RDSV3_WQ_THREAD_RUNNING 1
517 #define RDSV3_WQ_THREAD_FLUSHING 2
518 #define RDSV3_WQ_THREAD_EXITING 3
519
520 /* worker thread */
521 void
rdsv3_worker_thread(void * arg)522 rdsv3_worker_thread(void *arg)
523 {
524 rdsv3_workqueue_struct_t *wq = arg;
525 rdsv3_work_t *work;
526
527 RDSV3_DPRINTF4("rdsv3_worker_thread", "Enter(wq: 0x%p)", wq);
528
529 mutex_enter(&wq->wq_lock);
530 work = list_remove_head(&wq->wq_queue);
531 while (work) {
532 mutex_exit(&wq->wq_lock);
533
534 /* process work */
535 work->func(work);
536
537 mutex_enter(&wq->wq_lock);
538 work = list_remove_head(&wq->wq_queue);
539 }
540
541 /* No more work, go home, until called again */
542 if (wq->wq_state != RDSV3_WQ_THREAD_EXITING) {
543 wq->wq_state = RDSV3_WQ_THREAD_IDLE;
544 }
545 mutex_exit(&wq->wq_lock);
546
547 RDSV3_DPRINTF4("rdsv3_worker_thread", "Return(wq: 0x%p)", wq);
548 }
549
550 /* XXX */
551 void
rdsv3_flush_workqueue(rdsv3_workqueue_struct_t * wq)552 rdsv3_flush_workqueue(rdsv3_workqueue_struct_t *wq)
553 {
554 RDSV3_DPRINTF4("rdsv3_flush_workqueue", "Enter(wq: %p)", wq);
555
556 mutex_enter(&wq->wq_lock);
557 switch (wq->wq_state) {
558 case RDSV3_WQ_THREAD_IDLE:
559 /* nothing to do */
560 ASSERT(list_is_empty(&wq->wq_queue));
561 break;
562
563 case RDSV3_WQ_THREAD_RUNNING:
564 wq->wq_state = RDSV3_WQ_THREAD_FLUSHING;
565 /* FALLTHRU */
566 case RDSV3_WQ_THREAD_FLUSHING:
567 /* already flushing, wait until the flushing is complete */
568 do {
569 mutex_exit(&wq->wq_lock);
570 delay(drv_usectohz(1000000));
571 mutex_enter(&wq->wq_lock);
572 } while (wq->wq_state == RDSV3_WQ_THREAD_FLUSHING);
573 break;
574 case RDSV3_WQ_THREAD_EXITING:
575 mutex_exit(&wq->wq_lock);
576 rdsv3_worker_thread(wq);
577 return;
578 }
579 mutex_exit(&wq->wq_lock);
580
581 RDSV3_DPRINTF4("rdsv3_flush_workqueue", "Return(wq: %p)", wq);
582 }
583
584 void
rdsv3_queue_work(rdsv3_workqueue_struct_t * wq,rdsv3_work_t * wp)585 rdsv3_queue_work(rdsv3_workqueue_struct_t *wq, rdsv3_work_t *wp)
586 {
587 RDSV3_DPRINTF4("rdsv3_queue_work", "Enter(wq: %p, wp: %p)", wq, wp);
588
589 mutex_enter(&wq->wq_lock);
590
591 if (list_link_active(&wp->work_item)) {
592 /* This is already in the queue, ignore this call */
593 mutex_exit(&wq->wq_lock);
594 RDSV3_DPRINTF3("rdsv3_queue_work", "already queued: %p", wp);
595 return;
596 }
597
598 switch (wq->wq_state) {
599 case RDSV3_WQ_THREAD_RUNNING:
600 list_insert_tail(&wq->wq_queue, wp);
601 mutex_exit(&wq->wq_lock);
602 break;
603
604 case RDSV3_WQ_THREAD_FLUSHING:
605 do {
606 mutex_exit(&wq->wq_lock);
607 delay(drv_usectohz(1000000));
608 mutex_enter(&wq->wq_lock);
609 } while (wq->wq_state == RDSV3_WQ_THREAD_FLUSHING);
610
611 if (wq->wq_state == RDSV3_WQ_THREAD_RUNNING) {
612 list_insert_tail(&wq->wq_queue, wp);
613 mutex_exit(&wq->wq_lock);
614 break;
615 }
616 /* FALLTHRU */
617
618 case RDSV3_WQ_THREAD_IDLE:
619 list_insert_tail(&wq->wq_queue, wp);
620 wq->wq_state = RDSV3_WQ_THREAD_RUNNING;
621 mutex_exit(&wq->wq_lock);
622
623 (void) ddi_taskq_dispatch(rdsv3_taskq, rdsv3_worker_thread, wq,
624 DDI_SLEEP);
625 break;
626
627 case RDSV3_WQ_THREAD_EXITING:
628 mutex_exit(&wq->wq_lock);
629 break;
630 }
631
632 RDSV3_DPRINTF4("rdsv3_queue_work", "Return(wq: %p, wp: %p)", wq, wp);
633 }
634
635 /* timeout handler for delayed work queuing */
636 void
rdsv3_work_timeout_handler(void * arg)637 rdsv3_work_timeout_handler(void *arg)
638 {
639 rdsv3_delayed_work_t *dwp = (rdsv3_delayed_work_t *)arg;
640
641 RDSV3_DPRINTF4("rdsv3_work_timeout_handler",
642 "Enter(wq: %p, wp: %p)", dwp->wq, &dwp->work);
643
644 mutex_enter(&dwp->lock);
645 dwp->timeid = 0;
646 mutex_exit(&dwp->lock);
647
648 mutex_enter(&dwp->wq->wq_lock);
649 dwp->wq->wq_pending--;
650 if (dwp->wq->wq_state == RDSV3_WQ_THREAD_EXITING) {
651 mutex_exit(&dwp->wq->wq_lock);
652 return;
653 }
654 mutex_exit(&dwp->wq->wq_lock);
655
656 rdsv3_queue_work(dwp->wq, &dwp->work);
657
658 RDSV3_DPRINTF4("rdsv3_work_timeout_handler",
659 "Return(wq: %p, wp: %p)", dwp->wq, &dwp->work);
660 }
661
662 void
rdsv3_queue_delayed_work(rdsv3_workqueue_struct_t * wq,rdsv3_delayed_work_t * dwp,uint_t delay)663 rdsv3_queue_delayed_work(rdsv3_workqueue_struct_t *wq,
664 rdsv3_delayed_work_t *dwp, uint_t delay)
665 {
666 RDSV3_DPRINTF4("rdsv3_queue_delayed_work",
667 "Enter(wq: %p, wp: %p)", wq, dwp);
668
669 if (delay == 0) {
670 rdsv3_queue_work(wq, &dwp->work);
671 return;
672 }
673
674 mutex_enter(&wq->wq_lock);
675 if (wq->wq_state == RDSV3_WQ_THREAD_EXITING) {
676 mutex_exit(&wq->wq_lock);
677 RDSV3_DPRINTF4("rdsv3_queue_delayed_work",
678 "WQ exiting - don't queue (wq: %p, wp: %p)", wq, dwp);
679 return;
680 }
681 wq->wq_pending++;
682 mutex_exit(&wq->wq_lock);
683
684 mutex_enter(&dwp->lock);
685 if (dwp->timeid == 0) {
686 dwp->wq = wq;
687 dwp->timeid = timeout(rdsv3_work_timeout_handler, dwp,
688 jiffies + (delay * rdsv3_one_sec_in_hz));
689 mutex_exit(&dwp->lock);
690 } else {
691 mutex_exit(&dwp->lock);
692 RDSV3_DPRINTF4("rdsv3_queue_delayed_work", "Already queued: %p",
693 dwp);
694 mutex_enter(&wq->wq_lock);
695 wq->wq_pending--;
696 mutex_exit(&wq->wq_lock);
697 }
698
699 RDSV3_DPRINTF4("rdsv3_queue_delayed_work",
700 "Return(wq: %p, wp: %p)", wq, dwp);
701 }
702
703 void
rdsv3_cancel_delayed_work(rdsv3_delayed_work_t * dwp)704 rdsv3_cancel_delayed_work(rdsv3_delayed_work_t *dwp)
705 {
706 RDSV3_DPRINTF4("rdsv3_cancel_delayed_work",
707 "Enter(wq: %p, dwp: %p)", dwp->wq, dwp);
708
709 mutex_enter(&dwp->lock);
710 if (dwp->timeid != 0) {
711 (void) untimeout(dwp->timeid);
712 dwp->timeid = 0;
713 } else {
714 RDSV3_DPRINTF4("rdsv3_cancel_delayed_work",
715 "Nothing to cancel (wq: %p, dwp: %p)", dwp->wq, dwp);
716 mutex_exit(&dwp->lock);
717 return;
718 }
719 mutex_exit(&dwp->lock);
720
721 mutex_enter(&dwp->wq->wq_lock);
722 dwp->wq->wq_pending--;
723 mutex_exit(&dwp->wq->wq_lock);
724
725 RDSV3_DPRINTF4("rdsv3_cancel_delayed_work",
726 "Return(wq: %p, dwp: %p)", dwp->wq, dwp);
727 }
728
729 void
rdsv3_destroy_task_workqueue(rdsv3_workqueue_struct_t * wq)730 rdsv3_destroy_task_workqueue(rdsv3_workqueue_struct_t *wq)
731 {
732 RDSV3_DPRINTF2("rdsv3_destroy_workqueue", "Enter");
733
734 ASSERT(wq);
735
736 mutex_enter(&wq->wq_lock);
737 wq->wq_state = RDSV3_WQ_THREAD_EXITING;
738
739 while (wq->wq_pending > 0) {
740 mutex_exit(&wq->wq_lock);
741 delay(drv_usectohz(1000000));
742 mutex_enter(&wq->wq_lock);
743 };
744 mutex_exit(&wq->wq_lock);
745
746 rdsv3_flush_workqueue(wq);
747
748 list_destroy(&wq->wq_queue);
749 mutex_destroy(&wq->wq_lock);
750 kmem_free(wq, sizeof (rdsv3_workqueue_struct_t));
751
752 ASSERT(rdsv3_taskq);
753 ddi_taskq_destroy(rdsv3_taskq);
754
755 wq = NULL;
756 rdsv3_taskq = NULL;
757
758 RDSV3_DPRINTF2("rdsv3_destroy_workqueue", "Return");
759 }
760
761 /* ARGSUSED */
762 void
rdsv3_rdma_init_worker(struct rdsv3_work_s * work)763 rdsv3_rdma_init_worker(struct rdsv3_work_s *work)
764 {
765 rdsv3_rdma_init();
766 }
767
768 #define RDSV3_NUM_TASKQ_THREADS 1
769 rdsv3_workqueue_struct_t *
rdsv3_create_task_workqueue(char * name)770 rdsv3_create_task_workqueue(char *name)
771 {
772 rdsv3_workqueue_struct_t *wq;
773
774 RDSV3_DPRINTF2("create_singlethread_workqueue", "Enter (dip: %p)",
775 rdsv3_dev_info);
776
777 rdsv3_taskq = ddi_taskq_create(rdsv3_dev_info, name,
778 RDSV3_NUM_TASKQ_THREADS, TASKQ_DEFAULTPRI, 0);
779 if (rdsv3_taskq == NULL) {
780 RDSV3_DPRINTF2(__FILE__,
781 "ddi_taskq_create failed for rdsv3_taskq");
782 return (NULL);
783 }
784
785 wq = kmem_zalloc(sizeof (rdsv3_workqueue_struct_t), KM_NOSLEEP);
786 if (wq == NULL) {
787 RDSV3_DPRINTF2(__FILE__, "kmem_zalloc failed for wq");
788 ddi_taskq_destroy(rdsv3_taskq);
789 return (NULL);
790 }
791
792 list_create(&wq->wq_queue, sizeof (struct rdsv3_work_s),
793 offsetof(struct rdsv3_work_s, work_item));
794 mutex_init(&wq->wq_lock, NULL, MUTEX_DRIVER, NULL);
795 wq->wq_state = RDSV3_WQ_THREAD_IDLE;
796 wq->wq_pending = 0;
797 rdsv3_one_sec_in_hz = drv_usectohz(1000000);
798
799 RDSV3_DPRINTF2("create_singlethread_workqueue", "Return");
800
801 return (wq);
802 }
803
804 /*
805 * Implementation for struct sock
806 */
807
808 void
rdsv3_sock_exit_data(struct rsock * sk)809 rdsv3_sock_exit_data(struct rsock *sk)
810 {
811 struct rdsv3_sock *rs = sk->sk_protinfo;
812
813 RDSV3_DPRINTF4("rdsv3_sock_exit_data", "rs: %p sk: %p", rs, sk);
814
815 ASSERT(rs != NULL);
816 ASSERT(rdsv3_sk_sock_flag(sk, SOCK_DEAD));
817
818 rs->rs_sk = NULL;
819
820 list_destroy(&rs->rs_send_queue);
821 list_destroy(&rs->rs_notify_queue);
822 list_destroy(&rs->rs_recv_queue);
823
824 rw_destroy(&rs->rs_recv_lock);
825 mutex_destroy(&rs->rs_lock);
826
827 mutex_destroy(&rs->rs_rdma_lock);
828 avl_destroy(&rs->rs_rdma_keys);
829
830 mutex_destroy(&rs->rs_conn_lock);
831 mutex_destroy(&rs->rs_congested_lock);
832 cv_destroy(&rs->rs_congested_cv);
833
834 rdsv3_exit_waitqueue(sk->sk_sleep);
835 kmem_free(sk->sk_sleep, sizeof (rdsv3_wait_queue_t));
836 mutex_destroy(&sk->sk_lock);
837
838 kmem_cache_free(rdsv3_alloc_cache, sk);
839 RDSV3_DPRINTF4("rdsv3_sock_exit_data", "rs: %p sk: %p", rs, sk);
840 }
841
842 /* XXX - figure out right values */
843 #define RDSV3_RECV_HIWATER (256 * 1024)
844 #define RDSV3_RECV_LOWATER 128
845 #define RDSV3_XMIT_HIWATER (256 * 1024)
846 #define RDSV3_XMIT_LOWATER 1024
847
848 struct rsock *
rdsv3_sk_alloc()849 rdsv3_sk_alloc()
850 {
851 struct rsock *sk;
852
853 sk = kmem_cache_alloc(rdsv3_alloc_cache, KM_SLEEP);
854 if (sk == NULL) {
855 RDSV3_DPRINTF2("rdsv3_create", "kmem_cache_alloc failed");
856 return (NULL);
857 }
858
859 bzero(sk, sizeof (struct rsock) + sizeof (struct rdsv3_sock));
860 return (sk);
861 }
862
863 void
rdsv3_sock_init_data(struct rsock * sk)864 rdsv3_sock_init_data(struct rsock *sk)
865 {
866 sk->sk_sleep = kmem_zalloc(sizeof (rdsv3_wait_queue_t), KM_SLEEP);
867 rdsv3_init_waitqueue(sk->sk_sleep);
868
869 mutex_init(&sk->sk_lock, NULL, MUTEX_DRIVER, NULL);
870 sk->sk_refcount = 1;
871 sk->sk_protinfo = (struct rdsv3_sock *)(sk + 1);
872 sk->sk_sndbuf = RDSV3_XMIT_HIWATER;
873 sk->sk_rcvbuf = RDSV3_RECV_HIWATER;
874 }
875
876 /*
877 * Connection cache
878 */
879 /* ARGSUSED */
880 int
rdsv3_conn_constructor(void * buf,void * arg,int kmflags)881 rdsv3_conn_constructor(void *buf, void *arg, int kmflags)
882 {
883 struct rdsv3_connection *conn = buf;
884
885 bzero(conn, sizeof (struct rdsv3_connection));
886
887 conn->c_next_tx_seq = 1;
888 mutex_init(&conn->c_lock, NULL, MUTEX_DRIVER, NULL);
889 mutex_init(&conn->c_send_lock, NULL, MUTEX_DRIVER, NULL);
890 conn->c_send_generation = 1;
891 conn->c_senders = 0;
892
893 list_create(&conn->c_send_queue, sizeof (struct rdsv3_message),
894 offsetof(struct rdsv3_message, m_conn_item));
895 list_create(&conn->c_retrans, sizeof (struct rdsv3_message),
896 offsetof(struct rdsv3_message, m_conn_item));
897 return (0);
898 }
899
900 /* ARGSUSED */
901 void
rdsv3_conn_destructor(void * buf,void * arg)902 rdsv3_conn_destructor(void *buf, void *arg)
903 {
904 struct rdsv3_connection *conn = buf;
905
906 ASSERT(list_is_empty(&conn->c_send_queue));
907 ASSERT(list_is_empty(&conn->c_retrans));
908 list_destroy(&conn->c_send_queue);
909 list_destroy(&conn->c_retrans);
910 mutex_destroy(&conn->c_send_lock);
911 mutex_destroy(&conn->c_lock);
912 }
913
914 int
rdsv3_conn_compare(const void * conn1,const void * conn2)915 rdsv3_conn_compare(const void *conn1, const void *conn2)
916 {
917 uint32_be_t laddr1, faddr1, laddr2, faddr2;
918
919 laddr1 = ((rdsv3_conn_info_t *)conn1)->c_laddr;
920 laddr2 = ((struct rdsv3_connection *)conn2)->c_laddr;
921
922 if (laddr1 == laddr2) {
923 faddr1 = ((rdsv3_conn_info_t *)conn1)->c_faddr;
924 faddr2 = ((struct rdsv3_connection *)conn2)->c_faddr;
925 if (faddr1 == faddr2)
926 return (0);
927 if (faddr1 < faddr2)
928 return (-1);
929 return (1);
930 }
931
932 if (laddr1 < laddr2)
933 return (-1);
934
935 return (1);
936 }
937
938 /* rdsv3_ib_incoming cache */
939 /* ARGSUSED */
940 int
rdsv3_ib_inc_constructor(void * buf,void * arg,int kmflags)941 rdsv3_ib_inc_constructor(void *buf, void *arg, int kmflags)
942 {
943 list_create(&((struct rdsv3_ib_incoming *)buf)->ii_frags,
944 sizeof (struct rdsv3_page_frag),
945 offsetof(struct rdsv3_page_frag, f_item));
946
947 return (0);
948 }
949
950 /* ARGSUSED */
951 void
rdsv3_ib_inc_destructor(void * buf,void * arg)952 rdsv3_ib_inc_destructor(void *buf, void *arg)
953 {
954 list_destroy(&((struct rdsv3_ib_incoming *)buf)->ii_frags);
955 }
956
957 /* ib_frag_slab cache */
958 /* ARGSUSED */
959 int
rdsv3_ib_frag_constructor(void * buf,void * arg,int kmflags)960 rdsv3_ib_frag_constructor(void *buf, void *arg, int kmflags)
961 {
962 struct rdsv3_page_frag *frag = (struct rdsv3_page_frag *)buf;
963 struct rdsv3_ib_device *rds_ibdev = (struct rdsv3_ib_device *)arg;
964 ibt_iov_attr_t iov_attr;
965 ibt_iov_t iov_arr[1];
966 ibt_all_wr_t wr;
967
968 bzero(frag, sizeof (struct rdsv3_page_frag));
969 list_link_init(&frag->f_item);
970
971 frag->f_page = kmem_alloc(PAGE_SIZE, kmflags);
972 if (frag->f_page == NULL) {
973 RDSV3_DPRINTF2("rdsv3_ib_frag_constructor",
974 "kmem_alloc for %d failed", PAGE_SIZE);
975 return (-1);
976 }
977 frag->f_offset = 0;
978
979 iov_attr.iov_as = NULL;
980 iov_attr.iov = &iov_arr[0];
981 iov_attr.iov_buf = NULL;
982 iov_attr.iov_list_len = 1;
983 iov_attr.iov_wr_nds = 1;
984 iov_attr.iov_lso_hdr_sz = 0;
985 iov_attr.iov_flags = IBT_IOV_SLEEP | IBT_IOV_RECV;
986
987 iov_arr[0].iov_addr = frag->f_page;
988 iov_arr[0].iov_len = PAGE_SIZE;
989
990 wr.recv.wr_nds = 1;
991 wr.recv.wr_sgl = &frag->f_sge;
992
993 if (ibt_map_mem_iov(ib_get_ibt_hca_hdl(rds_ibdev->dev),
994 &iov_attr, &wr, &frag->f_mapped) != IBT_SUCCESS) {
995 RDSV3_DPRINTF2("rdsv3_ib_frag_constructor",
996 "ibt_map_mem_iov failed");
997 kmem_free(frag->f_page, PAGE_SIZE);
998 return (-1);
999 }
1000
1001 return (0);
1002 }
1003
1004 /* ARGSUSED */
1005 void
rdsv3_ib_frag_destructor(void * buf,void * arg)1006 rdsv3_ib_frag_destructor(void *buf, void *arg)
1007 {
1008 struct rdsv3_page_frag *frag = (struct rdsv3_page_frag *)buf;
1009 struct rdsv3_ib_device *rds_ibdev = (struct rdsv3_ib_device *)arg;
1010
1011 /* unmap the page */
1012 if (ibt_unmap_mem_iov(ib_get_ibt_hca_hdl(rds_ibdev->dev),
1013 frag->f_mapped) != IBT_SUCCESS)
1014 RDSV3_DPRINTF2("rdsv3_ib_frag_destructor",
1015 "ibt_unmap_mem_iov failed");
1016
1017 /* free the page */
1018 kmem_free(frag->f_page, PAGE_SIZE);
1019 }
1020
1021 /* loop.c */
1022 extern kmutex_t loop_conns_lock;
1023 extern list_t loop_conns;
1024
1025 struct rdsv3_loop_connection
1026 {
1027 struct list_node loop_node;
1028 struct rdsv3_connection *conn;
1029 };
1030
1031 void
rdsv3_loop_init(void)1032 rdsv3_loop_init(void)
1033 {
1034 list_create(&loop_conns, sizeof (struct rdsv3_loop_connection),
1035 offsetof(struct rdsv3_loop_connection, loop_node));
1036 mutex_init(&loop_conns_lock, NULL, MUTEX_DRIVER, NULL);
1037 }
1038
1039 /* rdma.c */
1040 /* IB Rkey is used here for comparison */
1041 int
rdsv3_mr_compare(const void * mr1,const void * mr2)1042 rdsv3_mr_compare(const void *mr1, const void *mr2)
1043 {
1044 uint32_t key1 = *(uint32_t *)mr1;
1045 uint32_t key2 = ((struct rdsv3_mr *)mr2)->r_key;
1046
1047 if (key1 < key2)
1048 return (-1);
1049 if (key1 > key2)
1050 return (1);
1051 return (0);
1052 }
1053
1054 /* transport.c */
1055 extern struct rdsv3_transport *transports[];
1056 extern krwlock_t trans_sem;
1057
1058 void
rdsv3_trans_exit(void)1059 rdsv3_trans_exit(void)
1060 {
1061 struct rdsv3_transport *trans;
1062 int i;
1063
1064 RDSV3_DPRINTF2("rdsv3_trans_exit", "Enter");
1065
1066 /* currently, only IB transport */
1067 rw_enter(&trans_sem, RW_READER);
1068 trans = NULL;
1069 for (i = 0; i < RDS_TRANS_COUNT; i++) {
1070 if (transports[i]) {
1071 trans = transports[i];
1072 break;
1073 }
1074 }
1075 rw_exit(&trans_sem);
1076
1077 /* trans->exit() will remove the trans from the list */
1078 if (trans)
1079 trans->exit();
1080
1081 rw_destroy(&trans_sem);
1082
1083 RDSV3_DPRINTF2("rdsv3_trans_exit", "Return");
1084 }
1085
1086 void
rdsv3_trans_init()1087 rdsv3_trans_init()
1088 {
1089 RDSV3_DPRINTF2("rdsv3_trans_init", "Enter");
1090
1091 rw_init(&trans_sem, NULL, RW_DRIVER, NULL);
1092
1093 RDSV3_DPRINTF2("rdsv3_trans_init", "Return");
1094 }
1095
1096 int
rdsv3_put_cmsg(struct nmsghdr * msg,int level,int type,size_t size,void * payload)1097 rdsv3_put_cmsg(struct nmsghdr *msg, int level, int type, size_t size,
1098 void *payload)
1099 {
1100 struct cmsghdr *cp;
1101 char *bp;
1102 size_t cmlen;
1103 size_t cmspace;
1104 size_t bufsz;
1105
1106 RDSV3_DPRINTF4("rdsv3_put_cmsg",
1107 "Enter(msg: %p level: %d type: %d sz: %d)",
1108 msg, level, type, size);
1109
1110 if (msg == NULL || msg->msg_controllen == 0) {
1111 return (0);
1112 }
1113 /* check for first cmsg or this is another cmsg to be appended */
1114 if (msg->msg_control == NULL)
1115 msg->msg_controllen = 0;
1116
1117 cmlen = CMSG_LEN(size);
1118 cmspace = CMSG_SPACE(size);
1119 bufsz = msg->msg_controllen + cmspace;
1120
1121 /* extend the existing cmsg to append the next cmsg */
1122 bp = kmem_alloc(bufsz, KM_SLEEP);
1123 if (msg->msg_control) {
1124 bcopy(msg->msg_control, bp, msg->msg_controllen);
1125 kmem_free(msg->msg_control, (size_t)msg->msg_controllen);
1126 }
1127
1128 /* assign payload the proper cmsg location */
1129 cp = (struct cmsghdr *)(bp + msg->msg_controllen);
1130 cp->cmsg_len = cmlen;
1131 cp->cmsg_level = level;
1132 cp->cmsg_type = type;
1133
1134 bcopy(payload, CMSG_DATA(cp), cmlen -
1135 (unsigned int)_CMSG_DATA_ALIGN(sizeof (struct cmsghdr)));
1136
1137 msg->msg_control = bp;
1138 msg->msg_controllen = bufsz;
1139
1140 RDSV3_DPRINTF4("rdsv3_put_cmsg", "Return(cmsg_len: %d)", cp->cmsg_len);
1141
1142 return (0);
1143 }
1144
1145 /* ARGSUSED */
1146 int
rdsv3_verify_bind_address(ipaddr_t addr)1147 rdsv3_verify_bind_address(ipaddr_t addr)
1148 {
1149 return (1);
1150 }
1151
1152 /* checksum */
1153 uint16_t
rdsv3_ip_fast_csum(void * hdr,size_t length)1154 rdsv3_ip_fast_csum(void *hdr, size_t length)
1155 {
1156 return (0xffff &
1157 (uint16_t)(~ip_ocsum((ushort_t *)hdr, (int)length <<1, 0)));
1158 }
1159
1160 /* scatterlist implementation */
1161 /* ARGSUSED */
1162 caddr_t
rdsv3_ib_sg_dma_address(ib_device_t * dev,struct rdsv3_scatterlist * scat,uint_t offset)1163 rdsv3_ib_sg_dma_address(ib_device_t *dev, struct rdsv3_scatterlist *scat,
1164 uint_t offset)
1165 {
1166 return (0);
1167 }
1168
1169 uint_t
rdsv3_ib_dma_map_sg(struct ib_device * dev,struct rdsv3_scatterlist * scat,uint_t num)1170 rdsv3_ib_dma_map_sg(struct ib_device *dev, struct rdsv3_scatterlist *scat,
1171 uint_t num)
1172 {
1173 struct rdsv3_scatterlist *s, *first;
1174 ibt_iov_t *iov;
1175 ibt_wr_ds_t *sgl;
1176 ibt_iov_attr_t iov_attr;
1177 ibt_send_wr_t swr;
1178 uint_t i;
1179
1180 RDSV3_DPRINTF4("rdsv3_ib_dma_map_sg", "scat %p, num: %d", scat, num);
1181
1182 s = first = &scat[0];
1183 ASSERT(first->mihdl == NULL);
1184
1185 iov = kmem_alloc(num * sizeof (ibt_iov_t), KM_SLEEP);
1186 sgl = kmem_zalloc((num * 2) * sizeof (ibt_wr_ds_t), KM_SLEEP);
1187
1188 for (i = 0; i < num; i++, s++) {
1189 iov[i].iov_addr = s->vaddr;
1190 iov[i].iov_len = s->length;
1191 }
1192
1193 iov_attr.iov_as = NULL;
1194 iov_attr.iov = iov;
1195 iov_attr.iov_buf = NULL;
1196 iov_attr.iov_list_len = num;
1197 iov_attr.iov_wr_nds = num * 2;
1198 iov_attr.iov_lso_hdr_sz = 0;
1199 iov_attr.iov_flags = IBT_IOV_SLEEP;
1200
1201 swr.wr_sgl = sgl;
1202
1203 i = ibt_map_mem_iov(ib_get_ibt_hca_hdl(dev),
1204 &iov_attr, (ibt_all_wr_t *)&swr, &first->mihdl);
1205 kmem_free(iov, num * sizeof (ibt_iov_t));
1206 if (i != IBT_SUCCESS) {
1207 RDSV3_DPRINTF2("rdsv3_ib_dma_map_sg",
1208 "ibt_map_mem_iov returned: %d", i);
1209 return (0);
1210 }
1211
1212 s = first;
1213 for (i = 0; i < num; i++, s++, sgl++) {
1214 s->sgl = sgl;
1215 }
1216
1217 return (num);
1218 }
1219
1220 void
rdsv3_ib_dma_unmap_sg(ib_device_t * dev,struct rdsv3_scatterlist * scat,uint_t num)1221 rdsv3_ib_dma_unmap_sg(ib_device_t *dev, struct rdsv3_scatterlist *scat,
1222 uint_t num)
1223 {
1224 /* Zero length messages have no scatter gather entries */
1225 if (num != 0) {
1226 ASSERT(scat->mihdl != NULL);
1227 ASSERT(scat->sgl != NULL);
1228
1229 (void) ibt_unmap_mem_iov(ib_get_ibt_hca_hdl(dev), scat->mihdl);
1230
1231 kmem_free(scat->sgl, (num * 2) * sizeof (ibt_wr_ds_t));
1232 scat->sgl = NULL;
1233 scat->mihdl = NULL;
1234 }
1235 }
1236
1237 int
rdsv3_ib_alloc_hdrs(ib_device_t * dev,struct rdsv3_ib_connection * ic)1238 rdsv3_ib_alloc_hdrs(ib_device_t *dev, struct rdsv3_ib_connection *ic)
1239 {
1240 caddr_t addr;
1241 size_t size;
1242 ibt_mr_attr_t mr_attr;
1243 ibt_mr_desc_t mr_desc;
1244 ibt_mr_hdl_t mr_hdl;
1245 int ret;
1246
1247 RDSV3_DPRINTF4("rdsv3_ib_alloc_hdrs", "Enter(dev: %p)", dev);
1248
1249 ASSERT(ic->i_mr == NULL);
1250
1251 size = (ic->i_send_ring.w_nr + ic->i_recv_ring.w_nr + 1) *
1252 sizeof (struct rdsv3_header);
1253
1254 addr = kmem_zalloc(size, KM_NOSLEEP);
1255 if (addr == NULL)
1256 return (-1);
1257
1258 mr_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)addr;
1259 mr_attr.mr_len = size;
1260 mr_attr.mr_as = NULL;
1261 mr_attr.mr_flags = IBT_MR_ENABLE_LOCAL_WRITE;
1262 ret = ibt_register_mr(ib_get_ibt_hca_hdl(dev), RDSV3_PD2PDHDL(ic->i_pd),
1263 &mr_attr, &mr_hdl, &mr_desc);
1264 if (ret != IBT_SUCCESS) {
1265 RDSV3_DPRINTF2("rdsv3_ib_alloc_hdrs",
1266 "ibt_register_mr returned: " "%d", ret);
1267 return (-1);
1268 }
1269
1270 ic->i_mr =
1271 (struct rdsv3_hdrs_mr *)kmem_alloc(sizeof (struct rdsv3_hdrs_mr),
1272 KM_SLEEP);
1273 ic->i_mr->addr = addr;
1274 ic->i_mr->size = size;
1275 ic->i_mr->hdl = mr_hdl;
1276 ic->i_mr->lkey = mr_desc.md_lkey;
1277
1278 ic->i_send_hdrs = (struct rdsv3_header *)addr;
1279 ic->i_send_hdrs_dma = (uint64_t)(uintptr_t)addr;
1280
1281 ic->i_recv_hdrs = (struct rdsv3_header *)(addr +
1282 (ic->i_send_ring.w_nr * sizeof (struct rdsv3_header)));
1283 ic->i_recv_hdrs_dma = (uint64_t)(uintptr_t)(addr +
1284 (ic->i_send_ring.w_nr * sizeof (struct rdsv3_header)));
1285
1286 ic->i_ack = (struct rdsv3_header *)(addr +
1287 ((ic->i_send_ring.w_nr + ic->i_recv_ring.w_nr) *
1288 sizeof (struct rdsv3_header)));
1289 ic->i_ack_dma = (uint64_t)(uintptr_t)(addr +
1290 ((ic->i_send_ring.w_nr + ic->i_recv_ring.w_nr) *
1291 sizeof (struct rdsv3_header)));
1292
1293 RDSV3_DPRINTF4("rdsv3_ib_alloc_hdrs", "Return(dev: %p)", dev);
1294
1295 return (0);
1296 }
1297
1298 void
rdsv3_ib_free_hdrs(ib_device_t * dev,struct rdsv3_ib_connection * ic)1299 rdsv3_ib_free_hdrs(ib_device_t *dev, struct rdsv3_ib_connection *ic)
1300 {
1301 RDSV3_DPRINTF4("rdsv3_ib_free_hdrs", "Enter(dev: %p)", dev);
1302 ASSERT(ic->i_mr != NULL);
1303
1304 ic->i_send_hdrs = NULL;
1305 ic->i_send_hdrs_dma = NULL;
1306
1307 ic->i_recv_hdrs = NULL;
1308 ic->i_recv_hdrs_dma = NULL;
1309
1310 ic->i_ack = NULL;
1311 ic->i_ack_dma = NULL;
1312
1313 (void) ibt_deregister_mr(ib_get_ibt_hca_hdl(dev), ic->i_mr->hdl);
1314
1315 kmem_free(ic->i_mr->addr, ic->i_mr->size);
1316 kmem_free(ic->i_mr, sizeof (struct rdsv3_hdrs_mr));
1317
1318 ic->i_mr = NULL;
1319 RDSV3_DPRINTF4("rdsv3_ib_free_hdrs", "Return(dev: %p)", dev);
1320 }
1321
1322 /*
1323 * atomic_add_unless - add unless the number is a given value
1324 * @v: pointer of type atomic_t
1325 * @a: the amount to add to v...
1326 * @u: ...unless v is equal to u.
1327 *
1328 * Atomically adds @a to @v, so long as it was not @u.
1329 * Returns non-zero if @v was not @u, and zero otherwise.
1330 */
1331 int
atomic_add_unless(atomic_t * v,uint_t a,ulong_t u)1332 atomic_add_unless(atomic_t *v, uint_t a, ulong_t u)
1333 {
1334 uint_t c, old;
1335
1336 c = *v;
1337 while (c != u && (old = atomic_cas_uint(v, c, c + a)) != c) {
1338 c = old;
1339 }
1340 return ((ulong_t)c != u);
1341 }
1342