1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24 #include <sys/types.h>
25 #include <sys/stream.h>
26 #include <sys/dlpi.h>
27 #include <sys/stropts.h>
28 #include <sys/strsun.h>
29 #include <sys/sysmacros.h>
30 #include <sys/strlog.h>
31 #include <sys/ddi.h>
32 #include <sys/cmn_err.h>
33 #include <sys/socket.h>
34 #include <net/if.h>
35 #include <net/if_types.h>
36 #include <netinet/in.h>
37 #include <sys/ethernet.h>
38 #include <inet/arp.h>
39 #include <inet/ip.h>
40 #include <inet/ip6.h>
41 #include <inet/ip_ire.h>
42 #include <inet/ip_if.h>
43 #include <inet/ip_ftable.h>
44
45 #include <sys/sunddi.h>
46 #include <sys/ksynch.h>
47
48 #include <sys/rds.h>
49 #include <sys/socket.h>
50 #include <sys/socketvar.h>
51 #include <sys/sockio.h>
52 #include <sys/sysmacros.h>
53 #include <inet/common.h>
54 #include <inet/ip.h>
55 #include <net/if_types.h>
56
57 #include <sys/ib/clients/rdsv3/rdsv3.h>
58 #include <sys/ib/clients/rdsv3/rdma.h>
59 #include <sys/ib/clients/rdsv3/ib.h>
60 #include <sys/ib/clients/rdsv3/rdsv3_impl.h>
61 #include <sys/ib/clients/rdsv3/rdsv3_debug.h>
62
63 #include <sys/dls.h>
64 #include <sys/mac.h>
65 #include <sys/mac_client.h>
66 #include <sys/mac_provider.h>
67 #include <sys/mac_client_priv.h>
68
69 uint_t rdsv3_one_sec_in_hz;
70 ddi_taskq_t *rdsv3_taskq = NULL;
71 extern kmem_cache_t *rdsv3_alloc_cache;
72
73 extern unsigned int ip_ocsum(ushort_t *address, int halfword_count,
74 unsigned int sum);
75
76 /*
77 * Check if the IP interface named by `lifrp' is RDS-capable.
78 */
79 boolean_t
rdsv3_capable_interface(struct lifreq * lifrp)80 rdsv3_capable_interface(struct lifreq *lifrp)
81 {
82 char ifname[LIFNAMSIZ];
83 char drv[MAXLINKNAMELEN];
84 uint_t ppa;
85 char *cp;
86
87 RDSV3_DPRINTF4("rdsv3_capable_interface", "Enter");
88
89 if (lifrp->lifr_type == IFT_IB)
90 return (B_TRUE);
91
92 /*
93 * Strip off the logical interface portion before getting
94 * intimate with the name.
95 */
96 (void) strlcpy(ifname, lifrp->lifr_name, LIFNAMSIZ);
97 if ((cp = strchr(ifname, ':')) != NULL)
98 *cp = '\0';
99
100 if (strcmp("lo0", ifname) == 0) {
101 /*
102 * loopback is considered RDS-capable
103 */
104 return (B_TRUE);
105 }
106
107 return (
108 ddi_parse_dlen(ifname, drv, MAXLINKNAMELEN, &ppa) == DDI_SUCCESS &&
109 rdsv3_if_lookup_by_name(drv));
110 }
111
112 int
rdsv3_do_ip_ioctl(ksocket_t so4,void ** ipaddrs,int * size,int * nifs)113 rdsv3_do_ip_ioctl(ksocket_t so4, void **ipaddrs, int *size, int *nifs)
114 {
115 struct lifnum lifn;
116 struct lifconf lifc;
117 struct lifreq *lp, *rlp, lifr;
118 int rval = 0;
119 int numifs;
120 int bufsize, rbufsize;
121 void *buf, *rbuf;
122 int i, j, n, rc;
123
124 *ipaddrs = NULL;
125 *size = 0;
126 *nifs = 0;
127
128 RDSV3_DPRINTF4("rdsv3_do_ip_ioctl", "Enter");
129
130 retry_count:
131 /* snapshot the current number of interfaces */
132 lifn.lifn_family = PF_UNSPEC;
133 lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
134 lifn.lifn_count = 0;
135 rval = ksocket_ioctl(so4, SIOCGLIFNUM, (intptr_t)&lifn, &rval,
136 CRED());
137 if (rval != 0) {
138 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl",
139 "ksocket_ioctl returned: %d", rval);
140 return (rval);
141 }
142
143 numifs = lifn.lifn_count;
144 if (numifs <= 0) {
145 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl", "No interfaces found");
146 return (0);
147 }
148
149 /* allocate extra room in case more interfaces appear */
150 numifs += 10;
151
152 /* get the interface names and ip addresses */
153 bufsize = numifs * sizeof (struct lifreq);
154 buf = kmem_alloc(bufsize, KM_SLEEP);
155
156 lifc.lifc_family = AF_UNSPEC;
157 lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
158 lifc.lifc_len = bufsize;
159 lifc.lifc_buf = buf;
160 rc = ksocket_ioctl(so4, SIOCGLIFCONF, (intptr_t)&lifc, &rval, CRED());
161 if (rc != 0) {
162 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl", "SIOCGLIFCONF failed");
163 kmem_free(buf, bufsize);
164 return (rc);
165 }
166 /* if our extra room is used up, try again */
167 if (bufsize <= lifc.lifc_len) {
168 kmem_free(buf, bufsize);
169 buf = NULL;
170 goto retry_count;
171 }
172 /* calc actual number of ifconfs */
173 n = lifc.lifc_len / sizeof (struct lifreq);
174
175 /*
176 * Count the RDS interfaces
177 */
178 for (i = 0, j = 0, lp = lifc.lifc_req; i < n; i++, lp++) {
179
180 /*
181 * Copy as the SIOCGLIFFLAGS ioctl is destructive
182 */
183 bcopy(lp, &lifr, sizeof (struct lifreq));
184 /*
185 * fetch the flags using the socket of the correct family
186 */
187 switch (lifr.lifr_addr.ss_family) {
188 case AF_INET:
189 rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)&lifr,
190 &rval, CRED());
191 break;
192 default:
193 continue;
194 }
195
196 if (rc != 0) continue;
197
198 /*
199 * If we got the flags, skip uninteresting
200 * interfaces based on flags
201 */
202 if ((lifr.lifr_flags & IFF_UP) != IFF_UP)
203 continue;
204 if (lifr.lifr_flags &
205 (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
206 continue;
207 if (!rdsv3_capable_interface(&lifr))
208 continue;
209 j++;
210 }
211
212 if (j <= 0) {
213 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl", "No RDS interfaces");
214 kmem_free(buf, bufsize);
215 return (rval);
216 }
217
218 numifs = j;
219
220 /* This is the buffer we pass back */
221 rbufsize = numifs * sizeof (struct lifreq);
222 rbuf = kmem_alloc(rbufsize, KM_SLEEP);
223 rlp = (struct lifreq *)rbuf;
224
225 /*
226 * Examine the array of interfaces and filter uninteresting ones
227 */
228 for (i = 0, lp = lifc.lifc_req; i < n; i++, lp++) {
229
230 /*
231 * Copy the address as the SIOCGLIFFLAGS ioctl is destructive
232 */
233 bcopy(lp, &lifr, sizeof (struct lifreq));
234 /*
235 * fetch the flags using the socket of the correct family
236 */
237 switch (lifr.lifr_addr.ss_family) {
238 case AF_INET:
239 rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)&lifr,
240 &rval, CRED());
241 break;
242 default:
243 continue;
244 }
245
246
247 if (rc != 0) {
248 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl",
249 "ksocket_ioctl failed" " for %s", lifr.lifr_name);
250 continue;
251 }
252
253 /*
254 * If we got the flags, skip uninteresting
255 * interfaces based on flags
256 */
257 if ((lifr.lifr_flags & IFF_UP) != IFF_UP)
258 continue;
259 if (lifr.lifr_flags &
260 (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
261 continue;
262 if (!rdsv3_capable_interface(&lifr))
263 continue;
264
265 /* save the record */
266 bcopy(lp, rlp, sizeof (struct lifreq));
267 rlp->lifr_addr.ss_family = AF_INET_OFFLOAD;
268 rlp++;
269 }
270
271 kmem_free(buf, bufsize);
272
273 *ipaddrs = rbuf;
274 *size = rbufsize;
275 *nifs = numifs;
276
277 RDSV3_DPRINTF4("rdsv3_do_ip_ioctl", "Return");
278
279 return (rval);
280 }
281
282 /*
283 * Check if the IP interface named by `ifrp' is RDS-capable.
284 */
285 boolean_t
rdsv3_capable_interface_old(struct ifreq * ifrp)286 rdsv3_capable_interface_old(struct ifreq *ifrp)
287 {
288 char ifname[IFNAMSIZ];
289 char drv[MAXLINKNAMELEN];
290 uint_t ppa;
291 char *cp;
292
293 RDSV3_DPRINTF4("rdsv3_capable_interface_old", "Enter");
294
295 /*
296 * Strip off the logical interface portion before getting
297 * intimate with the name.
298 */
299 (void) strlcpy(ifname, ifrp->ifr_name, IFNAMSIZ);
300 if ((cp = strchr(ifname, ':')) != NULL)
301 *cp = '\0';
302
303 RDSV3_DPRINTF4("rdsv3_capable_interface_old", "ifname: %s", ifname);
304
305 if ((strcmp("lo0", ifname) == 0) ||
306 (strncmp("ibd", ifname, 3) == 0)) {
307 /*
308 * loopback and IB are considered RDS-capable
309 */
310 return (B_TRUE);
311 }
312
313 return (
314 ddi_parse_dlen(ifname, drv, MAXLINKNAMELEN, &ppa) == DDI_SUCCESS &&
315 rdsv3_if_lookup_by_name(drv));
316 }
317
318 int
rdsv3_do_ip_ioctl_old(ksocket_t so4,void ** ipaddrs,int * size,int * nifs)319 rdsv3_do_ip_ioctl_old(ksocket_t so4, void **ipaddrs, int *size, int *nifs)
320 {
321 uint_t ifn;
322 struct ifconf ifc;
323 struct ifreq *lp, *rlp, ifr;
324 int rval = 0;
325 int numifs;
326 int bufsize, rbufsize;
327 void *buf, *rbuf;
328 int i, j, n, rc;
329
330 *ipaddrs = NULL;
331 *size = 0;
332 *nifs = 0;
333
334 RDSV3_DPRINTF4("rdsv3_do_ip_ioctl_old", "Enter");
335
336 retry_count:
337 rval = ksocket_ioctl(so4, SIOCGIFNUM, (intptr_t)&ifn, &rval,
338 CRED());
339 if (rval != 0) {
340 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old",
341 "ksocket_ioctl(SIOCGIFNUM) returned: %d", rval);
342 return (rval);
343 }
344
345 numifs = ifn;
346 if (numifs <= 0) {
347 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old", "No interfaces found");
348 return (0);
349 }
350
351 /* allocate extra room in case more interfaces appear */
352 numifs += 10;
353
354 /* get the interface names and ip addresses */
355 bufsize = numifs * sizeof (struct ifreq);
356 buf = kmem_alloc(bufsize, KM_SLEEP);
357
358 ifc.ifc_len = bufsize;
359 ifc.ifc_buf = buf;
360 rc = ksocket_ioctl(so4, SIOCGIFCONF, (intptr_t)&ifc, &rval, CRED());
361 if (rc != 0) {
362 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old",
363 "SIOCGLIFCONF failed: %d", rc);
364 kmem_free(buf, bufsize);
365 return (rc);
366 }
367 /* if our extra room is used up, try again */
368 if (bufsize <= ifc.ifc_len) {
369 kmem_free(buf, bufsize);
370 buf = NULL;
371 goto retry_count;
372 }
373 /* calc actual number of ifconfs */
374 n = ifc.ifc_len / sizeof (struct ifreq);
375
376 /*
377 * Count the RDS interfaces
378 */
379 for (i = 0, j = 0, lp = ifc.ifc_req; i < n; i++, lp++) {
380
381 /*
382 * Copy as the SIOCGIFFLAGS ioctl is destructive
383 */
384 bcopy(lp, &ifr, sizeof (struct ifreq));
385 /*
386 * fetch the flags using the socket of the correct family
387 */
388 switch (ifr.ifr_addr.sa_family) {
389 case AF_INET:
390 rc = ksocket_ioctl(so4, SIOCGIFFLAGS, (intptr_t)&ifr,
391 &rval, CRED());
392 break;
393 default:
394 continue;
395 }
396
397 if (rc != 0) continue;
398
399 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old",
400 "1. ifr_name: %s, flags: %d", ifr.ifr_name,
401 (ushort_t)ifr.ifr_flags);
402
403 /*
404 * If we got the flags, skip uninteresting
405 * interfaces based on flags
406 */
407 if ((((ushort_t)ifr.ifr_flags) & IFF_UP) != IFF_UP)
408 continue;
409 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old",
410 "2. ifr_name: %s, flags: %d", ifr.ifr_name,
411 (ushort_t)ifr.ifr_flags);
412 if (((ushort_t)ifr.ifr_flags) &
413 (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
414 continue;
415 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old",
416 "3. ifr_name: %s, flags: %d", ifr.ifr_name,
417 (ushort_t)ifr.ifr_flags);
418 if (!rdsv3_capable_interface_old(&ifr))
419 continue;
420 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old",
421 "4. ifr_name: %s, flags: %d", ifr.ifr_name,
422 (ushort_t)ifr.ifr_flags);
423 j++;
424 }
425
426 if (j <= 0) {
427 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old", "No RDS interfaces");
428 kmem_free(buf, bufsize);
429 return (rval);
430 }
431
432 numifs = j;
433
434 /* This is the buffer we pass back */
435 rbufsize = numifs * sizeof (struct ifreq);
436 rbuf = kmem_alloc(rbufsize, KM_SLEEP);
437 rlp = (struct ifreq *)rbuf;
438
439 /*
440 * Examine the array of interfaces and filter uninteresting ones
441 */
442 for (i = 0, lp = ifc.ifc_req; i < n; i++, lp++) {
443
444 /*
445 * Copy the address as the SIOCGIFFLAGS ioctl is destructive
446 */
447 bcopy(lp, &ifr, sizeof (struct ifreq));
448 /*
449 * fetch the flags using the socket of the correct family
450 */
451 switch (ifr.ifr_addr.sa_family) {
452 case AF_INET:
453 rc = ksocket_ioctl(so4, SIOCGIFFLAGS, (intptr_t)&ifr,
454 &rval, CRED());
455 break;
456 default:
457 continue;
458 }
459
460
461 if (rc != 0) {
462 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old",
463 "ksocket_ioctl failed: %d for %s",
464 rc, ifr.ifr_name);
465 continue;
466 }
467
468 /*
469 * If we got the flags, skip uninteresting
470 * interfaces based on flags
471 */
472 if ((((ushort_t)ifr.ifr_flags) & IFF_UP) != IFF_UP)
473 continue;
474 if (((ushort_t)ifr.ifr_flags) &
475 (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
476 continue;
477 if (!rdsv3_capable_interface_old(&ifr))
478 continue;
479
480 /* save the record */
481 bcopy(lp, rlp, sizeof (struct ifreq));
482 rlp->ifr_addr.sa_family = AF_INET_OFFLOAD;
483 rlp++;
484 }
485
486 kmem_free(buf, bufsize);
487
488 *ipaddrs = rbuf;
489 *size = rbufsize;
490 *nifs = numifs;
491
492 RDSV3_DPRINTF4("rdsv3_do_ip_ioctl_old", "Return");
493
494 return (rval);
495 }
496
497 boolean_t
rdsv3_isloopback(ipaddr_t addr)498 rdsv3_isloopback(ipaddr_t addr)
499 {
500 ip_stack_t *ipst;
501
502 ipst = netstack_find_by_zoneid(GLOBAL_ZONEID)->netstack_ip;
503 ASSERT(ipst != NULL);
504 if (ip_type_v4(addr, ipst) != IRE_LOOPBACK) {
505 netstack_rele(ipst->ips_netstack);
506 return (B_FALSE);
507 }
508 netstack_rele(ipst->ips_netstack);
509 return (B_TRUE);
510 }
511
512 /*
513 * Work Queue Implementation
514 */
515
516 #define RDSV3_WQ_THREAD_IDLE 0
517 #define RDSV3_WQ_THREAD_RUNNING 1
518 #define RDSV3_WQ_THREAD_FLUSHING 2
519 #define RDSV3_WQ_THREAD_EXITING 3
520
521 /* worker thread */
522 void
rdsv3_worker_thread(void * arg)523 rdsv3_worker_thread(void *arg)
524 {
525 rdsv3_workqueue_struct_t *wq = arg;
526 rdsv3_work_t *work;
527
528 RDSV3_DPRINTF4("rdsv3_worker_thread", "Enter(wq: 0x%p)", wq);
529
530 mutex_enter(&wq->wq_lock);
531 work = list_remove_head(&wq->wq_queue);
532 while (work) {
533 mutex_exit(&wq->wq_lock);
534
535 /* process work */
536 work->func(work);
537
538 mutex_enter(&wq->wq_lock);
539 work = list_remove_head(&wq->wq_queue);
540 }
541
542 /* No more work, go home, until called again */
543 if (wq->wq_state != RDSV3_WQ_THREAD_EXITING) {
544 wq->wq_state = RDSV3_WQ_THREAD_IDLE;
545 }
546 mutex_exit(&wq->wq_lock);
547
548 RDSV3_DPRINTF4("rdsv3_worker_thread", "Return(wq: 0x%p)", wq);
549 }
550
551 /* XXX */
552 void
rdsv3_flush_workqueue(rdsv3_workqueue_struct_t * wq)553 rdsv3_flush_workqueue(rdsv3_workqueue_struct_t *wq)
554 {
555 RDSV3_DPRINTF4("rdsv3_flush_workqueue", "Enter(wq: %p)", wq);
556
557 mutex_enter(&wq->wq_lock);
558 switch (wq->wq_state) {
559 case RDSV3_WQ_THREAD_IDLE:
560 /* nothing to do */
561 ASSERT(list_is_empty(&wq->wq_queue));
562 break;
563
564 case RDSV3_WQ_THREAD_RUNNING:
565 wq->wq_state = RDSV3_WQ_THREAD_FLUSHING;
566 /* FALLTHRU */
567 case RDSV3_WQ_THREAD_FLUSHING:
568 /* already flushing, wait until the flushing is complete */
569 do {
570 mutex_exit(&wq->wq_lock);
571 delay(drv_usectohz(1000000));
572 mutex_enter(&wq->wq_lock);
573 } while (wq->wq_state == RDSV3_WQ_THREAD_FLUSHING);
574 break;
575 case RDSV3_WQ_THREAD_EXITING:
576 mutex_exit(&wq->wq_lock);
577 rdsv3_worker_thread(wq);
578 return;
579 }
580 mutex_exit(&wq->wq_lock);
581
582 RDSV3_DPRINTF4("rdsv3_flush_workqueue", "Return(wq: %p)", wq);
583 }
584
585 void
rdsv3_queue_work(rdsv3_workqueue_struct_t * wq,rdsv3_work_t * wp)586 rdsv3_queue_work(rdsv3_workqueue_struct_t *wq, rdsv3_work_t *wp)
587 {
588 RDSV3_DPRINTF4("rdsv3_queue_work", "Enter(wq: %p, wp: %p)", wq, wp);
589
590 mutex_enter(&wq->wq_lock);
591
592 if (list_link_active(&wp->work_item)) {
593 /* This is already in the queue, ignore this call */
594 mutex_exit(&wq->wq_lock);
595 RDSV3_DPRINTF3("rdsv3_queue_work", "already queued: %p", wp);
596 return;
597 }
598
599 switch (wq->wq_state) {
600 case RDSV3_WQ_THREAD_RUNNING:
601 list_insert_tail(&wq->wq_queue, wp);
602 mutex_exit(&wq->wq_lock);
603 break;
604
605 case RDSV3_WQ_THREAD_FLUSHING:
606 do {
607 mutex_exit(&wq->wq_lock);
608 delay(drv_usectohz(1000000));
609 mutex_enter(&wq->wq_lock);
610 } while (wq->wq_state == RDSV3_WQ_THREAD_FLUSHING);
611
612 if (wq->wq_state == RDSV3_WQ_THREAD_RUNNING) {
613 list_insert_tail(&wq->wq_queue, wp);
614 mutex_exit(&wq->wq_lock);
615 break;
616 }
617 /* FALLTHRU */
618
619 case RDSV3_WQ_THREAD_IDLE:
620 list_insert_tail(&wq->wq_queue, wp);
621 wq->wq_state = RDSV3_WQ_THREAD_RUNNING;
622 mutex_exit(&wq->wq_lock);
623
624 (void) ddi_taskq_dispatch(rdsv3_taskq, rdsv3_worker_thread, wq,
625 DDI_SLEEP);
626 break;
627
628 case RDSV3_WQ_THREAD_EXITING:
629 mutex_exit(&wq->wq_lock);
630 break;
631 }
632
633 RDSV3_DPRINTF4("rdsv3_queue_work", "Return(wq: %p, wp: %p)", wq, wp);
634 }
635
636 /* timeout handler for delayed work queuing */
637 void
rdsv3_work_timeout_handler(void * arg)638 rdsv3_work_timeout_handler(void *arg)
639 {
640 rdsv3_delayed_work_t *dwp = (rdsv3_delayed_work_t *)arg;
641
642 RDSV3_DPRINTF4("rdsv3_work_timeout_handler",
643 "Enter(wq: %p, wp: %p)", dwp->wq, &dwp->work);
644
645 mutex_enter(&dwp->lock);
646 dwp->timeid = 0;
647 mutex_exit(&dwp->lock);
648
649 mutex_enter(&dwp->wq->wq_lock);
650 dwp->wq->wq_pending--;
651 if (dwp->wq->wq_state == RDSV3_WQ_THREAD_EXITING) {
652 mutex_exit(&dwp->wq->wq_lock);
653 return;
654 }
655 mutex_exit(&dwp->wq->wq_lock);
656
657 rdsv3_queue_work(dwp->wq, &dwp->work);
658
659 RDSV3_DPRINTF4("rdsv3_work_timeout_handler",
660 "Return(wq: %p, wp: %p)", dwp->wq, &dwp->work);
661 }
662
663 void
rdsv3_queue_delayed_work(rdsv3_workqueue_struct_t * wq,rdsv3_delayed_work_t * dwp,uint_t delay)664 rdsv3_queue_delayed_work(rdsv3_workqueue_struct_t *wq,
665 rdsv3_delayed_work_t *dwp, uint_t delay)
666 {
667 RDSV3_DPRINTF4("rdsv3_queue_delayed_work",
668 "Enter(wq: %p, wp: %p)", wq, dwp);
669
670 if (delay == 0) {
671 rdsv3_queue_work(wq, &dwp->work);
672 return;
673 }
674
675 mutex_enter(&wq->wq_lock);
676 if (wq->wq_state == RDSV3_WQ_THREAD_EXITING) {
677 mutex_exit(&wq->wq_lock);
678 RDSV3_DPRINTF4("rdsv3_queue_delayed_work",
679 "WQ exiting - don't queue (wq: %p, wp: %p)", wq, dwp);
680 return;
681 }
682 wq->wq_pending++;
683 mutex_exit(&wq->wq_lock);
684
685 mutex_enter(&dwp->lock);
686 if (dwp->timeid == 0) {
687 dwp->wq = wq;
688 dwp->timeid = timeout(rdsv3_work_timeout_handler, dwp,
689 jiffies + (delay * rdsv3_one_sec_in_hz));
690 mutex_exit(&dwp->lock);
691 } else {
692 mutex_exit(&dwp->lock);
693 RDSV3_DPRINTF4("rdsv3_queue_delayed_work", "Already queued: %p",
694 dwp);
695 mutex_enter(&wq->wq_lock);
696 wq->wq_pending--;
697 mutex_exit(&wq->wq_lock);
698 }
699
700 RDSV3_DPRINTF4("rdsv3_queue_delayed_work",
701 "Return(wq: %p, wp: %p)", wq, dwp);
702 }
703
704 void
rdsv3_cancel_delayed_work(rdsv3_delayed_work_t * dwp)705 rdsv3_cancel_delayed_work(rdsv3_delayed_work_t *dwp)
706 {
707 RDSV3_DPRINTF4("rdsv3_cancel_delayed_work",
708 "Enter(wq: %p, dwp: %p)", dwp->wq, dwp);
709
710 mutex_enter(&dwp->lock);
711 if (dwp->timeid != 0) {
712 (void) untimeout(dwp->timeid);
713 dwp->timeid = 0;
714 } else {
715 RDSV3_DPRINTF4("rdsv3_cancel_delayed_work",
716 "Nothing to cancel (wq: %p, dwp: %p)", dwp->wq, dwp);
717 mutex_exit(&dwp->lock);
718 return;
719 }
720 mutex_exit(&dwp->lock);
721
722 mutex_enter(&dwp->wq->wq_lock);
723 dwp->wq->wq_pending--;
724 mutex_exit(&dwp->wq->wq_lock);
725
726 RDSV3_DPRINTF4("rdsv3_cancel_delayed_work",
727 "Return(wq: %p, dwp: %p)", dwp->wq, dwp);
728 }
729
730 void
rdsv3_destroy_task_workqueue(rdsv3_workqueue_struct_t * wq)731 rdsv3_destroy_task_workqueue(rdsv3_workqueue_struct_t *wq)
732 {
733 RDSV3_DPRINTF2("rdsv3_destroy_workqueue", "Enter");
734
735 ASSERT(wq);
736
737 mutex_enter(&wq->wq_lock);
738 wq->wq_state = RDSV3_WQ_THREAD_EXITING;
739
740 while (wq->wq_pending > 0) {
741 mutex_exit(&wq->wq_lock);
742 delay(drv_usectohz(1000000));
743 mutex_enter(&wq->wq_lock);
744 };
745 mutex_exit(&wq->wq_lock);
746
747 rdsv3_flush_workqueue(wq);
748
749 list_destroy(&wq->wq_queue);
750 mutex_destroy(&wq->wq_lock);
751 kmem_free(wq, sizeof (rdsv3_workqueue_struct_t));
752
753 ASSERT(rdsv3_taskq);
754 ddi_taskq_destroy(rdsv3_taskq);
755
756 wq = NULL;
757 rdsv3_taskq = NULL;
758
759 RDSV3_DPRINTF2("rdsv3_destroy_workqueue", "Return");
760 }
761
762 /* ARGSUSED */
763 void
rdsv3_rdma_init_worker(struct rdsv3_work_s * work)764 rdsv3_rdma_init_worker(struct rdsv3_work_s *work)
765 {
766 rdsv3_rdma_init();
767 }
768
769 #define RDSV3_NUM_TASKQ_THREADS 1
770 rdsv3_workqueue_struct_t *
rdsv3_create_task_workqueue(char * name)771 rdsv3_create_task_workqueue(char *name)
772 {
773 rdsv3_workqueue_struct_t *wq;
774
775 RDSV3_DPRINTF2("create_singlethread_workqueue", "Enter (dip: %p)",
776 rdsv3_dev_info);
777
778 rdsv3_taskq = ddi_taskq_create(rdsv3_dev_info, name,
779 RDSV3_NUM_TASKQ_THREADS, TASKQ_DEFAULTPRI, 0);
780 if (rdsv3_taskq == NULL) {
781 RDSV3_DPRINTF2(__FILE__,
782 "ddi_taskq_create failed for rdsv3_taskq");
783 return (NULL);
784 }
785
786 wq = kmem_zalloc(sizeof (rdsv3_workqueue_struct_t), KM_NOSLEEP);
787 if (wq == NULL) {
788 RDSV3_DPRINTF2(__FILE__, "kmem_zalloc failed for wq");
789 ddi_taskq_destroy(rdsv3_taskq);
790 return (NULL);
791 }
792
793 list_create(&wq->wq_queue, sizeof (struct rdsv3_work_s),
794 offsetof(struct rdsv3_work_s, work_item));
795 mutex_init(&wq->wq_lock, NULL, MUTEX_DRIVER, NULL);
796 wq->wq_state = RDSV3_WQ_THREAD_IDLE;
797 wq->wq_pending = 0;
798 rdsv3_one_sec_in_hz = drv_usectohz(1000000);
799
800 RDSV3_DPRINTF2("create_singlethread_workqueue", "Return");
801
802 return (wq);
803 }
804
805 /*
806 * Implementation for struct sock
807 */
808
809 void
rdsv3_sock_exit_data(struct rsock * sk)810 rdsv3_sock_exit_data(struct rsock *sk)
811 {
812 struct rdsv3_sock *rs = sk->sk_protinfo;
813
814 RDSV3_DPRINTF4("rdsv3_sock_exit_data", "rs: %p sk: %p", rs, sk);
815
816 ASSERT(rs != NULL);
817 ASSERT(rdsv3_sk_sock_flag(sk, SOCK_DEAD));
818
819 rs->rs_sk = NULL;
820
821 list_destroy(&rs->rs_send_queue);
822 list_destroy(&rs->rs_notify_queue);
823 list_destroy(&rs->rs_recv_queue);
824
825 rw_destroy(&rs->rs_recv_lock);
826 mutex_destroy(&rs->rs_lock);
827
828 mutex_destroy(&rs->rs_rdma_lock);
829 avl_destroy(&rs->rs_rdma_keys);
830
831 mutex_destroy(&rs->rs_conn_lock);
832 mutex_destroy(&rs->rs_congested_lock);
833 cv_destroy(&rs->rs_congested_cv);
834
835 rdsv3_exit_waitqueue(sk->sk_sleep);
836 kmem_free(sk->sk_sleep, sizeof (rdsv3_wait_queue_t));
837 mutex_destroy(&sk->sk_lock);
838
839 kmem_cache_free(rdsv3_alloc_cache, sk);
840 RDSV3_DPRINTF4("rdsv3_sock_exit_data", "rs: %p sk: %p", rs, sk);
841 }
842
843 /* XXX - figure out right values */
844 #define RDSV3_RECV_HIWATER (256 * 1024)
845 #define RDSV3_RECV_LOWATER 128
846 #define RDSV3_XMIT_HIWATER (256 * 1024)
847 #define RDSV3_XMIT_LOWATER 1024
848
849 struct rsock *
rdsv3_sk_alloc()850 rdsv3_sk_alloc()
851 {
852 struct rsock *sk;
853
854 sk = kmem_cache_alloc(rdsv3_alloc_cache, KM_SLEEP);
855 if (sk == NULL) {
856 RDSV3_DPRINTF2("rdsv3_create", "kmem_cache_alloc failed");
857 return (NULL);
858 }
859
860 bzero(sk, sizeof (struct rsock) + sizeof (struct rdsv3_sock));
861 return (sk);
862 }
863
864 void
rdsv3_sock_init_data(struct rsock * sk)865 rdsv3_sock_init_data(struct rsock *sk)
866 {
867 sk->sk_sleep = kmem_zalloc(sizeof (rdsv3_wait_queue_t), KM_SLEEP);
868 rdsv3_init_waitqueue(sk->sk_sleep);
869
870 mutex_init(&sk->sk_lock, NULL, MUTEX_DRIVER, NULL);
871 sk->sk_refcount = 1;
872 sk->sk_protinfo = (struct rdsv3_sock *)(sk + 1);
873 sk->sk_sndbuf = RDSV3_XMIT_HIWATER;
874 sk->sk_rcvbuf = RDSV3_RECV_HIWATER;
875 }
876
877 /*
878 * Connection cache
879 */
880 /* ARGSUSED */
881 int
rdsv3_conn_constructor(void * buf,void * arg,int kmflags)882 rdsv3_conn_constructor(void *buf, void *arg, int kmflags)
883 {
884 struct rdsv3_connection *conn = buf;
885
886 bzero(conn, sizeof (struct rdsv3_connection));
887
888 conn->c_next_tx_seq = 1;
889 mutex_init(&conn->c_lock, NULL, MUTEX_DRIVER, NULL);
890 mutex_init(&conn->c_send_lock, NULL, MUTEX_DRIVER, NULL);
891 conn->c_send_generation = 1;
892 conn->c_senders = 0;
893
894 list_create(&conn->c_send_queue, sizeof (struct rdsv3_message),
895 offsetof(struct rdsv3_message, m_conn_item));
896 list_create(&conn->c_retrans, sizeof (struct rdsv3_message),
897 offsetof(struct rdsv3_message, m_conn_item));
898 return (0);
899 }
900
901 /* ARGSUSED */
902 void
rdsv3_conn_destructor(void * buf,void * arg)903 rdsv3_conn_destructor(void *buf, void *arg)
904 {
905 struct rdsv3_connection *conn = buf;
906
907 ASSERT(list_is_empty(&conn->c_send_queue));
908 ASSERT(list_is_empty(&conn->c_retrans));
909 list_destroy(&conn->c_send_queue);
910 list_destroy(&conn->c_retrans);
911 mutex_destroy(&conn->c_send_lock);
912 mutex_destroy(&conn->c_lock);
913 }
914
915 int
rdsv3_conn_compare(const void * conn1,const void * conn2)916 rdsv3_conn_compare(const void *conn1, const void *conn2)
917 {
918 uint32_be_t laddr1, faddr1, laddr2, faddr2;
919
920 laddr1 = ((rdsv3_conn_info_t *)conn1)->c_laddr;
921 laddr2 = ((struct rdsv3_connection *)conn2)->c_laddr;
922
923 if (laddr1 == laddr2) {
924 faddr1 = ((rdsv3_conn_info_t *)conn1)->c_faddr;
925 faddr2 = ((struct rdsv3_connection *)conn2)->c_faddr;
926 if (faddr1 == faddr2)
927 return (0);
928 if (faddr1 < faddr2)
929 return (-1);
930 return (1);
931 }
932
933 if (laddr1 < laddr2)
934 return (-1);
935
936 return (1);
937 }
938
939 /* rdsv3_ib_incoming cache */
940 /* ARGSUSED */
941 int
rdsv3_ib_inc_constructor(void * buf,void * arg,int kmflags)942 rdsv3_ib_inc_constructor(void *buf, void *arg, int kmflags)
943 {
944 list_create(&((struct rdsv3_ib_incoming *)buf)->ii_frags,
945 sizeof (struct rdsv3_page_frag),
946 offsetof(struct rdsv3_page_frag, f_item));
947
948 return (0);
949 }
950
951 /* ARGSUSED */
952 void
rdsv3_ib_inc_destructor(void * buf,void * arg)953 rdsv3_ib_inc_destructor(void *buf, void *arg)
954 {
955 list_destroy(&((struct rdsv3_ib_incoming *)buf)->ii_frags);
956 }
957
958 /* ib_frag_slab cache */
959 /* ARGSUSED */
960 int
rdsv3_ib_frag_constructor(void * buf,void * arg,int kmflags)961 rdsv3_ib_frag_constructor(void *buf, void *arg, int kmflags)
962 {
963 struct rdsv3_page_frag *frag = (struct rdsv3_page_frag *)buf;
964 struct rdsv3_ib_device *rds_ibdev = (struct rdsv3_ib_device *)arg;
965 ibt_iov_attr_t iov_attr;
966 ibt_iov_t iov_arr[1];
967 ibt_all_wr_t wr;
968
969 bzero(frag, sizeof (struct rdsv3_page_frag));
970 list_link_init(&frag->f_item);
971
972 frag->f_page = kmem_alloc(PAGE_SIZE, kmflags);
973 if (frag->f_page == NULL) {
974 RDSV3_DPRINTF2("rdsv3_ib_frag_constructor",
975 "kmem_alloc for %d failed", PAGE_SIZE);
976 return (-1);
977 }
978 frag->f_offset = 0;
979
980 iov_attr.iov_as = NULL;
981 iov_attr.iov = &iov_arr[0];
982 iov_attr.iov_buf = NULL;
983 iov_attr.iov_list_len = 1;
984 iov_attr.iov_wr_nds = 1;
985 iov_attr.iov_lso_hdr_sz = 0;
986 iov_attr.iov_flags = IBT_IOV_SLEEP | IBT_IOV_RECV;
987
988 iov_arr[0].iov_addr = frag->f_page;
989 iov_arr[0].iov_len = PAGE_SIZE;
990
991 wr.recv.wr_nds = 1;
992 wr.recv.wr_sgl = &frag->f_sge;
993
994 if (ibt_map_mem_iov(ib_get_ibt_hca_hdl(rds_ibdev->dev),
995 &iov_attr, &wr, &frag->f_mapped) != IBT_SUCCESS) {
996 RDSV3_DPRINTF2("rdsv3_ib_frag_constructor",
997 "ibt_map_mem_iov failed");
998 kmem_free(frag->f_page, PAGE_SIZE);
999 return (-1);
1000 }
1001
1002 return (0);
1003 }
1004
1005 /* ARGSUSED */
1006 void
rdsv3_ib_frag_destructor(void * buf,void * arg)1007 rdsv3_ib_frag_destructor(void *buf, void *arg)
1008 {
1009 struct rdsv3_page_frag *frag = (struct rdsv3_page_frag *)buf;
1010 struct rdsv3_ib_device *rds_ibdev = (struct rdsv3_ib_device *)arg;
1011
1012 /* unmap the page */
1013 if (ibt_unmap_mem_iov(ib_get_ibt_hca_hdl(rds_ibdev->dev),
1014 frag->f_mapped) != IBT_SUCCESS)
1015 RDSV3_DPRINTF2("rdsv3_ib_frag_destructor",
1016 "ibt_unmap_mem_iov failed");
1017
1018 /* free the page */
1019 kmem_free(frag->f_page, PAGE_SIZE);
1020 }
1021
1022 /* loop.c */
1023 extern kmutex_t loop_conns_lock;
1024 extern list_t loop_conns;
1025
1026 struct rdsv3_loop_connection
1027 {
1028 struct list_node loop_node;
1029 struct rdsv3_connection *conn;
1030 };
1031
1032 void
rdsv3_loop_init(void)1033 rdsv3_loop_init(void)
1034 {
1035 list_create(&loop_conns, sizeof (struct rdsv3_loop_connection),
1036 offsetof(struct rdsv3_loop_connection, loop_node));
1037 mutex_init(&loop_conns_lock, NULL, MUTEX_DRIVER, NULL);
1038 }
1039
1040 /* rdma.c */
1041 /* IB Rkey is used here for comparison */
1042 int
rdsv3_mr_compare(const void * mr1,const void * mr2)1043 rdsv3_mr_compare(const void *mr1, const void *mr2)
1044 {
1045 uint32_t key1 = *(uint32_t *)mr1;
1046 uint32_t key2 = ((struct rdsv3_mr *)mr2)->r_key;
1047
1048 if (key1 < key2)
1049 return (-1);
1050 if (key1 > key2)
1051 return (1);
1052 return (0);
1053 }
1054
1055 /* transport.c */
1056 extern struct rdsv3_transport *transports[];
1057 extern krwlock_t trans_sem;
1058
1059 void
rdsv3_trans_exit(void)1060 rdsv3_trans_exit(void)
1061 {
1062 struct rdsv3_transport *trans;
1063 int i;
1064
1065 RDSV3_DPRINTF2("rdsv3_trans_exit", "Enter");
1066
1067 /* currently, only IB transport */
1068 rw_enter(&trans_sem, RW_READER);
1069 trans = NULL;
1070 for (i = 0; i < RDS_TRANS_COUNT; i++) {
1071 if (transports[i]) {
1072 trans = transports[i];
1073 break;
1074 }
1075 }
1076 rw_exit(&trans_sem);
1077
1078 /* trans->exit() will remove the trans from the list */
1079 if (trans)
1080 trans->exit();
1081
1082 rw_destroy(&trans_sem);
1083
1084 RDSV3_DPRINTF2("rdsv3_trans_exit", "Return");
1085 }
1086
1087 void
rdsv3_trans_init()1088 rdsv3_trans_init()
1089 {
1090 RDSV3_DPRINTF2("rdsv3_trans_init", "Enter");
1091
1092 rw_init(&trans_sem, NULL, RW_DRIVER, NULL);
1093
1094 RDSV3_DPRINTF2("rdsv3_trans_init", "Return");
1095 }
1096
1097 int
rdsv3_put_cmsg(struct nmsghdr * msg,int level,int type,size_t size,void * payload)1098 rdsv3_put_cmsg(struct nmsghdr *msg, int level, int type, size_t size,
1099 void *payload)
1100 {
1101 struct cmsghdr *cp;
1102 char *bp;
1103 size_t cmlen;
1104 size_t cmspace;
1105 size_t bufsz;
1106
1107 RDSV3_DPRINTF4("rdsv3_put_cmsg",
1108 "Enter(msg: %p level: %d type: %d sz: %d)",
1109 msg, level, type, size);
1110
1111 if (msg == NULL || msg->msg_controllen == 0) {
1112 return (0);
1113 }
1114 /* check for first cmsg or this is another cmsg to be appended */
1115 if (msg->msg_control == NULL)
1116 msg->msg_controllen = 0;
1117
1118 cmlen = CMSG_LEN(size);
1119 cmspace = CMSG_SPACE(size);
1120 bufsz = msg->msg_controllen + cmspace;
1121
1122 /* extend the existing cmsg to append the next cmsg */
1123 bp = kmem_alloc(bufsz, KM_SLEEP);
1124 if (msg->msg_control) {
1125 bcopy(msg->msg_control, bp, msg->msg_controllen);
1126 kmem_free(msg->msg_control, (size_t)msg->msg_controllen);
1127 }
1128
1129 /* assign payload the proper cmsg location */
1130 cp = (struct cmsghdr *)(bp + msg->msg_controllen);
1131 cp->cmsg_len = cmlen;
1132 cp->cmsg_level = level;
1133 cp->cmsg_type = type;
1134
1135 bcopy(payload, CMSG_DATA(cp), cmlen -
1136 (unsigned int)_CMSG_DATA_ALIGN(sizeof (struct cmsghdr)));
1137
1138 msg->msg_control = bp;
1139 msg->msg_controllen = bufsz;
1140
1141 RDSV3_DPRINTF4("rdsv3_put_cmsg", "Return(cmsg_len: %d)", cp->cmsg_len);
1142
1143 return (0);
1144 }
1145
1146 /* ARGSUSED */
1147 int
rdsv3_verify_bind_address(ipaddr_t addr)1148 rdsv3_verify_bind_address(ipaddr_t addr)
1149 {
1150 return (1);
1151 }
1152
1153 /* checksum */
1154 uint16_t
rdsv3_ip_fast_csum(void * hdr,size_t length)1155 rdsv3_ip_fast_csum(void *hdr, size_t length)
1156 {
1157 return (0xffff &
1158 (uint16_t)(~ip_ocsum((ushort_t *)hdr, (int)length <<1, 0)));
1159 }
1160
1161 /* scatterlist implementation */
1162 /* ARGSUSED */
1163 caddr_t
rdsv3_ib_sg_dma_address(ib_device_t * dev,struct rdsv3_scatterlist * scat,uint_t offset)1164 rdsv3_ib_sg_dma_address(ib_device_t *dev, struct rdsv3_scatterlist *scat,
1165 uint_t offset)
1166 {
1167 return (0);
1168 }
1169
1170 uint_t
rdsv3_ib_dma_map_sg(struct ib_device * dev,struct rdsv3_scatterlist * scat,uint_t num)1171 rdsv3_ib_dma_map_sg(struct ib_device *dev, struct rdsv3_scatterlist *scat,
1172 uint_t num)
1173 {
1174 struct rdsv3_scatterlist *s, *first;
1175 ibt_iov_t *iov;
1176 ibt_wr_ds_t *sgl;
1177 ibt_iov_attr_t iov_attr;
1178 ibt_send_wr_t swr;
1179 uint_t i;
1180
1181 RDSV3_DPRINTF4("rdsv3_ib_dma_map_sg", "scat %p, num: %d", scat, num);
1182
1183 s = first = &scat[0];
1184 ASSERT(first->mihdl == NULL);
1185
1186 iov = kmem_alloc(num * sizeof (ibt_iov_t), KM_SLEEP);
1187 sgl = kmem_zalloc((num * 2) * sizeof (ibt_wr_ds_t), KM_SLEEP);
1188
1189 for (i = 0; i < num; i++, s++) {
1190 iov[i].iov_addr = s->vaddr;
1191 iov[i].iov_len = s->length;
1192 }
1193
1194 iov_attr.iov_as = NULL;
1195 iov_attr.iov = iov;
1196 iov_attr.iov_buf = NULL;
1197 iov_attr.iov_list_len = num;
1198 iov_attr.iov_wr_nds = num * 2;
1199 iov_attr.iov_lso_hdr_sz = 0;
1200 iov_attr.iov_flags = IBT_IOV_SLEEP;
1201
1202 swr.wr_sgl = sgl;
1203
1204 i = ibt_map_mem_iov(ib_get_ibt_hca_hdl(dev),
1205 &iov_attr, (ibt_all_wr_t *)&swr, &first->mihdl);
1206 kmem_free(iov, num * sizeof (ibt_iov_t));
1207 if (i != IBT_SUCCESS) {
1208 RDSV3_DPRINTF2("rdsv3_ib_dma_map_sg",
1209 "ibt_map_mem_iov returned: %d", i);
1210 return (0);
1211 }
1212
1213 s = first;
1214 for (i = 0; i < num; i++, s++, sgl++) {
1215 s->sgl = sgl;
1216 }
1217
1218 return (num);
1219 }
1220
1221 void
rdsv3_ib_dma_unmap_sg(ib_device_t * dev,struct rdsv3_scatterlist * scat,uint_t num)1222 rdsv3_ib_dma_unmap_sg(ib_device_t *dev, struct rdsv3_scatterlist *scat,
1223 uint_t num)
1224 {
1225 /* Zero length messages have no scatter gather entries */
1226 if (num != 0) {
1227 ASSERT(scat->mihdl != NULL);
1228 ASSERT(scat->sgl != NULL);
1229
1230 (void) ibt_unmap_mem_iov(ib_get_ibt_hca_hdl(dev), scat->mihdl);
1231
1232 kmem_free(scat->sgl, (num * 2) * sizeof (ibt_wr_ds_t));
1233 scat->sgl = NULL;
1234 scat->mihdl = NULL;
1235 }
1236 }
1237
1238 int
rdsv3_ib_alloc_hdrs(ib_device_t * dev,struct rdsv3_ib_connection * ic)1239 rdsv3_ib_alloc_hdrs(ib_device_t *dev, struct rdsv3_ib_connection *ic)
1240 {
1241 caddr_t addr;
1242 size_t size;
1243 ibt_mr_attr_t mr_attr;
1244 ibt_mr_desc_t mr_desc;
1245 ibt_mr_hdl_t mr_hdl;
1246 int ret;
1247
1248 RDSV3_DPRINTF4("rdsv3_ib_alloc_hdrs", "Enter(dev: %p)", dev);
1249
1250 ASSERT(ic->i_mr == NULL);
1251
1252 size = (ic->i_send_ring.w_nr + ic->i_recv_ring.w_nr + 1) *
1253 sizeof (struct rdsv3_header);
1254
1255 addr = kmem_zalloc(size, KM_NOSLEEP);
1256 if (addr == NULL)
1257 return (-1);
1258
1259 mr_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)addr;
1260 mr_attr.mr_len = size;
1261 mr_attr.mr_as = NULL;
1262 mr_attr.mr_flags = IBT_MR_ENABLE_LOCAL_WRITE;
1263 ret = ibt_register_mr(ib_get_ibt_hca_hdl(dev), RDSV3_PD2PDHDL(ic->i_pd),
1264 &mr_attr, &mr_hdl, &mr_desc);
1265 if (ret != IBT_SUCCESS) {
1266 RDSV3_DPRINTF2("rdsv3_ib_alloc_hdrs",
1267 "ibt_register_mr returned: " "%d", ret);
1268 return (-1);
1269 }
1270
1271 ic->i_mr =
1272 (struct rdsv3_hdrs_mr *)kmem_alloc(sizeof (struct rdsv3_hdrs_mr),
1273 KM_SLEEP);
1274 ic->i_mr->addr = addr;
1275 ic->i_mr->size = size;
1276 ic->i_mr->hdl = mr_hdl;
1277 ic->i_mr->lkey = mr_desc.md_lkey;
1278
1279 ic->i_send_hdrs = (struct rdsv3_header *)addr;
1280 ic->i_send_hdrs_dma = (uint64_t)(uintptr_t)addr;
1281
1282 ic->i_recv_hdrs = (struct rdsv3_header *)(addr +
1283 (ic->i_send_ring.w_nr * sizeof (struct rdsv3_header)));
1284 ic->i_recv_hdrs_dma = (uint64_t)(uintptr_t)(addr +
1285 (ic->i_send_ring.w_nr * sizeof (struct rdsv3_header)));
1286
1287 ic->i_ack = (struct rdsv3_header *)(addr +
1288 ((ic->i_send_ring.w_nr + ic->i_recv_ring.w_nr) *
1289 sizeof (struct rdsv3_header)));
1290 ic->i_ack_dma = (uint64_t)(uintptr_t)(addr +
1291 ((ic->i_send_ring.w_nr + ic->i_recv_ring.w_nr) *
1292 sizeof (struct rdsv3_header)));
1293
1294 RDSV3_DPRINTF4("rdsv3_ib_alloc_hdrs", "Return(dev: %p)", dev);
1295
1296 return (0);
1297 }
1298
1299 void
rdsv3_ib_free_hdrs(ib_device_t * dev,struct rdsv3_ib_connection * ic)1300 rdsv3_ib_free_hdrs(ib_device_t *dev, struct rdsv3_ib_connection *ic)
1301 {
1302 RDSV3_DPRINTF4("rdsv3_ib_free_hdrs", "Enter(dev: %p)", dev);
1303 ASSERT(ic->i_mr != NULL);
1304
1305 ic->i_send_hdrs = NULL;
1306 ic->i_send_hdrs_dma = 0;
1307
1308 ic->i_recv_hdrs = NULL;
1309 ic->i_recv_hdrs_dma = 0;
1310
1311 ic->i_ack = NULL;
1312 ic->i_ack_dma = 0;
1313
1314 (void) ibt_deregister_mr(ib_get_ibt_hca_hdl(dev), ic->i_mr->hdl);
1315
1316 kmem_free(ic->i_mr->addr, ic->i_mr->size);
1317 kmem_free(ic->i_mr, sizeof (struct rdsv3_hdrs_mr));
1318
1319 ic->i_mr = NULL;
1320 RDSV3_DPRINTF4("rdsv3_ib_free_hdrs", "Return(dev: %p)", dev);
1321 }
1322
1323 /*
1324 * atomic_add_unless - add unless the number is a given value
1325 * @v: pointer of type atomic_t
1326 * @a: the amount to add to v...
1327 * @u: ...unless v is equal to u.
1328 *
1329 * Atomically adds @a to @v, so long as it was not @u.
1330 * Returns non-zero if @v was not @u, and zero otherwise.
1331 */
1332 int
atomic_add_unless(atomic_t * v,uint_t a,ulong_t u)1333 atomic_add_unless(atomic_t *v, uint_t a, ulong_t u)
1334 {
1335 uint_t c, old;
1336
1337 c = *v;
1338 while (c != u && (old = atomic_cas_uint(v, c, c + a)) != c) {
1339 c = old;
1340 }
1341 return ((ulong_t)c != u);
1342 }
1343