xref: /titanic_52/usr/src/uts/common/io/ib/clients/rdsv3/rdsv3_impl.c (revision ccdeb6b6d71f3c9aa7e78b688f7b34fff109a817)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 #include <sys/types.h>
25 #include <sys/stream.h>
26 #include <sys/dlpi.h>
27 #include <sys/stropts.h>
28 #include <sys/strsun.h>
29 #include <sys/sysmacros.h>
30 #include <sys/strlog.h>
31 #include <sys/ddi.h>
32 #include <sys/cmn_err.h>
33 #include <sys/socket.h>
34 #include <net/if.h>
35 #include <net/if_types.h>
36 #include <netinet/in.h>
37 #include <sys/ethernet.h>
38 #include <inet/arp.h>
39 #include <inet/ip.h>
40 #include <inet/ip6.h>
41 #include <inet/ip_ire.h>
42 #include <inet/ip_if.h>
43 #include <inet/ip_ftable.h>
44 
45 #include <sys/sunddi.h>
46 #include <sys/ksynch.h>
47 
48 #include <sys/rds.h>
49 #include <sys/socket.h>
50 #include <sys/socketvar.h>
51 #include <sys/sockio.h>
52 #include <sys/sysmacros.h>
53 #include <inet/common.h>
54 #include <inet/ip.h>
55 #include <net/if_types.h>
56 
57 #include <sys/ib/clients/rdsv3/rdsv3.h>
58 #include <sys/ib/clients/rdsv3/rdma.h>
59 #include <sys/ib/clients/rdsv3/ib.h>
60 #include <sys/ib/clients/rdsv3/rdsv3_impl.h>
61 #include <sys/ib/clients/rdsv3/rdsv3_debug.h>
62 
63 #include <sys/dls.h>
64 #include <sys/mac.h>
65 #include <sys/mac_client.h>
66 #include <sys/mac_provider.h>
67 #include <sys/mac_client_priv.h>
68 
69 ddi_taskq_t		*rdsv3_taskq = NULL;
70 extern kmem_cache_t	*rdsv3_alloc_cache;
71 
72 extern unsigned int 	ip_ocsum(ushort_t *address, int halfword_count,
73     unsigned int sum);
74 
75 /*
76  * Check if the IP interface named by `lifrp' is RDS-capable.
77  */
78 boolean_t
79 rdsv3_capable_interface(struct lifreq *lifrp)
80 {
81 	char	ifname[LIFNAMSIZ];
82 	char	drv[MAXLINKNAMELEN];
83 	uint_t	ppa;
84 	char 	*cp;
85 
86 	RDSV3_DPRINTF4("rdsv3_capable_interface", "Enter");
87 
88 	if (lifrp->lifr_type == IFT_IB)
89 		return (B_TRUE);
90 
91 	/*
92 	 * Strip off the logical interface portion before getting
93 	 * intimate with the name.
94 	 */
95 	(void) strlcpy(ifname, lifrp->lifr_name, LIFNAMSIZ);
96 	if ((cp = strchr(ifname, ':')) != NULL)
97 		*cp = '\0';
98 
99 	if (strcmp("lo0", ifname) == 0) {
100 		/*
101 		 * loopback is considered RDS-capable
102 		 */
103 		return (B_TRUE);
104 	}
105 
106 	return (
107 	    ddi_parse_dlen(ifname, drv, MAXLINKNAMELEN, &ppa) == DDI_SUCCESS &&
108 	    rdsv3_if_lookup_by_name(drv));
109 }
110 
111 int
112 rdsv3_do_ip_ioctl(ksocket_t so4, void **ipaddrs, int *size, int *nifs)
113 {
114 	struct lifnum		lifn;
115 	struct lifconf		lifc;
116 	struct lifreq		*lp, *rlp, lifr;
117 	int			rval = 0;
118 	int			numifs;
119 	int			bufsize, rbufsize;
120 	void			*buf, *rbuf;
121 	int			i, j, n, rc;
122 
123 	*ipaddrs = NULL;
124 	*size = 0;
125 	*nifs = 0;
126 
127 	RDSV3_DPRINTF4("rdsv3_do_ip_ioctl", "Enter");
128 
129 retry_count:
130 	/* snapshot the current number of interfaces */
131 	lifn.lifn_family = PF_UNSPEC;
132 	lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
133 	lifn.lifn_count = 0;
134 	rval = ksocket_ioctl(so4, SIOCGLIFNUM, (intptr_t)&lifn, &rval,
135 	    CRED());
136 	if (rval != 0) {
137 		RDSV3_DPRINTF2("rdsv3_do_ip_ioctl",
138 		    "ksocket_ioctl returned: %d", rval);
139 		return (rval);
140 	}
141 
142 	numifs = lifn.lifn_count;
143 	if (numifs <= 0) {
144 		RDSV3_DPRINTF2("rdsv3_do_ip_ioctl", "No interfaces found");
145 		return (0);
146 	}
147 
148 	/* allocate extra room in case more interfaces appear */
149 	numifs += 10;
150 
151 	/* get the interface names and ip addresses */
152 	bufsize = numifs * sizeof (struct lifreq);
153 	buf = kmem_alloc(bufsize, KM_SLEEP);
154 
155 	lifc.lifc_family = AF_UNSPEC;
156 	lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
157 	lifc.lifc_len = bufsize;
158 	lifc.lifc_buf = buf;
159 	rc = ksocket_ioctl(so4, SIOCGLIFCONF, (intptr_t)&lifc, &rval, CRED());
160 	if (rc != 0) {
161 		RDSV3_DPRINTF2("rdsv3_do_ip_ioctl", "SIOCGLIFCONF failed");
162 		kmem_free(buf, bufsize);
163 		return (rc);
164 	}
165 	/* if our extra room is used up, try again */
166 	if (bufsize <= lifc.lifc_len) {
167 		kmem_free(buf, bufsize);
168 		buf = NULL;
169 		goto retry_count;
170 	}
171 	/* calc actual number of ifconfs */
172 	n = lifc.lifc_len / sizeof (struct lifreq);
173 
174 	/*
175 	 * Count the RDS interfaces
176 	 */
177 	for (i = 0, j = 0, lp = lifc.lifc_req; i < n; i++, lp++) {
178 
179 		/*
180 		 * Copy as the SIOCGLIFFLAGS ioctl is destructive
181 		 */
182 		bcopy(lp, &lifr, sizeof (struct lifreq));
183 		/*
184 		 * fetch the flags using the socket of the correct family
185 		 */
186 		switch (lifr.lifr_addr.ss_family) {
187 		case AF_INET:
188 			rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)&lifr,
189 			    &rval, CRED());
190 			break;
191 		default:
192 			continue;
193 		}
194 
195 		if (rc != 0) continue;
196 
197 		/*
198 		 * If we got the flags, skip uninteresting
199 		 * interfaces based on flags
200 		 */
201 		if ((lifr.lifr_flags & IFF_UP) != IFF_UP)
202 			continue;
203 		if (lifr.lifr_flags &
204 		    (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
205 			continue;
206 		if (!rdsv3_capable_interface(&lifr))
207 			continue;
208 		j++;
209 	}
210 
211 	if (j <= 0) {
212 		RDSV3_DPRINTF2("rdsv3_do_ip_ioctl", "No RDS interfaces");
213 		kmem_free(buf, bufsize);
214 		return (rval);
215 	}
216 
217 	numifs = j;
218 
219 	/* This is the buffer we pass back */
220 	rbufsize = numifs * sizeof (struct lifreq);
221 	rbuf = kmem_alloc(rbufsize, KM_SLEEP);
222 	rlp = (struct lifreq *)rbuf;
223 
224 	/*
225 	 * Examine the array of interfaces and filter uninteresting ones
226 	 */
227 	for (i = 0, lp = lifc.lifc_req; i < n; i++, lp++) {
228 
229 		/*
230 		 * Copy the address as the SIOCGLIFFLAGS ioctl is destructive
231 		 */
232 		bcopy(lp, &lifr, sizeof (struct lifreq));
233 		/*
234 		 * fetch the flags using the socket of the correct family
235 		 */
236 		switch (lifr.lifr_addr.ss_family) {
237 		case AF_INET:
238 			rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)&lifr,
239 			    &rval, CRED());
240 			break;
241 		default:
242 			continue;
243 		}
244 
245 
246 		if (rc != 0) {
247 			RDSV3_DPRINTF2("rdsv3_do_ip_ioctl",
248 			    "ksocket_ioctl failed" " for %s", lifr.lifr_name);
249 			continue;
250 		}
251 
252 		/*
253 		 * If we got the flags, skip uninteresting
254 		 * interfaces based on flags
255 		 */
256 		if ((lifr.lifr_flags & IFF_UP) != IFF_UP)
257 			continue;
258 		if (lifr.lifr_flags &
259 		    (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
260 			continue;
261 		if (!rdsv3_capable_interface(&lifr))
262 			continue;
263 
264 		/* save the record */
265 		bcopy(lp, rlp, sizeof (struct lifreq));
266 		rlp->lifr_addr.ss_family = AF_INET_OFFLOAD;
267 		rlp++;
268 	}
269 
270 	kmem_free(buf, bufsize);
271 
272 	*ipaddrs = rbuf;
273 	*size = rbufsize;
274 	*nifs = numifs;
275 
276 	RDSV3_DPRINTF4("rdsv3_do_ip_ioctl", "Return");
277 
278 	return (rval);
279 }
280 
281 /*
282  * Check if the IP interface named by `ifrp' is RDS-capable.
283  */
284 boolean_t
285 rdsv3_capable_interface_old(struct ifreq *ifrp)
286 {
287 	char	ifname[IFNAMSIZ];
288 	char	drv[MAXLINKNAMELEN];
289 	uint_t	ppa;
290 	char 	*cp;
291 
292 	RDSV3_DPRINTF4("rdsv3_capable_interface_old", "Enter");
293 
294 	/*
295 	 * Strip off the logical interface portion before getting
296 	 * intimate with the name.
297 	 */
298 	(void) strlcpy(ifname, ifrp->ifr_name, IFNAMSIZ);
299 	if ((cp = strchr(ifname, ':')) != NULL)
300 		*cp = '\0';
301 
302 	RDSV3_DPRINTF4("rdsv3_capable_interface_old", "ifname: %s", ifname);
303 
304 	if ((strcmp("lo0", ifname) == 0) ||
305 	    (strncmp("ibd", ifname, 3) == 0)) {
306 		/*
307 		 * loopback and IB are considered RDS-capable
308 		 */
309 		return (B_TRUE);
310 	}
311 
312 	return (
313 	    ddi_parse_dlen(ifname, drv, MAXLINKNAMELEN, &ppa) == DDI_SUCCESS &&
314 	    rdsv3_if_lookup_by_name(drv));
315 }
316 
317 int
318 rdsv3_do_ip_ioctl_old(ksocket_t so4, void **ipaddrs, int *size, int *nifs)
319 {
320 	uint_t			ifn;
321 	struct ifconf		ifc;
322 	struct ifreq		*lp, *rlp, ifr;
323 	int			rval = 0;
324 	int			numifs;
325 	int			bufsize, rbufsize;
326 	void			*buf, *rbuf;
327 	int			i, j, n, rc;
328 
329 	*ipaddrs = NULL;
330 	*size = 0;
331 	*nifs = 0;
332 
333 	RDSV3_DPRINTF4("rdsv3_do_ip_ioctl_old", "Enter");
334 
335 retry_count:
336 	rval = ksocket_ioctl(so4, SIOCGIFNUM, (intptr_t)&ifn, &rval,
337 	    CRED());
338 	if (rval != 0) {
339 		RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old",
340 		    "ksocket_ioctl(SIOCGIFNUM) returned: %d", rval);
341 		return (rval);
342 	}
343 
344 	numifs = ifn;
345 	if (numifs <= 0) {
346 		RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old", "No interfaces found");
347 		return (0);
348 	}
349 
350 	/* allocate extra room in case more interfaces appear */
351 	numifs += 10;
352 
353 	/* get the interface names and ip addresses */
354 	bufsize = numifs * sizeof (struct ifreq);
355 	buf = kmem_alloc(bufsize, KM_SLEEP);
356 
357 	ifc.ifc_len = bufsize;
358 	ifc.ifc_buf = buf;
359 	rc = ksocket_ioctl(so4, SIOCGIFCONF, (intptr_t)&ifc, &rval, CRED());
360 	if (rc != 0) {
361 		RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old",
362 		    "SIOCGLIFCONF failed: %d", rc);
363 		kmem_free(buf, bufsize);
364 		return (rc);
365 	}
366 	/* if our extra room is used up, try again */
367 	if (bufsize <= ifc.ifc_len) {
368 		kmem_free(buf, bufsize);
369 		buf = NULL;
370 		goto retry_count;
371 	}
372 	/* calc actual number of ifconfs */
373 	n = ifc.ifc_len / sizeof (struct ifreq);
374 
375 	/*
376 	 * Count the RDS interfaces
377 	 */
378 	for (i = 0, j = 0, lp = ifc.ifc_req; i < n; i++, lp++) {
379 
380 		/*
381 		 * Copy as the SIOCGIFFLAGS ioctl is destructive
382 		 */
383 		bcopy(lp, &ifr, sizeof (struct ifreq));
384 		/*
385 		 * fetch the flags using the socket of the correct family
386 		 */
387 		switch (ifr.ifr_addr.sa_family) {
388 		case AF_INET:
389 			rc = ksocket_ioctl(so4, SIOCGIFFLAGS, (intptr_t)&ifr,
390 			    &rval, CRED());
391 			break;
392 		default:
393 			continue;
394 		}
395 
396 		if (rc != 0) continue;
397 
398 		RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old",
399 		    "1. ifr_name: %s, flags: %d", ifr.ifr_name,
400 		    (ushort_t)ifr.ifr_flags);
401 
402 		/*
403 		 * If we got the flags, skip uninteresting
404 		 * interfaces based on flags
405 		 */
406 		if ((((ushort_t)ifr.ifr_flags) & IFF_UP) != IFF_UP)
407 			continue;
408 		RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old",
409 		    "2. ifr_name: %s, flags: %d", ifr.ifr_name,
410 		    (ushort_t)ifr.ifr_flags);
411 		if (((ushort_t)ifr.ifr_flags) &
412 		    (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
413 			continue;
414 		RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old",
415 		    "3. ifr_name: %s, flags: %d", ifr.ifr_name,
416 		    (ushort_t)ifr.ifr_flags);
417 		if (!rdsv3_capable_interface_old(&ifr))
418 			continue;
419 		RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old",
420 		    "4. ifr_name: %s, flags: %d", ifr.ifr_name,
421 		    (ushort_t)ifr.ifr_flags);
422 		j++;
423 	}
424 
425 	if (j <= 0) {
426 		RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old", "No RDS interfaces");
427 		kmem_free(buf, bufsize);
428 		return (rval);
429 	}
430 
431 	numifs = j;
432 
433 	/* This is the buffer we pass back */
434 	rbufsize = numifs * sizeof (struct ifreq);
435 	rbuf = kmem_alloc(rbufsize, KM_SLEEP);
436 	rlp = (struct ifreq *)rbuf;
437 
438 	/*
439 	 * Examine the array of interfaces and filter uninteresting ones
440 	 */
441 	for (i = 0, lp = ifc.ifc_req; i < n; i++, lp++) {
442 
443 		/*
444 		 * Copy the address as the SIOCGIFFLAGS ioctl is destructive
445 		 */
446 		bcopy(lp, &ifr, sizeof (struct ifreq));
447 		/*
448 		 * fetch the flags using the socket of the correct family
449 		 */
450 		switch (ifr.ifr_addr.sa_family) {
451 		case AF_INET:
452 			rc = ksocket_ioctl(so4, SIOCGIFFLAGS, (intptr_t)&ifr,
453 			    &rval, CRED());
454 			break;
455 		default:
456 			continue;
457 		}
458 
459 
460 		if (rc != 0) {
461 			RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old",
462 			    "ksocket_ioctl failed: %d for %s",
463 			    rc, ifr.ifr_name);
464 			continue;
465 		}
466 
467 		/*
468 		 * If we got the flags, skip uninteresting
469 		 * interfaces based on flags
470 		 */
471 		if ((((ushort_t)ifr.ifr_flags) & IFF_UP) != IFF_UP)
472 			continue;
473 		if (((ushort_t)ifr.ifr_flags) &
474 		    (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
475 			continue;
476 		if (!rdsv3_capable_interface_old(&ifr))
477 			continue;
478 
479 		/* save the record */
480 		bcopy(lp, rlp, sizeof (struct ifreq));
481 		rlp->ifr_addr.sa_family = AF_INET_OFFLOAD;
482 		rlp++;
483 	}
484 
485 	kmem_free(buf, bufsize);
486 
487 	*ipaddrs = rbuf;
488 	*size = rbufsize;
489 	*nifs = numifs;
490 
491 	RDSV3_DPRINTF4("rdsv3_do_ip_ioctl_old", "Return");
492 
493 	return (rval);
494 }
495 
496 boolean_t
497 rdsv3_isloopback(ipaddr_t addr)
498 {
499 	ip_stack_t *ipst;
500 
501 	ipst = netstack_find_by_zoneid(GLOBAL_ZONEID)->netstack_ip;
502 	ASSERT(ipst != NULL);
503 	if (ip_type_v4(addr, ipst) != IRE_LOOPBACK) {
504 		netstack_rele(ipst->ips_netstack);
505 		return (B_FALSE);
506 	}
507 	netstack_rele(ipst->ips_netstack);
508 	return (B_TRUE);
509 }
510 
511 /*
512  * Work Queue Implementation
513  */
514 
515 #define	RDSV3_WQ_THREAD_IDLE		0
516 #define	RDSV3_WQ_THREAD_RUNNING		1
517 #define	RDSV3_WQ_THREAD_FLUSHING	2
518 #define	RDSV3_WQ_THREAD_EXITING		3
519 
520 /* worker thread */
521 void
522 rdsv3_worker_thread(void *arg)
523 {
524 	rdsv3_workqueue_struct_t *wq = arg;
525 	rdsv3_work_t *work;
526 
527 	RDSV3_DPRINTF4("rdsv3_worker_thread", "Enter(wq: 0x%p)", wq);
528 
529 	mutex_enter(&wq->wq_lock);
530 	work = list_remove_head(&wq->wq_queue);
531 	while (work) {
532 		mutex_exit(&wq->wq_lock);
533 
534 		/* process work */
535 		work->func(work);
536 
537 		mutex_enter(&wq->wq_lock);
538 		work = list_remove_head(&wq->wq_queue);
539 	}
540 
541 	/* No more work, go home, until called again */
542 	if (wq->wq_state != RDSV3_WQ_THREAD_EXITING) {
543 		wq->wq_state = RDSV3_WQ_THREAD_IDLE;
544 	}
545 	mutex_exit(&wq->wq_lock);
546 
547 	RDSV3_DPRINTF4("rdsv3_worker_thread", "Return(wq: 0x%p)", wq);
548 }
549 
550 /* XXX */
551 void
552 rdsv3_flush_workqueue(rdsv3_workqueue_struct_t *wq)
553 {
554 	RDSV3_DPRINTF4("rdsv3_flush_workqueue", "Enter(wq: %p)", wq);
555 
556 	mutex_enter(&wq->wq_lock);
557 	switch (wq->wq_state) {
558 	case RDSV3_WQ_THREAD_IDLE:
559 		/* nothing to do */
560 		ASSERT(list_is_empty(&wq->wq_queue));
561 		break;
562 
563 	case RDSV3_WQ_THREAD_RUNNING:
564 		wq->wq_state = RDSV3_WQ_THREAD_FLUSHING;
565 		/* FALLTHRU */
566 	case RDSV3_WQ_THREAD_FLUSHING:
567 		/* already flushing, wait until the flushing is complete */
568 		do {
569 			mutex_exit(&wq->wq_lock);
570 			delay(drv_usectohz(1000000));
571 			mutex_enter(&wq->wq_lock);
572 		} while (wq->wq_state == RDSV3_WQ_THREAD_FLUSHING);
573 		break;
574 	case RDSV3_WQ_THREAD_EXITING:
575 		mutex_exit(&wq->wq_lock);
576 		rdsv3_worker_thread(wq);
577 		return;
578 	}
579 	mutex_exit(&wq->wq_lock);
580 
581 	RDSV3_DPRINTF4("rdsv3_flush_workqueue", "Return(wq: %p)", wq);
582 }
583 
584 void
585 rdsv3_queue_work(rdsv3_workqueue_struct_t *wq, rdsv3_work_t *wp)
586 {
587 	RDSV3_DPRINTF4("rdsv3_queue_work", "Enter(wq: %p, wp: %p)", wq, wp);
588 
589 	mutex_enter(&wq->wq_lock);
590 
591 	if (list_link_active(&wp->work_item)) {
592 		/* This is already in the queue, ignore this call */
593 		mutex_exit(&wq->wq_lock);
594 		RDSV3_DPRINTF3("rdsv3_queue_work", "already queued: %p", wp);
595 		return;
596 	}
597 
598 	switch (wq->wq_state) {
599 	case RDSV3_WQ_THREAD_RUNNING:
600 		list_insert_tail(&wq->wq_queue, wp);
601 		mutex_exit(&wq->wq_lock);
602 		break;
603 
604 	case RDSV3_WQ_THREAD_FLUSHING:
605 		do {
606 			mutex_exit(&wq->wq_lock);
607 			delay(drv_usectohz(1000000));
608 			mutex_enter(&wq->wq_lock);
609 		} while (wq->wq_state == RDSV3_WQ_THREAD_FLUSHING);
610 
611 		if (wq->wq_state == RDSV3_WQ_THREAD_RUNNING) {
612 			list_insert_tail(&wq->wq_queue, wp);
613 			mutex_exit(&wq->wq_lock);
614 			break;
615 		}
616 		/* FALLTHRU */
617 
618 	case RDSV3_WQ_THREAD_IDLE:
619 		list_insert_tail(&wq->wq_queue, wp);
620 		wq->wq_state = RDSV3_WQ_THREAD_RUNNING;
621 		mutex_exit(&wq->wq_lock);
622 
623 		(void) ddi_taskq_dispatch(rdsv3_taskq, rdsv3_worker_thread, wq,
624 		    DDI_SLEEP);
625 		break;
626 
627 	case RDSV3_WQ_THREAD_EXITING:
628 		mutex_exit(&wq->wq_lock);
629 		break;
630 	}
631 
632 	RDSV3_DPRINTF4("rdsv3_queue_work", "Return(wq: %p, wp: %p)", wq, wp);
633 }
634 
635 /* timeout handler for delayed work queuing */
636 void
637 rdsv3_work_timeout_handler(void *arg)
638 {
639 	rdsv3_delayed_work_t *dwp = (rdsv3_delayed_work_t *)arg;
640 
641 	RDSV3_DPRINTF4("rdsv3_work_timeout_handler",
642 	    "Enter(wq: %p, wp: %p)", dwp->wq, &dwp->work);
643 
644 	mutex_enter(&dwp->lock);
645 	dwp->timeid = 0;
646 	mutex_exit(&dwp->lock);
647 
648 	mutex_enter(&dwp->wq->wq_lock);
649 	dwp->wq->wq_pending--;
650 	if (dwp->wq->wq_state == RDSV3_WQ_THREAD_EXITING) {
651 		mutex_exit(&dwp->wq->wq_lock);
652 		return;
653 	}
654 	mutex_exit(&dwp->wq->wq_lock);
655 
656 	rdsv3_queue_work(dwp->wq, &dwp->work);
657 
658 	RDSV3_DPRINTF4("rdsv3_work_timeout_handler",
659 	    "Return(wq: %p, wp: %p)", dwp->wq, &dwp->work);
660 }
661 
662 void
663 rdsv3_queue_delayed_work(rdsv3_workqueue_struct_t *wq,
664     rdsv3_delayed_work_t *dwp, uint_t delay)
665 {
666 	RDSV3_DPRINTF4("rdsv3_queue_delayed_work",
667 	    "Enter(wq: %p, wp: %p)", wq, dwp);
668 
669 	if (delay == 0) {
670 		rdsv3_queue_work(wq, &dwp->work);
671 		return;
672 	}
673 
674 	mutex_enter(&wq->wq_lock);
675 	if (wq->wq_state == RDSV3_WQ_THREAD_EXITING) {
676 		mutex_exit(&wq->wq_lock);
677 		RDSV3_DPRINTF4("rdsv3_queue_delayed_work",
678 		    "WQ exiting - don't queue (wq: %p, wp: %p)", wq, dwp);
679 		return;
680 	}
681 	wq->wq_pending++;
682 	mutex_exit(&wq->wq_lock);
683 
684 	mutex_enter(&dwp->lock);
685 	if (dwp->timeid == 0) {
686 		dwp->wq = wq;
687 		dwp->timeid = timeout(rdsv3_work_timeout_handler, dwp,
688 		    jiffies + (delay * rdsv3_one_sec_in_hz));
689 		mutex_exit(&dwp->lock);
690 	} else {
691 		mutex_exit(&dwp->lock);
692 		RDSV3_DPRINTF4("rdsv3_queue_delayed_work", "Already queued: %p",
693 		    dwp);
694 		mutex_enter(&wq->wq_lock);
695 		wq->wq_pending--;
696 		mutex_exit(&wq->wq_lock);
697 	}
698 
699 	RDSV3_DPRINTF4("rdsv3_queue_delayed_work",
700 	    "Return(wq: %p, wp: %p)", wq, dwp);
701 }
702 
703 void
704 rdsv3_cancel_delayed_work(rdsv3_delayed_work_t *dwp)
705 {
706 	RDSV3_DPRINTF4("rdsv3_cancel_delayed_work",
707 	    "Enter(wq: %p, dwp: %p)", dwp->wq, dwp);
708 
709 	mutex_enter(&dwp->lock);
710 	if (dwp->timeid != 0) {
711 		(void) untimeout(dwp->timeid);
712 		dwp->timeid = 0;
713 	} else {
714 		RDSV3_DPRINTF4("rdsv3_cancel_delayed_work",
715 		    "Nothing to cancel (wq: %p, dwp: %p)", dwp->wq, dwp);
716 		mutex_exit(&dwp->lock);
717 		return;
718 	}
719 	mutex_exit(&dwp->lock);
720 
721 	mutex_enter(&dwp->wq->wq_lock);
722 	dwp->wq->wq_pending--;
723 	mutex_exit(&dwp->wq->wq_lock);
724 
725 	RDSV3_DPRINTF4("rdsv3_cancel_delayed_work",
726 	    "Return(wq: %p, dwp: %p)", dwp->wq, dwp);
727 }
728 
729 void
730 rdsv3_destroy_task_workqueue(rdsv3_workqueue_struct_t *wq)
731 {
732 	RDSV3_DPRINTF2("rdsv3_destroy_workqueue", "Enter");
733 
734 	ASSERT(wq);
735 
736 	mutex_enter(&wq->wq_lock);
737 	wq->wq_state = RDSV3_WQ_THREAD_EXITING;
738 
739 	while (wq->wq_pending > 0) {
740 		mutex_exit(&wq->wq_lock);
741 		delay(drv_usectohz(1000000));
742 		mutex_enter(&wq->wq_lock);
743 	};
744 	mutex_exit(&wq->wq_lock);
745 
746 	rdsv3_flush_workqueue(wq);
747 
748 	list_destroy(&wq->wq_queue);
749 	mutex_destroy(&wq->wq_lock);
750 	kmem_free(wq, sizeof (rdsv3_workqueue_struct_t));
751 
752 	ASSERT(rdsv3_taskq);
753 	ddi_taskq_destroy(rdsv3_taskq);
754 
755 	wq = NULL;
756 	rdsv3_taskq = NULL;
757 
758 	RDSV3_DPRINTF2("rdsv3_destroy_workqueue", "Return");
759 }
760 
761 /* ARGSUSED */
762 void
763 rdsv3_rdma_init_worker(struct rdsv3_work_s *work)
764 {
765 	rdsv3_rdma_init();
766 }
767 
768 #define	RDSV3_NUM_TASKQ_THREADS	1
769 rdsv3_workqueue_struct_t *
770 rdsv3_create_task_workqueue(char *name)
771 {
772 	rdsv3_workqueue_struct_t	*wq;
773 
774 	RDSV3_DPRINTF2("create_singlethread_workqueue", "Enter (dip: %p)",
775 	    rdsv3_dev_info);
776 
777 	rdsv3_taskq = ddi_taskq_create(rdsv3_dev_info, name,
778 	    RDSV3_NUM_TASKQ_THREADS, TASKQ_DEFAULTPRI, 0);
779 	if (rdsv3_taskq == NULL) {
780 		RDSV3_DPRINTF2(__FILE__,
781 		    "ddi_taskq_create failed for rdsv3_taskq");
782 		return (NULL);
783 	}
784 
785 	wq = kmem_zalloc(sizeof (rdsv3_workqueue_struct_t), KM_NOSLEEP);
786 	if (wq == NULL) {
787 		RDSV3_DPRINTF2(__FILE__, "kmem_zalloc failed for wq");
788 		ddi_taskq_destroy(rdsv3_taskq);
789 		return (NULL);
790 	}
791 
792 	list_create(&wq->wq_queue, sizeof (struct rdsv3_work_s),
793 	    offsetof(struct rdsv3_work_s, work_item));
794 	mutex_init(&wq->wq_lock, NULL, MUTEX_DRIVER, NULL);
795 	wq->wq_state = RDSV3_WQ_THREAD_IDLE;
796 	wq->wq_pending = 0;
797 	rdsv3_one_sec_in_hz = drv_usectohz(1000000);
798 
799 	RDSV3_DPRINTF2("create_singlethread_workqueue", "Return");
800 
801 	return (wq);
802 }
803 
804 /*
805  * Implementation for struct sock
806  */
807 
808 void
809 rdsv3_sock_exit_data(struct rsock *sk)
810 {
811 	struct rdsv3_sock *rs = sk->sk_protinfo;
812 
813 	RDSV3_DPRINTF4("rdsv3_sock_exit_data", "rs: %p sk: %p", rs, sk);
814 
815 	ASSERT(rs != NULL);
816 	ASSERT(rdsv3_sk_sock_flag(sk, SOCK_DEAD));
817 
818 	rs->rs_sk = NULL;
819 
820 	list_destroy(&rs->rs_send_queue);
821 	list_destroy(&rs->rs_notify_queue);
822 	list_destroy(&rs->rs_recv_queue);
823 
824 	rw_destroy(&rs->rs_recv_lock);
825 	mutex_destroy(&rs->rs_lock);
826 
827 	mutex_destroy(&rs->rs_rdma_lock);
828 	avl_destroy(&rs->rs_rdma_keys);
829 
830 	mutex_destroy(&rs->rs_conn_lock);
831 	mutex_destroy(&rs->rs_congested_lock);
832 	cv_destroy(&rs->rs_congested_cv);
833 
834 	rdsv3_exit_waitqueue(sk->sk_sleep);
835 	kmem_free(sk->sk_sleep, sizeof (rdsv3_wait_queue_t));
836 	mutex_destroy(&sk->sk_lock);
837 
838 	kmem_cache_free(rdsv3_alloc_cache, sk);
839 	RDSV3_DPRINTF4("rdsv3_sock_exit_data", "rs: %p sk: %p", rs, sk);
840 }
841 
842 /* XXX - figure out right values */
843 #define	RDSV3_RECV_HIWATER	(256 * 1024)
844 #define	RDSV3_RECV_LOWATER	128
845 #define	RDSV3_XMIT_HIWATER	(256 * 1024)
846 #define	RDSV3_XMIT_LOWATER	1024
847 
848 struct rsock *
849 rdsv3_sk_alloc()
850 {
851 	struct rsock *sk;
852 
853 	sk = kmem_cache_alloc(rdsv3_alloc_cache, KM_SLEEP);
854 	if (sk == NULL) {
855 		RDSV3_DPRINTF2("rdsv3_create", "kmem_cache_alloc failed");
856 		return (NULL);
857 	}
858 
859 	bzero(sk, sizeof (struct rsock) + sizeof (struct rdsv3_sock));
860 	return (sk);
861 }
862 
863 void
864 rdsv3_sock_init_data(struct rsock *sk)
865 {
866 	sk->sk_sleep = kmem_zalloc(sizeof (rdsv3_wait_queue_t), KM_SLEEP);
867 	rdsv3_init_waitqueue(sk->sk_sleep);
868 
869 	mutex_init(&sk->sk_lock, NULL, MUTEX_DRIVER, NULL);
870 	sk->sk_refcount = 1;
871 	sk->sk_protinfo = (struct rdsv3_sock *)(sk + 1);
872 	sk->sk_sndbuf = RDSV3_XMIT_HIWATER;
873 	sk->sk_rcvbuf = RDSV3_RECV_HIWATER;
874 }
875 
876 /*
877  * Connection cache
878  */
879 /* ARGSUSED */
880 int
881 rdsv3_conn_constructor(void *buf, void *arg, int kmflags)
882 {
883 	struct rdsv3_connection *conn = buf;
884 
885 	bzero(conn, sizeof (struct rdsv3_connection));
886 
887 	conn->c_next_tx_seq = 1;
888 	mutex_init(&conn->c_lock, NULL, MUTEX_DRIVER, NULL);
889 	mutex_init(&conn->c_send_lock, NULL, MUTEX_DRIVER, NULL);
890 	conn->c_send_generation = 1;
891 	conn->c_senders = 0;
892 
893 	list_create(&conn->c_send_queue, sizeof (struct rdsv3_message),
894 	    offsetof(struct rdsv3_message, m_conn_item));
895 	list_create(&conn->c_retrans, sizeof (struct rdsv3_message),
896 	    offsetof(struct rdsv3_message, m_conn_item));
897 	return (0);
898 }
899 
900 /* ARGSUSED */
901 void
902 rdsv3_conn_destructor(void *buf, void *arg)
903 {
904 	struct rdsv3_connection *conn = buf;
905 
906 	ASSERT(list_is_empty(&conn->c_send_queue));
907 	ASSERT(list_is_empty(&conn->c_retrans));
908 	list_destroy(&conn->c_send_queue);
909 	list_destroy(&conn->c_retrans);
910 	mutex_destroy(&conn->c_send_lock);
911 	mutex_destroy(&conn->c_lock);
912 }
913 
914 int
915 rdsv3_conn_compare(const void *conn1, const void *conn2)
916 {
917 	uint32_be_t	laddr1, faddr1, laddr2, faddr2;
918 
919 	laddr1 = ((rdsv3_conn_info_t *)conn1)->c_laddr;
920 	laddr2 = ((struct rdsv3_connection *)conn2)->c_laddr;
921 
922 	if (laddr1 == laddr2) {
923 		faddr1 = ((rdsv3_conn_info_t *)conn1)->c_faddr;
924 		faddr2 = ((struct rdsv3_connection *)conn2)->c_faddr;
925 		if (faddr1 == faddr2)
926 			return (0);
927 		if (faddr1 < faddr2)
928 			return (-1);
929 		return (1);
930 	}
931 
932 	if (laddr1 < laddr2)
933 		return (-1);
934 
935 	return (1);
936 }
937 
938 /* rdsv3_ib_incoming cache */
939 /* ARGSUSED */
940 int
941 rdsv3_ib_inc_constructor(void *buf, void *arg, int kmflags)
942 {
943 	list_create(&((struct rdsv3_ib_incoming *)buf)->ii_frags,
944 	    sizeof (struct rdsv3_page_frag),
945 	    offsetof(struct rdsv3_page_frag, f_item));
946 
947 	return (0);
948 }
949 
950 /* ARGSUSED */
951 void
952 rdsv3_ib_inc_destructor(void *buf, void *arg)
953 {
954 	list_destroy(&((struct rdsv3_ib_incoming *)buf)->ii_frags);
955 }
956 
957 /* ib_frag_slab cache */
958 /* ARGSUSED */
959 int
960 rdsv3_ib_frag_constructor(void *buf, void *arg, int kmflags)
961 {
962 	struct rdsv3_page_frag *frag = (struct rdsv3_page_frag *)buf;
963 	struct rdsv3_ib_device *rds_ibdev = (struct rdsv3_ib_device *)arg;
964 	ibt_iov_attr_t iov_attr;
965 	ibt_iov_t iov_arr[1];
966 	ibt_all_wr_t wr;
967 
968 	bzero(frag, sizeof (struct rdsv3_page_frag));
969 	list_link_init(&frag->f_item);
970 
971 	frag->f_page = kmem_alloc(PAGE_SIZE, kmflags);
972 	if (frag->f_page == NULL) {
973 		RDSV3_DPRINTF2("rdsv3_ib_frag_constructor",
974 		    "kmem_alloc for %d failed", PAGE_SIZE);
975 		return (-1);
976 	}
977 	frag->f_offset = 0;
978 
979 	iov_attr.iov_as = NULL;
980 	iov_attr.iov = &iov_arr[0];
981 	iov_attr.iov_buf = NULL;
982 	iov_attr.iov_list_len = 1;
983 	iov_attr.iov_wr_nds = 1;
984 	iov_attr.iov_lso_hdr_sz = 0;
985 	iov_attr.iov_flags = IBT_IOV_SLEEP | IBT_IOV_RECV;
986 
987 	iov_arr[0].iov_addr = frag->f_page;
988 	iov_arr[0].iov_len = PAGE_SIZE;
989 
990 	wr.recv.wr_nds = 1;
991 	wr.recv.wr_sgl = &frag->f_sge;
992 
993 	if (ibt_map_mem_iov(ib_get_ibt_hca_hdl(rds_ibdev->dev),
994 	    &iov_attr, &wr, &frag->f_mapped) != IBT_SUCCESS) {
995 		RDSV3_DPRINTF2("rdsv3_ib_frag_constructor",
996 		    "ibt_map_mem_iov failed");
997 		kmem_free(frag->f_page, PAGE_SIZE);
998 		return (-1);
999 	}
1000 
1001 	return (0);
1002 }
1003 
1004 /* ARGSUSED */
1005 void
1006 rdsv3_ib_frag_destructor(void *buf, void *arg)
1007 {
1008 	struct rdsv3_page_frag *frag = (struct rdsv3_page_frag *)buf;
1009 	struct rdsv3_ib_device *rds_ibdev = (struct rdsv3_ib_device *)arg;
1010 
1011 	/* unmap the page */
1012 	if (ibt_unmap_mem_iov(ib_get_ibt_hca_hdl(rds_ibdev->dev),
1013 	    frag->f_mapped) != IBT_SUCCESS)
1014 		RDSV3_DPRINTF2("rdsv3_ib_frag_destructor",
1015 		    "ibt_unmap_mem_iov failed");
1016 
1017 	/* free the page */
1018 	kmem_free(frag->f_page, PAGE_SIZE);
1019 }
1020 
1021 /* loop.c */
1022 extern kmutex_t loop_conns_lock;
1023 extern list_t loop_conns;
1024 
1025 struct rdsv3_loop_connection
1026 {
1027 	struct list_node loop_node;
1028 	struct rdsv3_connection *conn;
1029 };
1030 
1031 void
1032 rdsv3_loop_init(void)
1033 {
1034 	list_create(&loop_conns, sizeof (struct rdsv3_loop_connection),
1035 	    offsetof(struct rdsv3_loop_connection, loop_node));
1036 	mutex_init(&loop_conns_lock, NULL, MUTEX_DRIVER, NULL);
1037 }
1038 
1039 /* rdma.c */
1040 /* IB Rkey is used here for comparison */
1041 int
1042 rdsv3_mr_compare(const void *mr1, const void *mr2)
1043 {
1044 	uint32_t key1 = *(uint32_t *)mr1;
1045 	uint32_t key2 = ((struct rdsv3_mr *)mr2)->r_key;
1046 
1047 	if (key1 < key2)
1048 		return (-1);
1049 	if (key1 > key2)
1050 		return (1);
1051 	return (0);
1052 }
1053 
1054 /* transport.c */
1055 extern struct rdsv3_transport *transports[];
1056 extern krwlock_t		trans_sem;
1057 
1058 void
1059 rdsv3_trans_exit(void)
1060 {
1061 	struct rdsv3_transport *trans;
1062 	int i;
1063 
1064 	RDSV3_DPRINTF2("rdsv3_trans_exit", "Enter");
1065 
1066 	/* currently, only IB transport */
1067 	rw_enter(&trans_sem, RW_READER);
1068 	trans = NULL;
1069 	for (i = 0; i < RDS_TRANS_COUNT; i++) {
1070 		if (transports[i]) {
1071 			trans = transports[i];
1072 			break;
1073 		}
1074 	}
1075 	rw_exit(&trans_sem);
1076 
1077 	/* trans->exit() will remove the trans from the list */
1078 	if (trans)
1079 		trans->exit();
1080 
1081 	rw_destroy(&trans_sem);
1082 
1083 	RDSV3_DPRINTF2("rdsv3_trans_exit", "Return");
1084 }
1085 
1086 void
1087 rdsv3_trans_init()
1088 {
1089 	RDSV3_DPRINTF2("rdsv3_trans_init", "Enter");
1090 
1091 	rw_init(&trans_sem, NULL, RW_DRIVER, NULL);
1092 
1093 	RDSV3_DPRINTF2("rdsv3_trans_init", "Return");
1094 }
1095 
1096 int
1097 rdsv3_put_cmsg(struct nmsghdr *msg, int level, int type, size_t size,
1098 	void *payload)
1099 {
1100 	struct cmsghdr *cp;
1101 	char *bp;
1102 	size_t cmlen;
1103 	size_t cmspace;
1104 	size_t bufsz;
1105 
1106 	RDSV3_DPRINTF4("rdsv3_put_cmsg",
1107 	    "Enter(msg: %p level: %d type: %d sz: %d)",
1108 	    msg, level, type, size);
1109 
1110 	if (msg == NULL || msg->msg_controllen == 0) {
1111 		return (0);
1112 	}
1113 	/* check for first cmsg or this is another cmsg to be appended */
1114 	if (msg->msg_control == NULL)
1115 		msg->msg_controllen = 0;
1116 
1117 	cmlen = CMSG_LEN(size);
1118 	cmspace = CMSG_SPACE(size);
1119 	bufsz = msg->msg_controllen + cmspace;
1120 
1121 	/* extend the existing cmsg to append the next cmsg */
1122 	bp = kmem_alloc(bufsz, KM_SLEEP);
1123 	if (msg->msg_control) {
1124 		bcopy(msg->msg_control, bp, msg->msg_controllen);
1125 		kmem_free(msg->msg_control, (size_t)msg->msg_controllen);
1126 	}
1127 
1128 	/* assign payload the proper cmsg location */
1129 	cp = (struct cmsghdr *)(bp + msg->msg_controllen);
1130 	cp->cmsg_len = cmlen;
1131 	cp->cmsg_level = level;
1132 	cp->cmsg_type = type;
1133 
1134 	bcopy(payload, CMSG_DATA(cp), cmlen -
1135 	    (unsigned int)_CMSG_DATA_ALIGN(sizeof (struct cmsghdr)));
1136 
1137 	msg->msg_control = bp;
1138 	msg->msg_controllen = bufsz;
1139 
1140 	RDSV3_DPRINTF4("rdsv3_put_cmsg", "Return(cmsg_len: %d)", cp->cmsg_len);
1141 
1142 	return (0);
1143 }
1144 
1145 /* ARGSUSED */
1146 int
1147 rdsv3_verify_bind_address(ipaddr_t addr)
1148 {
1149 	return (1);
1150 }
1151 
1152 /* checksum */
1153 uint16_t
1154 rdsv3_ip_fast_csum(void *hdr, size_t length)
1155 {
1156 	return (0xffff &
1157 	    (uint16_t)(~ip_ocsum((ushort_t *)hdr, (int)length <<1, 0)));
1158 }
1159 
1160 /* scatterlist implementation */
1161 /* ARGSUSED */
1162 caddr_t
1163 rdsv3_ib_sg_dma_address(ib_device_t *dev, struct rdsv3_scatterlist *scat,
1164     uint_t offset)
1165 {
1166 	return (0);
1167 }
1168 
1169 uint_t
1170 rdsv3_ib_dma_map_sg(struct ib_device *dev, struct rdsv3_scatterlist *scat,
1171     uint_t num)
1172 {
1173 	struct rdsv3_scatterlist *s, *first;
1174 	ibt_iov_t *iov;
1175 	ibt_wr_ds_t *sgl;
1176 	ibt_iov_attr_t iov_attr;
1177 	ibt_send_wr_t swr;
1178 	uint_t i;
1179 
1180 	RDSV3_DPRINTF4("rdsv3_ib_dma_map_sg", "scat %p, num: %d", scat, num);
1181 
1182 	s = first = &scat[0];
1183 	ASSERT(first->mihdl == NULL);
1184 
1185 	iov = kmem_alloc(num * sizeof (ibt_iov_t), KM_SLEEP);
1186 	sgl = kmem_zalloc((num * 2) *  sizeof (ibt_wr_ds_t), KM_SLEEP);
1187 
1188 	for (i = 0; i < num; i++, s++) {
1189 		iov[i].iov_addr = s->vaddr;
1190 		iov[i].iov_len = s->length;
1191 	}
1192 
1193 	iov_attr.iov_as = NULL;
1194 	iov_attr.iov = iov;
1195 	iov_attr.iov_buf = NULL;
1196 	iov_attr.iov_list_len = num;
1197 	iov_attr.iov_wr_nds = num * 2;
1198 	iov_attr.iov_lso_hdr_sz = 0;
1199 	iov_attr.iov_flags = IBT_IOV_SLEEP;
1200 
1201 	swr.wr_sgl = sgl;
1202 
1203 	i = ibt_map_mem_iov(ib_get_ibt_hca_hdl(dev),
1204 	    &iov_attr, (ibt_all_wr_t *)&swr, &first->mihdl);
1205 	kmem_free(iov, num * sizeof (ibt_iov_t));
1206 	if (i != IBT_SUCCESS) {
1207 		RDSV3_DPRINTF2("rdsv3_ib_dma_map_sg",
1208 		    "ibt_map_mem_iov returned: %d", i);
1209 		return (0);
1210 	}
1211 
1212 	s = first;
1213 	for (i = 0; i < num; i++, s++, sgl++) {
1214 		s->sgl = sgl;
1215 	}
1216 
1217 	return (num);
1218 }
1219 
1220 void
1221 rdsv3_ib_dma_unmap_sg(ib_device_t *dev, struct rdsv3_scatterlist *scat,
1222     uint_t num)
1223 {
1224 	/* Zero length messages have no scatter gather entries */
1225 	if (num != 0) {
1226 		ASSERT(scat->mihdl != NULL);
1227 		ASSERT(scat->sgl != NULL);
1228 
1229 		(void) ibt_unmap_mem_iov(ib_get_ibt_hca_hdl(dev), scat->mihdl);
1230 
1231 		kmem_free(scat->sgl, (num * 2)  * sizeof (ibt_wr_ds_t));
1232 		scat->sgl = NULL;
1233 		scat->mihdl = NULL;
1234 	}
1235 }
1236 
1237 int
1238 rdsv3_ib_alloc_hdrs(ib_device_t *dev, struct rdsv3_ib_connection *ic)
1239 {
1240 	caddr_t addr;
1241 	size_t size;
1242 	ibt_mr_attr_t mr_attr;
1243 	ibt_mr_desc_t mr_desc;
1244 	ibt_mr_hdl_t mr_hdl;
1245 	int ret;
1246 
1247 	RDSV3_DPRINTF4("rdsv3_ib_alloc_hdrs", "Enter(dev: %p)", dev);
1248 
1249 	ASSERT(ic->i_mr == NULL);
1250 
1251 	size = (ic->i_send_ring.w_nr + ic->i_recv_ring.w_nr + 1) *
1252 	    sizeof (struct rdsv3_header);
1253 
1254 	addr = kmem_zalloc(size, KM_NOSLEEP);
1255 	if (addr == NULL)
1256 		return (-1);
1257 
1258 	mr_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)addr;
1259 	mr_attr.mr_len = size;
1260 	mr_attr.mr_as = NULL;
1261 	mr_attr.mr_flags = IBT_MR_ENABLE_LOCAL_WRITE;
1262 	ret = ibt_register_mr(ib_get_ibt_hca_hdl(dev), RDSV3_PD2PDHDL(ic->i_pd),
1263 	    &mr_attr, &mr_hdl, &mr_desc);
1264 	if (ret != IBT_SUCCESS) {
1265 		RDSV3_DPRINTF2("rdsv3_ib_alloc_hdrs",
1266 		    "ibt_register_mr returned: " "%d", ret);
1267 		return (-1);
1268 	}
1269 
1270 	ic->i_mr =
1271 	    (struct rdsv3_hdrs_mr *)kmem_alloc(sizeof (struct rdsv3_hdrs_mr),
1272 	    KM_SLEEP);
1273 	ic->i_mr->addr = addr;
1274 	ic->i_mr->size = size;
1275 	ic->i_mr->hdl =	mr_hdl;
1276 	ic->i_mr->lkey = mr_desc.md_lkey;
1277 
1278 	ic->i_send_hdrs = (struct rdsv3_header *)addr;
1279 	ic->i_send_hdrs_dma = (uint64_t)(uintptr_t)addr;
1280 
1281 	ic->i_recv_hdrs = (struct rdsv3_header *)(addr +
1282 	    (ic->i_send_ring.w_nr * sizeof (struct rdsv3_header)));
1283 	ic->i_recv_hdrs_dma = (uint64_t)(uintptr_t)(addr +
1284 	    (ic->i_send_ring.w_nr * sizeof (struct rdsv3_header)));
1285 
1286 	ic->i_ack = (struct rdsv3_header *)(addr +
1287 	    ((ic->i_send_ring.w_nr + ic->i_recv_ring.w_nr) *
1288 	    sizeof (struct rdsv3_header)));
1289 	ic->i_ack_dma = (uint64_t)(uintptr_t)(addr +
1290 	    ((ic->i_send_ring.w_nr + ic->i_recv_ring.w_nr) *
1291 	    sizeof (struct rdsv3_header)));
1292 
1293 	RDSV3_DPRINTF4("rdsv3_ib_alloc_hdrs", "Return(dev: %p)", dev);
1294 
1295 	return (0);
1296 }
1297 
1298 void
1299 rdsv3_ib_free_hdrs(ib_device_t *dev, struct rdsv3_ib_connection *ic)
1300 {
1301 	RDSV3_DPRINTF4("rdsv3_ib_free_hdrs", "Enter(dev: %p)", dev);
1302 	ASSERT(ic->i_mr != NULL);
1303 
1304 	ic->i_send_hdrs = NULL;
1305 	ic->i_send_hdrs_dma = NULL;
1306 
1307 	ic->i_recv_hdrs = NULL;
1308 	ic->i_recv_hdrs_dma = NULL;
1309 
1310 	ic->i_ack = NULL;
1311 	ic->i_ack_dma = NULL;
1312 
1313 	(void) ibt_deregister_mr(ib_get_ibt_hca_hdl(dev), ic->i_mr->hdl);
1314 
1315 	kmem_free(ic->i_mr->addr, ic->i_mr->size);
1316 	kmem_free(ic->i_mr, sizeof (struct rdsv3_hdrs_mr));
1317 
1318 	ic->i_mr = NULL;
1319 	RDSV3_DPRINTF4("rdsv3_ib_free_hdrs", "Return(dev: %p)", dev);
1320 }
1321 
1322 /*
1323  * atomic_add_unless - add unless the number is a given value
1324  * @v: pointer of type atomic_t
1325  * @a: the amount to add to v...
1326  * @u: ...unless v is equal to u.
1327  *
1328  * Atomically adds @a to @v, so long as it was not @u.
1329  * Returns non-zero if @v was not @u, and zero otherwise.
1330  */
1331 int
1332 atomic_add_unless(atomic_t *v, uint_t a, ulong_t u)
1333 {
1334 	uint_t c, old;
1335 
1336 	c = *v;
1337 	while (c != u && (old = atomic_cas_uint(v, c, c + a)) != c) {
1338 		c = old;
1339 	}
1340 	return ((ulong_t)c != u);
1341 }
1342