xref: /illumos-gate/usr/src/uts/common/io/ib/clients/rds/rdssubr.c (revision 94cad3fef5d199c4897e68d856995cdb44e20dbb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/ib/clients/rds/rds.h>
29 #include <sys/ib/clients/rds/rds_kstat.h>
30 
31 #include <inet/ipclassifier.h>
32 
33 struct rds_kstat_s rds_kstat = {
34 	{"rds_nports",			KSTAT_DATA_ULONG},
35 	{"rds_nsessions",		KSTAT_DATA_ULONG},
36 	{"rds_tx_bytes",		KSTAT_DATA_ULONG},
37 	{"rds_tx_pkts",			KSTAT_DATA_ULONG},
38 	{"rds_tx_errors",		KSTAT_DATA_ULONG},
39 	{"rds_rx_bytes",		KSTAT_DATA_ULONG},
40 	{"rds_rx_pkts",			KSTAT_DATA_ULONG},
41 	{"rds_rx_pkts_pending",		KSTAT_DATA_ULONG},
42 	{"rds_rx_errors",		KSTAT_DATA_ULONG},
43 	{"rds_tx_acks",			KSTAT_DATA_ULONG},
44 	{"rds_post_recv_buf_called",	KSTAT_DATA_ULONG},
45 	{"rds_stalls_triggered",	KSTAT_DATA_ULONG},
46 	{"rds_stalls_sent",		KSTAT_DATA_ULONG},
47 	{"rds_unstalls_triggered",	KSTAT_DATA_ULONG},
48 	{"rds_unstalls_sent",		KSTAT_DATA_ULONG},
49 	{"rds_stalls_recvd",		KSTAT_DATA_ULONG},
50 	{"rds_unstalls_recvd",		KSTAT_DATA_ULONG},
51 	{"rds_stalls_ignored",		KSTAT_DATA_ULONG},
52 	{"rds_enobufs",			KSTAT_DATA_ULONG},
53 	{"rds_ewouldblocks",		KSTAT_DATA_ULONG},
54 	{"rds_failovers",		KSTAT_DATA_ULONG},
55 	{"rds_port_quota",		KSTAT_DATA_ULONG},
56 	{"rds_port_quota_adjusted",	KSTAT_DATA_ULONG},
57 };
58 
59 kstat_t *rds_kstatsp;
60 static kmutex_t rds_kstat_mutex;
61 
62 
63 struct	kmem_cache	*rds_alloc_cache;
64 
65 uint_t	rds_bind_fanout_size = RDS_BIND_FANOUT_SIZE;
66 rds_bf_t *rds_bind_fanout;
67 
68 void
69 rds_increment_kstat(kstat_named_t *ksnp, boolean_t lock, uint_t num)
70 {
71 	if (lock)
72 		mutex_enter(&rds_kstat_mutex);
73 	ksnp->value.ul += num;
74 	if (lock)
75 		mutex_exit(&rds_kstat_mutex);
76 }
77 
78 void
79 rds_decrement_kstat(kstat_named_t *ksnp, boolean_t lock, uint_t num)
80 {
81 	if (lock)
82 		mutex_enter(&rds_kstat_mutex);
83 	ksnp->value.ul -= num;
84 	if (lock)
85 		mutex_exit(&rds_kstat_mutex);
86 }
87 
88 void
89 rds_set_kstat(kstat_named_t *ksnp, boolean_t lock, ulong_t num)
90 {
91 	if (lock)
92 		mutex_enter(&rds_kstat_mutex);
93 	ksnp->value.ul = num;
94 	if (lock)
95 		mutex_exit(&rds_kstat_mutex);
96 }
97 
98 ulong_t
99 rds_get_kstat(kstat_named_t *ksnp, boolean_t lock)
100 {
101 	ulong_t	value;
102 
103 	if (lock)
104 		mutex_enter(&rds_kstat_mutex);
105 	value = ksnp->value.ul;
106 	if (lock)
107 		mutex_exit(&rds_kstat_mutex);
108 
109 	return (value);
110 }
111 
112 
113 void
114 rds_fini()
115 {
116 	int	i;
117 
118 	for (i = 0; i < rds_bind_fanout_size; i++) {
119 		mutex_destroy(&rds_bind_fanout[i].rds_bf_lock);
120 	}
121 	kmem_free(rds_bind_fanout, rds_bind_fanout_size * sizeof (rds_bf_t));
122 
123 	kmem_cache_destroy(rds_alloc_cache);
124 	kstat_delete(rds_kstatsp);
125 }
126 
127 
128 void
129 rds_init()
130 {
131 	rds_alloc_cache = kmem_cache_create("rds_alloc_cache",
132 	    sizeof (rds_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
133 	rds_hash_init();
134 	/*
135 	 * kstats
136 	 */
137 	rds_kstatsp = kstat_create("rds", 0,
138 		"rds_kstat", "misc", KSTAT_TYPE_NAMED,
139 		sizeof (rds_kstat) / sizeof (kstat_named_t),
140 		KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE);
141 	if (rds_kstatsp != NULL) {
142 		rds_kstatsp->ks_lock = &rds_kstat_mutex;
143 		rds_kstatsp->ks_data = (void *)&rds_kstat;
144 		kstat_install(rds_kstatsp);
145 	}
146 }
147 
148 #define	UINT_32_BITS 31
149 void
150 rds_hash_init()
151 {
152 	int i;
153 
154 	if (rds_bind_fanout_size & (rds_bind_fanout_size - 1)) {
155 		/* Not a power of two. Round up to nearest power of two */
156 		for (i = 0; i < UINT_32_BITS; i++) {
157 			if (rds_bind_fanout_size < (1 << i))
158 				break;
159 		}
160 		rds_bind_fanout_size = 1 << i;
161 	}
162 	rds_bind_fanout = kmem_zalloc(rds_bind_fanout_size *
163 	    sizeof (rds_bf_t), KM_SLEEP);
164 	for (i = 0; i < rds_bind_fanout_size; i++) {
165 		mutex_init(&rds_bind_fanout[i].rds_bf_lock, NULL, MUTEX_DEFAULT,
166 		    NULL);
167 	}
168 }
169 
170 void
171 rds_free(rds_t *rds)
172 {
173 	ASSERT(rds->rds_refcnt == 0);
174 	ASSERT(MUTEX_HELD(&rds->rds_lock));
175 	crfree(rds->rds_cred);
176 	kmem_cache_free(rds_alloc_cache, rds);
177 }
178 
179 rds_t *
180 rds_create(void *rds_ulpd, cred_t *credp)
181 {
182 	rds_t	*rds;
183 
184 	/* User must supply a credential. */
185 	if (credp == NULL)
186 		return (NULL);
187 	rds = kmem_cache_alloc(rds_alloc_cache, KM_SLEEP);
188 	if (rds == NULL) {
189 		return (NULL);
190 	}
191 
192 	bzero(rds, sizeof (rds_t));
193 	mutex_init(&rds->rds_lock, NULL, MUTEX_DEFAULT, NULL);
194 	cv_init(&rds->rds_refcv, NULL, CV_DEFAULT, NULL);
195 	rds->rds_cred = credp;
196 	rds->rds_ulpd = rds_ulpd;
197 	rds->rds_zoneid = getzoneid();
198 	crhold(credp);
199 	rds->rds_refcnt++;
200 	return (rds);
201 }
202 
203 
204 /*
205  * Hash list removal routine for rds_t structures.
206  */
207 void
208 rds_bind_hash_remove(rds_t *rds, boolean_t caller_holds_lock)
209 {
210 	rds_t   *rdsnext;
211 	kmutex_t *lockp;
212 
213 	if (rds->rds_ptpbhn == NULL)
214 		return;
215 
216 	/*
217 	 * Extract the lock pointer in case there are concurrent
218 	 * hash_remove's for this instance.
219 	 */
220 	ASSERT(rds->rds_port != 0);
221 	if (!caller_holds_lock) {
222 		lockp = &rds_bind_fanout[RDS_BIND_HASH(rds->rds_port)].
223 		    rds_bf_lock;
224 		ASSERT(lockp != NULL);
225 		mutex_enter(lockp);
226 	}
227 
228 	if (rds->rds_ptpbhn != NULL) {
229 		rdsnext = rds->rds_bind_hash;
230 		if (rdsnext != NULL) {
231 			rdsnext->rds_ptpbhn = rds->rds_ptpbhn;
232 			rds->rds_bind_hash = NULL;
233 		}
234 		*rds->rds_ptpbhn = rdsnext;
235 		rds->rds_ptpbhn = NULL;
236 	}
237 
238 	RDS_DEC_REF_CNT(rds);
239 
240 	if (!caller_holds_lock) {
241 		mutex_exit(lockp);
242 	}
243 }
244 
245 void
246 rds_bind_hash_insert(rds_bf_t *rdsbf, rds_t *rds)
247 {
248 	rds_t   **rdsp;
249 	rds_t   *rdsnext;
250 
251 	ASSERT(MUTEX_HELD(&rdsbf->rds_bf_lock));
252 	if (rds->rds_ptpbhn != NULL) {
253 		rds_bind_hash_remove(rds, B_TRUE);
254 	}
255 
256 	rdsp = &rdsbf->rds_bf_rds;
257 	rdsnext = rdsp[0];
258 
259 	if (rdsnext != NULL) {
260 		rdsnext->rds_ptpbhn = &rds->rds_bind_hash;
261 	}
262 	rds->rds_bind_hash = rdsnext;
263 	rds->rds_ptpbhn = rdsp;
264 	rdsp[0] = rds;
265 	RDS_INCR_REF_CNT(rds);
266 
267 }
268 
269 /*
270  * Everything is in network byte order
271  */
272 /* ARGSUSED */
273 rds_t *
274 rds_fanout(ipaddr_t local_addr, ipaddr_t rem_addr,
275     in_port_t local_port, in_port_t rem_port, zoneid_t zoneid)
276 {
277 	rds_t	*rds;
278 	rds_bf_t *rdsbf;
279 
280 	rdsbf = &rds_bind_fanout[RDS_BIND_HASH(local_port)];
281 	mutex_enter(&rdsbf->rds_bf_lock);
282 	rds = rdsbf->rds_bf_rds;
283 	while (rds != NULL) {
284 		if (!(rds->rds_flags & RDS_CLOSING)) {
285 			if ((RDS_MATCH(rds, local_port, local_addr)) &&
286 			    ((local_addr != INADDR_LOOPBACK) ||
287 			    (rds->rds_zoneid == zoneid))) {
288 				RDS_INCR_REF_CNT(rds);
289 				break;
290 			}
291 		}
292 		rds = rds->rds_bind_hash;
293 	}
294 	mutex_exit(&rdsbf->rds_bf_lock);
295 	return (rds);
296 }
297 
298 boolean_t
299 rds_islocal(ipaddr_t addr)
300 {
301 	ire_t *ire;
302 	ip_stack_t *ipst;
303 
304 	ipst = netstack_find_by_zoneid(GLOBAL_ZONEID)->netstack_ip;
305 	ASSERT(ipst != NULL);
306 
307 	ire = ire_ctable_lookup(addr, NULL, IRE_LOCAL | IRE_LOOPBACK |
308 	    IRE_BROADCAST, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, ipst);
309 	netstack_rele(ipst->ips_netstack);
310 	if (ire == NULL)
311 		return (B_FALSE);
312 	ire_refrele(ire);
313 	return (B_TRUE);
314 }
315