1 /*
2 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
3 */
4
5 /*
6 * This file contains code imported from the OFED rds source file bind.c
7 * Oracle elects to have and use the contents of bind.c under and governed
8 * by the OpenIB.org BSD license (see below for full license text). However,
9 * the following notice accompanied the original version of this file:
10 */
11
12 /*
13 * Copyright (c) 2006 Oracle. All rights reserved.
14 *
15 * This software is available to you under a choice of one of two
16 * licenses. You may choose to be licensed under the terms of the GNU
17 * General Public License (GPL) Version 2, available from the file
18 * COPYING in the main directory of this source tree, or the
19 * OpenIB.org BSD license below:
20 *
21 * Redistribution and use in source and binary forms, with or
22 * without modification, are permitted provided that the following
23 * conditions are met:
24 *
25 * - Redistributions of source code must retain the above
26 * copyright notice, this list of conditions and the following
27 * disclaimer.
28 *
29 * - Redistributions in binary form must reproduce the above
30 * copyright notice, this list of conditions and the following
31 * disclaimer in the documentation and/or other materials
32 * provided with the distribution.
33 *
34 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
35 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
36 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
37 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
38 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
39 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
40 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
41 * SOFTWARE.
42 *
43 */
44 #include <sys/types.h>
45 #include <sys/sysmacros.h>
46 #include <sys/random.h>
47 #include <sys/rds.h>
48
49 #include <sys/ib/clients/rdsv3/rdsv3.h>
50 #include <sys/ib/clients/rdsv3/rdsv3_debug.h>
51
52 kmutex_t rdsv3_bind_lock;
53 avl_tree_t rdsv3_bind_tree;
54
55 /*
56 * Each node in the rdsv3_bind_tree is of this type.
57 */
58 struct rdsv3_ip_bucket {
59 ipaddr_t ip;
60 zoneid_t zone;
61 avl_node_t ip_avl_node;
62 krwlock_t rwlock;
63 uint_t nsockets;
64 struct rdsv3_sock *port[65536];
65 };
66
67 static int
rdsv3_bind_node_compare(const void * a,const void * b)68 rdsv3_bind_node_compare(const void *a, const void *b)
69 {
70 struct rdsv3_ip_bucket *bp = (struct rdsv3_ip_bucket *)b;
71
72 if (*(uint64_t *)a > (((uint64_t)bp->ip << 32) | bp->zone))
73 return (+1);
74 else if (*(uint64_t *)a < (((uint64_t)bp->ip << 32) | bp->zone))
75 return (-1);
76
77 return (0);
78 }
79
80 void
rdsv3_bind_init()81 rdsv3_bind_init()
82 {
83 RDSV3_DPRINTF4("rdsv3_bind_tree_init", "Enter");
84
85 mutex_init(&rdsv3_bind_lock, NULL, MUTEX_DRIVER, NULL);
86 avl_create(&rdsv3_bind_tree, rdsv3_bind_node_compare,
87 sizeof (struct rdsv3_ip_bucket),
88 offsetof(struct rdsv3_ip_bucket, ip_avl_node));
89
90 RDSV3_DPRINTF4("rdsv3_bind_tree_init", "Return");
91 }
92
93 /* called on detach */
94 void
rdsv3_bind_exit()95 rdsv3_bind_exit()
96 {
97 struct rdsv3_ip_bucket *bucketp;
98 void *cookie = NULL;
99
100 RDSV3_DPRINTF2("rdsv3_bind_tree_exit", "Enter");
101
102 while ((bucketp =
103 avl_destroy_nodes(&rdsv3_bind_tree, &cookie)) != NULL) {
104 rw_destroy(&bucketp->rwlock);
105 kmem_free(bucketp, sizeof (struct rdsv3_ip_bucket));
106 }
107
108 avl_destroy(&rdsv3_bind_tree);
109 mutex_destroy(&rdsv3_bind_lock);
110
111 RDSV3_DPRINTF2("rdsv3_bind_tree_exit", "Return");
112 }
113
114 struct rdsv3_ip_bucket *
rdsv3_find_ip_bucket(ipaddr_t ipaddr,zoneid_t zoneid)115 rdsv3_find_ip_bucket(ipaddr_t ipaddr, zoneid_t zoneid)
116 {
117 struct rdsv3_ip_bucket *bucketp;
118 avl_index_t where;
119 uint64_t needle = ((uint64_t)ipaddr << 32) | zoneid;
120
121 mutex_enter(&rdsv3_bind_lock);
122 bucketp = avl_find(&rdsv3_bind_tree, &needle, &where);
123 if (bucketp == NULL) {
124 /* allocate a new bucket for this IP & zone */
125 bucketp =
126 kmem_zalloc(sizeof (struct rdsv3_ip_bucket), KM_SLEEP);
127 rw_init(&bucketp->rwlock, NULL, RW_DRIVER, NULL);
128 bucketp->ip = ipaddr;
129 bucketp->zone = zoneid;
130 avl_insert(&rdsv3_bind_tree, bucketp, where);
131 }
132 mutex_exit(&rdsv3_bind_lock);
133
134 return (bucketp);
135 }
136
137 /*
138 * Return the rdsv3_sock bound at the given local address.
139 *
140 * The rx path can race with rdsv3_release. We notice if rdsv3_release() has
141 * marked this socket and don't return a rs ref to the rx path.
142 */
143 struct rdsv3_sock *
rdsv3_find_bound(struct rdsv3_connection * conn,uint16_be_t port)144 rdsv3_find_bound(struct rdsv3_connection *conn, uint16_be_t port)
145 {
146 struct rdsv3_sock *rs;
147
148 RDSV3_DPRINTF4("rdsv3_find_bound", "Enter(ip:port: %u.%u.%u.%u:%d)",
149 NIPQUAD(conn->c_laddr), ntohs(port));
150
151 rw_enter(&conn->c_bucketp->rwlock, RW_READER);
152 ASSERT(ntohl(conn->c_laddr) == conn->c_bucketp->ip);
153 rs = conn->c_bucketp->port[ntohs(port)];
154 if (rs && !rdsv3_sk_sock_flag(rdsv3_rs_to_sk(rs), SOCK_DEAD))
155 rdsv3_sk_sock_hold(rdsv3_rs_to_sk(rs));
156 else
157 rs = NULL;
158 rw_exit(&conn->c_bucketp->rwlock);
159
160 RDSV3_DPRINTF5("rdsv3_find_bound", "returning rs %p for %u.%u.%u.%u:%d",
161 rs, NIPQUAD(conn->c_laddr), ntohs(port));
162
163 return (rs);
164 }
165
166 /* returns -ve errno or +ve port */
167 static int
rdsv3_add_bound(struct rdsv3_sock * rs,uint32_be_t addr,uint16_be_t * port)168 rdsv3_add_bound(struct rdsv3_sock *rs, uint32_be_t addr, uint16_be_t *port)
169 {
170 int ret = -EADDRINUSE;
171 uint16_t rover, last;
172 struct rdsv3_ip_bucket *bucketp;
173
174 RDSV3_DPRINTF4("rdsv3_add_bound", "Enter(addr:port: %x:%x)",
175 ntohl(addr), ntohs(*port));
176
177 if (*port != 0) {
178 rover = ntohs(*port);
179 last = rover;
180 } else {
181 (void) random_get_pseudo_bytes((uint8_t *)&rover,
182 sizeof (uint16_t));
183 rover = MAX(rover, 2);
184 last = rover - 1;
185 }
186
187 bucketp = rdsv3_find_ip_bucket(ntohl(addr), rs->rs_zoneid);
188
189 /* leave the bind lock and get the bucket lock */
190 rw_enter(&bucketp->rwlock, RW_WRITER);
191
192 do {
193 if (rover == 0)
194 rover++;
195
196 if (bucketp->port[rover] == NULL) {
197 *port = htons(rover);
198 ret = 0;
199 break;
200 }
201 } while (rover++ != last);
202
203 if (ret == 0) {
204 rs->rs_bound_addr = addr;
205 rs->rs_bound_port = *port;
206 bucketp->port[rover] = rs;
207 bucketp->nsockets++;
208 rdsv3_sock_addref(rs);
209
210 RDSV3_DPRINTF5("rdsv3_add_bound",
211 "rs %p binding to %u.%u.%u.%u:%d",
212 rs, NIPQUAD(addr), rover);
213 }
214
215 rw_exit(&bucketp->rwlock);
216
217 RDSV3_DPRINTF4("rdsv3_add_bound", "Return(ret: %d port: %d)",
218 ret, rover);
219
220
221 return (ret);
222 }
223
224 void
rdsv3_remove_bound(struct rdsv3_sock * rs)225 rdsv3_remove_bound(struct rdsv3_sock *rs)
226 {
227 RDSV3_DPRINTF4("rdsv3_remove_bound", "Enter(rs: %p)", rs);
228
229 if (rs->rs_bound_addr) {
230 struct rdsv3_ip_bucket *bucketp;
231
232 RDSV3_DPRINTF5("rdsv3_remove_bound",
233 "rs %p unbinding from %u.%u.%u.%u:%x",
234 rs, NIPQUAD(htonl(rs->rs_bound_addr)), rs->rs_bound_port);
235
236 bucketp = rdsv3_find_ip_bucket(ntohl(rs->rs_bound_addr),
237 rs->rs_zoneid);
238
239 rw_enter(&bucketp->rwlock, RW_WRITER);
240 bucketp->port[ntohs(rs->rs_bound_port)] = NULL;
241 bucketp->nsockets--;
242 rs->rs_bound_addr = 0;
243 rw_exit(&bucketp->rwlock);
244
245 rdsv3_sock_put(rs);
246 }
247
248 RDSV3_DPRINTF4("rdsv3_remove_bound", "Return(rs: %p)", rs);
249 }
250
251 /* ARGSUSED */
252 int
rdsv3_bind(sock_lower_handle_t proto_handle,struct sockaddr * sa,socklen_t len,cred_t * cr)253 rdsv3_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
254 socklen_t len, cred_t *cr)
255 {
256 struct rsock *sk = (struct rsock *)proto_handle;
257 sin_t *sin = (sin_t *)sa;
258 struct rdsv3_sock *rs = rdsv3_sk_to_rs(sk);
259 int ret;
260
261 if (len != sizeof (sin_t) || (sin == NULL) ||
262 !OK_32PTR((char *)sin)) {
263 RDSV3_DPRINTF2("rdsv3_bind", "address to bind not specified");
264 return (EINVAL);
265 }
266
267 RDSV3_DPRINTF4("rdsv3_bind", "Enter(rs: %p, addr: 0x%x, port: %x)",
268 rs, ntohl(sin->sin_addr.s_addr), htons(sin->sin_port));
269
270 if (sin->sin_addr.s_addr == INADDR_ANY) {
271 RDSV3_DPRINTF2("rdsv3_bind", "Invalid address");
272 return (EINVAL);
273 }
274
275 /* We don't allow multiple binds */
276 if (rs->rs_bound_addr) {
277 RDSV3_DPRINTF2("rdsv3_bind", "Multiple binds not allowed");
278 return (EINVAL);
279 }
280
281 ret = rdsv3_add_bound(rs, sin->sin_addr.s_addr, &sin->sin_port);
282 if (ret) {
283 return (ret);
284 }
285
286 rs->rs_transport = rdsv3_trans_get_preferred(sin->sin_addr.s_addr);
287 if (!rs->rs_transport) {
288 rdsv3_remove_bound(rs);
289 if (rdsv3_printk_ratelimit()) {
290 RDSV3_DPRINTF1("rdsv3_bind",
291 "RDS: rdsv3_bind() could not find a transport.\n");
292 }
293 return (EADDRNOTAVAIL);
294 }
295
296 RDSV3_DPRINTF4("rdsv3_bind", "Return: Assigned port: %x to sock: %p",
297 sin->sin_port, rs);
298
299 return (0);
300 }
301