1 /* 2 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * This file contains code imported from the OFED rds source file bind.c 7 * Oracle elects to have and use the contents of bind.c under and governed 8 * by the OpenIB.org BSD license (see below for full license text). However, 9 * the following notice accompanied the original version of this file: 10 */ 11 12 /* 13 * Copyright (c) 2006 Oracle. All rights reserved. 14 * 15 * This software is available to you under a choice of one of two 16 * licenses. You may choose to be licensed under the terms of the GNU 17 * General Public License (GPL) Version 2, available from the file 18 * COPYING in the main directory of this source tree, or the 19 * OpenIB.org BSD license below: 20 * 21 * Redistribution and use in source and binary forms, with or 22 * without modification, are permitted provided that the following 23 * conditions are met: 24 * 25 * - Redistributions of source code must retain the above 26 * copyright notice, this list of conditions and the following 27 * disclaimer. 28 * 29 * - Redistributions in binary form must reproduce the above 30 * copyright notice, this list of conditions and the following 31 * disclaimer in the documentation and/or other materials 32 * provided with the distribution. 33 * 34 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 35 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 36 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 37 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 38 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 39 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 40 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 41 * SOFTWARE. 42 * 43 */ 44 #include <sys/types.h> 45 #include <sys/sysmacros.h> 46 #include <sys/random.h> 47 #include <sys/rds.h> 48 49 #include <sys/ib/clients/rdsv3/rdsv3.h> 50 #include <sys/ib/clients/rdsv3/rdsv3_debug.h> 51 52 kmutex_t rdsv3_bind_lock; 53 avl_tree_t rdsv3_bind_tree; 54 55 /* 56 * Each node in the rdsv3_bind_tree is of this type. 57 */ 58 struct rdsv3_ip_bucket { 59 ipaddr_t ip; 60 zoneid_t zone; 61 avl_node_t ip_avl_node; 62 krwlock_t rwlock; 63 uint_t nsockets; 64 struct rdsv3_sock *port[65536]; 65 }; 66 67 static int 68 rdsv3_bind_node_compare(const void *a, const void *b) 69 { 70 struct rdsv3_ip_bucket *bp = (struct rdsv3_ip_bucket *)b; 71 72 if (*(uint64_t *)a > (((uint64_t)bp->ip << 32) | bp->zone)) 73 return (+1); 74 else if (*(uint64_t *)a < (((uint64_t)bp->ip << 32) | bp->zone)) 75 return (-1); 76 77 return (0); 78 } 79 80 void 81 rdsv3_bind_init() 82 { 83 RDSV3_DPRINTF4("rdsv3_bind_tree_init", "Enter"); 84 85 mutex_init(&rdsv3_bind_lock, NULL, MUTEX_DRIVER, NULL); 86 avl_create(&rdsv3_bind_tree, rdsv3_bind_node_compare, 87 sizeof (struct rdsv3_ip_bucket), 88 offsetof(struct rdsv3_ip_bucket, ip_avl_node)); 89 90 RDSV3_DPRINTF4("rdsv3_bind_tree_init", "Return"); 91 } 92 93 /* called on detach */ 94 void 95 rdsv3_bind_exit() 96 { 97 struct rdsv3_ip_bucket *bucketp; 98 void *cookie = NULL; 99 100 RDSV3_DPRINTF2("rdsv3_bind_tree_exit", "Enter"); 101 102 while ((bucketp = 103 avl_destroy_nodes(&rdsv3_bind_tree, &cookie)) != NULL) { 104 rw_destroy(&bucketp->rwlock); 105 kmem_free(bucketp, sizeof (struct rdsv3_ip_bucket)); 106 } 107 108 avl_destroy(&rdsv3_bind_tree); 109 mutex_destroy(&rdsv3_bind_lock); 110 111 RDSV3_DPRINTF2("rdsv3_bind_tree_exit", "Return"); 112 } 113 114 struct rdsv3_ip_bucket * 115 rdsv3_find_ip_bucket(ipaddr_t ipaddr, zoneid_t zoneid) 116 { 117 struct rdsv3_ip_bucket *bucketp; 118 avl_index_t where; 119 uint64_t needle = ((uint64_t)ipaddr << 32) | zoneid; 120 121 mutex_enter(&rdsv3_bind_lock); 122 bucketp = avl_find(&rdsv3_bind_tree, &needle, &where); 123 if (bucketp == NULL) { 124 /* allocate a new bucket for this IP & zone */ 125 bucketp = 126 kmem_zalloc(sizeof (struct rdsv3_ip_bucket), KM_SLEEP); 127 rw_init(&bucketp->rwlock, NULL, RW_DRIVER, NULL); 128 bucketp->ip = ipaddr; 129 bucketp->zone = zoneid; 130 avl_insert(&rdsv3_bind_tree, bucketp, where); 131 } 132 mutex_exit(&rdsv3_bind_lock); 133 134 return (bucketp); 135 } 136 137 /* 138 * Return the rdsv3_sock bound at the given local address. 139 * 140 * The rx path can race with rdsv3_release. We notice if rdsv3_release() has 141 * marked this socket and don't return a rs ref to the rx path. 142 */ 143 struct rdsv3_sock * 144 rdsv3_find_bound(struct rdsv3_connection *conn, uint16_be_t port) 145 { 146 struct rdsv3_sock *rs; 147 148 RDSV3_DPRINTF4("rdsv3_find_bound", "Enter(ip:port: %u.%u.%u.%u:%d)", 149 NIPQUAD(conn->c_laddr), ntohs(port)); 150 151 rw_enter(&conn->c_bucketp->rwlock, RW_READER); 152 ASSERT(ntohl(conn->c_laddr) == conn->c_bucketp->ip); 153 rs = conn->c_bucketp->port[ntohs(port)]; 154 if (rs && !rdsv3_sk_sock_flag(rdsv3_rs_to_sk(rs), SOCK_DEAD)) 155 rdsv3_sk_sock_hold(rdsv3_rs_to_sk(rs)); 156 else 157 rs = NULL; 158 rw_exit(&conn->c_bucketp->rwlock); 159 160 RDSV3_DPRINTF5("rdsv3_find_bound", "returning rs %p for %u.%u.%u.%u:%d", 161 rs, NIPQUAD(conn->c_laddr), ntohs(port)); 162 163 return (rs); 164 } 165 166 /* returns -ve errno or +ve port */ 167 static int 168 rdsv3_add_bound(struct rdsv3_sock *rs, uint32_be_t addr, uint16_be_t *port) 169 { 170 int ret = -EADDRINUSE; 171 uint16_t rover, last; 172 struct rdsv3_ip_bucket *bucketp; 173 174 RDSV3_DPRINTF4("rdsv3_add_bound", "Enter(addr:port: %x:%x)", 175 ntohl(addr), ntohs(*port)); 176 177 if (*port != 0) { 178 rover = ntohs(*port); 179 last = rover; 180 } else { 181 (void) random_get_pseudo_bytes((uint8_t *)&rover, 182 sizeof (uint16_t)); 183 rover = MAX(rover, 2); 184 last = rover - 1; 185 } 186 187 bucketp = rdsv3_find_ip_bucket(ntohl(addr), rs->rs_zoneid); 188 189 /* leave the bind lock and get the bucket lock */ 190 rw_enter(&bucketp->rwlock, RW_WRITER); 191 192 do { 193 if (rover == 0) 194 rover++; 195 196 if (bucketp->port[rover] == NULL) { 197 *port = htons(rover); 198 ret = 0; 199 break; 200 } 201 } while (rover++ != last); 202 203 if (ret == 0) { 204 rs->rs_bound_addr = addr; 205 rs->rs_bound_port = *port; 206 bucketp->port[rover] = rs; 207 bucketp->nsockets++; 208 rdsv3_sock_addref(rs); 209 210 RDSV3_DPRINTF5("rdsv3_add_bound", 211 "rs %p binding to %u.%u.%u.%u:%d", 212 rs, NIPQUAD(addr), rover); 213 } 214 215 rw_exit(&bucketp->rwlock); 216 217 RDSV3_DPRINTF4("rdsv3_add_bound", "Return(ret: %d port: %d)", 218 ret, rover); 219 220 221 return (ret); 222 } 223 224 void 225 rdsv3_remove_bound(struct rdsv3_sock *rs) 226 { 227 RDSV3_DPRINTF4("rdsv3_remove_bound", "Enter(rs: %p)", rs); 228 229 if (rs->rs_bound_addr) { 230 struct rdsv3_ip_bucket *bucketp; 231 232 RDSV3_DPRINTF5("rdsv3_remove_bound", 233 "rs %p unbinding from %u.%u.%u.%u:%x", 234 rs, NIPQUAD(htonl(rs->rs_bound_addr)), rs->rs_bound_port); 235 236 bucketp = rdsv3_find_ip_bucket(ntohl(rs->rs_bound_addr), 237 rs->rs_zoneid); 238 239 rw_enter(&bucketp->rwlock, RW_WRITER); 240 bucketp->port[ntohs(rs->rs_bound_port)] = NULL; 241 bucketp->nsockets--; 242 rs->rs_bound_addr = 0; 243 rw_exit(&bucketp->rwlock); 244 245 rdsv3_sock_put(rs); 246 } 247 248 RDSV3_DPRINTF4("rdsv3_remove_bound", "Return(rs: %p)", rs); 249 } 250 251 /* ARGSUSED */ 252 int 253 rdsv3_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 254 socklen_t len, cred_t *cr) 255 { 256 struct rsock *sk = (struct rsock *)proto_handle; 257 sin_t *sin = (sin_t *)sa; 258 struct rdsv3_sock *rs = rdsv3_sk_to_rs(sk); 259 int ret; 260 261 if (len != sizeof (sin_t) || (sin == NULL) || 262 !OK_32PTR((char *)sin)) { 263 RDSV3_DPRINTF2("rdsv3_bind", "address to bind not specified"); 264 return (EINVAL); 265 } 266 267 RDSV3_DPRINTF4("rdsv3_bind", "Enter(rs: %p, addr: 0x%x, port: %x)", 268 rs, ntohl(sin->sin_addr.s_addr), htons(sin->sin_port)); 269 270 if (sin->sin_addr.s_addr == INADDR_ANY) { 271 RDSV3_DPRINTF2("rdsv3_bind", "Invalid address"); 272 return (EINVAL); 273 } 274 275 /* We don't allow multiple binds */ 276 if (rs->rs_bound_addr) { 277 RDSV3_DPRINTF2("rdsv3_bind", "Multiple binds not allowed"); 278 return (EINVAL); 279 } 280 281 ret = rdsv3_add_bound(rs, sin->sin_addr.s_addr, &sin->sin_port); 282 if (ret) { 283 return (ret); 284 } 285 286 rs->rs_transport = rdsv3_trans_get_preferred(sin->sin_addr.s_addr); 287 if (!rs->rs_transport) { 288 rdsv3_remove_bound(rs); 289 if (rdsv3_printk_ratelimit()) { 290 RDSV3_DPRINTF1("rdsv3_bind", 291 "RDS: rdsv3_bind() could not find a transport.\n"); 292 } 293 return (EADDRNOTAVAIL); 294 } 295 296 RDSV3_DPRINTF4("rdsv3_bind", "Return: Assigned port: %x to sock: %p", 297 sin->sin_port, rs); 298 299 return (0); 300 } 301