1 /* 2 * Copyright (c) 2005 Voltaire Inc. All rights reserved. 3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. 5 * Copyright (c) 2005 Intel Corporation. All rights reserved. 6 * 7 * This Software is licensed under one of the following licenses: 8 * 9 * 1) under the terms of the "Common Public License 1.0" a copy of which is 10 * available from the Open Source Initiative, see 11 * http://www.opensource.org/licenses/cpl.php. 12 * 13 * 2) under the terms of the "The BSD License" a copy of which is 14 * available from the Open Source Initiative, see 15 * http://www.opensource.org/licenses/bsd-license.php. 16 * 17 * 3) under the terms of the "GNU General Public License (GPL) Version 2" a 18 * copy of which is available from the Open Source Initiative, see 19 * http://www.opensource.org/licenses/gpl-license.php. 20 * 21 * Licensee has the right to choose one of the above licenses. 22 * 23 * Redistributions of source code must retain the above copyright 24 * notice and one of the license notices. 25 * 26 * Redistributions in binary form must reproduce both the above copyright 27 * notice, one of the license notices in the documentation 28 * and/or other materials provided with the distribution. 29 */ 30 31 #include <linux/mutex.h> 32 #include <linux/inetdevice.h> 33 #include <linux/workqueue.h> 34 #include <linux/if_arp.h> 35 #include <net/arp.h> 36 #include <net/neighbour.h> 37 #include <net/route.h> 38 #include <net/netevent.h> 39 #include <rdma/ib_addr.h> 40 41 MODULE_AUTHOR("Sean Hefty"); 42 MODULE_DESCRIPTION("IB Address Translation"); 43 MODULE_LICENSE("Dual BSD/GPL"); 44 45 struct addr_req { 46 struct list_head list; 47 struct sockaddr src_addr; 48 struct sockaddr dst_addr; 49 struct rdma_dev_addr *addr; 50 struct rdma_addr_client *client; 51 void *context; 52 void (*callback)(int status, struct sockaddr *src_addr, 53 struct rdma_dev_addr *addr, void *context); 54 unsigned long timeout; 55 int status; 56 }; 57 58 static void process_req(void *data); 59 60 static DEFINE_MUTEX(lock); 61 static LIST_HEAD(req_list); 62 static DECLARE_WORK(work, process_req, NULL); 63 static struct workqueue_struct *addr_wq; 64 65 void rdma_addr_register_client(struct rdma_addr_client *client) 66 { 67 atomic_set(&client->refcount, 1); 68 init_completion(&client->comp); 69 } 70 EXPORT_SYMBOL(rdma_addr_register_client); 71 72 static inline void put_client(struct rdma_addr_client *client) 73 { 74 if (atomic_dec_and_test(&client->refcount)) 75 complete(&client->comp); 76 } 77 78 void rdma_addr_unregister_client(struct rdma_addr_client *client) 79 { 80 put_client(client); 81 wait_for_completion(&client->comp); 82 } 83 EXPORT_SYMBOL(rdma_addr_unregister_client); 84 85 int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, 86 const unsigned char *dst_dev_addr) 87 { 88 switch (dev->type) { 89 case ARPHRD_INFINIBAND: 90 dev_addr->dev_type = RDMA_NODE_IB_CA; 91 break; 92 case ARPHRD_ETHER: 93 dev_addr->dev_type = RDMA_NODE_RNIC; 94 break; 95 default: 96 return -EADDRNOTAVAIL; 97 } 98 99 memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); 100 memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); 101 if (dst_dev_addr) 102 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); 103 return 0; 104 } 105 EXPORT_SYMBOL(rdma_copy_addr); 106 107 int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) 108 { 109 struct net_device *dev; 110 __be32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr; 111 int ret; 112 113 dev = ip_dev_find(ip); 114 if (!dev) 115 return -EADDRNOTAVAIL; 116 117 ret = rdma_copy_addr(dev_addr, dev, NULL); 118 dev_put(dev); 119 return ret; 120 } 121 EXPORT_SYMBOL(rdma_translate_ip); 122 123 static void set_timeout(unsigned long time) 124 { 125 unsigned long delay; 126 127 cancel_delayed_work(&work); 128 129 delay = time - jiffies; 130 if ((long)delay <= 0) 131 delay = 1; 132 133 queue_delayed_work(addr_wq, &work, delay); 134 } 135 136 static void queue_req(struct addr_req *req) 137 { 138 struct addr_req *temp_req; 139 140 mutex_lock(&lock); 141 list_for_each_entry_reverse(temp_req, &req_list, list) { 142 if (time_after(req->timeout, temp_req->timeout)) 143 break; 144 } 145 146 list_add(&req->list, &temp_req->list); 147 148 if (req_list.next == &req->list) 149 set_timeout(req->timeout); 150 mutex_unlock(&lock); 151 } 152 153 static void addr_send_arp(struct sockaddr_in *dst_in) 154 { 155 struct rtable *rt; 156 struct flowi fl; 157 u32 dst_ip = dst_in->sin_addr.s_addr; 158 159 memset(&fl, 0, sizeof fl); 160 fl.nl_u.ip4_u.daddr = dst_ip; 161 if (ip_route_output_key(&rt, &fl)) 162 return; 163 164 arp_send(ARPOP_REQUEST, ETH_P_ARP, rt->rt_gateway, rt->idev->dev, 165 rt->rt_src, NULL, rt->idev->dev->dev_addr, NULL); 166 ip_rt_put(rt); 167 } 168 169 static int addr_resolve_remote(struct sockaddr_in *src_in, 170 struct sockaddr_in *dst_in, 171 struct rdma_dev_addr *addr) 172 { 173 u32 src_ip = src_in->sin_addr.s_addr; 174 u32 dst_ip = dst_in->sin_addr.s_addr; 175 struct flowi fl; 176 struct rtable *rt; 177 struct neighbour *neigh; 178 int ret; 179 180 memset(&fl, 0, sizeof fl); 181 fl.nl_u.ip4_u.daddr = dst_ip; 182 fl.nl_u.ip4_u.saddr = src_ip; 183 ret = ip_route_output_key(&rt, &fl); 184 if (ret) 185 goto out; 186 187 /* If the device does ARP internally, return 'done' */ 188 if (rt->idev->dev->flags & IFF_NOARP) { 189 rdma_copy_addr(addr, rt->idev->dev, NULL); 190 goto put; 191 } 192 193 neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev); 194 if (!neigh) { 195 ret = -ENODATA; 196 goto put; 197 } 198 199 if (!(neigh->nud_state & NUD_VALID)) { 200 ret = -ENODATA; 201 goto release; 202 } 203 204 if (!src_ip) { 205 src_in->sin_family = dst_in->sin_family; 206 src_in->sin_addr.s_addr = rt->rt_src; 207 } 208 209 ret = rdma_copy_addr(addr, neigh->dev, neigh->ha); 210 release: 211 neigh_release(neigh); 212 put: 213 ip_rt_put(rt); 214 out: 215 return ret; 216 } 217 218 static void process_req(void *data) 219 { 220 struct addr_req *req, *temp_req; 221 struct sockaddr_in *src_in, *dst_in; 222 struct list_head done_list; 223 224 INIT_LIST_HEAD(&done_list); 225 226 mutex_lock(&lock); 227 list_for_each_entry_safe(req, temp_req, &req_list, list) { 228 if (req->status) { 229 src_in = (struct sockaddr_in *) &req->src_addr; 230 dst_in = (struct sockaddr_in *) &req->dst_addr; 231 req->status = addr_resolve_remote(src_in, dst_in, 232 req->addr); 233 } 234 if (req->status && time_after(jiffies, req->timeout)) 235 req->status = -ETIMEDOUT; 236 else if (req->status == -ENODATA) 237 continue; 238 239 list_del(&req->list); 240 list_add_tail(&req->list, &done_list); 241 } 242 243 if (!list_empty(&req_list)) { 244 req = list_entry(req_list.next, struct addr_req, list); 245 set_timeout(req->timeout); 246 } 247 mutex_unlock(&lock); 248 249 list_for_each_entry_safe(req, temp_req, &done_list, list) { 250 list_del(&req->list); 251 req->callback(req->status, &req->src_addr, req->addr, 252 req->context); 253 put_client(req->client); 254 kfree(req); 255 } 256 } 257 258 static int addr_resolve_local(struct sockaddr_in *src_in, 259 struct sockaddr_in *dst_in, 260 struct rdma_dev_addr *addr) 261 { 262 struct net_device *dev; 263 u32 src_ip = src_in->sin_addr.s_addr; 264 __be32 dst_ip = dst_in->sin_addr.s_addr; 265 int ret; 266 267 dev = ip_dev_find(dst_ip); 268 if (!dev) 269 return -EADDRNOTAVAIL; 270 271 if (ZERONET(src_ip)) { 272 src_in->sin_family = dst_in->sin_family; 273 src_in->sin_addr.s_addr = dst_ip; 274 ret = rdma_copy_addr(addr, dev, dev->dev_addr); 275 } else if (LOOPBACK(src_ip)) { 276 ret = rdma_translate_ip((struct sockaddr *)dst_in, addr); 277 if (!ret) 278 memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); 279 } else { 280 ret = rdma_translate_ip((struct sockaddr *)src_in, addr); 281 if (!ret) 282 memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); 283 } 284 285 dev_put(dev); 286 return ret; 287 } 288 289 int rdma_resolve_ip(struct rdma_addr_client *client, 290 struct sockaddr *src_addr, struct sockaddr *dst_addr, 291 struct rdma_dev_addr *addr, int timeout_ms, 292 void (*callback)(int status, struct sockaddr *src_addr, 293 struct rdma_dev_addr *addr, void *context), 294 void *context) 295 { 296 struct sockaddr_in *src_in, *dst_in; 297 struct addr_req *req; 298 int ret = 0; 299 300 req = kmalloc(sizeof *req, GFP_KERNEL); 301 if (!req) 302 return -ENOMEM; 303 memset(req, 0, sizeof *req); 304 305 if (src_addr) 306 memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr)); 307 memcpy(&req->dst_addr, dst_addr, ip_addr_size(dst_addr)); 308 req->addr = addr; 309 req->callback = callback; 310 req->context = context; 311 req->client = client; 312 atomic_inc(&client->refcount); 313 314 src_in = (struct sockaddr_in *) &req->src_addr; 315 dst_in = (struct sockaddr_in *) &req->dst_addr; 316 317 req->status = addr_resolve_local(src_in, dst_in, addr); 318 if (req->status == -EADDRNOTAVAIL) 319 req->status = addr_resolve_remote(src_in, dst_in, addr); 320 321 switch (req->status) { 322 case 0: 323 req->timeout = jiffies; 324 queue_req(req); 325 break; 326 case -ENODATA: 327 req->timeout = msecs_to_jiffies(timeout_ms) + jiffies; 328 queue_req(req); 329 addr_send_arp(dst_in); 330 break; 331 default: 332 ret = req->status; 333 atomic_dec(&client->refcount); 334 kfree(req); 335 break; 336 } 337 return ret; 338 } 339 EXPORT_SYMBOL(rdma_resolve_ip); 340 341 void rdma_addr_cancel(struct rdma_dev_addr *addr) 342 { 343 struct addr_req *req, *temp_req; 344 345 mutex_lock(&lock); 346 list_for_each_entry_safe(req, temp_req, &req_list, list) { 347 if (req->addr == addr) { 348 req->status = -ECANCELED; 349 req->timeout = jiffies; 350 list_del(&req->list); 351 list_add(&req->list, &req_list); 352 set_timeout(req->timeout); 353 break; 354 } 355 } 356 mutex_unlock(&lock); 357 } 358 EXPORT_SYMBOL(rdma_addr_cancel); 359 360 static int netevent_callback(struct notifier_block *self, unsigned long event, 361 void *ctx) 362 { 363 if (event == NETEVENT_NEIGH_UPDATE) { 364 struct neighbour *neigh = ctx; 365 366 if (neigh->dev->type == ARPHRD_INFINIBAND && 367 (neigh->nud_state & NUD_VALID)) { 368 set_timeout(jiffies); 369 } 370 } 371 return 0; 372 } 373 374 static struct notifier_block nb = { 375 .notifier_call = netevent_callback 376 }; 377 378 static int addr_init(void) 379 { 380 addr_wq = create_singlethread_workqueue("ib_addr_wq"); 381 if (!addr_wq) 382 return -ENOMEM; 383 384 register_netevent_notifier(&nb); 385 return 0; 386 } 387 388 static void addr_cleanup(void) 389 { 390 unregister_netevent_notifier(&nb); 391 destroy_workqueue(addr_wq); 392 } 393 394 module_init(addr_init); 395 module_exit(addr_cleanup); 396