1ec16227eSAndy Grover /* 2ec16227eSAndy Grover * Copyright (c) 2006 Oracle. All rights reserved. 3ec16227eSAndy Grover * 4ec16227eSAndy Grover * This software is available to you under a choice of one of two 5ec16227eSAndy Grover * licenses. You may choose to be licensed under the terms of the GNU 6ec16227eSAndy Grover * General Public License (GPL) Version 2, available from the file 7ec16227eSAndy Grover * COPYING in the main directory of this source tree, or the 8ec16227eSAndy Grover * OpenIB.org BSD license below: 9ec16227eSAndy Grover * 10ec16227eSAndy Grover * Redistribution and use in source and binary forms, with or 11ec16227eSAndy Grover * without modification, are permitted provided that the following 12ec16227eSAndy Grover * conditions are met: 13ec16227eSAndy Grover * 14ec16227eSAndy Grover * - Redistributions of source code must retain the above 15ec16227eSAndy Grover * copyright notice, this list of conditions and the following 16ec16227eSAndy Grover * disclaimer. 17ec16227eSAndy Grover * 18ec16227eSAndy Grover * - Redistributions in binary form must reproduce the above 19ec16227eSAndy Grover * copyright notice, this list of conditions and the following 20ec16227eSAndy Grover * disclaimer in the documentation and/or other materials 21ec16227eSAndy Grover * provided with the distribution. 22ec16227eSAndy Grover * 23ec16227eSAndy Grover * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24ec16227eSAndy Grover * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25ec16227eSAndy Grover * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26ec16227eSAndy Grover * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27ec16227eSAndy Grover * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28ec16227eSAndy Grover * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29ec16227eSAndy Grover * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30ec16227eSAndy Grover * SOFTWARE. 31ec16227eSAndy Grover * 32ec16227eSAndy Grover */ 33ec16227eSAndy Grover #include <linux/kernel.h> 34ec16227eSAndy Grover #include <linux/in.h> 35ec16227eSAndy Grover #include <linux/if.h> 36ec16227eSAndy Grover #include <linux/netdevice.h> 37ec16227eSAndy Grover #include <linux/inetdevice.h> 38ec16227eSAndy Grover #include <linux/if_arp.h> 39ec16227eSAndy Grover #include <linux/delay.h> 405a0e3ad6STejun Heo #include <linux/slab.h> 413a9a231dSPaul Gortmaker #include <linux/module.h> 42ec16227eSAndy Grover 430cb43965SSowmini Varadhan #include "rds_single_path.h" 44ec16227eSAndy Grover #include "rds.h" 45ec16227eSAndy Grover #include "ib.h" 46f6df683fSsantosh.shilimkar@oracle.com #include "ib_mr.h" 47ec16227eSAndy Grover 484f7bfb39SZhu Yanjun static unsigned int rds_ib_mr_1m_pool_size = RDS_MR_1M_POOL_SIZE; 494f7bfb39SZhu Yanjun static unsigned int rds_ib_mr_8k_pool_size = RDS_MR_8K_POOL_SIZE; 503ba23adeSAndy Grover unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT; 51*ebeeb1adSSowmini Varadhan static atomic_t rds_ib_unloading; 52ec16227eSAndy Grover 53f6df683fSsantosh.shilimkar@oracle.com module_param(rds_ib_mr_1m_pool_size, int, 0444); 54f6df683fSsantosh.shilimkar@oracle.com MODULE_PARM_DESC(rds_ib_mr_1m_pool_size, " Max number of 1M mr per HCA"); 55f6df683fSsantosh.shilimkar@oracle.com module_param(rds_ib_mr_8k_pool_size, int, 0444); 56f6df683fSsantosh.shilimkar@oracle.com MODULE_PARM_DESC(rds_ib_mr_8k_pool_size, " Max number of 8K mr per HCA"); 573ba23adeSAndy Grover module_param(rds_ib_retry_count, int, 0444); 583ba23adeSAndy Grover MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error"); 59ec16227eSAndy Grover 60ea819867SZach Brown /* 61ea819867SZach Brown * we have a clumsy combination of RCU and a rwsem protecting this list 62ea819867SZach Brown * because it is used both in the get_mr fast path and while blocking in 63ea819867SZach Brown * the FMR flushing path. 64ea819867SZach Brown */ 65ea819867SZach Brown DECLARE_RWSEM(rds_ib_devices_lock); 66ec16227eSAndy Grover struct list_head rds_ib_devices; 67ec16227eSAndy Grover 68745cbccaSAndy Grover /* NOTE: if also grabbing ibdev lock, grab this first */ 69ec16227eSAndy Grover DEFINE_SPINLOCK(ib_nodev_conns_lock); 70ec16227eSAndy Grover LIST_HEAD(ib_nodev_conns); 71ec16227eSAndy Grover 72ff51bf84Sstephen hemminger static void rds_ib_nodev_connect(void) 73fc19de38SZach Brown { 74fc19de38SZach Brown struct rds_ib_connection *ic; 75fc19de38SZach Brown 76fc19de38SZach Brown spin_lock(&ib_nodev_conns_lock); 77fc19de38SZach Brown list_for_each_entry(ic, &ib_nodev_conns, ib_node) 78fc19de38SZach Brown rds_conn_connect_if_down(ic->conn); 79fc19de38SZach Brown spin_unlock(&ib_nodev_conns_lock); 80fc19de38SZach Brown } 81fc19de38SZach Brown 82ff51bf84Sstephen hemminger static void rds_ib_dev_shutdown(struct rds_ib_device *rds_ibdev) 83fc19de38SZach Brown { 84fc19de38SZach Brown struct rds_ib_connection *ic; 85fc19de38SZach Brown unsigned long flags; 86fc19de38SZach Brown 87fc19de38SZach Brown spin_lock_irqsave(&rds_ibdev->spinlock, flags); 88fc19de38SZach Brown list_for_each_entry(ic, &rds_ibdev->conn_list, ib_node) 89fc19de38SZach Brown rds_conn_drop(ic->conn); 90fc19de38SZach Brown spin_unlock_irqrestore(&rds_ibdev->spinlock, flags); 91fc19de38SZach Brown } 92fc19de38SZach Brown 933e0249f9SZach Brown /* 943e0249f9SZach Brown * rds_ib_destroy_mr_pool() blocks on a few things and mrs drop references 953e0249f9SZach Brown * from interrupt context so we push freing off into a work struct in krdsd. 963e0249f9SZach Brown */ 973e0249f9SZach Brown static void rds_ib_dev_free(struct work_struct *work) 983e0249f9SZach Brown { 993e0249f9SZach Brown struct rds_ib_ipaddr *i_ipaddr, *i_next; 1003e0249f9SZach Brown struct rds_ib_device *rds_ibdev = container_of(work, 1013e0249f9SZach Brown struct rds_ib_device, free_work); 1023e0249f9SZach Brown 10306766513SSantosh Shilimkar if (rds_ibdev->mr_8k_pool) 10406766513SSantosh Shilimkar rds_ib_destroy_mr_pool(rds_ibdev->mr_8k_pool); 10506766513SSantosh Shilimkar if (rds_ibdev->mr_1m_pool) 10606766513SSantosh Shilimkar rds_ib_destroy_mr_pool(rds_ibdev->mr_1m_pool); 1073e0249f9SZach Brown if (rds_ibdev->pd) 1083e0249f9SZach Brown ib_dealloc_pd(rds_ibdev->pd); 1093e0249f9SZach Brown 1103e0249f9SZach Brown list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) { 1113e0249f9SZach Brown list_del(&i_ipaddr->list); 1123e0249f9SZach Brown kfree(i_ipaddr); 1133e0249f9SZach Brown } 1143e0249f9SZach Brown 115be2f76eaSSantosh Shilimkar kfree(rds_ibdev->vector_load); 116be2f76eaSSantosh Shilimkar 1173e0249f9SZach Brown kfree(rds_ibdev); 1183e0249f9SZach Brown } 1193e0249f9SZach Brown 1203e0249f9SZach Brown void rds_ib_dev_put(struct rds_ib_device *rds_ibdev) 1213e0249f9SZach Brown { 12250d61ff7SReshetova, Elena BUG_ON(refcount_read(&rds_ibdev->refcount) == 0); 12350d61ff7SReshetova, Elena if (refcount_dec_and_test(&rds_ibdev->refcount)) 1243e0249f9SZach Brown queue_work(rds_wq, &rds_ibdev->free_work); 1253e0249f9SZach Brown } 1263e0249f9SZach Brown 127ff51bf84Sstephen hemminger static void rds_ib_add_one(struct ib_device *device) 128ec16227eSAndy Grover { 129ec16227eSAndy Grover struct rds_ib_device *rds_ibdev; 1309dff9936SAvinash Repaka bool has_fr, has_fmr; 131ec16227eSAndy Grover 132ec16227eSAndy Grover /* Only handle IB (no iWARP) devices */ 133ec16227eSAndy Grover if (device->node_type != RDMA_NODE_IB_CA) 134ec16227eSAndy Grover return; 135ec16227eSAndy Grover 1363e0249f9SZach Brown rds_ibdev = kzalloc_node(sizeof(struct rds_ib_device), GFP_KERNEL, 1373e0249f9SZach Brown ibdev_to_node(device)); 138ec16227eSAndy Grover if (!rds_ibdev) 1390353261cSOr Gerlitz return; 140ec16227eSAndy Grover 141ec16227eSAndy Grover spin_lock_init(&rds_ibdev->spinlock); 14250d61ff7SReshetova, Elena refcount_set(&rds_ibdev->refcount, 1); 1433e0249f9SZach Brown INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free); 144ec16227eSAndy Grover 1450353261cSOr Gerlitz rds_ibdev->max_wrs = device->attrs.max_qp_wr; 1460353261cSOr Gerlitz rds_ibdev->max_sge = min(device->attrs.max_sge, RDS_IB_MAX_SGE); 147ec16227eSAndy Grover 1489dff9936SAvinash Repaka has_fr = (device->attrs.device_cap_flags & 1492cb2912dSsantosh.shilimkar@oracle.com IB_DEVICE_MEM_MGT_EXTENSIONS); 1509dff9936SAvinash Repaka has_fmr = (device->alloc_fmr && device->dealloc_fmr && 1512cb2912dSsantosh.shilimkar@oracle.com device->map_phys_fmr && device->unmap_fmr); 1529dff9936SAvinash Repaka rds_ibdev->use_fastreg = (has_fr && !has_fmr); 1532cb2912dSsantosh.shilimkar@oracle.com 1540353261cSOr Gerlitz rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32; 155f6df683fSsantosh.shilimkar@oracle.com rds_ibdev->max_1m_mrs = device->attrs.max_mr ? 1560353261cSOr Gerlitz min_t(unsigned int, (device->attrs.max_mr / 2), 157f6df683fSsantosh.shilimkar@oracle.com rds_ib_mr_1m_pool_size) : rds_ib_mr_1m_pool_size; 15806766513SSantosh Shilimkar 159f6df683fSsantosh.shilimkar@oracle.com rds_ibdev->max_8k_mrs = device->attrs.max_mr ? 1600353261cSOr Gerlitz min_t(unsigned int, ((device->attrs.max_mr / 2) * RDS_MR_8K_SCALE), 161f6df683fSsantosh.shilimkar@oracle.com rds_ib_mr_8k_pool_size) : rds_ib_mr_8k_pool_size; 162ec16227eSAndy Grover 1630353261cSOr Gerlitz rds_ibdev->max_initiator_depth = device->attrs.max_qp_init_rd_atom; 1640353261cSOr Gerlitz rds_ibdev->max_responder_resources = device->attrs.max_qp_rd_atom; 16540589e74SAndy Grover 166be2f76eaSSantosh Shilimkar rds_ibdev->vector_load = kzalloc(sizeof(int) * device->num_comp_vectors, 167be2f76eaSSantosh Shilimkar GFP_KERNEL); 168be2f76eaSSantosh Shilimkar if (!rds_ibdev->vector_load) { 169be2f76eaSSantosh Shilimkar pr_err("RDS/IB: %s failed to allocate vector memory\n", 170be2f76eaSSantosh Shilimkar __func__); 171be2f76eaSSantosh Shilimkar goto put_dev; 172be2f76eaSSantosh Shilimkar } 173be2f76eaSSantosh Shilimkar 174ec16227eSAndy Grover rds_ibdev->dev = device; 175ed082d36SChristoph Hellwig rds_ibdev->pd = ib_alloc_pd(device, 0); 1763e0249f9SZach Brown if (IS_ERR(rds_ibdev->pd)) { 1773e0249f9SZach Brown rds_ibdev->pd = NULL; 1783e0249f9SZach Brown goto put_dev; 1793e0249f9SZach Brown } 180ec16227eSAndy Grover 18106766513SSantosh Shilimkar rds_ibdev->mr_1m_pool = 18206766513SSantosh Shilimkar rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_1M_POOL); 18306766513SSantosh Shilimkar if (IS_ERR(rds_ibdev->mr_1m_pool)) { 18406766513SSantosh Shilimkar rds_ibdev->mr_1m_pool = NULL; 1853e0249f9SZach Brown goto put_dev; 186ec16227eSAndy Grover } 187ec16227eSAndy Grover 18806766513SSantosh Shilimkar rds_ibdev->mr_8k_pool = 18906766513SSantosh Shilimkar rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_8K_POOL); 19006766513SSantosh Shilimkar if (IS_ERR(rds_ibdev->mr_8k_pool)) { 19106766513SSantosh Shilimkar rds_ibdev->mr_8k_pool = NULL; 19206766513SSantosh Shilimkar goto put_dev; 19306766513SSantosh Shilimkar } 19406766513SSantosh Shilimkar 195f6df683fSsantosh.shilimkar@oracle.com rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, fmr_max_remaps = %d, max_1m_mrs = %d, max_8k_mrs = %d\n", 1960353261cSOr Gerlitz device->attrs.max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge, 197f6df683fSsantosh.shilimkar@oracle.com rds_ibdev->fmr_max_remaps, rds_ibdev->max_1m_mrs, 198f6df683fSsantosh.shilimkar@oracle.com rds_ibdev->max_8k_mrs); 19906766513SSantosh Shilimkar 2002cb2912dSsantosh.shilimkar@oracle.com pr_info("RDS/IB: %s: %s supported and preferred\n", 2012cb2912dSsantosh.shilimkar@oracle.com device->name, 2022cb2912dSsantosh.shilimkar@oracle.com rds_ibdev->use_fastreg ? "FRMR" : "FMR"); 2032cb2912dSsantosh.shilimkar@oracle.com 204ec16227eSAndy Grover INIT_LIST_HEAD(&rds_ibdev->ipaddr_list); 205ec16227eSAndy Grover INIT_LIST_HEAD(&rds_ibdev->conn_list); 206ea819867SZach Brown 207ea819867SZach Brown down_write(&rds_ib_devices_lock); 208ea819867SZach Brown list_add_tail_rcu(&rds_ibdev->list, &rds_ib_devices); 209ea819867SZach Brown up_write(&rds_ib_devices_lock); 21050d61ff7SReshetova, Elena refcount_inc(&rds_ibdev->refcount); 211ec16227eSAndy Grover 212ec16227eSAndy Grover ib_set_client_data(device, &rds_ib_client, rds_ibdev); 21350d61ff7SReshetova, Elena refcount_inc(&rds_ibdev->refcount); 214ec16227eSAndy Grover 215fc19de38SZach Brown rds_ib_nodev_connect(); 216fc19de38SZach Brown 2173e0249f9SZach Brown put_dev: 2183e0249f9SZach Brown rds_ib_dev_put(rds_ibdev); 219ec16227eSAndy Grover } 220ec16227eSAndy Grover 2213e0249f9SZach Brown /* 2223e0249f9SZach Brown * New connections use this to find the device to associate with the 2233e0249f9SZach Brown * connection. It's not in the fast path so we're not concerned about the 2243e0249f9SZach Brown * performance of the IB call. (As of this writing, it uses an interrupt 2253e0249f9SZach Brown * blocking spinlock to serialize walking a per-device list of all registered 2263e0249f9SZach Brown * clients.) 2273e0249f9SZach Brown * 2283e0249f9SZach Brown * RCU is used to handle incoming connections racing with device teardown. 2293e0249f9SZach Brown * Rather than use a lock to serialize removal from the client_data and 2303e0249f9SZach Brown * getting a new reference, we use an RCU grace period. The destruction 2313e0249f9SZach Brown * path removes the device from client_data and then waits for all RCU 2323e0249f9SZach Brown * readers to finish. 2333e0249f9SZach Brown * 2343e0249f9SZach Brown * A new connection can get NULL from this if its arriving on a 2353e0249f9SZach Brown * device that is in the process of being removed. 2363e0249f9SZach Brown */ 2373e0249f9SZach Brown struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device) 2383e0249f9SZach Brown { 2393e0249f9SZach Brown struct rds_ib_device *rds_ibdev; 2403e0249f9SZach Brown 2413e0249f9SZach Brown rcu_read_lock(); 2423e0249f9SZach Brown rds_ibdev = ib_get_client_data(device, &rds_ib_client); 2433e0249f9SZach Brown if (rds_ibdev) 24450d61ff7SReshetova, Elena refcount_inc(&rds_ibdev->refcount); 2453e0249f9SZach Brown rcu_read_unlock(); 2463e0249f9SZach Brown return rds_ibdev; 2473e0249f9SZach Brown } 2483e0249f9SZach Brown 2493e0249f9SZach Brown /* 2503e0249f9SZach Brown * The IB stack is letting us know that a device is going away. This can 2513e0249f9SZach Brown * happen if the underlying HCA driver is removed or if PCI hotplug is removing 2523e0249f9SZach Brown * the pci function, for example. 2533e0249f9SZach Brown * 2543e0249f9SZach Brown * This can be called at any time and can be racing with any other RDS path. 2553e0249f9SZach Brown */ 2567c1eb45aSHaggai Eran static void rds_ib_remove_one(struct ib_device *device, void *client_data) 257ec16227eSAndy Grover { 2587c1eb45aSHaggai Eran struct rds_ib_device *rds_ibdev = client_data; 259ec16227eSAndy Grover 260ec16227eSAndy Grover if (!rds_ibdev) 261ec16227eSAndy Grover return; 262ec16227eSAndy Grover 263fc19de38SZach Brown rds_ib_dev_shutdown(rds_ibdev); 264ec16227eSAndy Grover 265ea819867SZach Brown /* stop connection attempts from getting a reference to this device. */ 2663e0249f9SZach Brown ib_set_client_data(device, &rds_ib_client, NULL); 267ea819867SZach Brown 268ea819867SZach Brown down_write(&rds_ib_devices_lock); 269ea819867SZach Brown list_del_rcu(&rds_ibdev->list); 270ea819867SZach Brown up_write(&rds_ib_devices_lock); 271ea819867SZach Brown 272ea819867SZach Brown /* 273ea819867SZach Brown * This synchronize rcu is waiting for readers of both the ib 274ea819867SZach Brown * client data and the devices list to finish before we drop 275ea819867SZach Brown * both of those references. 276ea819867SZach Brown */ 2773e0249f9SZach Brown synchronize_rcu(); 2783e0249f9SZach Brown rds_ib_dev_put(rds_ibdev); 2793e0249f9SZach Brown rds_ib_dev_put(rds_ibdev); 280ec16227eSAndy Grover } 281ec16227eSAndy Grover 282ec16227eSAndy Grover struct ib_client rds_ib_client = { 283ec16227eSAndy Grover .name = "rds_ib", 284ec16227eSAndy Grover .add = rds_ib_add_one, 285ec16227eSAndy Grover .remove = rds_ib_remove_one 286ec16227eSAndy Grover }; 287ec16227eSAndy Grover 288ec16227eSAndy Grover static int rds_ib_conn_info_visitor(struct rds_connection *conn, 289ec16227eSAndy Grover void *buffer) 290ec16227eSAndy Grover { 291ec16227eSAndy Grover struct rds_info_rdma_connection *iinfo = buffer; 292ec16227eSAndy Grover struct rds_ib_connection *ic; 293ec16227eSAndy Grover 294ec16227eSAndy Grover /* We will only ever look at IB transports */ 295ec16227eSAndy Grover if (conn->c_trans != &rds_ib_transport) 296ec16227eSAndy Grover return 0; 297ec16227eSAndy Grover 298ec16227eSAndy Grover iinfo->src_addr = conn->c_laddr; 299ec16227eSAndy Grover iinfo->dst_addr = conn->c_faddr; 300ec16227eSAndy Grover 301ec16227eSAndy Grover memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid)); 302ec16227eSAndy Grover memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid)); 303ec16227eSAndy Grover if (rds_conn_state(conn) == RDS_CONN_UP) { 304ec16227eSAndy Grover struct rds_ib_device *rds_ibdev; 305ec16227eSAndy Grover 306ec16227eSAndy Grover ic = conn->c_transport_data; 307ec16227eSAndy Grover 308a2e812eaSParav Pandit rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo->src_gid, 309a2e812eaSParav Pandit (union ib_gid *)&iinfo->dst_gid); 310ec16227eSAndy Grover 3113e0249f9SZach Brown rds_ibdev = ic->rds_ibdev; 312ec16227eSAndy Grover iinfo->max_send_wr = ic->i_send_ring.w_nr; 313ec16227eSAndy Grover iinfo->max_recv_wr = ic->i_recv_ring.w_nr; 314ec16227eSAndy Grover iinfo->max_send_sge = rds_ibdev->max_sge; 315ec16227eSAndy Grover rds_ib_get_mr_info(rds_ibdev, iinfo); 316ec16227eSAndy Grover } 317ec16227eSAndy Grover return 1; 318ec16227eSAndy Grover } 319ec16227eSAndy Grover 320ec16227eSAndy Grover static void rds_ib_ic_info(struct socket *sock, unsigned int len, 321ec16227eSAndy Grover struct rds_info_iterator *iter, 322ec16227eSAndy Grover struct rds_info_lengths *lens) 323ec16227eSAndy Grover { 324ec16227eSAndy Grover rds_for_each_conn_info(sock, len, iter, lens, 325ec16227eSAndy Grover rds_ib_conn_info_visitor, 326ec16227eSAndy Grover sizeof(struct rds_info_rdma_connection)); 327ec16227eSAndy Grover } 328ec16227eSAndy Grover 329ec16227eSAndy Grover 330ec16227eSAndy Grover /* 331ec16227eSAndy Grover * Early RDS/IB was built to only bind to an address if there is an IPoIB 332ec16227eSAndy Grover * device with that address set. 333ec16227eSAndy Grover * 334ec16227eSAndy Grover * If it were me, I'd advocate for something more flexible. Sending and 335ec16227eSAndy Grover * receiving should be device-agnostic. Transports would try and maintain 336ec16227eSAndy Grover * connections between peers who have messages queued. Userspace would be 337ec16227eSAndy Grover * allowed to influence which paths have priority. We could call userspace 338ec16227eSAndy Grover * asserting this policy "routing". 339ec16227eSAndy Grover */ 340d5a8ac28SSowmini Varadhan static int rds_ib_laddr_check(struct net *net, __be32 addr) 341ec16227eSAndy Grover { 342ec16227eSAndy Grover int ret; 343ec16227eSAndy Grover struct rdma_cm_id *cm_id; 344ec16227eSAndy Grover struct sockaddr_in sin; 345ec16227eSAndy Grover 346ec16227eSAndy Grover /* Create a CMA ID and try to bind it. This catches both 347ec16227eSAndy Grover * IB and iWARP capable NICs. 348ec16227eSAndy Grover */ 3492c0aa086SGuanglei Li cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, 3502c0aa086SGuanglei Li NULL, RDMA_PS_TCP, IB_QPT_RC); 35194713babSDan Carpenter if (IS_ERR(cm_id)) 35294713babSDan Carpenter return PTR_ERR(cm_id); 353ec16227eSAndy Grover 354ec16227eSAndy Grover memset(&sin, 0, sizeof(sin)); 355ec16227eSAndy Grover sin.sin_family = AF_INET; 356ec16227eSAndy Grover sin.sin_addr.s_addr = addr; 357ec16227eSAndy Grover 358ec16227eSAndy Grover /* rdma_bind_addr will only succeed for IB & iWARP devices */ 359ec16227eSAndy Grover ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); 360ec16227eSAndy Grover /* due to this, we will claim to support iWARP devices unless we 361ec16227eSAndy Grover check node_type. */ 362c2349758SSasha Levin if (ret || !cm_id->device || 363c2349758SSasha Levin cm_id->device->node_type != RDMA_NODE_IB_CA) 364ec16227eSAndy Grover ret = -EADDRNOTAVAIL; 365ec16227eSAndy Grover 366ec16227eSAndy Grover rdsdebug("addr %pI4 ret %d node type %d\n", 367ec16227eSAndy Grover &addr, ret, 368ec16227eSAndy Grover cm_id->device ? cm_id->device->node_type : -1); 369ec16227eSAndy Grover 370ec16227eSAndy Grover rdma_destroy_id(cm_id); 371ec16227eSAndy Grover 372ec16227eSAndy Grover return ret; 373ec16227eSAndy Grover } 374ec16227eSAndy Grover 37524fa163aSZach Brown static void rds_ib_unregister_client(void) 37624fa163aSZach Brown { 37724fa163aSZach Brown ib_unregister_client(&rds_ib_client); 37824fa163aSZach Brown /* wait for rds_ib_dev_free() to complete */ 37924fa163aSZach Brown flush_workqueue(rds_wq); 38024fa163aSZach Brown } 38124fa163aSZach Brown 382*ebeeb1adSSowmini Varadhan static void rds_ib_set_unloading(void) 383*ebeeb1adSSowmini Varadhan { 384*ebeeb1adSSowmini Varadhan atomic_set(&rds_ib_unloading, 1); 385*ebeeb1adSSowmini Varadhan } 386*ebeeb1adSSowmini Varadhan 387*ebeeb1adSSowmini Varadhan static bool rds_ib_is_unloading(struct rds_connection *conn) 388*ebeeb1adSSowmini Varadhan { 389*ebeeb1adSSowmini Varadhan struct rds_conn_path *cp = &conn->c_path[0]; 390*ebeeb1adSSowmini Varadhan 391*ebeeb1adSSowmini Varadhan return (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags) || 392*ebeeb1adSSowmini Varadhan atomic_read(&rds_ib_unloading) != 0); 393*ebeeb1adSSowmini Varadhan } 394*ebeeb1adSSowmini Varadhan 395ec16227eSAndy Grover void rds_ib_exit(void) 396ec16227eSAndy Grover { 397*ebeeb1adSSowmini Varadhan rds_ib_set_unloading(); 398*ebeeb1adSSowmini Varadhan synchronize_rcu(); 399ec16227eSAndy Grover rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); 40024fa163aSZach Brown rds_ib_unregister_client(); 4018aeb1ba6SZach Brown rds_ib_destroy_nodev_conns(); 402ec16227eSAndy Grover rds_ib_sysctl_exit(); 403ec16227eSAndy Grover rds_ib_recv_exit(); 404ec16227eSAndy Grover rds_trans_unregister(&rds_ib_transport); 405f6df683fSsantosh.shilimkar@oracle.com rds_ib_mr_exit(); 406ec16227eSAndy Grover } 407ec16227eSAndy Grover 408ec16227eSAndy Grover struct rds_transport rds_ib_transport = { 409ec16227eSAndy Grover .laddr_check = rds_ib_laddr_check, 410226f7a7dSSowmini Varadhan .xmit_path_complete = rds_ib_xmit_path_complete, 411ec16227eSAndy Grover .xmit = rds_ib_xmit, 412ec16227eSAndy Grover .xmit_rdma = rds_ib_xmit_rdma, 41315133f6eSAndy Grover .xmit_atomic = rds_ib_xmit_atomic, 4142da43c4aSSowmini Varadhan .recv_path = rds_ib_recv_path, 415ec16227eSAndy Grover .conn_alloc = rds_ib_conn_alloc, 416ec16227eSAndy Grover .conn_free = rds_ib_conn_free, 417b04e8554SSowmini Varadhan .conn_path_connect = rds_ib_conn_path_connect, 418226f7a7dSSowmini Varadhan .conn_path_shutdown = rds_ib_conn_path_shutdown, 419ec16227eSAndy Grover .inc_copy_to_user = rds_ib_inc_copy_to_user, 420ec16227eSAndy Grover .inc_free = rds_ib_inc_free, 421ec16227eSAndy Grover .cm_initiate_connect = rds_ib_cm_initiate_connect, 422ec16227eSAndy Grover .cm_handle_connect = rds_ib_cm_handle_connect, 423ec16227eSAndy Grover .cm_connect_complete = rds_ib_cm_connect_complete, 424ec16227eSAndy Grover .stats_info_copy = rds_ib_stats_info_copy, 425ec16227eSAndy Grover .exit = rds_ib_exit, 426ec16227eSAndy Grover .get_mr = rds_ib_get_mr, 427ec16227eSAndy Grover .sync_mr = rds_ib_sync_mr, 428ec16227eSAndy Grover .free_mr = rds_ib_free_mr, 429ec16227eSAndy Grover .flush_mrs = rds_ib_flush_mrs, 430ec16227eSAndy Grover .t_owner = THIS_MODULE, 431ec16227eSAndy Grover .t_name = "infiniband", 432*ebeeb1adSSowmini Varadhan .t_unloading = rds_ib_is_unloading, 433335776bdSAndy Grover .t_type = RDS_TRANS_IB 434ec16227eSAndy Grover }; 435ec16227eSAndy Grover 436ef87b7eaSZach Brown int rds_ib_init(void) 437ec16227eSAndy Grover { 438ec16227eSAndy Grover int ret; 439ec16227eSAndy Grover 440ec16227eSAndy Grover INIT_LIST_HEAD(&rds_ib_devices); 441ec16227eSAndy Grover 442f6df683fSsantosh.shilimkar@oracle.com ret = rds_ib_mr_init(); 443515e079dSZach Brown if (ret) 444c534a107STejun Heo goto out; 445515e079dSZach Brown 446ad1d7dc0Ssantosh.shilimkar@oracle.com ret = ib_register_client(&rds_ib_client); 447ad1d7dc0Ssantosh.shilimkar@oracle.com if (ret) 448f6df683fSsantosh.shilimkar@oracle.com goto out_mr_exit; 449ad1d7dc0Ssantosh.shilimkar@oracle.com 450ec16227eSAndy Grover ret = rds_ib_sysctl_init(); 451ec16227eSAndy Grover if (ret) 452ec16227eSAndy Grover goto out_ibreg; 453ec16227eSAndy Grover 454ec16227eSAndy Grover ret = rds_ib_recv_init(); 455ec16227eSAndy Grover if (ret) 456ec16227eSAndy Grover goto out_sysctl; 457ec16227eSAndy Grover 458a8d63a53SZhu Yanjun rds_trans_register(&rds_ib_transport); 459ec16227eSAndy Grover 460ec16227eSAndy Grover rds_info_register_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); 461ec16227eSAndy Grover 462ec16227eSAndy Grover goto out; 463ec16227eSAndy Grover 464ec16227eSAndy Grover out_sysctl: 465ec16227eSAndy Grover rds_ib_sysctl_exit(); 466ec16227eSAndy Grover out_ibreg: 46724fa163aSZach Brown rds_ib_unregister_client(); 468f6df683fSsantosh.shilimkar@oracle.com out_mr_exit: 469f6df683fSsantosh.shilimkar@oracle.com rds_ib_mr_exit(); 470ec16227eSAndy Grover out: 471ec16227eSAndy Grover return ret; 472ec16227eSAndy Grover } 473ec16227eSAndy Grover 474ec16227eSAndy Grover MODULE_LICENSE("GPL"); 475ec16227eSAndy Grover 476