1ec16227eSAndy Grover /* 21e2b44e7SKa-Cheong Poon * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. 3ec16227eSAndy Grover * 4ec16227eSAndy Grover * This software is available to you under a choice of one of two 5ec16227eSAndy Grover * licenses. You may choose to be licensed under the terms of the GNU 6ec16227eSAndy Grover * General Public License (GPL) Version 2, available from the file 7ec16227eSAndy Grover * COPYING in the main directory of this source tree, or the 8ec16227eSAndy Grover * OpenIB.org BSD license below: 9ec16227eSAndy Grover * 10ec16227eSAndy Grover * Redistribution and use in source and binary forms, with or 11ec16227eSAndy Grover * without modification, are permitted provided that the following 12ec16227eSAndy Grover * conditions are met: 13ec16227eSAndy Grover * 14ec16227eSAndy Grover * - Redistributions of source code must retain the above 15ec16227eSAndy Grover * copyright notice, this list of conditions and the following 16ec16227eSAndy Grover * disclaimer. 17ec16227eSAndy Grover * 18ec16227eSAndy Grover * - Redistributions in binary form must reproduce the above 19ec16227eSAndy Grover * copyright notice, this list of conditions and the following 20ec16227eSAndy Grover * disclaimer in the documentation and/or other materials 21ec16227eSAndy Grover * provided with the distribution. 22ec16227eSAndy Grover * 23ec16227eSAndy Grover * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24ec16227eSAndy Grover * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25ec16227eSAndy Grover * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26ec16227eSAndy Grover * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27ec16227eSAndy Grover * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28ec16227eSAndy Grover * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29ec16227eSAndy Grover * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30ec16227eSAndy Grover * SOFTWARE. 31ec16227eSAndy Grover * 32ec16227eSAndy Grover */ 33ec16227eSAndy Grover #include <linux/kernel.h> 34ec16227eSAndy Grover #include <linux/in.h> 35ec16227eSAndy Grover #include <linux/if.h> 36ec16227eSAndy Grover #include <linux/netdevice.h> 37ec16227eSAndy Grover #include <linux/inetdevice.h> 38ec16227eSAndy Grover #include <linux/if_arp.h> 39ec16227eSAndy Grover #include <linux/delay.h> 405a0e3ad6STejun Heo #include <linux/slab.h> 413a9a231dSPaul Gortmaker #include <linux/module.h> 421e2b44e7SKa-Cheong Poon #include <net/addrconf.h> 43ec16227eSAndy Grover 440cb43965SSowmini Varadhan #include "rds_single_path.h" 45ec16227eSAndy Grover #include "rds.h" 46ec16227eSAndy Grover #include "ib.h" 47f6df683fSsantosh.shilimkar@oracle.com #include "ib_mr.h" 48ec16227eSAndy Grover 494f7bfb39SZhu Yanjun static unsigned int rds_ib_mr_1m_pool_size = RDS_MR_1M_POOL_SIZE; 504f7bfb39SZhu Yanjun static unsigned int rds_ib_mr_8k_pool_size = RDS_MR_8K_POOL_SIZE; 513ba23adeSAndy Grover unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT; 52ebeeb1adSSowmini Varadhan static atomic_t rds_ib_unloading; 53ec16227eSAndy Grover 54f6df683fSsantosh.shilimkar@oracle.com module_param(rds_ib_mr_1m_pool_size, int, 0444); 55f6df683fSsantosh.shilimkar@oracle.com MODULE_PARM_DESC(rds_ib_mr_1m_pool_size, " Max number of 1M mr per HCA"); 56f6df683fSsantosh.shilimkar@oracle.com module_param(rds_ib_mr_8k_pool_size, int, 0444); 57f6df683fSsantosh.shilimkar@oracle.com MODULE_PARM_DESC(rds_ib_mr_8k_pool_size, " Max number of 8K mr per HCA"); 583ba23adeSAndy Grover module_param(rds_ib_retry_count, int, 0444); 593ba23adeSAndy Grover MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error"); 60ec16227eSAndy Grover 61ea819867SZach Brown /* 62ea819867SZach Brown * we have a clumsy combination of RCU and a rwsem protecting this list 63ea819867SZach Brown * because it is used both in the get_mr fast path and while blocking in 64ea819867SZach Brown * the FMR flushing path. 65ea819867SZach Brown */ 66ea819867SZach Brown DECLARE_RWSEM(rds_ib_devices_lock); 67ec16227eSAndy Grover struct list_head rds_ib_devices; 68ec16227eSAndy Grover 69745cbccaSAndy Grover /* NOTE: if also grabbing ibdev lock, grab this first */ 70ec16227eSAndy Grover DEFINE_SPINLOCK(ib_nodev_conns_lock); 71ec16227eSAndy Grover LIST_HEAD(ib_nodev_conns); 72ec16227eSAndy Grover 73ff51bf84Sstephen hemminger static void rds_ib_nodev_connect(void) 74fc19de38SZach Brown { 75fc19de38SZach Brown struct rds_ib_connection *ic; 76fc19de38SZach Brown 77fc19de38SZach Brown spin_lock(&ib_nodev_conns_lock); 78fc19de38SZach Brown list_for_each_entry(ic, &ib_nodev_conns, ib_node) 79fc19de38SZach Brown rds_conn_connect_if_down(ic->conn); 80fc19de38SZach Brown spin_unlock(&ib_nodev_conns_lock); 81fc19de38SZach Brown } 82fc19de38SZach Brown 83ff51bf84Sstephen hemminger static void rds_ib_dev_shutdown(struct rds_ib_device *rds_ibdev) 84fc19de38SZach Brown { 85fc19de38SZach Brown struct rds_ib_connection *ic; 86fc19de38SZach Brown unsigned long flags; 87fc19de38SZach Brown 88fc19de38SZach Brown spin_lock_irqsave(&rds_ibdev->spinlock, flags); 89fc19de38SZach Brown list_for_each_entry(ic, &rds_ibdev->conn_list, ib_node) 90fc19de38SZach Brown rds_conn_drop(ic->conn); 91fc19de38SZach Brown spin_unlock_irqrestore(&rds_ibdev->spinlock, flags); 92fc19de38SZach Brown } 93fc19de38SZach Brown 943e0249f9SZach Brown /* 953e0249f9SZach Brown * rds_ib_destroy_mr_pool() blocks on a few things and mrs drop references 963e0249f9SZach Brown * from interrupt context so we push freing off into a work struct in krdsd. 973e0249f9SZach Brown */ 983e0249f9SZach Brown static void rds_ib_dev_free(struct work_struct *work) 993e0249f9SZach Brown { 1003e0249f9SZach Brown struct rds_ib_ipaddr *i_ipaddr, *i_next; 1013e0249f9SZach Brown struct rds_ib_device *rds_ibdev = container_of(work, 1023e0249f9SZach Brown struct rds_ib_device, free_work); 1033e0249f9SZach Brown 10406766513SSantosh Shilimkar if (rds_ibdev->mr_8k_pool) 10506766513SSantosh Shilimkar rds_ib_destroy_mr_pool(rds_ibdev->mr_8k_pool); 10606766513SSantosh Shilimkar if (rds_ibdev->mr_1m_pool) 10706766513SSantosh Shilimkar rds_ib_destroy_mr_pool(rds_ibdev->mr_1m_pool); 1083e0249f9SZach Brown if (rds_ibdev->pd) 1093e0249f9SZach Brown ib_dealloc_pd(rds_ibdev->pd); 1103e0249f9SZach Brown 1113e0249f9SZach Brown list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) { 1123e0249f9SZach Brown list_del(&i_ipaddr->list); 1133e0249f9SZach Brown kfree(i_ipaddr); 1143e0249f9SZach Brown } 1153e0249f9SZach Brown 116be2f76eaSSantosh Shilimkar kfree(rds_ibdev->vector_load); 117be2f76eaSSantosh Shilimkar 1183e0249f9SZach Brown kfree(rds_ibdev); 1193e0249f9SZach Brown } 1203e0249f9SZach Brown 1213e0249f9SZach Brown void rds_ib_dev_put(struct rds_ib_device *rds_ibdev) 1223e0249f9SZach Brown { 12350d61ff7SReshetova, Elena BUG_ON(refcount_read(&rds_ibdev->refcount) == 0); 12450d61ff7SReshetova, Elena if (refcount_dec_and_test(&rds_ibdev->refcount)) 1253e0249f9SZach Brown queue_work(rds_wq, &rds_ibdev->free_work); 1263e0249f9SZach Brown } 1273e0249f9SZach Brown 128ff51bf84Sstephen hemminger static void rds_ib_add_one(struct ib_device *device) 129ec16227eSAndy Grover { 130ec16227eSAndy Grover struct rds_ib_device *rds_ibdev; 1319dff9936SAvinash Repaka bool has_fr, has_fmr; 132ec16227eSAndy Grover 133ec16227eSAndy Grover /* Only handle IB (no iWARP) devices */ 134ec16227eSAndy Grover if (device->node_type != RDMA_NODE_IB_CA) 135ec16227eSAndy Grover return; 136ec16227eSAndy Grover 1373e0249f9SZach Brown rds_ibdev = kzalloc_node(sizeof(struct rds_ib_device), GFP_KERNEL, 1383e0249f9SZach Brown ibdev_to_node(device)); 139ec16227eSAndy Grover if (!rds_ibdev) 1400353261cSOr Gerlitz return; 141ec16227eSAndy Grover 142ec16227eSAndy Grover spin_lock_init(&rds_ibdev->spinlock); 14350d61ff7SReshetova, Elena refcount_set(&rds_ibdev->refcount, 1); 1443e0249f9SZach Brown INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free); 145ec16227eSAndy Grover 1460353261cSOr Gerlitz rds_ibdev->max_wrs = device->attrs.max_qp_wr; 14733023fb8SSteve Wise rds_ibdev->max_sge = min(device->attrs.max_send_sge, RDS_IB_MAX_SGE); 148ec16227eSAndy Grover 1499dff9936SAvinash Repaka has_fr = (device->attrs.device_cap_flags & 1502cb2912dSsantosh.shilimkar@oracle.com IB_DEVICE_MEM_MGT_EXTENSIONS); 151*3023a1e9SKamal Heib has_fmr = (device->ops.alloc_fmr && device->ops.dealloc_fmr && 152*3023a1e9SKamal Heib device->ops.map_phys_fmr && device->ops.unmap_fmr); 1539dff9936SAvinash Repaka rds_ibdev->use_fastreg = (has_fr && !has_fmr); 1542cb2912dSsantosh.shilimkar@oracle.com 1550353261cSOr Gerlitz rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32; 156f6df683fSsantosh.shilimkar@oracle.com rds_ibdev->max_1m_mrs = device->attrs.max_mr ? 1570353261cSOr Gerlitz min_t(unsigned int, (device->attrs.max_mr / 2), 158f6df683fSsantosh.shilimkar@oracle.com rds_ib_mr_1m_pool_size) : rds_ib_mr_1m_pool_size; 15906766513SSantosh Shilimkar 160f6df683fSsantosh.shilimkar@oracle.com rds_ibdev->max_8k_mrs = device->attrs.max_mr ? 1610353261cSOr Gerlitz min_t(unsigned int, ((device->attrs.max_mr / 2) * RDS_MR_8K_SCALE), 162f6df683fSsantosh.shilimkar@oracle.com rds_ib_mr_8k_pool_size) : rds_ib_mr_8k_pool_size; 163ec16227eSAndy Grover 1640353261cSOr Gerlitz rds_ibdev->max_initiator_depth = device->attrs.max_qp_init_rd_atom; 1650353261cSOr Gerlitz rds_ibdev->max_responder_resources = device->attrs.max_qp_rd_atom; 16640589e74SAndy Grover 1676396bb22SKees Cook rds_ibdev->vector_load = kcalloc(device->num_comp_vectors, 1686396bb22SKees Cook sizeof(int), 169be2f76eaSSantosh Shilimkar GFP_KERNEL); 170be2f76eaSSantosh Shilimkar if (!rds_ibdev->vector_load) { 171be2f76eaSSantosh Shilimkar pr_err("RDS/IB: %s failed to allocate vector memory\n", 172be2f76eaSSantosh Shilimkar __func__); 173be2f76eaSSantosh Shilimkar goto put_dev; 174be2f76eaSSantosh Shilimkar } 175be2f76eaSSantosh Shilimkar 176ec16227eSAndy Grover rds_ibdev->dev = device; 177ed082d36SChristoph Hellwig rds_ibdev->pd = ib_alloc_pd(device, 0); 1783e0249f9SZach Brown if (IS_ERR(rds_ibdev->pd)) { 1793e0249f9SZach Brown rds_ibdev->pd = NULL; 1803e0249f9SZach Brown goto put_dev; 1813e0249f9SZach Brown } 182ec16227eSAndy Grover 18306766513SSantosh Shilimkar rds_ibdev->mr_1m_pool = 18406766513SSantosh Shilimkar rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_1M_POOL); 18506766513SSantosh Shilimkar if (IS_ERR(rds_ibdev->mr_1m_pool)) { 18606766513SSantosh Shilimkar rds_ibdev->mr_1m_pool = NULL; 1873e0249f9SZach Brown goto put_dev; 188ec16227eSAndy Grover } 189ec16227eSAndy Grover 19006766513SSantosh Shilimkar rds_ibdev->mr_8k_pool = 19106766513SSantosh Shilimkar rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_8K_POOL); 19206766513SSantosh Shilimkar if (IS_ERR(rds_ibdev->mr_8k_pool)) { 19306766513SSantosh Shilimkar rds_ibdev->mr_8k_pool = NULL; 19406766513SSantosh Shilimkar goto put_dev; 19506766513SSantosh Shilimkar } 19606766513SSantosh Shilimkar 197f6df683fSsantosh.shilimkar@oracle.com rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, fmr_max_remaps = %d, max_1m_mrs = %d, max_8k_mrs = %d\n", 1980353261cSOr Gerlitz device->attrs.max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge, 199f6df683fSsantosh.shilimkar@oracle.com rds_ibdev->fmr_max_remaps, rds_ibdev->max_1m_mrs, 200f6df683fSsantosh.shilimkar@oracle.com rds_ibdev->max_8k_mrs); 20106766513SSantosh Shilimkar 2022cb2912dSsantosh.shilimkar@oracle.com pr_info("RDS/IB: %s: %s supported and preferred\n", 2032cb2912dSsantosh.shilimkar@oracle.com device->name, 2042cb2912dSsantosh.shilimkar@oracle.com rds_ibdev->use_fastreg ? "FRMR" : "FMR"); 2052cb2912dSsantosh.shilimkar@oracle.com 206ec16227eSAndy Grover INIT_LIST_HEAD(&rds_ibdev->ipaddr_list); 207ec16227eSAndy Grover INIT_LIST_HEAD(&rds_ibdev->conn_list); 208ea819867SZach Brown 209ea819867SZach Brown down_write(&rds_ib_devices_lock); 210ea819867SZach Brown list_add_tail_rcu(&rds_ibdev->list, &rds_ib_devices); 211ea819867SZach Brown up_write(&rds_ib_devices_lock); 21250d61ff7SReshetova, Elena refcount_inc(&rds_ibdev->refcount); 213ec16227eSAndy Grover 214ec16227eSAndy Grover ib_set_client_data(device, &rds_ib_client, rds_ibdev); 21550d61ff7SReshetova, Elena refcount_inc(&rds_ibdev->refcount); 216ec16227eSAndy Grover 217fc19de38SZach Brown rds_ib_nodev_connect(); 218fc19de38SZach Brown 2193e0249f9SZach Brown put_dev: 2203e0249f9SZach Brown rds_ib_dev_put(rds_ibdev); 221ec16227eSAndy Grover } 222ec16227eSAndy Grover 2233e0249f9SZach Brown /* 2243e0249f9SZach Brown * New connections use this to find the device to associate with the 2253e0249f9SZach Brown * connection. It's not in the fast path so we're not concerned about the 2263e0249f9SZach Brown * performance of the IB call. (As of this writing, it uses an interrupt 2273e0249f9SZach Brown * blocking spinlock to serialize walking a per-device list of all registered 2283e0249f9SZach Brown * clients.) 2293e0249f9SZach Brown * 2303e0249f9SZach Brown * RCU is used to handle incoming connections racing with device teardown. 2313e0249f9SZach Brown * Rather than use a lock to serialize removal from the client_data and 2323e0249f9SZach Brown * getting a new reference, we use an RCU grace period. The destruction 2333e0249f9SZach Brown * path removes the device from client_data and then waits for all RCU 2343e0249f9SZach Brown * readers to finish. 2353e0249f9SZach Brown * 2363e0249f9SZach Brown * A new connection can get NULL from this if its arriving on a 2373e0249f9SZach Brown * device that is in the process of being removed. 2383e0249f9SZach Brown */ 2393e0249f9SZach Brown struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device) 2403e0249f9SZach Brown { 2413e0249f9SZach Brown struct rds_ib_device *rds_ibdev; 2423e0249f9SZach Brown 2433e0249f9SZach Brown rcu_read_lock(); 2443e0249f9SZach Brown rds_ibdev = ib_get_client_data(device, &rds_ib_client); 2453e0249f9SZach Brown if (rds_ibdev) 24650d61ff7SReshetova, Elena refcount_inc(&rds_ibdev->refcount); 2473e0249f9SZach Brown rcu_read_unlock(); 2483e0249f9SZach Brown return rds_ibdev; 2493e0249f9SZach Brown } 2503e0249f9SZach Brown 2513e0249f9SZach Brown /* 2523e0249f9SZach Brown * The IB stack is letting us know that a device is going away. This can 2533e0249f9SZach Brown * happen if the underlying HCA driver is removed or if PCI hotplug is removing 2543e0249f9SZach Brown * the pci function, for example. 2553e0249f9SZach Brown * 2563e0249f9SZach Brown * This can be called at any time and can be racing with any other RDS path. 2573e0249f9SZach Brown */ 2587c1eb45aSHaggai Eran static void rds_ib_remove_one(struct ib_device *device, void *client_data) 259ec16227eSAndy Grover { 2607c1eb45aSHaggai Eran struct rds_ib_device *rds_ibdev = client_data; 261ec16227eSAndy Grover 262ec16227eSAndy Grover if (!rds_ibdev) 263ec16227eSAndy Grover return; 264ec16227eSAndy Grover 265fc19de38SZach Brown rds_ib_dev_shutdown(rds_ibdev); 266ec16227eSAndy Grover 267ea819867SZach Brown /* stop connection attempts from getting a reference to this device. */ 2683e0249f9SZach Brown ib_set_client_data(device, &rds_ib_client, NULL); 269ea819867SZach Brown 270ea819867SZach Brown down_write(&rds_ib_devices_lock); 271ea819867SZach Brown list_del_rcu(&rds_ibdev->list); 272ea819867SZach Brown up_write(&rds_ib_devices_lock); 273ea819867SZach Brown 274ea819867SZach Brown /* 275ea819867SZach Brown * This synchronize rcu is waiting for readers of both the ib 276ea819867SZach Brown * client data and the devices list to finish before we drop 277ea819867SZach Brown * both of those references. 278ea819867SZach Brown */ 2793e0249f9SZach Brown synchronize_rcu(); 2803e0249f9SZach Brown rds_ib_dev_put(rds_ibdev); 2813e0249f9SZach Brown rds_ib_dev_put(rds_ibdev); 282ec16227eSAndy Grover } 283ec16227eSAndy Grover 284ec16227eSAndy Grover struct ib_client rds_ib_client = { 285ec16227eSAndy Grover .name = "rds_ib", 286ec16227eSAndy Grover .add = rds_ib_add_one, 287ec16227eSAndy Grover .remove = rds_ib_remove_one 288ec16227eSAndy Grover }; 289ec16227eSAndy Grover 290ec16227eSAndy Grover static int rds_ib_conn_info_visitor(struct rds_connection *conn, 291ec16227eSAndy Grover void *buffer) 292ec16227eSAndy Grover { 293ec16227eSAndy Grover struct rds_info_rdma_connection *iinfo = buffer; 294ec16227eSAndy Grover struct rds_ib_connection *ic; 295ec16227eSAndy Grover 296ec16227eSAndy Grover /* We will only ever look at IB transports */ 297ec16227eSAndy Grover if (conn->c_trans != &rds_ib_transport) 298ec16227eSAndy Grover return 0; 2991e2b44e7SKa-Cheong Poon if (conn->c_isv6) 3001e2b44e7SKa-Cheong Poon return 0; 301ec16227eSAndy Grover 302eee2fa6aSKa-Cheong Poon iinfo->src_addr = conn->c_laddr.s6_addr32[3]; 303eee2fa6aSKa-Cheong Poon iinfo->dst_addr = conn->c_faddr.s6_addr32[3]; 304ec16227eSAndy Grover 305ec16227eSAndy Grover memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid)); 306ec16227eSAndy Grover memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid)); 307ec16227eSAndy Grover if (rds_conn_state(conn) == RDS_CONN_UP) { 308ec16227eSAndy Grover struct rds_ib_device *rds_ibdev; 309ec16227eSAndy Grover 310ec16227eSAndy Grover ic = conn->c_transport_data; 311ec16227eSAndy Grover 312a2e812eaSParav Pandit rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo->src_gid, 313a2e812eaSParav Pandit (union ib_gid *)&iinfo->dst_gid); 314ec16227eSAndy Grover 3153e0249f9SZach Brown rds_ibdev = ic->rds_ibdev; 316ec16227eSAndy Grover iinfo->max_send_wr = ic->i_send_ring.w_nr; 317ec16227eSAndy Grover iinfo->max_recv_wr = ic->i_recv_ring.w_nr; 318ec16227eSAndy Grover iinfo->max_send_sge = rds_ibdev->max_sge; 319ec16227eSAndy Grover rds_ib_get_mr_info(rds_ibdev, iinfo); 320ec16227eSAndy Grover } 321ec16227eSAndy Grover return 1; 322ec16227eSAndy Grover } 323ec16227eSAndy Grover 324e65d4d96SKa-Cheong Poon #if IS_ENABLED(CONFIG_IPV6) 325b7ff8b10SKa-Cheong Poon /* IPv6 version of rds_ib_conn_info_visitor(). */ 326b7ff8b10SKa-Cheong Poon static int rds6_ib_conn_info_visitor(struct rds_connection *conn, 327b7ff8b10SKa-Cheong Poon void *buffer) 328b7ff8b10SKa-Cheong Poon { 329b7ff8b10SKa-Cheong Poon struct rds6_info_rdma_connection *iinfo6 = buffer; 330b7ff8b10SKa-Cheong Poon struct rds_ib_connection *ic; 331b7ff8b10SKa-Cheong Poon 332b7ff8b10SKa-Cheong Poon /* We will only ever look at IB transports */ 333b7ff8b10SKa-Cheong Poon if (conn->c_trans != &rds_ib_transport) 334b7ff8b10SKa-Cheong Poon return 0; 335b7ff8b10SKa-Cheong Poon 336b7ff8b10SKa-Cheong Poon iinfo6->src_addr = conn->c_laddr; 337b7ff8b10SKa-Cheong Poon iinfo6->dst_addr = conn->c_faddr; 338b7ff8b10SKa-Cheong Poon 339b7ff8b10SKa-Cheong Poon memset(&iinfo6->src_gid, 0, sizeof(iinfo6->src_gid)); 340b7ff8b10SKa-Cheong Poon memset(&iinfo6->dst_gid, 0, sizeof(iinfo6->dst_gid)); 341b7ff8b10SKa-Cheong Poon 342b7ff8b10SKa-Cheong Poon if (rds_conn_state(conn) == RDS_CONN_UP) { 343b7ff8b10SKa-Cheong Poon struct rds_ib_device *rds_ibdev; 344b7ff8b10SKa-Cheong Poon 345b7ff8b10SKa-Cheong Poon ic = conn->c_transport_data; 34653ae914dSZhu Yanjun rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo6->src_gid, 347b7ff8b10SKa-Cheong Poon (union ib_gid *)&iinfo6->dst_gid); 348b7ff8b10SKa-Cheong Poon rds_ibdev = ic->rds_ibdev; 349b7ff8b10SKa-Cheong Poon iinfo6->max_send_wr = ic->i_send_ring.w_nr; 350b7ff8b10SKa-Cheong Poon iinfo6->max_recv_wr = ic->i_recv_ring.w_nr; 351b7ff8b10SKa-Cheong Poon iinfo6->max_send_sge = rds_ibdev->max_sge; 352b7ff8b10SKa-Cheong Poon rds6_ib_get_mr_info(rds_ibdev, iinfo6); 353b7ff8b10SKa-Cheong Poon } 354b7ff8b10SKa-Cheong Poon return 1; 355b7ff8b10SKa-Cheong Poon } 356e65d4d96SKa-Cheong Poon #endif 357b7ff8b10SKa-Cheong Poon 358ec16227eSAndy Grover static void rds_ib_ic_info(struct socket *sock, unsigned int len, 359ec16227eSAndy Grover struct rds_info_iterator *iter, 360ec16227eSAndy Grover struct rds_info_lengths *lens) 361ec16227eSAndy Grover { 362f1cb9d68SSalvatore Mesoraca u64 buffer[(sizeof(struct rds_info_rdma_connection) + 7) / 8]; 363f1cb9d68SSalvatore Mesoraca 364ec16227eSAndy Grover rds_for_each_conn_info(sock, len, iter, lens, 365ec16227eSAndy Grover rds_ib_conn_info_visitor, 366f1cb9d68SSalvatore Mesoraca buffer, 367ec16227eSAndy Grover sizeof(struct rds_info_rdma_connection)); 368ec16227eSAndy Grover } 369ec16227eSAndy Grover 370e65d4d96SKa-Cheong Poon #if IS_ENABLED(CONFIG_IPV6) 371b7ff8b10SKa-Cheong Poon /* IPv6 version of rds_ib_ic_info(). */ 372b7ff8b10SKa-Cheong Poon static void rds6_ib_ic_info(struct socket *sock, unsigned int len, 373b7ff8b10SKa-Cheong Poon struct rds_info_iterator *iter, 374b7ff8b10SKa-Cheong Poon struct rds_info_lengths *lens) 375b7ff8b10SKa-Cheong Poon { 376b7ff8b10SKa-Cheong Poon u64 buffer[(sizeof(struct rds6_info_rdma_connection) + 7) / 8]; 377b7ff8b10SKa-Cheong Poon 378b7ff8b10SKa-Cheong Poon rds_for_each_conn_info(sock, len, iter, lens, 379b7ff8b10SKa-Cheong Poon rds6_ib_conn_info_visitor, 380b7ff8b10SKa-Cheong Poon buffer, 381b7ff8b10SKa-Cheong Poon sizeof(struct rds6_info_rdma_connection)); 382b7ff8b10SKa-Cheong Poon } 383e65d4d96SKa-Cheong Poon #endif 384ec16227eSAndy Grover 385ec16227eSAndy Grover /* 386ec16227eSAndy Grover * Early RDS/IB was built to only bind to an address if there is an IPoIB 387ec16227eSAndy Grover * device with that address set. 388ec16227eSAndy Grover * 389ec16227eSAndy Grover * If it were me, I'd advocate for something more flexible. Sending and 390ec16227eSAndy Grover * receiving should be device-agnostic. Transports would try and maintain 391ec16227eSAndy Grover * connections between peers who have messages queued. Userspace would be 392ec16227eSAndy Grover * allowed to influence which paths have priority. We could call userspace 393ec16227eSAndy Grover * asserting this policy "routing". 394ec16227eSAndy Grover */ 395eee2fa6aSKa-Cheong Poon static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr, 396eee2fa6aSKa-Cheong Poon __u32 scope_id) 397ec16227eSAndy Grover { 398ec16227eSAndy Grover int ret; 399ec16227eSAndy Grover struct rdma_cm_id *cm_id; 400e65d4d96SKa-Cheong Poon #if IS_ENABLED(CONFIG_IPV6) 4011e2b44e7SKa-Cheong Poon struct sockaddr_in6 sin6; 402e65d4d96SKa-Cheong Poon #endif 403ec16227eSAndy Grover struct sockaddr_in sin; 4041e2b44e7SKa-Cheong Poon struct sockaddr *sa; 4051e2b44e7SKa-Cheong Poon bool isv4; 406ec16227eSAndy Grover 4071e2b44e7SKa-Cheong Poon isv4 = ipv6_addr_v4mapped(addr); 408ec16227eSAndy Grover /* Create a CMA ID and try to bind it. This catches both 409ec16227eSAndy Grover * IB and iWARP capable NICs. 410ec16227eSAndy Grover */ 4112c0aa086SGuanglei Li cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, 4122c0aa086SGuanglei Li NULL, RDMA_PS_TCP, IB_QPT_RC); 41394713babSDan Carpenter if (IS_ERR(cm_id)) 41494713babSDan Carpenter return PTR_ERR(cm_id); 415ec16227eSAndy Grover 4161e2b44e7SKa-Cheong Poon if (isv4) { 417ec16227eSAndy Grover memset(&sin, 0, sizeof(sin)); 418ec16227eSAndy Grover sin.sin_family = AF_INET; 419eee2fa6aSKa-Cheong Poon sin.sin_addr.s_addr = addr->s6_addr32[3]; 4201e2b44e7SKa-Cheong Poon sa = (struct sockaddr *)&sin; 4211e2b44e7SKa-Cheong Poon } else { 422e65d4d96SKa-Cheong Poon #if IS_ENABLED(CONFIG_IPV6) 4231e2b44e7SKa-Cheong Poon memset(&sin6, 0, sizeof(sin6)); 4241e2b44e7SKa-Cheong Poon sin6.sin6_family = AF_INET6; 4251e2b44e7SKa-Cheong Poon sin6.sin6_addr = *addr; 4261e2b44e7SKa-Cheong Poon sin6.sin6_scope_id = scope_id; 4271e2b44e7SKa-Cheong Poon sa = (struct sockaddr *)&sin6; 4281e2b44e7SKa-Cheong Poon 4291e2b44e7SKa-Cheong Poon /* XXX Do a special IPv6 link local address check here. The 4301e2b44e7SKa-Cheong Poon * reason is that rdma_bind_addr() always succeeds with IPv6 4311e2b44e7SKa-Cheong Poon * link local address regardless it is indeed configured in a 4321e2b44e7SKa-Cheong Poon * system. 4331e2b44e7SKa-Cheong Poon */ 4341e2b44e7SKa-Cheong Poon if (ipv6_addr_type(addr) & IPV6_ADDR_LINKLOCAL) { 4351e2b44e7SKa-Cheong Poon struct net_device *dev; 4361e2b44e7SKa-Cheong Poon 437e65d4d96SKa-Cheong Poon if (scope_id == 0) { 438e65d4d96SKa-Cheong Poon ret = -EADDRNOTAVAIL; 439e65d4d96SKa-Cheong Poon goto out; 440e65d4d96SKa-Cheong Poon } 4411e2b44e7SKa-Cheong Poon 4421e2b44e7SKa-Cheong Poon /* Use init_net for now as RDS is not network 4431e2b44e7SKa-Cheong Poon * name space aware. 4441e2b44e7SKa-Cheong Poon */ 4451e2b44e7SKa-Cheong Poon dev = dev_get_by_index(&init_net, scope_id); 446e65d4d96SKa-Cheong Poon if (!dev) { 447e65d4d96SKa-Cheong Poon ret = -EADDRNOTAVAIL; 448e65d4d96SKa-Cheong Poon goto out; 449e65d4d96SKa-Cheong Poon } 4501e2b44e7SKa-Cheong Poon if (!ipv6_chk_addr(&init_net, addr, dev, 1)) { 4511e2b44e7SKa-Cheong Poon dev_put(dev); 452e65d4d96SKa-Cheong Poon ret = -EADDRNOTAVAIL; 453e65d4d96SKa-Cheong Poon goto out; 4541e2b44e7SKa-Cheong Poon } 4551e2b44e7SKa-Cheong Poon dev_put(dev); 4561e2b44e7SKa-Cheong Poon } 457e65d4d96SKa-Cheong Poon #else 458e65d4d96SKa-Cheong Poon ret = -EADDRNOTAVAIL; 459e65d4d96SKa-Cheong Poon goto out; 460e65d4d96SKa-Cheong Poon #endif 4611e2b44e7SKa-Cheong Poon } 462ec16227eSAndy Grover 463ec16227eSAndy Grover /* rdma_bind_addr will only succeed for IB & iWARP devices */ 4641e2b44e7SKa-Cheong Poon ret = rdma_bind_addr(cm_id, sa); 465ec16227eSAndy Grover /* due to this, we will claim to support iWARP devices unless we 466ec16227eSAndy Grover check node_type. */ 467c2349758SSasha Levin if (ret || !cm_id->device || 468c2349758SSasha Levin cm_id->device->node_type != RDMA_NODE_IB_CA) 469ec16227eSAndy Grover ret = -EADDRNOTAVAIL; 470ec16227eSAndy Grover 4711e2b44e7SKa-Cheong Poon rdsdebug("addr %pI6c%%%u ret %d node type %d\n", 4721e2b44e7SKa-Cheong Poon addr, scope_id, ret, 473ec16227eSAndy Grover cm_id->device ? cm_id->device->node_type : -1); 474ec16227eSAndy Grover 475e65d4d96SKa-Cheong Poon out: 476ec16227eSAndy Grover rdma_destroy_id(cm_id); 477ec16227eSAndy Grover 478ec16227eSAndy Grover return ret; 479ec16227eSAndy Grover } 480ec16227eSAndy Grover 48124fa163aSZach Brown static void rds_ib_unregister_client(void) 48224fa163aSZach Brown { 48324fa163aSZach Brown ib_unregister_client(&rds_ib_client); 48424fa163aSZach Brown /* wait for rds_ib_dev_free() to complete */ 48524fa163aSZach Brown flush_workqueue(rds_wq); 48624fa163aSZach Brown } 48724fa163aSZach Brown 488ebeeb1adSSowmini Varadhan static void rds_ib_set_unloading(void) 489ebeeb1adSSowmini Varadhan { 490ebeeb1adSSowmini Varadhan atomic_set(&rds_ib_unloading, 1); 491ebeeb1adSSowmini Varadhan } 492ebeeb1adSSowmini Varadhan 493ebeeb1adSSowmini Varadhan static bool rds_ib_is_unloading(struct rds_connection *conn) 494ebeeb1adSSowmini Varadhan { 495ebeeb1adSSowmini Varadhan struct rds_conn_path *cp = &conn->c_path[0]; 496ebeeb1adSSowmini Varadhan 497ebeeb1adSSowmini Varadhan return (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags) || 498ebeeb1adSSowmini Varadhan atomic_read(&rds_ib_unloading) != 0); 499ebeeb1adSSowmini Varadhan } 500ebeeb1adSSowmini Varadhan 501ec16227eSAndy Grover void rds_ib_exit(void) 502ec16227eSAndy Grover { 503ebeeb1adSSowmini Varadhan rds_ib_set_unloading(); 504ebeeb1adSSowmini Varadhan synchronize_rcu(); 505ec16227eSAndy Grover rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); 506e65d4d96SKa-Cheong Poon #if IS_ENABLED(CONFIG_IPV6) 507b7ff8b10SKa-Cheong Poon rds_info_deregister_func(RDS6_INFO_IB_CONNECTIONS, rds6_ib_ic_info); 508e65d4d96SKa-Cheong Poon #endif 50924fa163aSZach Brown rds_ib_unregister_client(); 5108aeb1ba6SZach Brown rds_ib_destroy_nodev_conns(); 511ec16227eSAndy Grover rds_ib_sysctl_exit(); 512ec16227eSAndy Grover rds_ib_recv_exit(); 513ec16227eSAndy Grover rds_trans_unregister(&rds_ib_transport); 514f6df683fSsantosh.shilimkar@oracle.com rds_ib_mr_exit(); 515ec16227eSAndy Grover } 516ec16227eSAndy Grover 517ec16227eSAndy Grover struct rds_transport rds_ib_transport = { 518ec16227eSAndy Grover .laddr_check = rds_ib_laddr_check, 519226f7a7dSSowmini Varadhan .xmit_path_complete = rds_ib_xmit_path_complete, 520ec16227eSAndy Grover .xmit = rds_ib_xmit, 521ec16227eSAndy Grover .xmit_rdma = rds_ib_xmit_rdma, 52215133f6eSAndy Grover .xmit_atomic = rds_ib_xmit_atomic, 5232da43c4aSSowmini Varadhan .recv_path = rds_ib_recv_path, 524ec16227eSAndy Grover .conn_alloc = rds_ib_conn_alloc, 525ec16227eSAndy Grover .conn_free = rds_ib_conn_free, 526b04e8554SSowmini Varadhan .conn_path_connect = rds_ib_conn_path_connect, 527226f7a7dSSowmini Varadhan .conn_path_shutdown = rds_ib_conn_path_shutdown, 528ec16227eSAndy Grover .inc_copy_to_user = rds_ib_inc_copy_to_user, 529ec16227eSAndy Grover .inc_free = rds_ib_inc_free, 530ec16227eSAndy Grover .cm_initiate_connect = rds_ib_cm_initiate_connect, 531ec16227eSAndy Grover .cm_handle_connect = rds_ib_cm_handle_connect, 532ec16227eSAndy Grover .cm_connect_complete = rds_ib_cm_connect_complete, 533ec16227eSAndy Grover .stats_info_copy = rds_ib_stats_info_copy, 534ec16227eSAndy Grover .exit = rds_ib_exit, 535ec16227eSAndy Grover .get_mr = rds_ib_get_mr, 536ec16227eSAndy Grover .sync_mr = rds_ib_sync_mr, 537ec16227eSAndy Grover .free_mr = rds_ib_free_mr, 538ec16227eSAndy Grover .flush_mrs = rds_ib_flush_mrs, 539ec16227eSAndy Grover .t_owner = THIS_MODULE, 540ec16227eSAndy Grover .t_name = "infiniband", 541ebeeb1adSSowmini Varadhan .t_unloading = rds_ib_is_unloading, 542335776bdSAndy Grover .t_type = RDS_TRANS_IB 543ec16227eSAndy Grover }; 544ec16227eSAndy Grover 545ef87b7eaSZach Brown int rds_ib_init(void) 546ec16227eSAndy Grover { 547ec16227eSAndy Grover int ret; 548ec16227eSAndy Grover 549ec16227eSAndy Grover INIT_LIST_HEAD(&rds_ib_devices); 550ec16227eSAndy Grover 551f6df683fSsantosh.shilimkar@oracle.com ret = rds_ib_mr_init(); 552515e079dSZach Brown if (ret) 553c534a107STejun Heo goto out; 554515e079dSZach Brown 555ad1d7dc0Ssantosh.shilimkar@oracle.com ret = ib_register_client(&rds_ib_client); 556ad1d7dc0Ssantosh.shilimkar@oracle.com if (ret) 557f6df683fSsantosh.shilimkar@oracle.com goto out_mr_exit; 558ad1d7dc0Ssantosh.shilimkar@oracle.com 559ec16227eSAndy Grover ret = rds_ib_sysctl_init(); 560ec16227eSAndy Grover if (ret) 561ec16227eSAndy Grover goto out_ibreg; 562ec16227eSAndy Grover 563ec16227eSAndy Grover ret = rds_ib_recv_init(); 564ec16227eSAndy Grover if (ret) 565ec16227eSAndy Grover goto out_sysctl; 566ec16227eSAndy Grover 567a8d63a53SZhu Yanjun rds_trans_register(&rds_ib_transport); 568ec16227eSAndy Grover 569ec16227eSAndy Grover rds_info_register_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); 570e65d4d96SKa-Cheong Poon #if IS_ENABLED(CONFIG_IPV6) 571b7ff8b10SKa-Cheong Poon rds_info_register_func(RDS6_INFO_IB_CONNECTIONS, rds6_ib_ic_info); 572e65d4d96SKa-Cheong Poon #endif 573ec16227eSAndy Grover 574ec16227eSAndy Grover goto out; 575ec16227eSAndy Grover 576ec16227eSAndy Grover out_sysctl: 577ec16227eSAndy Grover rds_ib_sysctl_exit(); 578ec16227eSAndy Grover out_ibreg: 57924fa163aSZach Brown rds_ib_unregister_client(); 580f6df683fSsantosh.shilimkar@oracle.com out_mr_exit: 581f6df683fSsantosh.shilimkar@oracle.com rds_ib_mr_exit(); 582ec16227eSAndy Grover out: 583ec16227eSAndy Grover return ret; 584ec16227eSAndy Grover } 585ec16227eSAndy Grover 586ec16227eSAndy Grover MODULE_LICENSE("GPL"); 587