1c0dd49bdSEiji Ota /* 2c0dd49bdSEiji Ota * CDDL HEADER START 3c0dd49bdSEiji Ota * 4c0dd49bdSEiji Ota * The contents of this file are subject to the terms of the 5c0dd49bdSEiji Ota * Common Development and Distribution License (the "License"). 6c0dd49bdSEiji Ota * You may not use this file except in compliance with the License. 7c0dd49bdSEiji Ota * 8c0dd49bdSEiji Ota * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9c0dd49bdSEiji Ota * or http://www.opensolaris.org/os/licensing. 10c0dd49bdSEiji Ota * See the License for the specific language governing permissions 11c0dd49bdSEiji Ota * and limitations under the License. 12c0dd49bdSEiji Ota * 13c0dd49bdSEiji Ota * When distributing Covered Code, include this CDDL HEADER in each 14c0dd49bdSEiji Ota * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15c0dd49bdSEiji Ota * If applicable, add the following below this CDDL HEADER, with the 16c0dd49bdSEiji Ota * fields enclosed by brackets "[]" replaced with your own identifying 17c0dd49bdSEiji Ota * information: Portions Copyright [yyyy] [name of copyright owner] 18c0dd49bdSEiji Ota * 19c0dd49bdSEiji Ota * CDDL HEADER END 20c0dd49bdSEiji Ota */ 21c0dd49bdSEiji Ota /* 22c0dd49bdSEiji Ota * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 23c0dd49bdSEiji Ota */ 24c0dd49bdSEiji Ota #include <sys/types.h> 25c0dd49bdSEiji Ota #include <sys/stream.h> 26c0dd49bdSEiji Ota #include <sys/dlpi.h> 27c0dd49bdSEiji Ota #include <sys/stropts.h> 28c0dd49bdSEiji Ota #include <sys/strsun.h> 29c0dd49bdSEiji Ota #include <sys/sysmacros.h> 30c0dd49bdSEiji Ota #include <sys/strlog.h> 31c0dd49bdSEiji Ota #include <sys/ddi.h> 32c0dd49bdSEiji Ota #include <sys/cmn_err.h> 33c0dd49bdSEiji Ota #include <sys/socket.h> 34c0dd49bdSEiji Ota #include <net/if.h> 35c0dd49bdSEiji Ota #include <net/if_types.h> 36c0dd49bdSEiji Ota #include <netinet/in.h> 37c0dd49bdSEiji Ota #include <sys/ethernet.h> 38c0dd49bdSEiji Ota #include <inet/arp.h> 39c0dd49bdSEiji Ota #include <inet/ip.h> 40c0dd49bdSEiji Ota #include <inet/ip6.h> 41c0dd49bdSEiji Ota #include <inet/ip_ire.h> 42c0dd49bdSEiji Ota #include <inet/ip_if.h> 43c0dd49bdSEiji Ota #include <inet/ip_ftable.h> 44c0dd49bdSEiji Ota 45c0dd49bdSEiji Ota #include <sys/sunddi.h> 46c0dd49bdSEiji Ota #include <sys/ksynch.h> 47c0dd49bdSEiji Ota 48c0dd49bdSEiji Ota #include <sys/rds.h> 49c0dd49bdSEiji Ota #include <sys/socket.h> 50c0dd49bdSEiji Ota #include <sys/socketvar.h> 51c0dd49bdSEiji Ota #include <sys/sockio.h> 52c0dd49bdSEiji Ota #include <sys/sysmacros.h> 53c0dd49bdSEiji Ota #include <inet/common.h> 54c0dd49bdSEiji Ota #include <inet/ip.h> 55c0dd49bdSEiji Ota #include <net/if_types.h> 56c0dd49bdSEiji Ota 57c0dd49bdSEiji Ota #include <sys/ib/clients/rdsv3/rdsv3.h> 58c0dd49bdSEiji Ota #include <sys/ib/clients/rdsv3/rdma.h> 59c0dd49bdSEiji Ota #include <sys/ib/clients/rdsv3/ib.h> 60c0dd49bdSEiji Ota #include <sys/ib/clients/rdsv3/rdsv3_impl.h> 61c0dd49bdSEiji Ota #include <sys/ib/clients/rdsv3/rdsv3_debug.h> 62c0dd49bdSEiji Ota 63c0dd49bdSEiji Ota #include <sys/dls.h> 64c0dd49bdSEiji Ota #include <sys/mac.h> 65c0dd49bdSEiji Ota #include <sys/mac_client.h> 66c0dd49bdSEiji Ota #include <sys/mac_provider.h> 67c0dd49bdSEiji Ota #include <sys/mac_client_priv.h> 68c0dd49bdSEiji Ota 69c0dd49bdSEiji Ota ddi_taskq_t *rdsv3_taskq = NULL; 70c0dd49bdSEiji Ota extern kmem_cache_t *rdsv3_alloc_cache; 71c0dd49bdSEiji Ota 72c0dd49bdSEiji Ota extern unsigned int ip_ocsum(ushort_t *address, int halfword_count, 73c0dd49bdSEiji Ota unsigned int sum); 74c0dd49bdSEiji Ota 75c0dd49bdSEiji Ota /* 76c0dd49bdSEiji Ota * Check if the IP interface named by `lifrp' is RDS-capable. 77c0dd49bdSEiji Ota */ 78c0dd49bdSEiji Ota boolean_t 79c0dd49bdSEiji Ota rdsv3_capable_interface(struct lifreq *lifrp) 80c0dd49bdSEiji Ota { 81c0dd49bdSEiji Ota char ifname[LIFNAMSIZ]; 82c0dd49bdSEiji Ota char drv[MAXLINKNAMELEN]; 83c0dd49bdSEiji Ota uint_t ppa; 84c0dd49bdSEiji Ota char *cp; 85c0dd49bdSEiji Ota 86c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_capable_interface", "Enter"); 87c0dd49bdSEiji Ota 88c0dd49bdSEiji Ota if (lifrp->lifr_type == IFT_IB) 89c0dd49bdSEiji Ota return (B_TRUE); 90c0dd49bdSEiji Ota 91c0dd49bdSEiji Ota /* 92c0dd49bdSEiji Ota * Strip off the logical interface portion before getting 93c0dd49bdSEiji Ota * intimate with the name. 94c0dd49bdSEiji Ota */ 95c0dd49bdSEiji Ota (void) strlcpy(ifname, lifrp->lifr_name, LIFNAMSIZ); 96c0dd49bdSEiji Ota if ((cp = strchr(ifname, ':')) != NULL) 97c0dd49bdSEiji Ota *cp = '\0'; 98c0dd49bdSEiji Ota 99c0dd49bdSEiji Ota if (strcmp("lo0", ifname) == 0) { 100c0dd49bdSEiji Ota /* 101c0dd49bdSEiji Ota * loopback is considered RDS-capable 102c0dd49bdSEiji Ota */ 103c0dd49bdSEiji Ota return (B_TRUE); 104c0dd49bdSEiji Ota } 105c0dd49bdSEiji Ota 106c0dd49bdSEiji Ota return (ddi_parse(ifname, drv, &ppa) == DDI_SUCCESS && 107c0dd49bdSEiji Ota rdsv3_if_lookup_by_name(drv)); 108c0dd49bdSEiji Ota } 109c0dd49bdSEiji Ota 110c0dd49bdSEiji Ota int 111c0dd49bdSEiji Ota rdsv3_do_ip_ioctl(ksocket_t so4, void **ipaddrs, int *size, int *nifs) 112c0dd49bdSEiji Ota { 113c0dd49bdSEiji Ota struct lifnum lifn; 114c0dd49bdSEiji Ota struct lifconf lifc; 115c0dd49bdSEiji Ota struct lifreq *lp, *rlp, lifr; 116c0dd49bdSEiji Ota int rval = 0; 117c0dd49bdSEiji Ota int numifs; 118c0dd49bdSEiji Ota int bufsize, rbufsize; 119c0dd49bdSEiji Ota void *buf, *rbuf; 120c0dd49bdSEiji Ota int i, j, n, rc; 121c0dd49bdSEiji Ota 122c0dd49bdSEiji Ota *ipaddrs = NULL; 123c0dd49bdSEiji Ota *size = 0; 124c0dd49bdSEiji Ota *nifs = 0; 125c0dd49bdSEiji Ota 126c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_do_ip_ioctl", "Enter"); 127c0dd49bdSEiji Ota 128c0dd49bdSEiji Ota retry_count: 129c0dd49bdSEiji Ota /* snapshot the current number of interfaces */ 130c0dd49bdSEiji Ota lifn.lifn_family = PF_UNSPEC; 131c0dd49bdSEiji Ota lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES; 132c0dd49bdSEiji Ota lifn.lifn_count = 0; 133c0dd49bdSEiji Ota rval = ksocket_ioctl(so4, SIOCGLIFNUM, (intptr_t)&lifn, &rval, 134c0dd49bdSEiji Ota CRED()); 135c0dd49bdSEiji Ota if (rval != 0) { 136c0dd49bdSEiji Ota RDSV3_DPRINTF2("rdsv3_do_ip_ioctl", 137c0dd49bdSEiji Ota "ksocket_ioctl returned: %d", rval); 138c0dd49bdSEiji Ota return (rval); 139c0dd49bdSEiji Ota } 140c0dd49bdSEiji Ota 141c0dd49bdSEiji Ota numifs = lifn.lifn_count; 142c0dd49bdSEiji Ota if (numifs <= 0) { 143c0dd49bdSEiji Ota RDSV3_DPRINTF2("rdsv3_do_ip_ioctl", "No interfaces found"); 144c0dd49bdSEiji Ota return (0); 145c0dd49bdSEiji Ota } 146c0dd49bdSEiji Ota 147c0dd49bdSEiji Ota /* allocate extra room in case more interfaces appear */ 148c0dd49bdSEiji Ota numifs += 10; 149c0dd49bdSEiji Ota 150c0dd49bdSEiji Ota /* get the interface names and ip addresses */ 151c0dd49bdSEiji Ota bufsize = numifs * sizeof (struct lifreq); 152c0dd49bdSEiji Ota buf = kmem_alloc(bufsize, KM_SLEEP); 153c0dd49bdSEiji Ota 154c0dd49bdSEiji Ota lifc.lifc_family = AF_UNSPEC; 155c0dd49bdSEiji Ota lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES; 156c0dd49bdSEiji Ota lifc.lifc_len = bufsize; 157c0dd49bdSEiji Ota lifc.lifc_buf = buf; 158c0dd49bdSEiji Ota rc = ksocket_ioctl(so4, SIOCGLIFCONF, (intptr_t)&lifc, &rval, CRED()); 159c0dd49bdSEiji Ota if (rc != 0) { 160c0dd49bdSEiji Ota RDSV3_DPRINTF2("rdsv3_do_ip_ioctl", "SIOCGLIFCONF failed"); 161c0dd49bdSEiji Ota kmem_free(buf, bufsize); 162c0dd49bdSEiji Ota return (rc); 163c0dd49bdSEiji Ota } 164c0dd49bdSEiji Ota /* if our extra room is used up, try again */ 165c0dd49bdSEiji Ota if (bufsize <= lifc.lifc_len) { 166c0dd49bdSEiji Ota kmem_free(buf, bufsize); 167c0dd49bdSEiji Ota buf = NULL; 168c0dd49bdSEiji Ota goto retry_count; 169c0dd49bdSEiji Ota } 170c0dd49bdSEiji Ota /* calc actual number of ifconfs */ 171c0dd49bdSEiji Ota n = lifc.lifc_len / sizeof (struct lifreq); 172c0dd49bdSEiji Ota 173c0dd49bdSEiji Ota /* 174c0dd49bdSEiji Ota * Count the RDS interfaces 175c0dd49bdSEiji Ota */ 176c0dd49bdSEiji Ota for (i = 0, j = 0, lp = lifc.lifc_req; i < n; i++, lp++) { 177c0dd49bdSEiji Ota 178c0dd49bdSEiji Ota /* 179c0dd49bdSEiji Ota * Copy as the SIOCGLIFFLAGS ioctl is destructive 180c0dd49bdSEiji Ota */ 181c0dd49bdSEiji Ota bcopy(lp, &lifr, sizeof (struct lifreq)); 182c0dd49bdSEiji Ota /* 183c0dd49bdSEiji Ota * fetch the flags using the socket of the correct family 184c0dd49bdSEiji Ota */ 185c0dd49bdSEiji Ota switch (lifr.lifr_addr.ss_family) { 186c0dd49bdSEiji Ota case AF_INET: 187c0dd49bdSEiji Ota rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)&lifr, 188c0dd49bdSEiji Ota &rval, CRED()); 189c0dd49bdSEiji Ota break; 190c0dd49bdSEiji Ota default: 191c0dd49bdSEiji Ota continue; 192c0dd49bdSEiji Ota } 193c0dd49bdSEiji Ota 194c0dd49bdSEiji Ota if (rc != 0) continue; 195c0dd49bdSEiji Ota 196c0dd49bdSEiji Ota /* 197c0dd49bdSEiji Ota * If we got the flags, skip uninteresting 198c0dd49bdSEiji Ota * interfaces based on flags 199c0dd49bdSEiji Ota */ 200c0dd49bdSEiji Ota if ((lifr.lifr_flags & IFF_UP) != IFF_UP) 201c0dd49bdSEiji Ota continue; 202c0dd49bdSEiji Ota if (lifr.lifr_flags & 203c0dd49bdSEiji Ota (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED)) 204c0dd49bdSEiji Ota continue; 205c0dd49bdSEiji Ota if (!rdsv3_capable_interface(&lifr)) 206c0dd49bdSEiji Ota continue; 207c0dd49bdSEiji Ota j++; 208c0dd49bdSEiji Ota } 209c0dd49bdSEiji Ota 210c0dd49bdSEiji Ota if (j <= 0) { 211c0dd49bdSEiji Ota RDSV3_DPRINTF2("rdsv3_do_ip_ioctl", "No RDS interfaces"); 212c0dd49bdSEiji Ota kmem_free(buf, bufsize); 213c0dd49bdSEiji Ota return (rval); 214c0dd49bdSEiji Ota } 215c0dd49bdSEiji Ota 216c0dd49bdSEiji Ota numifs = j; 217c0dd49bdSEiji Ota 218c0dd49bdSEiji Ota /* This is the buffer we pass back */ 219c0dd49bdSEiji Ota rbufsize = numifs * sizeof (struct lifreq); 220c0dd49bdSEiji Ota rbuf = kmem_alloc(rbufsize, KM_SLEEP); 221c0dd49bdSEiji Ota rlp = (struct lifreq *)rbuf; 222c0dd49bdSEiji Ota 223c0dd49bdSEiji Ota /* 224c0dd49bdSEiji Ota * Examine the array of interfaces and filter uninteresting ones 225c0dd49bdSEiji Ota */ 226c0dd49bdSEiji Ota for (i = 0, lp = lifc.lifc_req; i < n; i++, lp++) { 227c0dd49bdSEiji Ota 228c0dd49bdSEiji Ota /* 229c0dd49bdSEiji Ota * Copy the address as the SIOCGLIFFLAGS ioctl is destructive 230c0dd49bdSEiji Ota */ 231c0dd49bdSEiji Ota bcopy(lp, &lifr, sizeof (struct lifreq)); 232c0dd49bdSEiji Ota /* 233c0dd49bdSEiji Ota * fetch the flags using the socket of the correct family 234c0dd49bdSEiji Ota */ 235c0dd49bdSEiji Ota switch (lifr.lifr_addr.ss_family) { 236c0dd49bdSEiji Ota case AF_INET: 237c0dd49bdSEiji Ota rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)&lifr, 238c0dd49bdSEiji Ota &rval, CRED()); 239c0dd49bdSEiji Ota break; 240c0dd49bdSEiji Ota default: 241c0dd49bdSEiji Ota continue; 242c0dd49bdSEiji Ota } 243c0dd49bdSEiji Ota 244c0dd49bdSEiji Ota 245c0dd49bdSEiji Ota if (rc != 0) { 246c0dd49bdSEiji Ota RDSV3_DPRINTF2("rdsv3_do_ip_ioctl", 247c0dd49bdSEiji Ota "ksocket_ioctl failed" " for %s", lifr.lifr_name); 248c0dd49bdSEiji Ota continue; 249c0dd49bdSEiji Ota } 250c0dd49bdSEiji Ota 251c0dd49bdSEiji Ota /* 252c0dd49bdSEiji Ota * If we got the flags, skip uninteresting 253c0dd49bdSEiji Ota * interfaces based on flags 254c0dd49bdSEiji Ota */ 255c0dd49bdSEiji Ota if ((lifr.lifr_flags & IFF_UP) != IFF_UP) 256c0dd49bdSEiji Ota continue; 257c0dd49bdSEiji Ota if (lifr.lifr_flags & 258c0dd49bdSEiji Ota (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED)) 259c0dd49bdSEiji Ota continue; 260c0dd49bdSEiji Ota if (!rdsv3_capable_interface(&lifr)) 261c0dd49bdSEiji Ota continue; 262c0dd49bdSEiji Ota 263c0dd49bdSEiji Ota /* save the record */ 264c0dd49bdSEiji Ota bcopy(lp, rlp, sizeof (struct lifreq)); 265c0dd49bdSEiji Ota rlp++; 266c0dd49bdSEiji Ota } 267c0dd49bdSEiji Ota 268c0dd49bdSEiji Ota kmem_free(buf, bufsize); 269c0dd49bdSEiji Ota 270c0dd49bdSEiji Ota *ipaddrs = rbuf; 271c0dd49bdSEiji Ota *size = rbufsize; 272c0dd49bdSEiji Ota *nifs = numifs; 273c0dd49bdSEiji Ota 274c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_do_ip_ioctl", "Return"); 275c0dd49bdSEiji Ota 276c0dd49bdSEiji Ota return (rval); 277c0dd49bdSEiji Ota } 278c0dd49bdSEiji Ota 279c0dd49bdSEiji Ota /* 280c0dd49bdSEiji Ota * Check if the IP interface named by `ifrp' is RDS-capable. 281c0dd49bdSEiji Ota */ 282c0dd49bdSEiji Ota boolean_t 283c0dd49bdSEiji Ota rdsv3_capable_interface_old(struct ifreq *ifrp) 284c0dd49bdSEiji Ota { 285c0dd49bdSEiji Ota char ifname[IFNAMSIZ]; 286c0dd49bdSEiji Ota char drv[MAXLINKNAMELEN]; 287c0dd49bdSEiji Ota uint_t ppa; 288c0dd49bdSEiji Ota char *cp; 289c0dd49bdSEiji Ota 290c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_capable_interface_old", "Enter"); 291c0dd49bdSEiji Ota 292c0dd49bdSEiji Ota /* 293c0dd49bdSEiji Ota * Strip off the logical interface portion before getting 294c0dd49bdSEiji Ota * intimate with the name. 295c0dd49bdSEiji Ota */ 296c0dd49bdSEiji Ota (void) strlcpy(ifname, ifrp->ifr_name, IFNAMSIZ); 297c0dd49bdSEiji Ota if ((cp = strchr(ifname, ':')) != NULL) 298c0dd49bdSEiji Ota *cp = '\0'; 299c0dd49bdSEiji Ota 300c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_capable_interface_old", "ifname: %s", ifname); 301c0dd49bdSEiji Ota 302c0dd49bdSEiji Ota if ((strcmp("lo0", ifname) == 0) || 303c0dd49bdSEiji Ota (strncmp("ibd", ifname, 3) == 0)) { 304c0dd49bdSEiji Ota /* 305c0dd49bdSEiji Ota * loopback and IB are considered RDS-capable 306c0dd49bdSEiji Ota */ 307c0dd49bdSEiji Ota return (B_TRUE); 308c0dd49bdSEiji Ota } 309c0dd49bdSEiji Ota 310c0dd49bdSEiji Ota return (ddi_parse(ifname, drv, &ppa) == DDI_SUCCESS && 311c0dd49bdSEiji Ota rdsv3_if_lookup_by_name(drv)); 312c0dd49bdSEiji Ota } 313c0dd49bdSEiji Ota 314c0dd49bdSEiji Ota int 315c0dd49bdSEiji Ota rdsv3_do_ip_ioctl_old(ksocket_t so4, void **ipaddrs, int *size, int *nifs) 316c0dd49bdSEiji Ota { 317c0dd49bdSEiji Ota uint_t ifn; 318c0dd49bdSEiji Ota struct ifconf ifc; 319c0dd49bdSEiji Ota struct ifreq *lp, *rlp, ifr; 320c0dd49bdSEiji Ota int rval = 0; 321c0dd49bdSEiji Ota int numifs; 322c0dd49bdSEiji Ota int bufsize, rbufsize; 323c0dd49bdSEiji Ota void *buf, *rbuf; 324c0dd49bdSEiji Ota int i, j, n, rc; 325c0dd49bdSEiji Ota 326c0dd49bdSEiji Ota *ipaddrs = NULL; 327c0dd49bdSEiji Ota *size = 0; 328c0dd49bdSEiji Ota *nifs = 0; 329c0dd49bdSEiji Ota 330c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_do_ip_ioctl_old", "Enter"); 331c0dd49bdSEiji Ota 332c0dd49bdSEiji Ota retry_count: 333c0dd49bdSEiji Ota rval = ksocket_ioctl(so4, SIOCGIFNUM, (intptr_t)&ifn, &rval, 334c0dd49bdSEiji Ota CRED()); 335c0dd49bdSEiji Ota if (rval != 0) { 336c0dd49bdSEiji Ota RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old", 337c0dd49bdSEiji Ota "ksocket_ioctl(SIOCGIFNUM) returned: %d", rval); 338c0dd49bdSEiji Ota return (rval); 339c0dd49bdSEiji Ota } 340c0dd49bdSEiji Ota 341c0dd49bdSEiji Ota numifs = ifn; 342c0dd49bdSEiji Ota if (numifs <= 0) { 343c0dd49bdSEiji Ota RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old", "No interfaces found"); 344c0dd49bdSEiji Ota return (0); 345c0dd49bdSEiji Ota } 346c0dd49bdSEiji Ota 347c0dd49bdSEiji Ota /* allocate extra room in case more interfaces appear */ 348c0dd49bdSEiji Ota numifs += 10; 349c0dd49bdSEiji Ota 350c0dd49bdSEiji Ota /* get the interface names and ip addresses */ 351c0dd49bdSEiji Ota bufsize = numifs * sizeof (struct ifreq); 352c0dd49bdSEiji Ota buf = kmem_alloc(bufsize, KM_SLEEP); 353c0dd49bdSEiji Ota 354c0dd49bdSEiji Ota ifc.ifc_len = bufsize; 355c0dd49bdSEiji Ota ifc.ifc_buf = buf; 356c0dd49bdSEiji Ota rc = ksocket_ioctl(so4, SIOCGIFCONF, (intptr_t)&ifc, &rval, CRED()); 357c0dd49bdSEiji Ota if (rc != 0) { 358c0dd49bdSEiji Ota RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old", 359c0dd49bdSEiji Ota "SIOCGLIFCONF failed: %d", rc); 360c0dd49bdSEiji Ota kmem_free(buf, bufsize); 361c0dd49bdSEiji Ota return (rc); 362c0dd49bdSEiji Ota } 363c0dd49bdSEiji Ota /* if our extra room is used up, try again */ 364c0dd49bdSEiji Ota if (bufsize <= ifc.ifc_len) { 365c0dd49bdSEiji Ota kmem_free(buf, bufsize); 366c0dd49bdSEiji Ota buf = NULL; 367c0dd49bdSEiji Ota goto retry_count; 368c0dd49bdSEiji Ota } 369c0dd49bdSEiji Ota /* calc actual number of ifconfs */ 370c0dd49bdSEiji Ota n = ifc.ifc_len / sizeof (struct ifreq); 371c0dd49bdSEiji Ota 372c0dd49bdSEiji Ota /* 373c0dd49bdSEiji Ota * Count the RDS interfaces 374c0dd49bdSEiji Ota */ 375c0dd49bdSEiji Ota for (i = 0, j = 0, lp = ifc.ifc_req; i < n; i++, lp++) { 376c0dd49bdSEiji Ota 377c0dd49bdSEiji Ota /* 378c0dd49bdSEiji Ota * Copy as the SIOCGIFFLAGS ioctl is destructive 379c0dd49bdSEiji Ota */ 380c0dd49bdSEiji Ota bcopy(lp, &ifr, sizeof (struct ifreq)); 381c0dd49bdSEiji Ota /* 382c0dd49bdSEiji Ota * fetch the flags using the socket of the correct family 383c0dd49bdSEiji Ota */ 384c0dd49bdSEiji Ota switch (ifr.ifr_addr.sa_family) { 385c0dd49bdSEiji Ota case AF_INET: 386c0dd49bdSEiji Ota rc = ksocket_ioctl(so4, SIOCGIFFLAGS, (intptr_t)&ifr, 387c0dd49bdSEiji Ota &rval, CRED()); 388c0dd49bdSEiji Ota break; 389c0dd49bdSEiji Ota default: 390c0dd49bdSEiji Ota continue; 391c0dd49bdSEiji Ota } 392c0dd49bdSEiji Ota 393c0dd49bdSEiji Ota if (rc != 0) continue; 394c0dd49bdSEiji Ota 395c0dd49bdSEiji Ota RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old", 396c0dd49bdSEiji Ota "1. ifr_name: %s, flags: %d", ifr.ifr_name, 397c0dd49bdSEiji Ota (ushort_t)ifr.ifr_flags); 398c0dd49bdSEiji Ota 399c0dd49bdSEiji Ota /* 400c0dd49bdSEiji Ota * If we got the flags, skip uninteresting 401c0dd49bdSEiji Ota * interfaces based on flags 402c0dd49bdSEiji Ota */ 403c0dd49bdSEiji Ota if ((((ushort_t)ifr.ifr_flags) & IFF_UP) != IFF_UP) 404c0dd49bdSEiji Ota continue; 405c0dd49bdSEiji Ota RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old", 406c0dd49bdSEiji Ota "2. ifr_name: %s, flags: %d", ifr.ifr_name, 407c0dd49bdSEiji Ota (ushort_t)ifr.ifr_flags); 408c0dd49bdSEiji Ota if (((ushort_t)ifr.ifr_flags) & 409c0dd49bdSEiji Ota (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED)) 410c0dd49bdSEiji Ota continue; 411c0dd49bdSEiji Ota RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old", 412c0dd49bdSEiji Ota "3. ifr_name: %s, flags: %d", ifr.ifr_name, 413c0dd49bdSEiji Ota (ushort_t)ifr.ifr_flags); 414c0dd49bdSEiji Ota if (!rdsv3_capable_interface_old(&ifr)) 415c0dd49bdSEiji Ota continue; 416c0dd49bdSEiji Ota RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old", 417c0dd49bdSEiji Ota "4. ifr_name: %s, flags: %d", ifr.ifr_name, 418c0dd49bdSEiji Ota (ushort_t)ifr.ifr_flags); 419c0dd49bdSEiji Ota j++; 420c0dd49bdSEiji Ota } 421c0dd49bdSEiji Ota 422c0dd49bdSEiji Ota if (j <= 0) { 423c0dd49bdSEiji Ota RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old", "No RDS interfaces"); 424c0dd49bdSEiji Ota kmem_free(buf, bufsize); 425c0dd49bdSEiji Ota return (rval); 426c0dd49bdSEiji Ota } 427c0dd49bdSEiji Ota 428c0dd49bdSEiji Ota numifs = j; 429c0dd49bdSEiji Ota 430c0dd49bdSEiji Ota /* This is the buffer we pass back */ 431c0dd49bdSEiji Ota rbufsize = numifs * sizeof (struct ifreq); 432c0dd49bdSEiji Ota rbuf = kmem_alloc(rbufsize, KM_SLEEP); 433c0dd49bdSEiji Ota rlp = (struct ifreq *)rbuf; 434c0dd49bdSEiji Ota 435c0dd49bdSEiji Ota /* 436c0dd49bdSEiji Ota * Examine the array of interfaces and filter uninteresting ones 437c0dd49bdSEiji Ota */ 438c0dd49bdSEiji Ota for (i = 0, lp = ifc.ifc_req; i < n; i++, lp++) { 439c0dd49bdSEiji Ota 440c0dd49bdSEiji Ota /* 441c0dd49bdSEiji Ota * Copy the address as the SIOCGIFFLAGS ioctl is destructive 442c0dd49bdSEiji Ota */ 443c0dd49bdSEiji Ota bcopy(lp, &ifr, sizeof (struct ifreq)); 444c0dd49bdSEiji Ota /* 445c0dd49bdSEiji Ota * fetch the flags using the socket of the correct family 446c0dd49bdSEiji Ota */ 447c0dd49bdSEiji Ota switch (ifr.ifr_addr.sa_family) { 448c0dd49bdSEiji Ota case AF_INET: 449c0dd49bdSEiji Ota rc = ksocket_ioctl(so4, SIOCGIFFLAGS, (intptr_t)&ifr, 450c0dd49bdSEiji Ota &rval, CRED()); 451c0dd49bdSEiji Ota break; 452c0dd49bdSEiji Ota default: 453c0dd49bdSEiji Ota continue; 454c0dd49bdSEiji Ota } 455c0dd49bdSEiji Ota 456c0dd49bdSEiji Ota 457c0dd49bdSEiji Ota if (rc != 0) { 458c0dd49bdSEiji Ota RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old", 459c0dd49bdSEiji Ota "ksocket_ioctl failed: %d for %s", 460c0dd49bdSEiji Ota rc, ifr.ifr_name); 461c0dd49bdSEiji Ota continue; 462c0dd49bdSEiji Ota } 463c0dd49bdSEiji Ota 464c0dd49bdSEiji Ota /* 465c0dd49bdSEiji Ota * If we got the flags, skip uninteresting 466c0dd49bdSEiji Ota * interfaces based on flags 467c0dd49bdSEiji Ota */ 468c0dd49bdSEiji Ota if ((((ushort_t)ifr.ifr_flags) & IFF_UP) != IFF_UP) 469c0dd49bdSEiji Ota continue; 470c0dd49bdSEiji Ota if (((ushort_t)ifr.ifr_flags) & 471c0dd49bdSEiji Ota (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED)) 472c0dd49bdSEiji Ota continue; 473c0dd49bdSEiji Ota if (!rdsv3_capable_interface_old(&ifr)) 474c0dd49bdSEiji Ota continue; 475c0dd49bdSEiji Ota 476c0dd49bdSEiji Ota /* save the record */ 477c0dd49bdSEiji Ota bcopy(lp, rlp, sizeof (struct ifreq)); 478c0dd49bdSEiji Ota rlp++; 479c0dd49bdSEiji Ota } 480c0dd49bdSEiji Ota 481c0dd49bdSEiji Ota kmem_free(buf, bufsize); 482c0dd49bdSEiji Ota 483c0dd49bdSEiji Ota *ipaddrs = rbuf; 484c0dd49bdSEiji Ota *size = rbufsize; 485c0dd49bdSEiji Ota *nifs = numifs; 486c0dd49bdSEiji Ota 487c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_do_ip_ioctl_old", "Return"); 488c0dd49bdSEiji Ota 489c0dd49bdSEiji Ota return (rval); 490c0dd49bdSEiji Ota } 491c0dd49bdSEiji Ota 492c0dd49bdSEiji Ota boolean_t 493c0dd49bdSEiji Ota rdsv3_isloopback(ipaddr_t addr) 494c0dd49bdSEiji Ota { 495c0dd49bdSEiji Ota ip_stack_t *ipst; 496c0dd49bdSEiji Ota 497c0dd49bdSEiji Ota ipst = netstack_find_by_zoneid(GLOBAL_ZONEID)->netstack_ip; 498c0dd49bdSEiji Ota ASSERT(ipst != NULL); 499c0dd49bdSEiji Ota if (ip_type_v4(addr, ipst) != IRE_LOOPBACK) { 500c0dd49bdSEiji Ota netstack_rele(ipst->ips_netstack); 501c0dd49bdSEiji Ota return (B_FALSE); 502c0dd49bdSEiji Ota } 503c0dd49bdSEiji Ota netstack_rele(ipst->ips_netstack); 504c0dd49bdSEiji Ota return (B_TRUE); 505c0dd49bdSEiji Ota } 506c0dd49bdSEiji Ota 507c0dd49bdSEiji Ota /* 508c0dd49bdSEiji Ota * Work Queue Implementation 509c0dd49bdSEiji Ota */ 510c0dd49bdSEiji Ota 511c0dd49bdSEiji Ota #define RDSV3_WQ_THREAD_IDLE 0 512c0dd49bdSEiji Ota #define RDSV3_WQ_THREAD_RUNNING 1 513c0dd49bdSEiji Ota #define RDSV3_WQ_THREAD_FLUSHING 2 514c0dd49bdSEiji Ota #define RDSV3_WQ_THREAD_EXITING 3 515c0dd49bdSEiji Ota 516c0dd49bdSEiji Ota /* worker thread */ 517c0dd49bdSEiji Ota void 518c0dd49bdSEiji Ota rdsv3_worker_thread(void *arg) 519c0dd49bdSEiji Ota { 520c0dd49bdSEiji Ota rdsv3_workqueue_struct_t *wq = arg; 521c0dd49bdSEiji Ota rdsv3_work_t *work; 522c0dd49bdSEiji Ota 523c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_worker_thread", "Enter(wq: 0x%p)", wq); 524c0dd49bdSEiji Ota 525c0dd49bdSEiji Ota mutex_enter(&wq->wq_lock); 526c0dd49bdSEiji Ota work = list_remove_head(&wq->wq_queue); 527c0dd49bdSEiji Ota while (work) { 528c0dd49bdSEiji Ota mutex_exit(&wq->wq_lock); 529c0dd49bdSEiji Ota 530c0dd49bdSEiji Ota /* process work */ 531c0dd49bdSEiji Ota work->func(work); 532c0dd49bdSEiji Ota 533c0dd49bdSEiji Ota mutex_enter(&wq->wq_lock); 534c0dd49bdSEiji Ota work = list_remove_head(&wq->wq_queue); 535c0dd49bdSEiji Ota } 536c0dd49bdSEiji Ota 537c0dd49bdSEiji Ota /* No more work, go home, until called again */ 538c0dd49bdSEiji Ota if (wq->wq_state != RDSV3_WQ_THREAD_EXITING) { 539c0dd49bdSEiji Ota wq->wq_state = RDSV3_WQ_THREAD_IDLE; 540c0dd49bdSEiji Ota } 541c0dd49bdSEiji Ota mutex_exit(&wq->wq_lock); 542c0dd49bdSEiji Ota 543c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_worker_thread", "Return(wq: 0x%p)", wq); 544c0dd49bdSEiji Ota } 545c0dd49bdSEiji Ota 546c0dd49bdSEiji Ota /* XXX */ 547c0dd49bdSEiji Ota void 548c0dd49bdSEiji Ota rdsv3_flush_workqueue(rdsv3_workqueue_struct_t *wq) 549c0dd49bdSEiji Ota { 550c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_flush_workqueue", "Enter(wq: %p)", wq); 551c0dd49bdSEiji Ota 552c0dd49bdSEiji Ota mutex_enter(&wq->wq_lock); 553c0dd49bdSEiji Ota switch (wq->wq_state) { 554c0dd49bdSEiji Ota case RDSV3_WQ_THREAD_IDLE: 555c0dd49bdSEiji Ota /* nothing to do */ 556c0dd49bdSEiji Ota ASSERT(list_is_empty(&wq->wq_queue)); 557c0dd49bdSEiji Ota break; 558c0dd49bdSEiji Ota 559c0dd49bdSEiji Ota case RDSV3_WQ_THREAD_RUNNING: 560c0dd49bdSEiji Ota wq->wq_state = RDSV3_WQ_THREAD_FLUSHING; 561c0dd49bdSEiji Ota /* FALLTHRU */ 562c0dd49bdSEiji Ota case RDSV3_WQ_THREAD_FLUSHING: 563c0dd49bdSEiji Ota /* already flushing, wait until the flushing is complete */ 564c0dd49bdSEiji Ota do { 565c0dd49bdSEiji Ota mutex_exit(&wq->wq_lock); 566c0dd49bdSEiji Ota delay(drv_usectohz(1000000)); 567c0dd49bdSEiji Ota mutex_enter(&wq->wq_lock); 568c0dd49bdSEiji Ota } while (wq->wq_state == RDSV3_WQ_THREAD_FLUSHING); 569c0dd49bdSEiji Ota break; 570c0dd49bdSEiji Ota case RDSV3_WQ_THREAD_EXITING: 571c0dd49bdSEiji Ota mutex_exit(&wq->wq_lock); 572c0dd49bdSEiji Ota rdsv3_worker_thread(wq); 573c0dd49bdSEiji Ota return; 574c0dd49bdSEiji Ota } 575c0dd49bdSEiji Ota mutex_exit(&wq->wq_lock); 576c0dd49bdSEiji Ota 577c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_flush_workqueue", "Return(wq: %p)", wq); 578c0dd49bdSEiji Ota } 579c0dd49bdSEiji Ota 580c0dd49bdSEiji Ota void 581c0dd49bdSEiji Ota rdsv3_queue_work(rdsv3_workqueue_struct_t *wq, rdsv3_work_t *wp) 582c0dd49bdSEiji Ota { 583c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_queue_work", "Enter(wq: %p, wp: %p)", wq, wp); 584c0dd49bdSEiji Ota 585c0dd49bdSEiji Ota mutex_enter(&wq->wq_lock); 586c0dd49bdSEiji Ota 587c0dd49bdSEiji Ota if (list_link_active(&wp->work_item)) { 588c0dd49bdSEiji Ota /* This is already in the queue, ignore this call */ 589c0dd49bdSEiji Ota mutex_exit(&wq->wq_lock); 590c0dd49bdSEiji Ota RDSV3_DPRINTF3("rdsv3_queue_work", "already queued: %p", wp); 591c0dd49bdSEiji Ota return; 592c0dd49bdSEiji Ota } 593c0dd49bdSEiji Ota 594c0dd49bdSEiji Ota switch (wq->wq_state) { 595c0dd49bdSEiji Ota case RDSV3_WQ_THREAD_RUNNING: 596c0dd49bdSEiji Ota list_insert_tail(&wq->wq_queue, wp); 597c0dd49bdSEiji Ota mutex_exit(&wq->wq_lock); 598c0dd49bdSEiji Ota break; 599c0dd49bdSEiji Ota 600c0dd49bdSEiji Ota case RDSV3_WQ_THREAD_FLUSHING: 601c0dd49bdSEiji Ota do { 602c0dd49bdSEiji Ota mutex_exit(&wq->wq_lock); 603c0dd49bdSEiji Ota delay(drv_usectohz(1000000)); 604c0dd49bdSEiji Ota mutex_enter(&wq->wq_lock); 605c0dd49bdSEiji Ota } while (wq->wq_state == RDSV3_WQ_THREAD_FLUSHING); 606c0dd49bdSEiji Ota 607c0dd49bdSEiji Ota if (wq->wq_state == RDSV3_WQ_THREAD_RUNNING) { 608c0dd49bdSEiji Ota list_insert_tail(&wq->wq_queue, wp); 609c0dd49bdSEiji Ota mutex_exit(&wq->wq_lock); 610c0dd49bdSEiji Ota break; 611c0dd49bdSEiji Ota } 612c0dd49bdSEiji Ota /* FALLTHRU */ 613c0dd49bdSEiji Ota 614c0dd49bdSEiji Ota case RDSV3_WQ_THREAD_IDLE: 615c0dd49bdSEiji Ota list_insert_tail(&wq->wq_queue, wp); 616c0dd49bdSEiji Ota wq->wq_state = RDSV3_WQ_THREAD_RUNNING; 617c0dd49bdSEiji Ota mutex_exit(&wq->wq_lock); 618c0dd49bdSEiji Ota 619c0dd49bdSEiji Ota (void) ddi_taskq_dispatch(rdsv3_taskq, rdsv3_worker_thread, wq, 620c0dd49bdSEiji Ota DDI_SLEEP); 621c0dd49bdSEiji Ota break; 622c0dd49bdSEiji Ota 623c0dd49bdSEiji Ota case RDSV3_WQ_THREAD_EXITING: 624c0dd49bdSEiji Ota mutex_exit(&wq->wq_lock); 625c0dd49bdSEiji Ota break; 626c0dd49bdSEiji Ota } 627c0dd49bdSEiji Ota 628c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_queue_work", "Return(wq: %p, wp: %p)", wq, wp); 629c0dd49bdSEiji Ota } 630c0dd49bdSEiji Ota 631c0dd49bdSEiji Ota /* timeout handler for delayed work queuing */ 632c0dd49bdSEiji Ota void 633c0dd49bdSEiji Ota rdsv3_work_timeout_handler(void *arg) 634c0dd49bdSEiji Ota { 635c0dd49bdSEiji Ota rdsv3_delayed_work_t *dwp = (rdsv3_delayed_work_t *)arg; 636c0dd49bdSEiji Ota 637c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_work_timeout_handler", 638c0dd49bdSEiji Ota "Enter(wq: %p, wp: %p)", dwp->wq, &dwp->work); 639c0dd49bdSEiji Ota 640c0dd49bdSEiji Ota mutex_enter(&dwp->lock); 641c0dd49bdSEiji Ota dwp->timeid = 0; 642c0dd49bdSEiji Ota mutex_exit(&dwp->lock); 643c0dd49bdSEiji Ota 644c0dd49bdSEiji Ota mutex_enter(&dwp->wq->wq_lock); 645c0dd49bdSEiji Ota dwp->wq->wq_pending--; 646c0dd49bdSEiji Ota if (dwp->wq->wq_state == RDSV3_WQ_THREAD_EXITING) { 647c0dd49bdSEiji Ota mutex_exit(&dwp->wq->wq_lock); 648c0dd49bdSEiji Ota return; 649c0dd49bdSEiji Ota } 650c0dd49bdSEiji Ota mutex_exit(&dwp->wq->wq_lock); 651c0dd49bdSEiji Ota 652c0dd49bdSEiji Ota rdsv3_queue_work(dwp->wq, &dwp->work); 653c0dd49bdSEiji Ota 654c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_work_timeout_handler", 655c0dd49bdSEiji Ota "Return(wq: %p, wp: %p)", dwp->wq, &dwp->work); 656c0dd49bdSEiji Ota } 657c0dd49bdSEiji Ota 658c0dd49bdSEiji Ota void 659c0dd49bdSEiji Ota rdsv3_queue_delayed_work(rdsv3_workqueue_struct_t *wq, 660c0dd49bdSEiji Ota rdsv3_delayed_work_t *dwp, uint_t delay) 661c0dd49bdSEiji Ota { 662c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_queue_delayed_work", 663c0dd49bdSEiji Ota "Enter(wq: %p, wp: %p)", wq, dwp); 664c0dd49bdSEiji Ota 665c0dd49bdSEiji Ota if (delay == 0) { 666c0dd49bdSEiji Ota rdsv3_queue_work(wq, &dwp->work); 667c0dd49bdSEiji Ota return; 668c0dd49bdSEiji Ota } 669c0dd49bdSEiji Ota 670c0dd49bdSEiji Ota mutex_enter(&wq->wq_lock); 671c0dd49bdSEiji Ota if (wq->wq_state == RDSV3_WQ_THREAD_EXITING) { 672c0dd49bdSEiji Ota mutex_exit(&wq->wq_lock); 673c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_queue_delayed_work", 674c0dd49bdSEiji Ota "WQ exiting - don't queue (wq: %p, wp: %p)", wq, dwp); 675c0dd49bdSEiji Ota return; 676c0dd49bdSEiji Ota } 677c0dd49bdSEiji Ota wq->wq_pending++; 678c0dd49bdSEiji Ota mutex_exit(&wq->wq_lock); 679c0dd49bdSEiji Ota 680c0dd49bdSEiji Ota mutex_enter(&dwp->lock); 681c0dd49bdSEiji Ota if (dwp->timeid == 0) { 682c0dd49bdSEiji Ota dwp->wq = wq; 683c0dd49bdSEiji Ota dwp->timeid = timeout(rdsv3_work_timeout_handler, dwp, 684c0dd49bdSEiji Ota jiffies + (delay * rdsv3_one_sec_in_hz)); 685c0dd49bdSEiji Ota mutex_exit(&dwp->lock); 686c0dd49bdSEiji Ota } else { 687c0dd49bdSEiji Ota mutex_exit(&dwp->lock); 688c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_queue_delayed_work", "Already queued: %p", 689c0dd49bdSEiji Ota dwp); 690c0dd49bdSEiji Ota mutex_enter(&wq->wq_lock); 691c0dd49bdSEiji Ota wq->wq_pending--; 692c0dd49bdSEiji Ota mutex_exit(&wq->wq_lock); 693c0dd49bdSEiji Ota } 694c0dd49bdSEiji Ota 695c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_queue_delayed_work", 696c0dd49bdSEiji Ota "Return(wq: %p, wp: %p)", wq, dwp); 697c0dd49bdSEiji Ota } 698c0dd49bdSEiji Ota 699c0dd49bdSEiji Ota void 700c0dd49bdSEiji Ota rdsv3_cancel_delayed_work(rdsv3_delayed_work_t *dwp) 701c0dd49bdSEiji Ota { 702c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_cancel_delayed_work", 703c0dd49bdSEiji Ota "Enter(wq: %p, dwp: %p)", dwp->wq, dwp); 704c0dd49bdSEiji Ota 705c0dd49bdSEiji Ota mutex_enter(&dwp->lock); 706c0dd49bdSEiji Ota if (dwp->timeid != 0) { 707c0dd49bdSEiji Ota (void) untimeout(dwp->timeid); 708c0dd49bdSEiji Ota dwp->timeid = 0; 709c0dd49bdSEiji Ota } else { 710c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_cancel_delayed_work", 711c0dd49bdSEiji Ota "Nothing to cancel (wq: %p, dwp: %p)", dwp->wq, dwp); 712c0dd49bdSEiji Ota mutex_exit(&dwp->lock); 713c0dd49bdSEiji Ota return; 714c0dd49bdSEiji Ota } 715c0dd49bdSEiji Ota mutex_exit(&dwp->lock); 716c0dd49bdSEiji Ota 717c0dd49bdSEiji Ota mutex_enter(&dwp->wq->wq_lock); 718c0dd49bdSEiji Ota dwp->wq->wq_pending--; 719c0dd49bdSEiji Ota mutex_exit(&dwp->wq->wq_lock); 720c0dd49bdSEiji Ota 721c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_cancel_delayed_work", 722c0dd49bdSEiji Ota "Return(wq: %p, dwp: %p)", dwp->wq, dwp); 723c0dd49bdSEiji Ota } 724c0dd49bdSEiji Ota 725c0dd49bdSEiji Ota void 726c0dd49bdSEiji Ota rdsv3_destroy_task_workqueue(rdsv3_workqueue_struct_t *wq) 727c0dd49bdSEiji Ota { 728c0dd49bdSEiji Ota RDSV3_DPRINTF2("rdsv3_destroy_workqueue", "Enter"); 729c0dd49bdSEiji Ota 730c0dd49bdSEiji Ota ASSERT(wq); 731c0dd49bdSEiji Ota 732c0dd49bdSEiji Ota mutex_enter(&wq->wq_lock); 733c0dd49bdSEiji Ota wq->wq_state = RDSV3_WQ_THREAD_EXITING; 734c0dd49bdSEiji Ota 735c0dd49bdSEiji Ota while (wq->wq_pending > 0) { 736c0dd49bdSEiji Ota mutex_exit(&wq->wq_lock); 737c0dd49bdSEiji Ota delay(drv_usectohz(1000000)); 738c0dd49bdSEiji Ota mutex_enter(&wq->wq_lock); 739c0dd49bdSEiji Ota }; 740c0dd49bdSEiji Ota mutex_exit(&wq->wq_lock); 741c0dd49bdSEiji Ota 742c0dd49bdSEiji Ota rdsv3_flush_workqueue(wq); 743c0dd49bdSEiji Ota 744c0dd49bdSEiji Ota list_destroy(&wq->wq_queue); 745c0dd49bdSEiji Ota mutex_destroy(&wq->wq_lock); 746c0dd49bdSEiji Ota kmem_free(wq, sizeof (rdsv3_workqueue_struct_t)); 747c0dd49bdSEiji Ota 748c0dd49bdSEiji Ota ASSERT(rdsv3_taskq); 749c0dd49bdSEiji Ota ddi_taskq_destroy(rdsv3_taskq); 750c0dd49bdSEiji Ota 751c0dd49bdSEiji Ota wq = NULL; 752c0dd49bdSEiji Ota rdsv3_taskq = NULL; 753c0dd49bdSEiji Ota 754c0dd49bdSEiji Ota RDSV3_DPRINTF2("rdsv3_destroy_workqueue", "Return"); 755c0dd49bdSEiji Ota } 756c0dd49bdSEiji Ota 757c0dd49bdSEiji Ota /* ARGSUSED */ 758c0dd49bdSEiji Ota void 759c0dd49bdSEiji Ota rdsv3_rdma_init_worker(struct rdsv3_work_s *work) 760c0dd49bdSEiji Ota { 761c0dd49bdSEiji Ota rdsv3_rdma_init(); 762c0dd49bdSEiji Ota } 763c0dd49bdSEiji Ota 764c0dd49bdSEiji Ota #define RDSV3_NUM_TASKQ_THREADS 4 765c0dd49bdSEiji Ota rdsv3_workqueue_struct_t * 766c0dd49bdSEiji Ota rdsv3_create_task_workqueue(char *name) 767c0dd49bdSEiji Ota { 768c0dd49bdSEiji Ota rdsv3_workqueue_struct_t *wq; 769c0dd49bdSEiji Ota 770c0dd49bdSEiji Ota RDSV3_DPRINTF2("create_singlethread_workqueue", "Enter (dip: %p)", 771c0dd49bdSEiji Ota rdsv3_dev_info); 772c0dd49bdSEiji Ota 773c0dd49bdSEiji Ota rdsv3_taskq = ddi_taskq_create(rdsv3_dev_info, name, 774c0dd49bdSEiji Ota RDSV3_NUM_TASKQ_THREADS, TASKQ_DEFAULTPRI, 0); 775c0dd49bdSEiji Ota if (rdsv3_taskq == NULL) { 7766e18d381Sagiri RDSV3_DPRINTF2(__FILE__, 777c0dd49bdSEiji Ota "ddi_taskq_create failed for rdsv3_taskq"); 778c0dd49bdSEiji Ota return (NULL); 779c0dd49bdSEiji Ota } 780c0dd49bdSEiji Ota 781c0dd49bdSEiji Ota wq = kmem_zalloc(sizeof (rdsv3_workqueue_struct_t), KM_NOSLEEP); 782c0dd49bdSEiji Ota if (wq == NULL) { 7836e18d381Sagiri RDSV3_DPRINTF2(__FILE__, "kmem_zalloc failed for wq"); 784c0dd49bdSEiji Ota ddi_taskq_destroy(rdsv3_taskq); 785c0dd49bdSEiji Ota return (NULL); 786c0dd49bdSEiji Ota } 787c0dd49bdSEiji Ota 788c0dd49bdSEiji Ota list_create(&wq->wq_queue, sizeof (struct rdsv3_work_s), 789c0dd49bdSEiji Ota offsetof(struct rdsv3_work_s, work_item)); 790c0dd49bdSEiji Ota mutex_init(&wq->wq_lock, NULL, MUTEX_DRIVER, NULL); 791c0dd49bdSEiji Ota wq->wq_state = RDSV3_WQ_THREAD_IDLE; 792c0dd49bdSEiji Ota wq->wq_pending = 0; 793c0dd49bdSEiji Ota rdsv3_one_sec_in_hz = drv_usectohz(1000000); 794c0dd49bdSEiji Ota 795c0dd49bdSEiji Ota RDSV3_DPRINTF2("create_singlethread_workqueue", "Return"); 796c0dd49bdSEiji Ota 797c0dd49bdSEiji Ota return (wq); 798c0dd49bdSEiji Ota } 799c0dd49bdSEiji Ota 800c0dd49bdSEiji Ota /* 801c0dd49bdSEiji Ota * Implementation for struct sock 802c0dd49bdSEiji Ota */ 803c0dd49bdSEiji Ota 804c0dd49bdSEiji Ota void 805c0dd49bdSEiji Ota rdsv3_sock_exit_data(struct rsock *sk) 806c0dd49bdSEiji Ota { 807c0dd49bdSEiji Ota struct rdsv3_sock *rs = sk->sk_protinfo; 808c0dd49bdSEiji Ota 809c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_sock_exit_data", "rs: %p sk: %p", rs, sk); 810c0dd49bdSEiji Ota 811c0dd49bdSEiji Ota ASSERT(rs != NULL); 812c0dd49bdSEiji Ota ASSERT(rdsv3_sk_sock_flag(sk, SOCK_DEAD)); 813c0dd49bdSEiji Ota 814c0dd49bdSEiji Ota rs->rs_sk = NULL; 815c0dd49bdSEiji Ota 816c0dd49bdSEiji Ota list_destroy(&rs->rs_send_queue); 817c0dd49bdSEiji Ota list_destroy(&rs->rs_notify_queue); 818c0dd49bdSEiji Ota list_destroy(&rs->rs_recv_queue); 819c0dd49bdSEiji Ota 820c0dd49bdSEiji Ota rw_destroy(&rs->rs_recv_lock); 821c0dd49bdSEiji Ota mutex_destroy(&rs->rs_lock); 822c0dd49bdSEiji Ota 823c0dd49bdSEiji Ota mutex_destroy(&rs->rs_rdma_lock); 824c0dd49bdSEiji Ota avl_destroy(&rs->rs_rdma_keys); 825c0dd49bdSEiji Ota 826c0dd49bdSEiji Ota rdsv3_exit_waitqueue(sk->sk_sleep); 827c0dd49bdSEiji Ota kmem_free(sk->sk_sleep, sizeof (rdsv3_wait_queue_t)); 828c0dd49bdSEiji Ota mutex_destroy(&sk->sk_lock); 829c0dd49bdSEiji Ota 830c0dd49bdSEiji Ota kmem_cache_free(rdsv3_alloc_cache, sk); 831c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_sock_exit_data", "rs: %p sk: %p", rs, sk); 832c0dd49bdSEiji Ota } 833c0dd49bdSEiji Ota 834c0dd49bdSEiji Ota /* XXX - figure out right values */ 835c0dd49bdSEiji Ota #define RDSV3_RECV_HIWATER (256 * 1024) 836c0dd49bdSEiji Ota #define RDSV3_RECV_LOWATER 128 837c0dd49bdSEiji Ota #define RDSV3_XMIT_HIWATER (256 * 1024) 838c0dd49bdSEiji Ota #define RDSV3_XMIT_LOWATER 1024 839c0dd49bdSEiji Ota 840c0dd49bdSEiji Ota struct rsock * 841c0dd49bdSEiji Ota rdsv3_sk_alloc() 842c0dd49bdSEiji Ota { 843c0dd49bdSEiji Ota struct rsock *sk; 844c0dd49bdSEiji Ota 845c0dd49bdSEiji Ota sk = kmem_cache_alloc(rdsv3_alloc_cache, KM_SLEEP); 846c0dd49bdSEiji Ota if (sk == NULL) { 847c0dd49bdSEiji Ota RDSV3_DPRINTF2("rdsv3_create", "kmem_cache_alloc failed"); 848c0dd49bdSEiji Ota return (NULL); 849c0dd49bdSEiji Ota } 850c0dd49bdSEiji Ota 851c0dd49bdSEiji Ota bzero(sk, sizeof (struct rsock) + sizeof (struct rdsv3_sock)); 852c0dd49bdSEiji Ota return (sk); 853c0dd49bdSEiji Ota } 854c0dd49bdSEiji Ota 855c0dd49bdSEiji Ota void 856c0dd49bdSEiji Ota rdsv3_sock_init_data(struct rsock *sk) 857c0dd49bdSEiji Ota { 858c0dd49bdSEiji Ota sk->sk_sleep = kmem_zalloc(sizeof (rdsv3_wait_queue_t), KM_SLEEP); 859c0dd49bdSEiji Ota rdsv3_init_waitqueue(sk->sk_sleep); 860c0dd49bdSEiji Ota 861c0dd49bdSEiji Ota mutex_init(&sk->sk_lock, NULL, MUTEX_DRIVER, NULL); 862c0dd49bdSEiji Ota sk->sk_refcount = 1; 863c0dd49bdSEiji Ota sk->sk_protinfo = (struct rdsv3_sock *)(sk + 1); 864c0dd49bdSEiji Ota sk->sk_sndbuf = RDSV3_XMIT_HIWATER; 865c0dd49bdSEiji Ota sk->sk_rcvbuf = RDSV3_RECV_HIWATER; 866c0dd49bdSEiji Ota } 867c0dd49bdSEiji Ota 868c0dd49bdSEiji Ota /* 869c0dd49bdSEiji Ota * Connection cache 870c0dd49bdSEiji Ota */ 871c0dd49bdSEiji Ota /* ARGSUSED */ 872c0dd49bdSEiji Ota int 873c0dd49bdSEiji Ota rdsv3_conn_constructor(void *buf, void *arg, int kmflags) 874c0dd49bdSEiji Ota { 875c0dd49bdSEiji Ota struct rdsv3_connection *conn = buf; 876c0dd49bdSEiji Ota 877c0dd49bdSEiji Ota bzero(conn, sizeof (struct rdsv3_connection)); 878c0dd49bdSEiji Ota 879c0dd49bdSEiji Ota conn->c_next_tx_seq = 1; 880c0dd49bdSEiji Ota mutex_init(&conn->c_lock, NULL, MUTEX_DRIVER, NULL); 881c0dd49bdSEiji Ota mutex_init(&conn->c_send_lock, NULL, MUTEX_DRIVER, NULL); 882c0dd49bdSEiji Ota list_create(&conn->c_send_queue, sizeof (struct rdsv3_message), 883c0dd49bdSEiji Ota offsetof(struct rdsv3_message, m_conn_item)); 884c0dd49bdSEiji Ota list_create(&conn->c_retrans, sizeof (struct rdsv3_message), 885c0dd49bdSEiji Ota offsetof(struct rdsv3_message, m_conn_item)); 886c0dd49bdSEiji Ota return (0); 887c0dd49bdSEiji Ota } 888c0dd49bdSEiji Ota 889c0dd49bdSEiji Ota /* ARGSUSED */ 890c0dd49bdSEiji Ota void 891c0dd49bdSEiji Ota rdsv3_conn_destructor(void *buf, void *arg) 892c0dd49bdSEiji Ota { 893c0dd49bdSEiji Ota struct rdsv3_connection *conn = buf; 894c0dd49bdSEiji Ota 895c0dd49bdSEiji Ota ASSERT(list_is_empty(&conn->c_send_queue)); 896c0dd49bdSEiji Ota ASSERT(list_is_empty(&conn->c_retrans)); 897c0dd49bdSEiji Ota list_destroy(&conn->c_send_queue); 898c0dd49bdSEiji Ota list_destroy(&conn->c_retrans); 899c0dd49bdSEiji Ota mutex_destroy(&conn->c_send_lock); 900c0dd49bdSEiji Ota mutex_destroy(&conn->c_lock); 901c0dd49bdSEiji Ota } 902c0dd49bdSEiji Ota 903c0dd49bdSEiji Ota int 904c0dd49bdSEiji Ota rdsv3_conn_compare(const void *conn1, const void *conn2) 905c0dd49bdSEiji Ota { 906c0dd49bdSEiji Ota uint32_be_t laddr1, faddr1, laddr2, faddr2; 907c0dd49bdSEiji Ota 908c0dd49bdSEiji Ota laddr1 = ((rdsv3_conn_info_t *)conn1)->c_laddr; 909c0dd49bdSEiji Ota laddr2 = ((struct rdsv3_connection *)conn2)->c_laddr; 910c0dd49bdSEiji Ota 911c0dd49bdSEiji Ota if (laddr1 == laddr2) { 912c0dd49bdSEiji Ota faddr1 = ((rdsv3_conn_info_t *)conn1)->c_faddr; 913c0dd49bdSEiji Ota faddr2 = ((struct rdsv3_connection *)conn2)->c_faddr; 914c0dd49bdSEiji Ota if (faddr1 == faddr2) 915c0dd49bdSEiji Ota return (0); 916c0dd49bdSEiji Ota if (faddr1 < faddr2) 917c0dd49bdSEiji Ota return (-1); 918c0dd49bdSEiji Ota return (1); 919c0dd49bdSEiji Ota } 920c0dd49bdSEiji Ota 921c0dd49bdSEiji Ota if (laddr1 < laddr2) 922c0dd49bdSEiji Ota return (-1); 923c0dd49bdSEiji Ota 924c0dd49bdSEiji Ota return (1); 925c0dd49bdSEiji Ota } 926c0dd49bdSEiji Ota 927*d2b539e7Sagiri /* rdsv3_ib_incoming cache */ 928*d2b539e7Sagiri /* ARGSUSED */ 929*d2b539e7Sagiri int 930*d2b539e7Sagiri rdsv3_ib_inc_constructor(void *buf, void *arg, int kmflags) 931*d2b539e7Sagiri { 932*d2b539e7Sagiri list_create(&((struct rdsv3_ib_incoming *)buf)->ii_frags, 933*d2b539e7Sagiri sizeof (struct rdsv3_page_frag), 934*d2b539e7Sagiri offsetof(struct rdsv3_page_frag, f_item)); 935*d2b539e7Sagiri 936*d2b539e7Sagiri return (0); 937*d2b539e7Sagiri } 938*d2b539e7Sagiri 939*d2b539e7Sagiri /* ARGSUSED */ 940*d2b539e7Sagiri void 941*d2b539e7Sagiri rdsv3_ib_inc_destructor(void *buf, void *arg) 942*d2b539e7Sagiri { 943*d2b539e7Sagiri list_destroy(&((struct rdsv3_ib_incoming *)buf)->ii_frags); 944*d2b539e7Sagiri } 945*d2b539e7Sagiri 946*d2b539e7Sagiri /* ib_frag_slab cache */ 947*d2b539e7Sagiri /* ARGSUSED */ 948*d2b539e7Sagiri int 949*d2b539e7Sagiri rdsv3_ib_frag_constructor(void *buf, void *arg, int kmflags) 950*d2b539e7Sagiri { 951*d2b539e7Sagiri struct rdsv3_page_frag *frag = (struct rdsv3_page_frag *)buf; 952*d2b539e7Sagiri struct rdsv3_ib_device *rds_ibdev = (struct rdsv3_ib_device *)arg; 953*d2b539e7Sagiri ibt_iov_attr_t iov_attr; 954*d2b539e7Sagiri ibt_iov_t iov_arr[1]; 955*d2b539e7Sagiri ibt_all_wr_t wr; 956*d2b539e7Sagiri 957*d2b539e7Sagiri bzero(frag, sizeof (struct rdsv3_page_frag)); 958*d2b539e7Sagiri list_link_init(&frag->f_item); 959*d2b539e7Sagiri 960*d2b539e7Sagiri frag->f_page = kmem_alloc(PAGE_SIZE, kmflags); 961*d2b539e7Sagiri if (frag->f_page == NULL) { 962*d2b539e7Sagiri RDSV3_DPRINTF2("rdsv3_ib_frag_constructor", 963*d2b539e7Sagiri "kmem_alloc for %d failed", PAGE_SIZE); 964*d2b539e7Sagiri return (-1); 965*d2b539e7Sagiri } 966*d2b539e7Sagiri frag->f_offset = 0; 967*d2b539e7Sagiri 968*d2b539e7Sagiri iov_attr.iov_as = NULL; 969*d2b539e7Sagiri iov_attr.iov = &iov_arr[0]; 970*d2b539e7Sagiri iov_attr.iov_buf = NULL; 971*d2b539e7Sagiri iov_attr.iov_list_len = 1; 972*d2b539e7Sagiri iov_attr.iov_wr_nds = 1; 973*d2b539e7Sagiri iov_attr.iov_lso_hdr_sz = 0; 974*d2b539e7Sagiri iov_attr.iov_flags = IBT_IOV_SLEEP | IBT_IOV_RECV; 975*d2b539e7Sagiri 976*d2b539e7Sagiri iov_arr[0].iov_addr = frag->f_page; 977*d2b539e7Sagiri iov_arr[0].iov_len = PAGE_SIZE; 978*d2b539e7Sagiri 979*d2b539e7Sagiri wr.recv.wr_nds = 1; 980*d2b539e7Sagiri wr.recv.wr_sgl = &frag->f_sge; 981*d2b539e7Sagiri 982*d2b539e7Sagiri if (ibt_map_mem_iov(ib_get_ibt_hca_hdl(rds_ibdev->dev), 983*d2b539e7Sagiri &iov_attr, &wr, &frag->f_mapped) != IBT_SUCCESS) { 984*d2b539e7Sagiri RDSV3_DPRINTF2("rdsv3_ib_frag_constructor", 985*d2b539e7Sagiri "ibt_map_mem_iov failed"); 986*d2b539e7Sagiri kmem_free(frag->f_page, PAGE_SIZE); 987*d2b539e7Sagiri return (-1); 988*d2b539e7Sagiri } 989*d2b539e7Sagiri 990*d2b539e7Sagiri return (0); 991*d2b539e7Sagiri } 992*d2b539e7Sagiri 993*d2b539e7Sagiri /* ARGSUSED */ 994*d2b539e7Sagiri void 995*d2b539e7Sagiri rdsv3_ib_frag_destructor(void *buf, void *arg) 996*d2b539e7Sagiri { 997*d2b539e7Sagiri struct rdsv3_page_frag *frag = (struct rdsv3_page_frag *)buf; 998*d2b539e7Sagiri struct rdsv3_ib_device *rds_ibdev = (struct rdsv3_ib_device *)arg; 999*d2b539e7Sagiri 1000*d2b539e7Sagiri /* unmap the page */ 1001*d2b539e7Sagiri if (ibt_unmap_mem_iov(ib_get_ibt_hca_hdl(rds_ibdev->dev), 1002*d2b539e7Sagiri frag->f_mapped) != IBT_SUCCESS) 1003*d2b539e7Sagiri RDSV3_DPRINTF2("rdsv3_ib_frag_destructor", 1004*d2b539e7Sagiri "ibt_unmap_mem_iov failed"); 1005*d2b539e7Sagiri 1006*d2b539e7Sagiri /* free the page */ 1007*d2b539e7Sagiri kmem_free(frag->f_page, PAGE_SIZE); 1008*d2b539e7Sagiri } 1009*d2b539e7Sagiri 1010c0dd49bdSEiji Ota /* loop.c */ 1011c0dd49bdSEiji Ota extern kmutex_t loop_conns_lock; 1012c0dd49bdSEiji Ota extern list_t loop_conns; 1013c0dd49bdSEiji Ota 1014c0dd49bdSEiji Ota struct rdsv3_loop_connection 1015c0dd49bdSEiji Ota { 1016c0dd49bdSEiji Ota struct list_node loop_node; 1017c0dd49bdSEiji Ota struct rdsv3_connection *conn; 1018c0dd49bdSEiji Ota }; 1019c0dd49bdSEiji Ota 1020c0dd49bdSEiji Ota void 1021c0dd49bdSEiji Ota rdsv3_loop_init(void) 1022c0dd49bdSEiji Ota { 1023c0dd49bdSEiji Ota list_create(&loop_conns, sizeof (struct rdsv3_loop_connection), 1024c0dd49bdSEiji Ota offsetof(struct rdsv3_loop_connection, loop_node)); 1025c0dd49bdSEiji Ota mutex_init(&loop_conns_lock, NULL, MUTEX_DRIVER, NULL); 1026c0dd49bdSEiji Ota } 1027c0dd49bdSEiji Ota 1028c0dd49bdSEiji Ota /* rdma.c */ 1029c0dd49bdSEiji Ota /* IB Rkey is used here for comparison */ 1030c0dd49bdSEiji Ota int 1031c0dd49bdSEiji Ota rdsv3_mr_compare(const void *mr1, const void *mr2) 1032c0dd49bdSEiji Ota { 1033c0dd49bdSEiji Ota uint32_t key1 = *(uint32_t *)mr1; 1034c0dd49bdSEiji Ota uint32_t key2 = ((struct rdsv3_mr *)mr2)->r_key; 1035c0dd49bdSEiji Ota 1036c0dd49bdSEiji Ota if (key1 < key2) 1037c0dd49bdSEiji Ota return (-1); 1038c0dd49bdSEiji Ota if (key1 > key2) 1039c0dd49bdSEiji Ota return (1); 1040c0dd49bdSEiji Ota return (0); 1041c0dd49bdSEiji Ota } 1042c0dd49bdSEiji Ota 1043c0dd49bdSEiji Ota /* transport.c */ 1044cadbfdc3SEiji Ota extern struct rdsv3_transport *transports[]; 1045c0dd49bdSEiji Ota extern krwlock_t trans_sem; 1046c0dd49bdSEiji Ota 1047c0dd49bdSEiji Ota void 1048c0dd49bdSEiji Ota rdsv3_trans_exit(void) 1049c0dd49bdSEiji Ota { 1050c0dd49bdSEiji Ota struct rdsv3_transport *trans; 1051cadbfdc3SEiji Ota int i; 1052c0dd49bdSEiji Ota 1053c0dd49bdSEiji Ota RDSV3_DPRINTF2("rdsv3_trans_exit", "Enter"); 1054c0dd49bdSEiji Ota 1055c0dd49bdSEiji Ota /* currently, only IB transport */ 1056c0dd49bdSEiji Ota rw_enter(&trans_sem, RW_READER); 1057c0dd49bdSEiji Ota trans = NULL; 1058cadbfdc3SEiji Ota for (i = 0; i < RDS_TRANS_COUNT; i++) { 1059cadbfdc3SEiji Ota if (transports[i]) { 1060cadbfdc3SEiji Ota trans = transports[i]; 1061cadbfdc3SEiji Ota break; 1062cadbfdc3SEiji Ota } 1063cadbfdc3SEiji Ota } 1064c0dd49bdSEiji Ota rw_exit(&trans_sem); 1065c0dd49bdSEiji Ota 1066c0dd49bdSEiji Ota /* trans->exit() will remove the trans from the list */ 1067c0dd49bdSEiji Ota if (trans) 1068c0dd49bdSEiji Ota trans->exit(); 1069c0dd49bdSEiji Ota 1070c0dd49bdSEiji Ota rw_destroy(&trans_sem); 1071c0dd49bdSEiji Ota 1072c0dd49bdSEiji Ota RDSV3_DPRINTF2("rdsv3_trans_exit", "Return"); 1073c0dd49bdSEiji Ota } 1074c0dd49bdSEiji Ota 1075c0dd49bdSEiji Ota void 1076c0dd49bdSEiji Ota rdsv3_trans_init() 1077c0dd49bdSEiji Ota { 1078c0dd49bdSEiji Ota RDSV3_DPRINTF2("rdsv3_trans_init", "Enter"); 1079c0dd49bdSEiji Ota 1080c0dd49bdSEiji Ota rw_init(&trans_sem, NULL, RW_DRIVER, NULL); 1081c0dd49bdSEiji Ota 1082c0dd49bdSEiji Ota RDSV3_DPRINTF2("rdsv3_trans_init", "Return"); 1083c0dd49bdSEiji Ota } 1084c0dd49bdSEiji Ota 1085c0dd49bdSEiji Ota int 1086c0dd49bdSEiji Ota rdsv3_put_cmsg(struct nmsghdr *msg, int level, int type, size_t size, 1087c0dd49bdSEiji Ota void *payload) 1088c0dd49bdSEiji Ota { 1089c0dd49bdSEiji Ota struct cmsghdr *cp; 1090c0dd49bdSEiji Ota char *bp; 1091c0dd49bdSEiji Ota size_t cmlen; 1092c0dd49bdSEiji Ota size_t cmspace; 1093c0dd49bdSEiji Ota size_t bufsz; 1094c0dd49bdSEiji Ota 1095c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_put_cmsg", 1096c0dd49bdSEiji Ota "Enter(msg: %p level: %d type: %d sz: %d)", 1097c0dd49bdSEiji Ota msg, level, type, size); 1098c0dd49bdSEiji Ota 1099c0dd49bdSEiji Ota if (msg == NULL || msg->msg_controllen == 0 || payload == NULL) { 1100c0dd49bdSEiji Ota return (0); 1101c0dd49bdSEiji Ota } 1102c0dd49bdSEiji Ota /* check for first cmsg or this is another cmsg to be appended */ 1103c0dd49bdSEiji Ota if (msg->msg_control == NULL) 1104c0dd49bdSEiji Ota msg->msg_controllen = 0; 1105c0dd49bdSEiji Ota 1106c0dd49bdSEiji Ota cmlen = CMSG_LEN(size); 1107c0dd49bdSEiji Ota cmspace = CMSG_SPACE(size); 1108c0dd49bdSEiji Ota bufsz = msg->msg_controllen + cmspace; 1109c0dd49bdSEiji Ota 1110c0dd49bdSEiji Ota /* extend the existing cmsg to append the next cmsg */ 1111c0dd49bdSEiji Ota bp = kmem_alloc(bufsz, KM_SLEEP); 1112c0dd49bdSEiji Ota if (msg->msg_control) { 1113c0dd49bdSEiji Ota bcopy(msg->msg_control, bp, msg->msg_controllen); 1114c0dd49bdSEiji Ota kmem_free(msg->msg_control, (size_t)msg->msg_controllen); 1115c0dd49bdSEiji Ota } 1116c0dd49bdSEiji Ota 1117c0dd49bdSEiji Ota /* assign payload the proper cmsg location */ 1118c0dd49bdSEiji Ota cp = (struct cmsghdr *)(bp + msg->msg_controllen); 1119c0dd49bdSEiji Ota cp->cmsg_len = cmlen; 1120c0dd49bdSEiji Ota cp->cmsg_level = level; 1121c0dd49bdSEiji Ota cp->cmsg_type = type; 1122c0dd49bdSEiji Ota 1123c0dd49bdSEiji Ota bcopy(payload, CMSG_DATA(cp), cmlen - 1124c0dd49bdSEiji Ota (unsigned int)_CMSG_DATA_ALIGN(sizeof (struct cmsghdr))); 1125c0dd49bdSEiji Ota 1126c0dd49bdSEiji Ota msg->msg_control = bp; 1127c0dd49bdSEiji Ota msg->msg_controllen = bufsz; 1128c0dd49bdSEiji Ota 1129c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_put_cmsg", "Return(cmsg_len: %d)", cp->cmsg_len); 1130c0dd49bdSEiji Ota 1131c0dd49bdSEiji Ota return (0); 1132c0dd49bdSEiji Ota } 1133c0dd49bdSEiji Ota 1134c0dd49bdSEiji Ota /* bind.c */ 1135c0dd49bdSEiji Ota extern kmutex_t rdsv3_bind_lock; 1136c0dd49bdSEiji Ota extern avl_tree_t rdsv3_bind_tree; 1137c0dd49bdSEiji Ota 1138c0dd49bdSEiji Ota /* ARGSUSED */ 1139c0dd49bdSEiji Ota int 1140c0dd49bdSEiji Ota rdsv3_verify_bind_address(ipaddr_t addr) 1141c0dd49bdSEiji Ota { 1142c0dd49bdSEiji Ota return (1); 1143c0dd49bdSEiji Ota } 1144c0dd49bdSEiji Ota 1145c0dd49bdSEiji Ota /* XXX - need to enhance to compare IP address and port */ 1146c0dd49bdSEiji Ota int 1147c0dd49bdSEiji Ota rdsv3_bind_node_compare(const void *a, const void *b) 1148c0dd49bdSEiji Ota { 1149c0dd49bdSEiji Ota uint16_be_t port = *(in_port_t *)a; 1150c0dd49bdSEiji Ota struct rdsv3_sock *rs = (struct rdsv3_sock *)b; 1151c0dd49bdSEiji Ota 1152c0dd49bdSEiji Ota RDSV3_DPRINTF5("rdsv3_bind_node_compare", "Enter (%x %x)", port, 1153c0dd49bdSEiji Ota rs->rs_bound_port); 1154c0dd49bdSEiji Ota 1155c0dd49bdSEiji Ota if (port > rs->rs_bound_port) 1156c0dd49bdSEiji Ota return (+1); 1157c0dd49bdSEiji Ota else if (port < rs->rs_bound_port) 1158c0dd49bdSEiji Ota return (-1); 1159c0dd49bdSEiji Ota 1160c0dd49bdSEiji Ota return (0); 1161c0dd49bdSEiji Ota } 1162c0dd49bdSEiji Ota 1163c0dd49bdSEiji Ota void 1164c0dd49bdSEiji Ota rdsv3_bind_tree_init() 1165c0dd49bdSEiji Ota { 1166c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_bind_tree_init", "Enter"); 1167c0dd49bdSEiji Ota 1168c0dd49bdSEiji Ota mutex_init(&rdsv3_bind_lock, NULL, MUTEX_DRIVER, NULL); 1169c0dd49bdSEiji Ota avl_create(&rdsv3_bind_tree, rdsv3_bind_node_compare, 1170c0dd49bdSEiji Ota sizeof (struct rdsv3_sock), 1171c0dd49bdSEiji Ota offsetof(struct rdsv3_sock, rs_bound_node)); 1172c0dd49bdSEiji Ota 1173c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_bind_tree_init", "Return"); 1174c0dd49bdSEiji Ota } 1175c0dd49bdSEiji Ota 1176c0dd49bdSEiji Ota void 1177c0dd49bdSEiji Ota rdsv3_bind_tree_exit() 1178c0dd49bdSEiji Ota { 1179c0dd49bdSEiji Ota RDSV3_DPRINTF2("rdsv3_bind_tree_exit", "Enter"); 1180c0dd49bdSEiji Ota 1181c0dd49bdSEiji Ota ASSERT(avl_is_empty(&rdsv3_bind_tree)); 1182c0dd49bdSEiji Ota avl_destroy(&rdsv3_bind_tree); 1183c0dd49bdSEiji Ota mutex_destroy(&rdsv3_bind_lock); 1184c0dd49bdSEiji Ota 1185c0dd49bdSEiji Ota RDSV3_DPRINTF2("rdsv3_bind_tree_exit", "Return"); 1186c0dd49bdSEiji Ota } 1187c0dd49bdSEiji Ota 1188c0dd49bdSEiji Ota /* checksum */ 1189c0dd49bdSEiji Ota uint16_t 1190c0dd49bdSEiji Ota rdsv3_ip_fast_csum(void *hdr, size_t length) 1191c0dd49bdSEiji Ota { 1192c0dd49bdSEiji Ota return (0xffff & 1193c0dd49bdSEiji Ota (uint16_t)(~ip_ocsum((ushort_t *)hdr, (int)length <<1, 0))); 1194c0dd49bdSEiji Ota } 1195c0dd49bdSEiji Ota 1196c0dd49bdSEiji Ota /* scatterlist implementation */ 1197c0dd49bdSEiji Ota /* ARGSUSED */ 1198c0dd49bdSEiji Ota caddr_t 1199c0dd49bdSEiji Ota rdsv3_ib_sg_dma_address(ib_device_t *dev, struct rdsv3_scatterlist *scat, 1200c0dd49bdSEiji Ota uint_t offset) 1201c0dd49bdSEiji Ota { 1202c0dd49bdSEiji Ota return (0); 1203c0dd49bdSEiji Ota } 1204c0dd49bdSEiji Ota 1205c0dd49bdSEiji Ota uint_t 1206c0dd49bdSEiji Ota rdsv3_ib_dma_map_sg(struct ib_device *dev, struct rdsv3_scatterlist *scat, 1207c0dd49bdSEiji Ota uint_t num) 1208c0dd49bdSEiji Ota { 1209c0dd49bdSEiji Ota struct rdsv3_scatterlist *s, *first; 1210c0dd49bdSEiji Ota ibt_iov_t *iov; 1211c0dd49bdSEiji Ota ibt_wr_ds_t *sgl; 1212c0dd49bdSEiji Ota ibt_iov_attr_t iov_attr; 1213c0dd49bdSEiji Ota ibt_send_wr_t swr; 1214c0dd49bdSEiji Ota uint_t i; 1215c0dd49bdSEiji Ota 1216c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_ib_dma_map_sg", "scat %p, num: %d", scat, num); 1217c0dd49bdSEiji Ota 1218c0dd49bdSEiji Ota s = first = &scat[0]; 1219c0dd49bdSEiji Ota ASSERT(first->mihdl == NULL); 1220c0dd49bdSEiji Ota 1221c0dd49bdSEiji Ota iov = kmem_alloc(num * sizeof (ibt_iov_t), KM_SLEEP); 1222c0dd49bdSEiji Ota sgl = kmem_zalloc((num * 2) * sizeof (ibt_wr_ds_t), KM_SLEEP); 1223c0dd49bdSEiji Ota 1224c0dd49bdSEiji Ota for (i = 0; i < num; i++, s++) { 1225c0dd49bdSEiji Ota iov[i].iov_addr = s->vaddr; 1226c0dd49bdSEiji Ota iov[i].iov_len = s->length; 1227c0dd49bdSEiji Ota } 1228c0dd49bdSEiji Ota 1229c0dd49bdSEiji Ota iov_attr.iov_as = NULL; 1230c0dd49bdSEiji Ota iov_attr.iov = iov; 1231c0dd49bdSEiji Ota iov_attr.iov_buf = NULL; 1232c0dd49bdSEiji Ota iov_attr.iov_list_len = num; 1233c0dd49bdSEiji Ota iov_attr.iov_wr_nds = num * 2; 1234c0dd49bdSEiji Ota iov_attr.iov_lso_hdr_sz = 0; 1235c0dd49bdSEiji Ota iov_attr.iov_flags = IBT_IOV_SLEEP; 1236c0dd49bdSEiji Ota 1237c0dd49bdSEiji Ota swr.wr_sgl = sgl; 1238c0dd49bdSEiji Ota 1239c0dd49bdSEiji Ota i = ibt_map_mem_iov(ib_get_ibt_hca_hdl(dev), 1240c0dd49bdSEiji Ota &iov_attr, (ibt_all_wr_t *)&swr, &first->mihdl); 1241c0dd49bdSEiji Ota kmem_free(iov, num * sizeof (ibt_iov_t)); 1242c0dd49bdSEiji Ota if (i != IBT_SUCCESS) { 1243c0dd49bdSEiji Ota RDSV3_DPRINTF2("rdsv3_ib_dma_map_sg", 1244c0dd49bdSEiji Ota "ibt_map_mem_iov returned: %d", i); 1245c0dd49bdSEiji Ota return (0); 1246c0dd49bdSEiji Ota } 1247c0dd49bdSEiji Ota 1248c0dd49bdSEiji Ota s = first; 1249c0dd49bdSEiji Ota for (i = 0; i < num; i++, s++, sgl++) { 1250c0dd49bdSEiji Ota s->sgl = sgl; 1251c0dd49bdSEiji Ota } 1252c0dd49bdSEiji Ota 1253c0dd49bdSEiji Ota return (num); 1254c0dd49bdSEiji Ota } 1255c0dd49bdSEiji Ota 1256c0dd49bdSEiji Ota void 1257c0dd49bdSEiji Ota rdsv3_ib_dma_unmap_sg(ib_device_t *dev, struct rdsv3_scatterlist *scat, 1258c0dd49bdSEiji Ota uint_t num) 1259c0dd49bdSEiji Ota { 1260c0dd49bdSEiji Ota /* Zero length messages have no scatter gather entries */ 1261c0dd49bdSEiji Ota if (num != 0) { 1262c0dd49bdSEiji Ota ASSERT(scat->mihdl != NULL); 1263c0dd49bdSEiji Ota ASSERT(scat->sgl != NULL); 1264c0dd49bdSEiji Ota 1265c0dd49bdSEiji Ota (void) ibt_unmap_mem_iov(ib_get_ibt_hca_hdl(dev), scat->mihdl); 1266c0dd49bdSEiji Ota 1267c0dd49bdSEiji Ota kmem_free(scat->sgl, (num * 2) * sizeof (ibt_wr_ds_t)); 1268c0dd49bdSEiji Ota scat->sgl = NULL; 1269c0dd49bdSEiji Ota scat->mihdl = NULL; 1270c0dd49bdSEiji Ota } 1271c0dd49bdSEiji Ota } 1272c0dd49bdSEiji Ota 1273c0dd49bdSEiji Ota int 1274c0dd49bdSEiji Ota rdsv3_ib_alloc_hdrs(ib_device_t *dev, struct rdsv3_ib_connection *ic) 1275c0dd49bdSEiji Ota { 1276c0dd49bdSEiji Ota caddr_t addr; 1277c0dd49bdSEiji Ota size_t size; 1278c0dd49bdSEiji Ota ibt_mr_attr_t mr_attr; 1279c0dd49bdSEiji Ota ibt_mr_desc_t mr_desc; 1280c0dd49bdSEiji Ota ibt_mr_hdl_t mr_hdl; 1281c0dd49bdSEiji Ota int ret; 1282c0dd49bdSEiji Ota 1283c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_ib_alloc_hdrs", "Enter(dev: %p)", dev); 1284c0dd49bdSEiji Ota 1285c0dd49bdSEiji Ota ASSERT(ic->i_mr == NULL); 1286c0dd49bdSEiji Ota 1287c0dd49bdSEiji Ota size = (ic->i_send_ring.w_nr + ic->i_recv_ring.w_nr + 1) * 1288c0dd49bdSEiji Ota sizeof (struct rdsv3_header); 1289c0dd49bdSEiji Ota 1290c0dd49bdSEiji Ota addr = kmem_zalloc(size, KM_NOSLEEP); 1291c0dd49bdSEiji Ota if (addr == NULL) 1292c0dd49bdSEiji Ota return (-1); 1293c0dd49bdSEiji Ota 1294c0dd49bdSEiji Ota mr_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)addr; 1295c0dd49bdSEiji Ota mr_attr.mr_len = size; 1296c0dd49bdSEiji Ota mr_attr.mr_as = NULL; 1297c0dd49bdSEiji Ota mr_attr.mr_flags = IBT_MR_ENABLE_LOCAL_WRITE; 1298c0dd49bdSEiji Ota ret = ibt_register_mr(ib_get_ibt_hca_hdl(dev), RDSV3_PD2PDHDL(ic->i_pd), 1299c0dd49bdSEiji Ota &mr_attr, &mr_hdl, &mr_desc); 1300c0dd49bdSEiji Ota if (ret != IBT_SUCCESS) { 1301c0dd49bdSEiji Ota RDSV3_DPRINTF2("rdsv3_ib_alloc_hdrs", 1302c0dd49bdSEiji Ota "ibt_register_mr returned: " "%d", ret); 1303c0dd49bdSEiji Ota return (-1); 1304c0dd49bdSEiji Ota } 1305c0dd49bdSEiji Ota 1306c0dd49bdSEiji Ota ic->i_mr = 1307c0dd49bdSEiji Ota (struct rdsv3_hdrs_mr *)kmem_alloc(sizeof (struct rdsv3_hdrs_mr), 1308c0dd49bdSEiji Ota KM_SLEEP); 1309c0dd49bdSEiji Ota ic->i_mr->addr = addr; 1310c0dd49bdSEiji Ota ic->i_mr->size = size; 1311c0dd49bdSEiji Ota ic->i_mr->hdl = mr_hdl; 1312c0dd49bdSEiji Ota ic->i_mr->lkey = mr_desc.md_lkey; 1313c0dd49bdSEiji Ota 1314c0dd49bdSEiji Ota ic->i_send_hdrs = (struct rdsv3_header *)addr; 1315c0dd49bdSEiji Ota ic->i_send_hdrs_dma = (uint64_t)(uintptr_t)addr; 1316c0dd49bdSEiji Ota 1317c0dd49bdSEiji Ota ic->i_recv_hdrs = (struct rdsv3_header *)(addr + 1318c0dd49bdSEiji Ota (ic->i_send_ring.w_nr * sizeof (struct rdsv3_header))); 1319c0dd49bdSEiji Ota ic->i_recv_hdrs_dma = (uint64_t)(uintptr_t)(addr + 1320c0dd49bdSEiji Ota (ic->i_send_ring.w_nr * sizeof (struct rdsv3_header))); 1321c0dd49bdSEiji Ota ic->i_recv_tasklet_cpuid = -1; 1322c0dd49bdSEiji Ota 1323c0dd49bdSEiji Ota ic->i_ack = (struct rdsv3_header *)(addr + 1324c0dd49bdSEiji Ota ((ic->i_send_ring.w_nr + ic->i_recv_ring.w_nr) * 1325c0dd49bdSEiji Ota sizeof (struct rdsv3_header))); 1326c0dd49bdSEiji Ota ic->i_ack_dma = (uint64_t)(uintptr_t)(addr + 1327c0dd49bdSEiji Ota ((ic->i_send_ring.w_nr + ic->i_recv_ring.w_nr) * 1328c0dd49bdSEiji Ota sizeof (struct rdsv3_header))); 1329c0dd49bdSEiji Ota 1330c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_ib_alloc_hdrs", "Return(dev: %p)", dev); 1331c0dd49bdSEiji Ota 1332c0dd49bdSEiji Ota return (0); 1333c0dd49bdSEiji Ota } 1334c0dd49bdSEiji Ota 1335c0dd49bdSEiji Ota void 1336c0dd49bdSEiji Ota rdsv3_ib_free_hdrs(ib_device_t *dev, struct rdsv3_ib_connection *ic) 1337c0dd49bdSEiji Ota { 1338c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_ib_free_hdrs", "Enter(dev: %p)", dev); 1339c0dd49bdSEiji Ota ASSERT(ic->i_mr != NULL); 1340c0dd49bdSEiji Ota 1341c0dd49bdSEiji Ota ic->i_send_hdrs = NULL; 1342c0dd49bdSEiji Ota ic->i_send_hdrs_dma = NULL; 1343c0dd49bdSEiji Ota 1344c0dd49bdSEiji Ota ic->i_recv_hdrs = NULL; 1345c0dd49bdSEiji Ota ic->i_recv_hdrs_dma = NULL; 1346c0dd49bdSEiji Ota 1347c0dd49bdSEiji Ota ic->i_ack = NULL; 1348c0dd49bdSEiji Ota ic->i_ack_dma = NULL; 1349c0dd49bdSEiji Ota 1350c0dd49bdSEiji Ota (void) ibt_deregister_mr(ib_get_ibt_hca_hdl(dev), ic->i_mr->hdl); 1351c0dd49bdSEiji Ota 1352c0dd49bdSEiji Ota kmem_free(ic->i_mr->addr, ic->i_mr->size); 1353c0dd49bdSEiji Ota kmem_free(ic->i_mr, sizeof (struct rdsv3_hdrs_mr)); 1354c0dd49bdSEiji Ota 1355c0dd49bdSEiji Ota ic->i_mr = NULL; 1356c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_ib_free_hdrs", "Return(dev: %p)", dev); 1357c0dd49bdSEiji Ota } 1358cadbfdc3SEiji Ota 1359cadbfdc3SEiji Ota /* 1360cadbfdc3SEiji Ota * atomic_add_unless - add unless the number is a given value 1361cadbfdc3SEiji Ota * @v: pointer of type atomic_t 1362cadbfdc3SEiji Ota * @a: the amount to add to v... 1363cadbfdc3SEiji Ota * @u: ...unless v is equal to u. 1364cadbfdc3SEiji Ota * 1365cadbfdc3SEiji Ota * Atomically adds @a to @v, so long as it was not @u. 1366cadbfdc3SEiji Ota * Returns non-zero if @v was not @u, and zero otherwise. 1367cadbfdc3SEiji Ota */ 1368cadbfdc3SEiji Ota int 1369cadbfdc3SEiji Ota atomic_add_unless(atomic_t *v, uint_t a, ulong_t u) 1370cadbfdc3SEiji Ota { 1371cadbfdc3SEiji Ota uint_t c, old; 1372cadbfdc3SEiji Ota 1373cadbfdc3SEiji Ota c = *v; 1374cadbfdc3SEiji Ota while (c != u && (old = atomic_cas_uint(v, c, c + a)) != c) { 1375cadbfdc3SEiji Ota c = old; 1376cadbfdc3SEiji Ota } 1377cadbfdc3SEiji Ota return ((ulong_t)c != u); 1378cadbfdc3SEiji Ota } 1379