1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Generic netlink support functions to configure an SMC-R PNET table 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/module.h> 13 #include <linux/list.h> 14 #include <linux/ctype.h> 15 #include <net/netlink.h> 16 #include <net/genetlink.h> 17 18 #include <uapi/linux/if.h> 19 #include <uapi/linux/smc.h> 20 21 #include <rdma/ib_verbs.h> 22 23 #include <net/netns/generic.h> 24 #include "smc_netns.h" 25 26 #include "smc_pnet.h" 27 #include "smc_ib.h" 28 #include "smc_ism.h" 29 #include "smc_core.h" 30 31 #define SMC_ASCII_BLANK 32 32 33 static struct net_device *pnet_find_base_ndev(struct net_device *ndev); 34 35 static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { 36 [SMC_PNETID_NAME] = { 37 .type = NLA_NUL_STRING, 38 .len = SMC_MAX_PNETID_LEN 39 }, 40 [SMC_PNETID_ETHNAME] = { 41 .type = NLA_NUL_STRING, 42 .len = IFNAMSIZ - 1 43 }, 44 [SMC_PNETID_IBNAME] = { 45 .type = NLA_NUL_STRING, 46 .len = IB_DEVICE_NAME_MAX - 1 47 }, 48 [SMC_PNETID_IBPORT] = { .type = NLA_U8 } 49 }; 50 51 static struct genl_family smc_pnet_nl_family; 52 53 enum smc_pnet_nametype { 54 SMC_PNET_ETH = 1, 55 SMC_PNET_IB = 2, 56 }; 57 58 /* pnet entry stored in pnet table */ 59 struct smc_pnetentry { 60 struct list_head list; 61 char pnet_name[SMC_MAX_PNETID_LEN + 1]; 62 enum smc_pnet_nametype type; 63 union { 64 struct { 65 char eth_name[IFNAMSIZ + 1]; 66 struct net_device *ndev; 67 }; 68 struct { 69 char ib_name[IB_DEVICE_NAME_MAX + 1]; 70 u8 ib_port; 71 }; 72 }; 73 }; 74 75 /* Check if two given pnetids match */ 76 static bool smc_pnet_match(u8 *pnetid1, u8 *pnetid2) 77 { 78 int i; 79 80 for (i = 0; i < SMC_MAX_PNETID_LEN; i++) { 81 if ((pnetid1[i] == 0 || pnetid1[i] == SMC_ASCII_BLANK) && 82 (pnetid2[i] == 0 || pnetid2[i] == SMC_ASCII_BLANK)) 83 break; 84 if (pnetid1[i] != pnetid2[i]) 85 return false; 86 } 87 return true; 88 } 89 90 /* Remove a pnetid from the pnet table. 91 */ 92 static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) 93 { 94 struct smc_pnetentry *pnetelem, *tmp_pe; 95 struct smc_pnettable *pnettable; 96 struct smc_ib_device *ibdev; 97 struct smcd_dev *smcd_dev; 98 struct smc_net *sn; 99 int rc = -ENOENT; 100 int ibport; 101 102 /* get pnettable for namespace */ 103 sn = net_generic(net, smc_net_id); 104 pnettable = &sn->pnettable; 105 106 /* remove table entry */ 107 write_lock(&pnettable->lock); 108 list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, 109 list) { 110 if (!pnet_name || 111 smc_pnet_match(pnetelem->pnet_name, pnet_name)) { 112 list_del(&pnetelem->list); 113 if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev) 114 dev_put(pnetelem->ndev); 115 kfree(pnetelem); 116 rc = 0; 117 } 118 } 119 write_unlock(&pnettable->lock); 120 121 /* if this is not the initial namespace, stop here */ 122 if (net != &init_net) 123 return rc; 124 125 /* remove ib devices */ 126 spin_lock(&smc_ib_devices.lock); 127 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 128 for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) { 129 if (ibdev->pnetid_by_user[ibport] && 130 (!pnet_name || 131 smc_pnet_match(pnet_name, 132 ibdev->pnetid[ibport]))) { 133 memset(ibdev->pnetid[ibport], 0, 134 SMC_MAX_PNETID_LEN); 135 ibdev->pnetid_by_user[ibport] = false; 136 rc = 0; 137 } 138 } 139 } 140 spin_unlock(&smc_ib_devices.lock); 141 /* remove smcd devices */ 142 spin_lock(&smcd_dev_list.lock); 143 list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { 144 if (smcd_dev->pnetid_by_user && 145 (!pnet_name || 146 smc_pnet_match(pnet_name, smcd_dev->pnetid))) { 147 memset(smcd_dev->pnetid, 0, SMC_MAX_PNETID_LEN); 148 smcd_dev->pnetid_by_user = false; 149 rc = 0; 150 } 151 } 152 spin_unlock(&smcd_dev_list.lock); 153 return rc; 154 } 155 156 /* Add the reference to a given network device to the pnet table. 157 */ 158 static int smc_pnet_add_by_ndev(struct net_device *ndev) 159 { 160 struct smc_pnetentry *pnetelem, *tmp_pe; 161 struct smc_pnettable *pnettable; 162 struct net *net = dev_net(ndev); 163 struct smc_net *sn; 164 int rc = -ENOENT; 165 166 /* get pnettable for namespace */ 167 sn = net_generic(net, smc_net_id); 168 pnettable = &sn->pnettable; 169 170 write_lock(&pnettable->lock); 171 list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { 172 if (pnetelem->type == SMC_PNET_ETH && !pnetelem->ndev && 173 !strncmp(pnetelem->eth_name, ndev->name, IFNAMSIZ)) { 174 dev_hold(ndev); 175 pnetelem->ndev = ndev; 176 rc = 0; 177 break; 178 } 179 } 180 write_unlock(&pnettable->lock); 181 return rc; 182 } 183 184 /* Remove the reference to a given network device from the pnet table. 185 */ 186 static int smc_pnet_remove_by_ndev(struct net_device *ndev) 187 { 188 struct smc_pnetentry *pnetelem, *tmp_pe; 189 struct smc_pnettable *pnettable; 190 struct net *net = dev_net(ndev); 191 struct smc_net *sn; 192 int rc = -ENOENT; 193 194 /* get pnettable for namespace */ 195 sn = net_generic(net, smc_net_id); 196 pnettable = &sn->pnettable; 197 198 write_lock(&pnettable->lock); 199 list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { 200 if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev == ndev) { 201 dev_put(pnetelem->ndev); 202 pnetelem->ndev = NULL; 203 rc = 0; 204 break; 205 } 206 } 207 write_unlock(&pnettable->lock); 208 return rc; 209 } 210 211 /* Apply pnetid to ib device when no pnetid is set. 212 */ 213 static bool smc_pnet_apply_ib(struct smc_ib_device *ib_dev, u8 ib_port, 214 char *pnet_name) 215 { 216 u8 pnet_null[SMC_MAX_PNETID_LEN] = {0}; 217 bool applied = false; 218 219 spin_lock(&smc_ib_devices.lock); 220 if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) { 221 memcpy(ib_dev->pnetid[ib_port - 1], pnet_name, 222 SMC_MAX_PNETID_LEN); 223 ib_dev->pnetid_by_user[ib_port - 1] = true; 224 applied = true; 225 } 226 spin_unlock(&smc_ib_devices.lock); 227 return applied; 228 } 229 230 /* Apply pnetid to smcd device when no pnetid is set. 231 */ 232 static bool smc_pnet_apply_smcd(struct smcd_dev *smcd_dev, char *pnet_name) 233 { 234 u8 pnet_null[SMC_MAX_PNETID_LEN] = {0}; 235 bool applied = false; 236 237 spin_lock(&smcd_dev_list.lock); 238 if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) { 239 memcpy(smcd_dev->pnetid, pnet_name, SMC_MAX_PNETID_LEN); 240 smcd_dev->pnetid_by_user = true; 241 applied = true; 242 } 243 spin_unlock(&smcd_dev_list.lock); 244 return applied; 245 } 246 247 /* The limit for pnetid is 16 characters. 248 * Valid characters should be (single-byte character set) a-z, A-Z, 0-9. 249 * Lower case letters are converted to upper case. 250 * Interior blanks should not be used. 251 */ 252 static bool smc_pnetid_valid(const char *pnet_name, char *pnetid) 253 { 254 char *bf = skip_spaces(pnet_name); 255 size_t len = strlen(bf); 256 char *end = bf + len; 257 258 if (!len) 259 return false; 260 while (--end >= bf && isspace(*end)) 261 ; 262 if (end - bf >= SMC_MAX_PNETID_LEN) 263 return false; 264 while (bf <= end) { 265 if (!isalnum(*bf)) 266 return false; 267 *pnetid++ = islower(*bf) ? toupper(*bf) : *bf; 268 bf++; 269 } 270 *pnetid = '\0'; 271 return true; 272 } 273 274 /* Find an infiniband device by a given name. The device might not exist. */ 275 static struct smc_ib_device *smc_pnet_find_ib(char *ib_name) 276 { 277 struct smc_ib_device *ibdev; 278 279 spin_lock(&smc_ib_devices.lock); 280 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 281 if (!strncmp(ibdev->ibdev->name, ib_name, 282 sizeof(ibdev->ibdev->name)) || 283 !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name, 284 IB_DEVICE_NAME_MAX - 1)) { 285 goto out; 286 } 287 } 288 ibdev = NULL; 289 out: 290 spin_unlock(&smc_ib_devices.lock); 291 return ibdev; 292 } 293 294 /* Find an smcd device by a given name. The device might not exist. */ 295 static struct smcd_dev *smc_pnet_find_smcd(char *smcd_name) 296 { 297 struct smcd_dev *smcd_dev; 298 299 spin_lock(&smcd_dev_list.lock); 300 list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { 301 if (!strncmp(dev_name(&smcd_dev->dev), smcd_name, 302 IB_DEVICE_NAME_MAX - 1)) 303 goto out; 304 } 305 smcd_dev = NULL; 306 out: 307 spin_unlock(&smcd_dev_list.lock); 308 return smcd_dev; 309 } 310 311 static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net, 312 char *eth_name, char *pnet_name) 313 { 314 struct smc_pnetentry *tmp_pe, *new_pe; 315 struct net_device *ndev, *base_ndev; 316 u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; 317 bool new_netdev; 318 int rc; 319 320 /* check if (base) netdev already has a pnetid. If there is one, we do 321 * not want to add a pnet table entry 322 */ 323 rc = -EEXIST; 324 ndev = dev_get_by_name(net, eth_name); /* dev_hold() */ 325 if (ndev) { 326 base_ndev = pnet_find_base_ndev(ndev); 327 if (!smc_pnetid_by_dev_port(base_ndev->dev.parent, 328 base_ndev->dev_port, ndev_pnetid)) 329 goto out_put; 330 } 331 332 /* add a new netdev entry to the pnet table if there isn't one */ 333 rc = -ENOMEM; 334 new_pe = kzalloc(sizeof(*new_pe), GFP_KERNEL); 335 if (!new_pe) 336 goto out_put; 337 new_pe->type = SMC_PNET_ETH; 338 memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN); 339 strncpy(new_pe->eth_name, eth_name, IFNAMSIZ); 340 new_pe->ndev = ndev; 341 342 rc = -EEXIST; 343 new_netdev = true; 344 write_lock(&pnettable->lock); 345 list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { 346 if (tmp_pe->type == SMC_PNET_ETH && 347 !strncmp(tmp_pe->eth_name, eth_name, IFNAMSIZ)) { 348 new_netdev = false; 349 break; 350 } 351 } 352 if (new_netdev) { 353 list_add_tail(&new_pe->list, &pnettable->pnetlist); 354 write_unlock(&pnettable->lock); 355 } else { 356 write_unlock(&pnettable->lock); 357 kfree(new_pe); 358 goto out_put; 359 } 360 return 0; 361 362 out_put: 363 if (ndev) 364 dev_put(ndev); 365 return rc; 366 } 367 368 static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name, 369 u8 ib_port, char *pnet_name) 370 { 371 struct smc_pnetentry *tmp_pe, *new_pe; 372 struct smc_ib_device *ib_dev; 373 bool smcddev_applied = true; 374 bool ibdev_applied = true; 375 struct smcd_dev *smcd_dev; 376 bool new_ibdev; 377 378 /* try to apply the pnetid to active devices */ 379 ib_dev = smc_pnet_find_ib(ib_name); 380 if (ib_dev) 381 ibdev_applied = smc_pnet_apply_ib(ib_dev, ib_port, pnet_name); 382 smcd_dev = smc_pnet_find_smcd(ib_name); 383 if (smcd_dev) 384 smcddev_applied = smc_pnet_apply_smcd(smcd_dev, pnet_name); 385 /* Apply fails when a device has a hardware-defined pnetid set, do not 386 * add a pnet table entry in that case. 387 */ 388 if (!ibdev_applied || !smcddev_applied) 389 return -EEXIST; 390 391 /* add a new ib entry to the pnet table if there isn't one */ 392 new_pe = kzalloc(sizeof(*new_pe), GFP_KERNEL); 393 if (!new_pe) 394 return -ENOMEM; 395 new_pe->type = SMC_PNET_IB; 396 memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN); 397 strncpy(new_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX); 398 new_pe->ib_port = ib_port; 399 400 new_ibdev = true; 401 write_lock(&pnettable->lock); 402 list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { 403 if (tmp_pe->type == SMC_PNET_IB && 404 !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) { 405 new_ibdev = false; 406 break; 407 } 408 } 409 if (new_ibdev) { 410 list_add_tail(&new_pe->list, &pnettable->pnetlist); 411 write_unlock(&pnettable->lock); 412 } else { 413 write_unlock(&pnettable->lock); 414 kfree(new_pe); 415 } 416 return (new_ibdev) ? 0 : -EEXIST; 417 } 418 419 /* Append a pnetid to the end of the pnet table if not already on this list. 420 */ 421 static int smc_pnet_enter(struct net *net, struct nlattr *tb[]) 422 { 423 char pnet_name[SMC_MAX_PNETID_LEN + 1]; 424 struct smc_pnettable *pnettable; 425 bool new_netdev = false; 426 bool new_ibdev = false; 427 struct smc_net *sn; 428 u8 ibport = 1; 429 char *string; 430 int rc; 431 432 /* get pnettable for namespace */ 433 sn = net_generic(net, smc_net_id); 434 pnettable = &sn->pnettable; 435 436 rc = -EINVAL; 437 if (!tb[SMC_PNETID_NAME]) 438 goto error; 439 string = (char *)nla_data(tb[SMC_PNETID_NAME]); 440 if (!smc_pnetid_valid(string, pnet_name)) 441 goto error; 442 443 if (tb[SMC_PNETID_ETHNAME]) { 444 string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]); 445 rc = smc_pnet_add_eth(pnettable, net, string, pnet_name); 446 if (!rc) 447 new_netdev = true; 448 else if (rc != -EEXIST) 449 goto error; 450 } 451 452 /* if this is not the initial namespace, stop here */ 453 if (net != &init_net) 454 return new_netdev ? 0 : -EEXIST; 455 456 rc = -EINVAL; 457 if (tb[SMC_PNETID_IBNAME]) { 458 string = (char *)nla_data(tb[SMC_PNETID_IBNAME]); 459 string = strim(string); 460 if (tb[SMC_PNETID_IBPORT]) { 461 ibport = nla_get_u8(tb[SMC_PNETID_IBPORT]); 462 if (ibport < 1 || ibport > SMC_MAX_PORTS) 463 goto error; 464 } 465 rc = smc_pnet_add_ib(pnettable, string, ibport, pnet_name); 466 if (!rc) 467 new_ibdev = true; 468 else if (rc != -EEXIST) 469 goto error; 470 } 471 return (new_netdev || new_ibdev) ? 0 : -EEXIST; 472 473 error: 474 return rc; 475 } 476 477 /* Convert an smc_pnetentry to a netlink attribute sequence */ 478 static int smc_pnet_set_nla(struct sk_buff *msg, 479 struct smc_pnetentry *pnetelem) 480 { 481 if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name)) 482 return -1; 483 if (pnetelem->type == SMC_PNET_ETH) { 484 if (nla_put_string(msg, SMC_PNETID_ETHNAME, 485 pnetelem->eth_name)) 486 return -1; 487 } else { 488 if (nla_put_string(msg, SMC_PNETID_ETHNAME, "n/a")) 489 return -1; 490 } 491 if (pnetelem->type == SMC_PNET_IB) { 492 if (nla_put_string(msg, SMC_PNETID_IBNAME, pnetelem->ib_name) || 493 nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port)) 494 return -1; 495 } else { 496 if (nla_put_string(msg, SMC_PNETID_IBNAME, "n/a") || 497 nla_put_u8(msg, SMC_PNETID_IBPORT, 0xff)) 498 return -1; 499 } 500 501 return 0; 502 } 503 504 static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info) 505 { 506 struct net *net = genl_info_net(info); 507 508 return smc_pnet_enter(net, info->attrs); 509 } 510 511 static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info) 512 { 513 struct net *net = genl_info_net(info); 514 515 if (!info->attrs[SMC_PNETID_NAME]) 516 return -EINVAL; 517 return smc_pnet_remove_by_pnetid(net, 518 (char *)nla_data(info->attrs[SMC_PNETID_NAME])); 519 } 520 521 static int smc_pnet_dump_start(struct netlink_callback *cb) 522 { 523 cb->args[0] = 0; 524 return 0; 525 } 526 527 static int smc_pnet_dumpinfo(struct sk_buff *skb, 528 u32 portid, u32 seq, u32 flags, 529 struct smc_pnetentry *pnetelem) 530 { 531 void *hdr; 532 533 hdr = genlmsg_put(skb, portid, seq, &smc_pnet_nl_family, 534 flags, SMC_PNETID_GET); 535 if (!hdr) 536 return -ENOMEM; 537 if (smc_pnet_set_nla(skb, pnetelem) < 0) { 538 genlmsg_cancel(skb, hdr); 539 return -EMSGSIZE; 540 } 541 genlmsg_end(skb, hdr); 542 return 0; 543 } 544 545 static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid, 546 u32 seq, u8 *pnetid, int start_idx) 547 { 548 struct smc_pnettable *pnettable; 549 struct smc_pnetentry *pnetelem; 550 struct smc_net *sn; 551 int idx = 0; 552 553 /* get pnettable for namespace */ 554 sn = net_generic(net, smc_net_id); 555 pnettable = &sn->pnettable; 556 557 /* dump pnettable entries */ 558 read_lock(&pnettable->lock); 559 list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { 560 if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid)) 561 continue; 562 if (idx++ < start_idx) 563 continue; 564 /* if this is not the initial namespace, dump only netdev */ 565 if (net != &init_net && pnetelem->type != SMC_PNET_ETH) 566 continue; 567 if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI, 568 pnetelem)) { 569 --idx; 570 break; 571 } 572 } 573 read_unlock(&pnettable->lock); 574 return idx; 575 } 576 577 static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb) 578 { 579 struct net *net = sock_net(skb->sk); 580 int idx; 581 582 idx = _smc_pnet_dump(net, skb, NETLINK_CB(cb->skb).portid, 583 cb->nlh->nlmsg_seq, NULL, cb->args[0]); 584 585 cb->args[0] = idx; 586 return skb->len; 587 } 588 589 /* Retrieve one PNETID entry */ 590 static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info) 591 { 592 struct net *net = genl_info_net(info); 593 struct sk_buff *msg; 594 void *hdr; 595 596 if (!info->attrs[SMC_PNETID_NAME]) 597 return -EINVAL; 598 599 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 600 if (!msg) 601 return -ENOMEM; 602 603 _smc_pnet_dump(net, msg, info->snd_portid, info->snd_seq, 604 nla_data(info->attrs[SMC_PNETID_NAME]), 0); 605 606 /* finish multi part message and send it */ 607 hdr = nlmsg_put(msg, info->snd_portid, info->snd_seq, NLMSG_DONE, 0, 608 NLM_F_MULTI); 609 if (!hdr) { 610 nlmsg_free(msg); 611 return -EMSGSIZE; 612 } 613 return genlmsg_reply(msg, info); 614 } 615 616 /* Remove and delete all pnetids from pnet table. 617 */ 618 static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info) 619 { 620 struct net *net = genl_info_net(info); 621 622 smc_pnet_remove_by_pnetid(net, NULL); 623 return 0; 624 } 625 626 /* SMC_PNETID generic netlink operation definition */ 627 static const struct genl_ops smc_pnet_ops[] = { 628 { 629 .cmd = SMC_PNETID_GET, 630 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 631 /* can be retrieved by unprivileged users */ 632 .doit = smc_pnet_get, 633 .dumpit = smc_pnet_dump, 634 .start = smc_pnet_dump_start 635 }, 636 { 637 .cmd = SMC_PNETID_ADD, 638 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 639 .flags = GENL_ADMIN_PERM, 640 .doit = smc_pnet_add 641 }, 642 { 643 .cmd = SMC_PNETID_DEL, 644 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 645 .flags = GENL_ADMIN_PERM, 646 .doit = smc_pnet_del 647 }, 648 { 649 .cmd = SMC_PNETID_FLUSH, 650 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 651 .flags = GENL_ADMIN_PERM, 652 .doit = smc_pnet_flush 653 } 654 }; 655 656 /* SMC_PNETID family definition */ 657 static struct genl_family smc_pnet_nl_family __ro_after_init = { 658 .hdrsize = 0, 659 .name = SMCR_GENL_FAMILY_NAME, 660 .version = SMCR_GENL_FAMILY_VERSION, 661 .maxattr = SMC_PNETID_MAX, 662 .policy = smc_pnet_policy, 663 .netnsok = true, 664 .module = THIS_MODULE, 665 .ops = smc_pnet_ops, 666 .n_ops = ARRAY_SIZE(smc_pnet_ops) 667 }; 668 669 static int smc_pnet_netdev_event(struct notifier_block *this, 670 unsigned long event, void *ptr) 671 { 672 struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); 673 674 switch (event) { 675 case NETDEV_REBOOT: 676 case NETDEV_UNREGISTER: 677 smc_pnet_remove_by_ndev(event_dev); 678 return NOTIFY_OK; 679 case NETDEV_REGISTER: 680 smc_pnet_add_by_ndev(event_dev); 681 return NOTIFY_OK; 682 default: 683 return NOTIFY_DONE; 684 } 685 } 686 687 static struct notifier_block smc_netdev_notifier = { 688 .notifier_call = smc_pnet_netdev_event 689 }; 690 691 /* init network namespace */ 692 int smc_pnet_net_init(struct net *net) 693 { 694 struct smc_net *sn = net_generic(net, smc_net_id); 695 struct smc_pnettable *pnettable = &sn->pnettable; 696 697 INIT_LIST_HEAD(&pnettable->pnetlist); 698 rwlock_init(&pnettable->lock); 699 700 return 0; 701 } 702 703 int __init smc_pnet_init(void) 704 { 705 int rc; 706 707 rc = genl_register_family(&smc_pnet_nl_family); 708 if (rc) 709 return rc; 710 rc = register_netdevice_notifier(&smc_netdev_notifier); 711 if (rc) 712 genl_unregister_family(&smc_pnet_nl_family); 713 return rc; 714 } 715 716 /* exit network namespace */ 717 void smc_pnet_net_exit(struct net *net) 718 { 719 /* flush pnet table */ 720 smc_pnet_remove_by_pnetid(net, NULL); 721 } 722 723 void smc_pnet_exit(void) 724 { 725 unregister_netdevice_notifier(&smc_netdev_notifier); 726 genl_unregister_family(&smc_pnet_nl_family); 727 } 728 729 /* Determine one base device for stacked net devices. 730 * If the lower device level contains more than one devices 731 * (for instance with bonding slaves), just the first device 732 * is used to reach a base device. 733 */ 734 static struct net_device *pnet_find_base_ndev(struct net_device *ndev) 735 { 736 int i, nest_lvl; 737 738 rtnl_lock(); 739 nest_lvl = ndev->lower_level; 740 for (i = 0; i < nest_lvl; i++) { 741 struct list_head *lower = &ndev->adj_list.lower; 742 743 if (list_empty(lower)) 744 break; 745 lower = lower->next; 746 ndev = netdev_lower_get_next(ndev, &lower); 747 } 748 rtnl_unlock(); 749 return ndev; 750 } 751 752 static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev, 753 u8 *pnetid) 754 { 755 struct smc_pnettable *pnettable; 756 struct net *net = dev_net(ndev); 757 struct smc_pnetentry *pnetelem; 758 struct smc_net *sn; 759 int rc = -ENOENT; 760 761 /* get pnettable for namespace */ 762 sn = net_generic(net, smc_net_id); 763 pnettable = &sn->pnettable; 764 765 read_lock(&pnettable->lock); 766 list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { 767 if (pnetelem->type == SMC_PNET_ETH && ndev == pnetelem->ndev) { 768 /* get pnetid of netdev device */ 769 memcpy(pnetid, pnetelem->pnet_name, SMC_MAX_PNETID_LEN); 770 rc = 0; 771 break; 772 } 773 } 774 read_unlock(&pnettable->lock); 775 return rc; 776 } 777 778 /* find a roce device for the given pnetid */ 779 static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id, 780 struct smc_init_info *ini, 781 struct smc_ib_device *known_dev) 782 { 783 struct smc_ib_device *ibdev; 784 int i; 785 786 ini->ib_dev = NULL; 787 spin_lock(&smc_ib_devices.lock); 788 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 789 if (ibdev == known_dev) 790 continue; 791 for (i = 1; i <= SMC_MAX_PORTS; i++) { 792 if (!rdma_is_port_valid(ibdev->ibdev, i)) 793 continue; 794 if (smc_pnet_match(ibdev->pnetid[i - 1], pnet_id) && 795 smc_ib_port_active(ibdev, i) && 796 !test_bit(i - 1, ibdev->ports_going_away) && 797 !smc_ib_determine_gid(ibdev, i, ini->vlan_id, 798 ini->ib_gid, NULL)) { 799 ini->ib_dev = ibdev; 800 ini->ib_port = i; 801 goto out; 802 } 803 } 804 } 805 out: 806 spin_unlock(&smc_ib_devices.lock); 807 } 808 809 /* find alternate roce device with same pnet_id and vlan_id */ 810 void smc_pnet_find_alt_roce(struct smc_link_group *lgr, 811 struct smc_init_info *ini, 812 struct smc_ib_device *known_dev) 813 { 814 _smc_pnet_find_roce_by_pnetid(lgr->pnet_id, ini, known_dev); 815 } 816 817 /* if handshake network device belongs to a roce device, return its 818 * IB device and port 819 */ 820 static void smc_pnet_find_rdma_dev(struct net_device *netdev, 821 struct smc_init_info *ini) 822 { 823 struct smc_ib_device *ibdev; 824 825 spin_lock(&smc_ib_devices.lock); 826 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 827 struct net_device *ndev; 828 int i; 829 830 for (i = 1; i <= SMC_MAX_PORTS; i++) { 831 if (!rdma_is_port_valid(ibdev->ibdev, i)) 832 continue; 833 if (!ibdev->ibdev->ops.get_netdev) 834 continue; 835 ndev = ibdev->ibdev->ops.get_netdev(ibdev->ibdev, i); 836 if (!ndev) 837 continue; 838 dev_put(ndev); 839 if (netdev == ndev && 840 smc_ib_port_active(ibdev, i) && 841 !test_bit(i - 1, ibdev->ports_going_away) && 842 !smc_ib_determine_gid(ibdev, i, ini->vlan_id, 843 ini->ib_gid, NULL)) { 844 ini->ib_dev = ibdev; 845 ini->ib_port = i; 846 break; 847 } 848 } 849 } 850 spin_unlock(&smc_ib_devices.lock); 851 } 852 853 /* Determine the corresponding IB device port based on the hardware PNETID. 854 * Searching stops at the first matching active IB device port with vlan_id 855 * configured. 856 * If nothing found, check pnetid table. 857 * If nothing found, try to use handshake device 858 */ 859 static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, 860 struct smc_init_info *ini) 861 { 862 u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; 863 864 ndev = pnet_find_base_ndev(ndev); 865 if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, 866 ndev_pnetid) && 867 smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) { 868 smc_pnet_find_rdma_dev(ndev, ini); 869 return; /* pnetid could not be determined */ 870 } 871 _smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini, NULL); 872 } 873 874 static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, 875 struct smc_init_info *ini) 876 { 877 u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; 878 struct smcd_dev *ismdev; 879 880 ndev = pnet_find_base_ndev(ndev); 881 if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, 882 ndev_pnetid) && 883 smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) 884 return; /* pnetid could not be determined */ 885 886 spin_lock(&smcd_dev_list.lock); 887 list_for_each_entry(ismdev, &smcd_dev_list.list, list) { 888 if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) && 889 !ismdev->going_away) { 890 ini->ism_dev = ismdev; 891 break; 892 } 893 } 894 spin_unlock(&smcd_dev_list.lock); 895 } 896 897 /* PNET table analysis for a given sock: 898 * determine ib_device and port belonging to used internal TCP socket 899 * ethernet interface. 900 */ 901 void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini) 902 { 903 struct dst_entry *dst = sk_dst_get(sk); 904 905 ini->ib_dev = NULL; 906 ini->ib_port = 0; 907 if (!dst) 908 goto out; 909 if (!dst->dev) 910 goto out_rel; 911 912 smc_pnet_find_roce_by_pnetid(dst->dev, ini); 913 914 out_rel: 915 dst_release(dst); 916 out: 917 return; 918 } 919 920 void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini) 921 { 922 struct dst_entry *dst = sk_dst_get(sk); 923 924 ini->ism_dev = NULL; 925 if (!dst) 926 goto out; 927 if (!dst->dev) 928 goto out_rel; 929 930 smc_pnet_find_ism_by_pnetid(dst->dev, ini); 931 932 out_rel: 933 dst_release(dst); 934 out: 935 return; 936 } 937 938 /* Lookup and apply a pnet table entry to the given ib device. 939 */ 940 int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port) 941 { 942 char *ib_name = smcibdev->ibdev->name; 943 struct smc_pnettable *pnettable; 944 struct smc_pnetentry *tmp_pe; 945 struct smc_net *sn; 946 int rc = -ENOENT; 947 948 /* get pnettable for init namespace */ 949 sn = net_generic(&init_net, smc_net_id); 950 pnettable = &sn->pnettable; 951 952 read_lock(&pnettable->lock); 953 list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { 954 if (tmp_pe->type == SMC_PNET_IB && 955 !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX) && 956 tmp_pe->ib_port == ib_port) { 957 smc_pnet_apply_ib(smcibdev, ib_port, tmp_pe->pnet_name); 958 rc = 0; 959 break; 960 } 961 } 962 read_unlock(&pnettable->lock); 963 964 return rc; 965 } 966 967 /* Lookup and apply a pnet table entry to the given smcd device. 968 */ 969 int smc_pnetid_by_table_smcd(struct smcd_dev *smcddev) 970 { 971 const char *ib_name = dev_name(&smcddev->dev); 972 struct smc_pnettable *pnettable; 973 struct smc_pnetentry *tmp_pe; 974 struct smc_net *sn; 975 int rc = -ENOENT; 976 977 /* get pnettable for init namespace */ 978 sn = net_generic(&init_net, smc_net_id); 979 pnettable = &sn->pnettable; 980 981 read_lock(&pnettable->lock); 982 list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { 983 if (tmp_pe->type == SMC_PNET_IB && 984 !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) { 985 smc_pnet_apply_smcd(smcddev, tmp_pe->pnet_name); 986 rc = 0; 987 break; 988 } 989 } 990 read_unlock(&pnettable->lock); 991 992 return rc; 993 } 994