1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Generic netlink support functions to configure an SMC-R PNET table 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/module.h> 13 #include <linux/list.h> 14 #include <linux/ctype.h> 15 #include <net/netlink.h> 16 #include <net/genetlink.h> 17 18 #include <uapi/linux/if.h> 19 #include <uapi/linux/smc.h> 20 21 #include <rdma/ib_verbs.h> 22 23 #include <net/netns/generic.h> 24 #include "smc_netns.h" 25 26 #include "smc_pnet.h" 27 #include "smc_ib.h" 28 #include "smc_ism.h" 29 30 #define SMC_ASCII_BLANK 32 31 32 static struct net_device *pnet_find_base_ndev(struct net_device *ndev); 33 34 static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { 35 [SMC_PNETID_NAME] = { 36 .type = NLA_NUL_STRING, 37 .len = SMC_MAX_PNETID_LEN 38 }, 39 [SMC_PNETID_ETHNAME] = { 40 .type = NLA_NUL_STRING, 41 .len = IFNAMSIZ - 1 42 }, 43 [SMC_PNETID_IBNAME] = { 44 .type = NLA_NUL_STRING, 45 .len = IB_DEVICE_NAME_MAX - 1 46 }, 47 [SMC_PNETID_IBPORT] = { .type = NLA_U8 } 48 }; 49 50 static struct genl_family smc_pnet_nl_family; 51 52 /** 53 * struct smc_user_pnetentry - pnet identifier name entry for/from user 54 * @list: List node. 55 * @pnet_name: Pnet identifier name 56 * @ndev: pointer to network device. 57 * @smcibdev: Pointer to IB device. 58 * @ib_port: Port of IB device. 59 * @smcd_dev: Pointer to smcd device. 60 */ 61 struct smc_user_pnetentry { 62 struct list_head list; 63 char pnet_name[SMC_MAX_PNETID_LEN + 1]; 64 struct net_device *ndev; 65 struct smc_ib_device *smcibdev; 66 u8 ib_port; 67 struct smcd_dev *smcd_dev; 68 }; 69 70 /* pnet entry stored in pnet table */ 71 struct smc_pnetentry { 72 struct list_head list; 73 char pnet_name[SMC_MAX_PNETID_LEN + 1]; 74 struct net_device *ndev; 75 }; 76 77 /* Check if two given pnetids match */ 78 static bool smc_pnet_match(u8 *pnetid1, u8 *pnetid2) 79 { 80 int i; 81 82 for (i = 0; i < SMC_MAX_PNETID_LEN; i++) { 83 if ((pnetid1[i] == 0 || pnetid1[i] == SMC_ASCII_BLANK) && 84 (pnetid2[i] == 0 || pnetid2[i] == SMC_ASCII_BLANK)) 85 break; 86 if (pnetid1[i] != pnetid2[i]) 87 return false; 88 } 89 return true; 90 } 91 92 /* Remove a pnetid from the pnet table. 93 */ 94 static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) 95 { 96 struct smc_pnetentry *pnetelem, *tmp_pe; 97 struct smc_pnettable *pnettable; 98 struct smc_ib_device *ibdev; 99 struct smcd_dev *smcd_dev; 100 struct smc_net *sn; 101 int rc = -ENOENT; 102 int ibport; 103 104 /* get pnettable for namespace */ 105 sn = net_generic(net, smc_net_id); 106 pnettable = &sn->pnettable; 107 108 /* remove netdevices */ 109 write_lock(&pnettable->lock); 110 list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, 111 list) { 112 if (!pnet_name || 113 smc_pnet_match(pnetelem->pnet_name, pnet_name)) { 114 list_del(&pnetelem->list); 115 dev_put(pnetelem->ndev); 116 kfree(pnetelem); 117 rc = 0; 118 } 119 } 120 write_unlock(&pnettable->lock); 121 122 /* if this is not the initial namespace, stop here */ 123 if (net != &init_net) 124 return rc; 125 126 /* remove ib devices */ 127 spin_lock(&smc_ib_devices.lock); 128 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 129 for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) { 130 if (ibdev->pnetid_by_user[ibport] && 131 (!pnet_name || 132 smc_pnet_match(pnet_name, 133 ibdev->pnetid[ibport]))) { 134 memset(ibdev->pnetid[ibport], 0, 135 SMC_MAX_PNETID_LEN); 136 ibdev->pnetid_by_user[ibport] = false; 137 rc = 0; 138 } 139 } 140 } 141 spin_unlock(&smc_ib_devices.lock); 142 /* remove smcd devices */ 143 spin_lock(&smcd_dev_list.lock); 144 list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { 145 if (smcd_dev->pnetid_by_user && 146 (!pnet_name || 147 smc_pnet_match(pnet_name, smcd_dev->pnetid))) { 148 memset(smcd_dev->pnetid, 0, SMC_MAX_PNETID_LEN); 149 smcd_dev->pnetid_by_user = false; 150 rc = 0; 151 } 152 } 153 spin_unlock(&smcd_dev_list.lock); 154 return rc; 155 } 156 157 /* Remove a pnet entry mentioning a given network device from the pnet table. 158 */ 159 static int smc_pnet_remove_by_ndev(struct net_device *ndev) 160 { 161 struct smc_pnetentry *pnetelem, *tmp_pe; 162 struct smc_pnettable *pnettable; 163 struct net *net = dev_net(ndev); 164 struct smc_net *sn; 165 int rc = -ENOENT; 166 167 /* get pnettable for namespace */ 168 sn = net_generic(net, smc_net_id); 169 pnettable = &sn->pnettable; 170 171 write_lock(&pnettable->lock); 172 list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { 173 if (pnetelem->ndev == ndev) { 174 list_del(&pnetelem->list); 175 dev_put(pnetelem->ndev); 176 kfree(pnetelem); 177 rc = 0; 178 break; 179 } 180 } 181 write_unlock(&pnettable->lock); 182 return rc; 183 } 184 185 /* Append a pnetid to the end of the pnet table if not already on this list. 186 */ 187 static int smc_pnet_enter(struct smc_pnettable *pnettable, 188 struct smc_user_pnetentry *new_pnetelem) 189 { 190 u8 pnet_null[SMC_MAX_PNETID_LEN] = {0}; 191 u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; 192 struct smc_pnetentry *tmp_pnetelem; 193 struct smc_pnetentry *pnetelem; 194 bool new_smcddev = false; 195 struct net_device *ndev; 196 bool new_netdev = true; 197 bool new_ibdev = false; 198 199 if (new_pnetelem->smcibdev) { 200 struct smc_ib_device *ib_dev = new_pnetelem->smcibdev; 201 int ib_port = new_pnetelem->ib_port; 202 203 spin_lock(&smc_ib_devices.lock); 204 if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) { 205 memcpy(ib_dev->pnetid[ib_port - 1], 206 new_pnetelem->pnet_name, SMC_MAX_PNETID_LEN); 207 ib_dev->pnetid_by_user[ib_port - 1] = true; 208 new_ibdev = true; 209 } 210 spin_unlock(&smc_ib_devices.lock); 211 } 212 if (new_pnetelem->smcd_dev) { 213 struct smcd_dev *smcd_dev = new_pnetelem->smcd_dev; 214 215 spin_lock(&smcd_dev_list.lock); 216 if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) { 217 memcpy(smcd_dev->pnetid, new_pnetelem->pnet_name, 218 SMC_MAX_PNETID_LEN); 219 smcd_dev->pnetid_by_user = true; 220 new_smcddev = true; 221 } 222 spin_unlock(&smcd_dev_list.lock); 223 } 224 225 if (!new_pnetelem->ndev) 226 return (new_ibdev || new_smcddev) ? 0 : -EEXIST; 227 228 /* check if (base) netdev already has a pnetid. If there is one, we do 229 * not want to add a pnet table entry 230 */ 231 ndev = pnet_find_base_ndev(new_pnetelem->ndev); 232 if (!smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, 233 ndev_pnetid)) 234 return (new_ibdev || new_smcddev) ? 0 : -EEXIST; 235 236 /* add a new netdev entry to the pnet table if there isn't one */ 237 tmp_pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL); 238 if (!tmp_pnetelem) 239 return -ENOMEM; 240 memcpy(tmp_pnetelem->pnet_name, new_pnetelem->pnet_name, 241 SMC_MAX_PNETID_LEN); 242 tmp_pnetelem->ndev = new_pnetelem->ndev; 243 244 write_lock(&pnettable->lock); 245 list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { 246 if (pnetelem->ndev == new_pnetelem->ndev) 247 new_netdev = false; 248 } 249 if (new_netdev) { 250 dev_hold(tmp_pnetelem->ndev); 251 list_add_tail(&tmp_pnetelem->list, &pnettable->pnetlist); 252 write_unlock(&pnettable->lock); 253 } else { 254 write_unlock(&pnettable->lock); 255 kfree(tmp_pnetelem); 256 } 257 258 return (new_netdev || new_ibdev || new_smcddev) ? 0 : -EEXIST; 259 } 260 261 /* The limit for pnetid is 16 characters. 262 * Valid characters should be (single-byte character set) a-z, A-Z, 0-9. 263 * Lower case letters are converted to upper case. 264 * Interior blanks should not be used. 265 */ 266 static bool smc_pnetid_valid(const char *pnet_name, char *pnetid) 267 { 268 char *bf = skip_spaces(pnet_name); 269 size_t len = strlen(bf); 270 char *end = bf + len; 271 272 if (!len) 273 return false; 274 while (--end >= bf && isspace(*end)) 275 ; 276 if (end - bf >= SMC_MAX_PNETID_LEN) 277 return false; 278 while (bf <= end) { 279 if (!isalnum(*bf)) 280 return false; 281 *pnetid++ = islower(*bf) ? toupper(*bf) : *bf; 282 bf++; 283 } 284 *pnetid = '\0'; 285 return true; 286 } 287 288 /* Find an infiniband device by a given name. The device might not exist. */ 289 static struct smc_ib_device *smc_pnet_find_ib(char *ib_name) 290 { 291 struct smc_ib_device *ibdev; 292 293 spin_lock(&smc_ib_devices.lock); 294 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 295 if (!strncmp(ibdev->ibdev->name, ib_name, 296 sizeof(ibdev->ibdev->name)) || 297 !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name, 298 IB_DEVICE_NAME_MAX - 1)) { 299 goto out; 300 } 301 } 302 ibdev = NULL; 303 out: 304 spin_unlock(&smc_ib_devices.lock); 305 return ibdev; 306 } 307 308 /* Find an smcd device by a given name. The device might not exist. */ 309 static struct smcd_dev *smc_pnet_find_smcd(char *smcd_name) 310 { 311 struct smcd_dev *smcd_dev; 312 313 spin_lock(&smcd_dev_list.lock); 314 list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { 315 if (!strncmp(dev_name(&smcd_dev->dev), smcd_name, 316 IB_DEVICE_NAME_MAX - 1)) 317 goto out; 318 } 319 smcd_dev = NULL; 320 out: 321 spin_unlock(&smcd_dev_list.lock); 322 return smcd_dev; 323 } 324 325 /* Parse the supplied netlink attributes and fill a pnetentry structure. 326 * For ethernet and infiniband device names verify that the devices exist. 327 */ 328 static int smc_pnet_fill_entry(struct net *net, 329 struct smc_user_pnetentry *pnetelem, 330 struct nlattr *tb[]) 331 { 332 char *string, *ibname; 333 int rc; 334 335 memset(pnetelem, 0, sizeof(*pnetelem)); 336 INIT_LIST_HEAD(&pnetelem->list); 337 338 rc = -EINVAL; 339 if (!tb[SMC_PNETID_NAME]) 340 goto error; 341 string = (char *)nla_data(tb[SMC_PNETID_NAME]); 342 if (!smc_pnetid_valid(string, pnetelem->pnet_name)) 343 goto error; 344 345 rc = -EINVAL; 346 if (tb[SMC_PNETID_ETHNAME]) { 347 string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]); 348 pnetelem->ndev = dev_get_by_name(net, string); 349 if (!pnetelem->ndev) 350 goto error; 351 } 352 353 /* if this is not the initial namespace, stop here */ 354 if (net != &init_net) 355 return 0; 356 357 rc = -EINVAL; 358 if (tb[SMC_PNETID_IBNAME]) { 359 ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]); 360 ibname = strim(ibname); 361 pnetelem->smcibdev = smc_pnet_find_ib(ibname); 362 pnetelem->smcd_dev = smc_pnet_find_smcd(ibname); 363 if (!pnetelem->smcibdev && !pnetelem->smcd_dev) 364 goto error; 365 if (pnetelem->smcibdev) { 366 if (!tb[SMC_PNETID_IBPORT]) 367 goto error; 368 pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]); 369 if (pnetelem->ib_port < 1 || 370 pnetelem->ib_port > SMC_MAX_PORTS) 371 goto error; 372 } 373 } 374 375 return 0; 376 377 error: 378 if (pnetelem->ndev) 379 dev_put(pnetelem->ndev); 380 return rc; 381 } 382 383 /* Convert an smc_pnetentry to a netlink attribute sequence */ 384 static int smc_pnet_set_nla(struct sk_buff *msg, 385 struct smc_user_pnetentry *pnetelem) 386 { 387 if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name)) 388 return -1; 389 if (pnetelem->ndev) { 390 if (nla_put_string(msg, SMC_PNETID_ETHNAME, 391 pnetelem->ndev->name)) 392 return -1; 393 } else { 394 if (nla_put_string(msg, SMC_PNETID_ETHNAME, "n/a")) 395 return -1; 396 } 397 if (pnetelem->smcibdev) { 398 if (nla_put_string(msg, SMC_PNETID_IBNAME, 399 dev_name(pnetelem->smcibdev->ibdev->dev.parent)) || 400 nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port)) 401 return -1; 402 } else if (pnetelem->smcd_dev) { 403 if (nla_put_string(msg, SMC_PNETID_IBNAME, 404 dev_name(&pnetelem->smcd_dev->dev)) || 405 nla_put_u8(msg, SMC_PNETID_IBPORT, 1)) 406 return -1; 407 } else { 408 if (nla_put_string(msg, SMC_PNETID_IBNAME, "n/a") || 409 nla_put_u8(msg, SMC_PNETID_IBPORT, 0xff)) 410 return -1; 411 } 412 413 return 0; 414 } 415 416 static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info) 417 { 418 struct net *net = genl_info_net(info); 419 struct smc_user_pnetentry pnetelem; 420 struct smc_pnettable *pnettable; 421 struct smc_net *sn; 422 int rc; 423 424 /* get pnettable for namespace */ 425 sn = net_generic(net, smc_net_id); 426 pnettable = &sn->pnettable; 427 428 rc = smc_pnet_fill_entry(net, &pnetelem, info->attrs); 429 if (!rc) 430 rc = smc_pnet_enter(pnettable, &pnetelem); 431 if (pnetelem.ndev) 432 dev_put(pnetelem.ndev); 433 return rc; 434 } 435 436 static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info) 437 { 438 struct net *net = genl_info_net(info); 439 440 if (!info->attrs[SMC_PNETID_NAME]) 441 return -EINVAL; 442 return smc_pnet_remove_by_pnetid(net, 443 (char *)nla_data(info->attrs[SMC_PNETID_NAME])); 444 } 445 446 static int smc_pnet_dump_start(struct netlink_callback *cb) 447 { 448 cb->args[0] = 0; 449 return 0; 450 } 451 452 static int smc_pnet_dumpinfo(struct sk_buff *skb, 453 u32 portid, u32 seq, u32 flags, 454 struct smc_user_pnetentry *pnetelem) 455 { 456 void *hdr; 457 458 hdr = genlmsg_put(skb, portid, seq, &smc_pnet_nl_family, 459 flags, SMC_PNETID_GET); 460 if (!hdr) 461 return -ENOMEM; 462 if (smc_pnet_set_nla(skb, pnetelem) < 0) { 463 genlmsg_cancel(skb, hdr); 464 return -EMSGSIZE; 465 } 466 genlmsg_end(skb, hdr); 467 return 0; 468 } 469 470 static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid, 471 u32 seq, u8 *pnetid, int start_idx) 472 { 473 struct smc_user_pnetentry tmp_entry; 474 struct smc_pnettable *pnettable; 475 struct smc_pnetentry *pnetelem; 476 struct smc_ib_device *ibdev; 477 struct smcd_dev *smcd_dev; 478 struct smc_net *sn; 479 int idx = 0; 480 int ibport; 481 482 /* get pnettable for namespace */ 483 sn = net_generic(net, smc_net_id); 484 pnettable = &sn->pnettable; 485 486 /* dump netdevices */ 487 read_lock(&pnettable->lock); 488 list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { 489 if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid)) 490 continue; 491 if (idx++ < start_idx) 492 continue; 493 memset(&tmp_entry, 0, sizeof(tmp_entry)); 494 memcpy(&tmp_entry.pnet_name, pnetelem->pnet_name, 495 SMC_MAX_PNETID_LEN); 496 tmp_entry.ndev = pnetelem->ndev; 497 if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI, 498 &tmp_entry)) { 499 --idx; 500 break; 501 } 502 } 503 read_unlock(&pnettable->lock); 504 505 /* if this is not the initial namespace, stop here */ 506 if (net != &init_net) 507 return idx; 508 509 /* dump ib devices */ 510 spin_lock(&smc_ib_devices.lock); 511 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 512 for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) { 513 if (ibdev->pnetid_by_user[ibport]) { 514 if (pnetid && 515 !smc_pnet_match(ibdev->pnetid[ibport], 516 pnetid)) 517 continue; 518 if (idx++ < start_idx) 519 continue; 520 memset(&tmp_entry, 0, sizeof(tmp_entry)); 521 memcpy(&tmp_entry.pnet_name, 522 ibdev->pnetid[ibport], 523 SMC_MAX_PNETID_LEN); 524 tmp_entry.smcibdev = ibdev; 525 tmp_entry.ib_port = ibport + 1; 526 if (smc_pnet_dumpinfo(skb, portid, seq, 527 NLM_F_MULTI, 528 &tmp_entry)) { 529 --idx; 530 break; 531 } 532 } 533 } 534 } 535 spin_unlock(&smc_ib_devices.lock); 536 537 /* dump smcd devices */ 538 spin_lock(&smcd_dev_list.lock); 539 list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { 540 if (smcd_dev->pnetid_by_user) { 541 if (pnetid && !smc_pnet_match(smcd_dev->pnetid, pnetid)) 542 continue; 543 if (idx++ < start_idx) 544 continue; 545 memset(&tmp_entry, 0, sizeof(tmp_entry)); 546 memcpy(&tmp_entry.pnet_name, smcd_dev->pnetid, 547 SMC_MAX_PNETID_LEN); 548 tmp_entry.smcd_dev = smcd_dev; 549 if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI, 550 &tmp_entry)) { 551 --idx; 552 break; 553 } 554 } 555 } 556 spin_unlock(&smcd_dev_list.lock); 557 558 return idx; 559 } 560 561 static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb) 562 { 563 struct net *net = sock_net(skb->sk); 564 int idx; 565 566 idx = _smc_pnet_dump(net, skb, NETLINK_CB(cb->skb).portid, 567 cb->nlh->nlmsg_seq, NULL, cb->args[0]); 568 569 cb->args[0] = idx; 570 return skb->len; 571 } 572 573 /* Retrieve one PNETID entry */ 574 static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info) 575 { 576 struct net *net = genl_info_net(info); 577 struct sk_buff *msg; 578 void *hdr; 579 580 if (!info->attrs[SMC_PNETID_NAME]) 581 return -EINVAL; 582 583 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 584 if (!msg) 585 return -ENOMEM; 586 587 _smc_pnet_dump(net, msg, info->snd_portid, info->snd_seq, 588 nla_data(info->attrs[SMC_PNETID_NAME]), 0); 589 590 /* finish multi part message and send it */ 591 hdr = nlmsg_put(msg, info->snd_portid, info->snd_seq, NLMSG_DONE, 0, 592 NLM_F_MULTI); 593 if (!hdr) { 594 nlmsg_free(msg); 595 return -EMSGSIZE; 596 } 597 return genlmsg_reply(msg, info); 598 } 599 600 /* Remove and delete all pnetids from pnet table. 601 */ 602 static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info) 603 { 604 struct net *net = genl_info_net(info); 605 606 smc_pnet_remove_by_pnetid(net, NULL); 607 return 0; 608 } 609 610 /* SMC_PNETID generic netlink operation definition */ 611 static const struct genl_ops smc_pnet_ops[] = { 612 { 613 .cmd = SMC_PNETID_GET, 614 .flags = GENL_ADMIN_PERM, 615 .policy = smc_pnet_policy, 616 .doit = smc_pnet_get, 617 .dumpit = smc_pnet_dump, 618 .start = smc_pnet_dump_start 619 }, 620 { 621 .cmd = SMC_PNETID_ADD, 622 .flags = GENL_ADMIN_PERM, 623 .policy = smc_pnet_policy, 624 .doit = smc_pnet_add 625 }, 626 { 627 .cmd = SMC_PNETID_DEL, 628 .flags = GENL_ADMIN_PERM, 629 .policy = smc_pnet_policy, 630 .doit = smc_pnet_del 631 }, 632 { 633 .cmd = SMC_PNETID_FLUSH, 634 .flags = GENL_ADMIN_PERM, 635 .policy = smc_pnet_policy, 636 .doit = smc_pnet_flush 637 } 638 }; 639 640 /* SMC_PNETID family definition */ 641 static struct genl_family smc_pnet_nl_family __ro_after_init = { 642 .hdrsize = 0, 643 .name = SMCR_GENL_FAMILY_NAME, 644 .version = SMCR_GENL_FAMILY_VERSION, 645 .maxattr = SMC_PNETID_MAX, 646 .netnsok = true, 647 .module = THIS_MODULE, 648 .ops = smc_pnet_ops, 649 .n_ops = ARRAY_SIZE(smc_pnet_ops) 650 }; 651 652 static int smc_pnet_netdev_event(struct notifier_block *this, 653 unsigned long event, void *ptr) 654 { 655 struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); 656 657 switch (event) { 658 case NETDEV_REBOOT: 659 case NETDEV_UNREGISTER: 660 smc_pnet_remove_by_ndev(event_dev); 661 return NOTIFY_OK; 662 default: 663 return NOTIFY_DONE; 664 } 665 } 666 667 static struct notifier_block smc_netdev_notifier = { 668 .notifier_call = smc_pnet_netdev_event 669 }; 670 671 /* init network namespace */ 672 int smc_pnet_net_init(struct net *net) 673 { 674 struct smc_net *sn = net_generic(net, smc_net_id); 675 struct smc_pnettable *pnettable = &sn->pnettable; 676 677 INIT_LIST_HEAD(&pnettable->pnetlist); 678 rwlock_init(&pnettable->lock); 679 680 return 0; 681 } 682 683 int __init smc_pnet_init(void) 684 { 685 int rc; 686 687 rc = genl_register_family(&smc_pnet_nl_family); 688 if (rc) 689 return rc; 690 rc = register_netdevice_notifier(&smc_netdev_notifier); 691 if (rc) 692 genl_unregister_family(&smc_pnet_nl_family); 693 return rc; 694 } 695 696 /* exit network namespace */ 697 void smc_pnet_net_exit(struct net *net) 698 { 699 /* flush pnet table */ 700 smc_pnet_remove_by_pnetid(net, NULL); 701 } 702 703 void smc_pnet_exit(void) 704 { 705 unregister_netdevice_notifier(&smc_netdev_notifier); 706 genl_unregister_family(&smc_pnet_nl_family); 707 } 708 709 /* Determine one base device for stacked net devices. 710 * If the lower device level contains more than one devices 711 * (for instance with bonding slaves), just the first device 712 * is used to reach a base device. 713 */ 714 static struct net_device *pnet_find_base_ndev(struct net_device *ndev) 715 { 716 int i, nest_lvl; 717 718 rtnl_lock(); 719 nest_lvl = dev_get_nest_level(ndev); 720 for (i = 0; i < nest_lvl; i++) { 721 struct list_head *lower = &ndev->adj_list.lower; 722 723 if (list_empty(lower)) 724 break; 725 lower = lower->next; 726 ndev = netdev_lower_get_next(ndev, &lower); 727 } 728 rtnl_unlock(); 729 return ndev; 730 } 731 732 static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev, 733 u8 *pnetid) 734 { 735 struct smc_pnettable *pnettable; 736 struct net *net = dev_net(ndev); 737 struct smc_pnetentry *pnetelem; 738 struct smc_net *sn; 739 int rc = -ENOENT; 740 741 /* get pnettable for namespace */ 742 sn = net_generic(net, smc_net_id); 743 pnettable = &sn->pnettable; 744 745 read_lock(&pnettable->lock); 746 list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { 747 if (ndev == pnetelem->ndev) { 748 /* get pnetid of netdev device */ 749 memcpy(pnetid, pnetelem->pnet_name, SMC_MAX_PNETID_LEN); 750 rc = 0; 751 break; 752 } 753 } 754 read_unlock(&pnettable->lock); 755 return rc; 756 } 757 758 /* if handshake network device belongs to a roce device, return its 759 * IB device and port 760 */ 761 static void smc_pnet_find_rdma_dev(struct net_device *netdev, 762 struct smc_ib_device **smcibdev, 763 u8 *ibport, unsigned short vlan_id, u8 gid[]) 764 { 765 struct smc_ib_device *ibdev; 766 767 spin_lock(&smc_ib_devices.lock); 768 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 769 struct net_device *ndev; 770 int i; 771 772 for (i = 1; i <= SMC_MAX_PORTS; i++) { 773 if (!rdma_is_port_valid(ibdev->ibdev, i)) 774 continue; 775 if (!ibdev->ibdev->ops.get_netdev) 776 continue; 777 ndev = ibdev->ibdev->ops.get_netdev(ibdev->ibdev, i); 778 if (!ndev) 779 continue; 780 dev_put(ndev); 781 if (netdev == ndev && 782 smc_ib_port_active(ibdev, i) && 783 !smc_ib_determine_gid(ibdev, i, vlan_id, gid, 784 NULL)) { 785 *smcibdev = ibdev; 786 *ibport = i; 787 break; 788 } 789 } 790 } 791 spin_unlock(&smc_ib_devices.lock); 792 } 793 794 /* Determine the corresponding IB device port based on the hardware PNETID. 795 * Searching stops at the first matching active IB device port with vlan_id 796 * configured. 797 * If nothing found, check pnetid table. 798 * If nothing found, try to use handshake device 799 */ 800 static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, 801 struct smc_ib_device **smcibdev, 802 u8 *ibport, unsigned short vlan_id, 803 u8 gid[]) 804 { 805 u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; 806 struct smc_ib_device *ibdev; 807 int i; 808 809 ndev = pnet_find_base_ndev(ndev); 810 if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, 811 ndev_pnetid) && 812 smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) { 813 smc_pnet_find_rdma_dev(ndev, smcibdev, ibport, vlan_id, gid); 814 return; /* pnetid could not be determined */ 815 } 816 817 spin_lock(&smc_ib_devices.lock); 818 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 819 for (i = 1; i <= SMC_MAX_PORTS; i++) { 820 if (!rdma_is_port_valid(ibdev->ibdev, i)) 821 continue; 822 if (smc_pnet_match(ibdev->pnetid[i - 1], ndev_pnetid) && 823 smc_ib_port_active(ibdev, i) && 824 !smc_ib_determine_gid(ibdev, i, vlan_id, gid, 825 NULL)) { 826 *smcibdev = ibdev; 827 *ibport = i; 828 goto out; 829 } 830 } 831 } 832 out: 833 spin_unlock(&smc_ib_devices.lock); 834 } 835 836 static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, 837 struct smcd_dev **smcismdev) 838 { 839 u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; 840 struct smcd_dev *ismdev; 841 842 ndev = pnet_find_base_ndev(ndev); 843 if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, 844 ndev_pnetid) && 845 smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) 846 return; /* pnetid could not be determined */ 847 848 spin_lock(&smcd_dev_list.lock); 849 list_for_each_entry(ismdev, &smcd_dev_list.list, list) { 850 if (smc_pnet_match(ismdev->pnetid, ndev_pnetid)) { 851 *smcismdev = ismdev; 852 break; 853 } 854 } 855 spin_unlock(&smcd_dev_list.lock); 856 } 857 858 /* PNET table analysis for a given sock: 859 * determine ib_device and port belonging to used internal TCP socket 860 * ethernet interface. 861 */ 862 void smc_pnet_find_roce_resource(struct sock *sk, 863 struct smc_ib_device **smcibdev, u8 *ibport, 864 unsigned short vlan_id, u8 gid[]) 865 { 866 struct dst_entry *dst = sk_dst_get(sk); 867 868 *smcibdev = NULL; 869 *ibport = 0; 870 871 if (!dst) 872 goto out; 873 if (!dst->dev) 874 goto out_rel; 875 876 smc_pnet_find_roce_by_pnetid(dst->dev, smcibdev, ibport, vlan_id, gid); 877 878 out_rel: 879 dst_release(dst); 880 out: 881 return; 882 } 883 884 void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev) 885 { 886 struct dst_entry *dst = sk_dst_get(sk); 887 888 *smcismdev = NULL; 889 if (!dst) 890 goto out; 891 if (!dst->dev) 892 goto out_rel; 893 894 smc_pnet_find_ism_by_pnetid(dst->dev, smcismdev); 895 896 out_rel: 897 dst_release(dst); 898 out: 899 return; 900 } 901