1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Generic netlink support functions to configure an SMC-R PNET table 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/module.h> 13 #include <linux/list.h> 14 #include <linux/ctype.h> 15 #include <net/netlink.h> 16 #include <net/genetlink.h> 17 18 #include <uapi/linux/if.h> 19 #include <uapi/linux/smc.h> 20 21 #include <rdma/ib_verbs.h> 22 23 #include <net/netns/generic.h> 24 #include "smc_netns.h" 25 26 #include "smc_pnet.h" 27 #include "smc_ib.h" 28 #include "smc_ism.h" 29 #include "smc_core.h" 30 31 #define SMC_ASCII_BLANK 32 32 33 static struct net_device *pnet_find_base_ndev(struct net_device *ndev); 34 35 static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { 36 [SMC_PNETID_NAME] = { 37 .type = NLA_NUL_STRING, 38 .len = SMC_MAX_PNETID_LEN 39 }, 40 [SMC_PNETID_ETHNAME] = { 41 .type = NLA_NUL_STRING, 42 .len = IFNAMSIZ - 1 43 }, 44 [SMC_PNETID_IBNAME] = { 45 .type = NLA_NUL_STRING, 46 .len = IB_DEVICE_NAME_MAX - 1 47 }, 48 [SMC_PNETID_IBPORT] = { .type = NLA_U8 } 49 }; 50 51 static struct genl_family smc_pnet_nl_family; 52 53 /** 54 * struct smc_user_pnetentry - pnet identifier name entry for/from user 55 * @list: List node. 56 * @pnet_name: Pnet identifier name 57 * @ndev: pointer to network device. 58 * @smcibdev: Pointer to IB device. 59 * @ib_port: Port of IB device. 60 * @smcd_dev: Pointer to smcd device. 61 */ 62 struct smc_user_pnetentry { 63 struct list_head list; 64 char pnet_name[SMC_MAX_PNETID_LEN + 1]; 65 struct net_device *ndev; 66 struct smc_ib_device *smcibdev; 67 u8 ib_port; 68 struct smcd_dev *smcd_dev; 69 }; 70 71 /* pnet entry stored in pnet table */ 72 struct smc_pnetentry { 73 struct list_head list; 74 char pnet_name[SMC_MAX_PNETID_LEN + 1]; 75 struct net_device *ndev; 76 }; 77 78 /* Check if two given pnetids match */ 79 static bool smc_pnet_match(u8 *pnetid1, u8 *pnetid2) 80 { 81 int i; 82 83 for (i = 0; i < SMC_MAX_PNETID_LEN; i++) { 84 if ((pnetid1[i] == 0 || pnetid1[i] == SMC_ASCII_BLANK) && 85 (pnetid2[i] == 0 || pnetid2[i] == SMC_ASCII_BLANK)) 86 break; 87 if (pnetid1[i] != pnetid2[i]) 88 return false; 89 } 90 return true; 91 } 92 93 /* Remove a pnetid from the pnet table. 94 */ 95 static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) 96 { 97 struct smc_pnetentry *pnetelem, *tmp_pe; 98 struct smc_pnettable *pnettable; 99 struct smc_ib_device *ibdev; 100 struct smcd_dev *smcd_dev; 101 struct smc_net *sn; 102 int rc = -ENOENT; 103 int ibport; 104 105 /* get pnettable for namespace */ 106 sn = net_generic(net, smc_net_id); 107 pnettable = &sn->pnettable; 108 109 /* remove netdevices */ 110 write_lock(&pnettable->lock); 111 list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, 112 list) { 113 if (!pnet_name || 114 smc_pnet_match(pnetelem->pnet_name, pnet_name)) { 115 list_del(&pnetelem->list); 116 dev_put(pnetelem->ndev); 117 kfree(pnetelem); 118 rc = 0; 119 } 120 } 121 write_unlock(&pnettable->lock); 122 123 /* if this is not the initial namespace, stop here */ 124 if (net != &init_net) 125 return rc; 126 127 /* remove ib devices */ 128 spin_lock(&smc_ib_devices.lock); 129 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 130 for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) { 131 if (ibdev->pnetid_by_user[ibport] && 132 (!pnet_name || 133 smc_pnet_match(pnet_name, 134 ibdev->pnetid[ibport]))) { 135 memset(ibdev->pnetid[ibport], 0, 136 SMC_MAX_PNETID_LEN); 137 ibdev->pnetid_by_user[ibport] = false; 138 rc = 0; 139 } 140 } 141 } 142 spin_unlock(&smc_ib_devices.lock); 143 /* remove smcd devices */ 144 spin_lock(&smcd_dev_list.lock); 145 list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { 146 if (smcd_dev->pnetid_by_user && 147 (!pnet_name || 148 smc_pnet_match(pnet_name, smcd_dev->pnetid))) { 149 memset(smcd_dev->pnetid, 0, SMC_MAX_PNETID_LEN); 150 smcd_dev->pnetid_by_user = false; 151 rc = 0; 152 } 153 } 154 spin_unlock(&smcd_dev_list.lock); 155 return rc; 156 } 157 158 /* Remove a pnet entry mentioning a given network device from the pnet table. 159 */ 160 static int smc_pnet_remove_by_ndev(struct net_device *ndev) 161 { 162 struct smc_pnetentry *pnetelem, *tmp_pe; 163 struct smc_pnettable *pnettable; 164 struct net *net = dev_net(ndev); 165 struct smc_net *sn; 166 int rc = -ENOENT; 167 168 /* get pnettable for namespace */ 169 sn = net_generic(net, smc_net_id); 170 pnettable = &sn->pnettable; 171 172 write_lock(&pnettable->lock); 173 list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { 174 if (pnetelem->ndev == ndev) { 175 list_del(&pnetelem->list); 176 dev_put(pnetelem->ndev); 177 kfree(pnetelem); 178 rc = 0; 179 break; 180 } 181 } 182 write_unlock(&pnettable->lock); 183 return rc; 184 } 185 186 /* Append a pnetid to the end of the pnet table if not already on this list. 187 */ 188 static int smc_pnet_enter(struct smc_pnettable *pnettable, 189 struct smc_user_pnetentry *new_pnetelem) 190 { 191 u8 pnet_null[SMC_MAX_PNETID_LEN] = {0}; 192 u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; 193 struct smc_pnetentry *tmp_pnetelem; 194 struct smc_pnetentry *pnetelem; 195 bool new_smcddev = false; 196 struct net_device *ndev; 197 bool new_netdev = true; 198 bool new_ibdev = false; 199 200 if (new_pnetelem->smcibdev) { 201 struct smc_ib_device *ib_dev = new_pnetelem->smcibdev; 202 int ib_port = new_pnetelem->ib_port; 203 204 spin_lock(&smc_ib_devices.lock); 205 if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) { 206 memcpy(ib_dev->pnetid[ib_port - 1], 207 new_pnetelem->pnet_name, SMC_MAX_PNETID_LEN); 208 ib_dev->pnetid_by_user[ib_port - 1] = true; 209 new_ibdev = true; 210 } 211 spin_unlock(&smc_ib_devices.lock); 212 } 213 if (new_pnetelem->smcd_dev) { 214 struct smcd_dev *smcd_dev = new_pnetelem->smcd_dev; 215 216 spin_lock(&smcd_dev_list.lock); 217 if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) { 218 memcpy(smcd_dev->pnetid, new_pnetelem->pnet_name, 219 SMC_MAX_PNETID_LEN); 220 smcd_dev->pnetid_by_user = true; 221 new_smcddev = true; 222 } 223 spin_unlock(&smcd_dev_list.lock); 224 } 225 226 if (!new_pnetelem->ndev) 227 return (new_ibdev || new_smcddev) ? 0 : -EEXIST; 228 229 /* check if (base) netdev already has a pnetid. If there is one, we do 230 * not want to add a pnet table entry 231 */ 232 ndev = pnet_find_base_ndev(new_pnetelem->ndev); 233 if (!smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, 234 ndev_pnetid)) 235 return (new_ibdev || new_smcddev) ? 0 : -EEXIST; 236 237 /* add a new netdev entry to the pnet table if there isn't one */ 238 tmp_pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL); 239 if (!tmp_pnetelem) 240 return -ENOMEM; 241 memcpy(tmp_pnetelem->pnet_name, new_pnetelem->pnet_name, 242 SMC_MAX_PNETID_LEN); 243 tmp_pnetelem->ndev = new_pnetelem->ndev; 244 245 write_lock(&pnettable->lock); 246 list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { 247 if (pnetelem->ndev == new_pnetelem->ndev) 248 new_netdev = false; 249 } 250 if (new_netdev) { 251 dev_hold(tmp_pnetelem->ndev); 252 list_add_tail(&tmp_pnetelem->list, &pnettable->pnetlist); 253 write_unlock(&pnettable->lock); 254 } else { 255 write_unlock(&pnettable->lock); 256 kfree(tmp_pnetelem); 257 } 258 259 return (new_netdev || new_ibdev || new_smcddev) ? 0 : -EEXIST; 260 } 261 262 /* The limit for pnetid is 16 characters. 263 * Valid characters should be (single-byte character set) a-z, A-Z, 0-9. 264 * Lower case letters are converted to upper case. 265 * Interior blanks should not be used. 266 */ 267 static bool smc_pnetid_valid(const char *pnet_name, char *pnetid) 268 { 269 char *bf = skip_spaces(pnet_name); 270 size_t len = strlen(bf); 271 char *end = bf + len; 272 273 if (!len) 274 return false; 275 while (--end >= bf && isspace(*end)) 276 ; 277 if (end - bf >= SMC_MAX_PNETID_LEN) 278 return false; 279 while (bf <= end) { 280 if (!isalnum(*bf)) 281 return false; 282 *pnetid++ = islower(*bf) ? toupper(*bf) : *bf; 283 bf++; 284 } 285 *pnetid = '\0'; 286 return true; 287 } 288 289 /* Find an infiniband device by a given name. The device might not exist. */ 290 static struct smc_ib_device *smc_pnet_find_ib(char *ib_name) 291 { 292 struct smc_ib_device *ibdev; 293 294 spin_lock(&smc_ib_devices.lock); 295 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 296 if (!strncmp(ibdev->ibdev->name, ib_name, 297 sizeof(ibdev->ibdev->name)) || 298 !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name, 299 IB_DEVICE_NAME_MAX - 1)) { 300 goto out; 301 } 302 } 303 ibdev = NULL; 304 out: 305 spin_unlock(&smc_ib_devices.lock); 306 return ibdev; 307 } 308 309 /* Find an smcd device by a given name. The device might not exist. */ 310 static struct smcd_dev *smc_pnet_find_smcd(char *smcd_name) 311 { 312 struct smcd_dev *smcd_dev; 313 314 spin_lock(&smcd_dev_list.lock); 315 list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { 316 if (!strncmp(dev_name(&smcd_dev->dev), smcd_name, 317 IB_DEVICE_NAME_MAX - 1)) 318 goto out; 319 } 320 smcd_dev = NULL; 321 out: 322 spin_unlock(&smcd_dev_list.lock); 323 return smcd_dev; 324 } 325 326 /* Parse the supplied netlink attributes and fill a pnetentry structure. 327 * For ethernet and infiniband device names verify that the devices exist. 328 */ 329 static int smc_pnet_fill_entry(struct net *net, 330 struct smc_user_pnetentry *pnetelem, 331 struct nlattr *tb[]) 332 { 333 char *string, *ibname; 334 int rc; 335 336 memset(pnetelem, 0, sizeof(*pnetelem)); 337 INIT_LIST_HEAD(&pnetelem->list); 338 339 rc = -EINVAL; 340 if (!tb[SMC_PNETID_NAME]) 341 goto error; 342 string = (char *)nla_data(tb[SMC_PNETID_NAME]); 343 if (!smc_pnetid_valid(string, pnetelem->pnet_name)) 344 goto error; 345 346 rc = -EINVAL; 347 if (tb[SMC_PNETID_ETHNAME]) { 348 string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]); 349 pnetelem->ndev = dev_get_by_name(net, string); 350 if (!pnetelem->ndev) 351 goto error; 352 } 353 354 /* if this is not the initial namespace, stop here */ 355 if (net != &init_net) 356 return 0; 357 358 rc = -EINVAL; 359 if (tb[SMC_PNETID_IBNAME]) { 360 ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]); 361 ibname = strim(ibname); 362 pnetelem->smcibdev = smc_pnet_find_ib(ibname); 363 pnetelem->smcd_dev = smc_pnet_find_smcd(ibname); 364 if (!pnetelem->smcibdev && !pnetelem->smcd_dev) 365 goto error; 366 if (pnetelem->smcibdev) { 367 if (!tb[SMC_PNETID_IBPORT]) 368 goto error; 369 pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]); 370 if (pnetelem->ib_port < 1 || 371 pnetelem->ib_port > SMC_MAX_PORTS) 372 goto error; 373 } 374 } 375 376 return 0; 377 378 error: 379 if (pnetelem->ndev) 380 dev_put(pnetelem->ndev); 381 return rc; 382 } 383 384 /* Convert an smc_pnetentry to a netlink attribute sequence */ 385 static int smc_pnet_set_nla(struct sk_buff *msg, 386 struct smc_user_pnetentry *pnetelem) 387 { 388 if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name)) 389 return -1; 390 if (pnetelem->ndev) { 391 if (nla_put_string(msg, SMC_PNETID_ETHNAME, 392 pnetelem->ndev->name)) 393 return -1; 394 } else { 395 if (nla_put_string(msg, SMC_PNETID_ETHNAME, "n/a")) 396 return -1; 397 } 398 if (pnetelem->smcibdev) { 399 if (nla_put_string(msg, SMC_PNETID_IBNAME, 400 dev_name(pnetelem->smcibdev->ibdev->dev.parent)) || 401 nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port)) 402 return -1; 403 } else if (pnetelem->smcd_dev) { 404 if (nla_put_string(msg, SMC_PNETID_IBNAME, 405 dev_name(&pnetelem->smcd_dev->dev)) || 406 nla_put_u8(msg, SMC_PNETID_IBPORT, 1)) 407 return -1; 408 } else { 409 if (nla_put_string(msg, SMC_PNETID_IBNAME, "n/a") || 410 nla_put_u8(msg, SMC_PNETID_IBPORT, 0xff)) 411 return -1; 412 } 413 414 return 0; 415 } 416 417 static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info) 418 { 419 struct net *net = genl_info_net(info); 420 struct smc_user_pnetentry pnetelem; 421 struct smc_pnettable *pnettable; 422 struct smc_net *sn; 423 int rc; 424 425 /* get pnettable for namespace */ 426 sn = net_generic(net, smc_net_id); 427 pnettable = &sn->pnettable; 428 429 rc = smc_pnet_fill_entry(net, &pnetelem, info->attrs); 430 if (!rc) 431 rc = smc_pnet_enter(pnettable, &pnetelem); 432 if (pnetelem.ndev) 433 dev_put(pnetelem.ndev); 434 return rc; 435 } 436 437 static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info) 438 { 439 struct net *net = genl_info_net(info); 440 441 if (!info->attrs[SMC_PNETID_NAME]) 442 return -EINVAL; 443 return smc_pnet_remove_by_pnetid(net, 444 (char *)nla_data(info->attrs[SMC_PNETID_NAME])); 445 } 446 447 static int smc_pnet_dump_start(struct netlink_callback *cb) 448 { 449 cb->args[0] = 0; 450 return 0; 451 } 452 453 static int smc_pnet_dumpinfo(struct sk_buff *skb, 454 u32 portid, u32 seq, u32 flags, 455 struct smc_user_pnetentry *pnetelem) 456 { 457 void *hdr; 458 459 hdr = genlmsg_put(skb, portid, seq, &smc_pnet_nl_family, 460 flags, SMC_PNETID_GET); 461 if (!hdr) 462 return -ENOMEM; 463 if (smc_pnet_set_nla(skb, pnetelem) < 0) { 464 genlmsg_cancel(skb, hdr); 465 return -EMSGSIZE; 466 } 467 genlmsg_end(skb, hdr); 468 return 0; 469 } 470 471 static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid, 472 u32 seq, u8 *pnetid, int start_idx) 473 { 474 struct smc_user_pnetentry tmp_entry; 475 struct smc_pnettable *pnettable; 476 struct smc_pnetentry *pnetelem; 477 struct smc_ib_device *ibdev; 478 struct smcd_dev *smcd_dev; 479 struct smc_net *sn; 480 int idx = 0; 481 int ibport; 482 483 /* get pnettable for namespace */ 484 sn = net_generic(net, smc_net_id); 485 pnettable = &sn->pnettable; 486 487 /* dump netdevices */ 488 read_lock(&pnettable->lock); 489 list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { 490 if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid)) 491 continue; 492 if (idx++ < start_idx) 493 continue; 494 memset(&tmp_entry, 0, sizeof(tmp_entry)); 495 memcpy(&tmp_entry.pnet_name, pnetelem->pnet_name, 496 SMC_MAX_PNETID_LEN); 497 tmp_entry.ndev = pnetelem->ndev; 498 if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI, 499 &tmp_entry)) { 500 --idx; 501 break; 502 } 503 } 504 read_unlock(&pnettable->lock); 505 506 /* if this is not the initial namespace, stop here */ 507 if (net != &init_net) 508 return idx; 509 510 /* dump ib devices */ 511 spin_lock(&smc_ib_devices.lock); 512 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 513 for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) { 514 if (ibdev->pnetid_by_user[ibport]) { 515 if (pnetid && 516 !smc_pnet_match(ibdev->pnetid[ibport], 517 pnetid)) 518 continue; 519 if (idx++ < start_idx) 520 continue; 521 memset(&tmp_entry, 0, sizeof(tmp_entry)); 522 memcpy(&tmp_entry.pnet_name, 523 ibdev->pnetid[ibport], 524 SMC_MAX_PNETID_LEN); 525 tmp_entry.smcibdev = ibdev; 526 tmp_entry.ib_port = ibport + 1; 527 if (smc_pnet_dumpinfo(skb, portid, seq, 528 NLM_F_MULTI, 529 &tmp_entry)) { 530 --idx; 531 break; 532 } 533 } 534 } 535 } 536 spin_unlock(&smc_ib_devices.lock); 537 538 /* dump smcd devices */ 539 spin_lock(&smcd_dev_list.lock); 540 list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { 541 if (smcd_dev->pnetid_by_user) { 542 if (pnetid && !smc_pnet_match(smcd_dev->pnetid, pnetid)) 543 continue; 544 if (idx++ < start_idx) 545 continue; 546 memset(&tmp_entry, 0, sizeof(tmp_entry)); 547 memcpy(&tmp_entry.pnet_name, smcd_dev->pnetid, 548 SMC_MAX_PNETID_LEN); 549 tmp_entry.smcd_dev = smcd_dev; 550 if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI, 551 &tmp_entry)) { 552 --idx; 553 break; 554 } 555 } 556 } 557 spin_unlock(&smcd_dev_list.lock); 558 559 return idx; 560 } 561 562 static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb) 563 { 564 struct net *net = sock_net(skb->sk); 565 int idx; 566 567 idx = _smc_pnet_dump(net, skb, NETLINK_CB(cb->skb).portid, 568 cb->nlh->nlmsg_seq, NULL, cb->args[0]); 569 570 cb->args[0] = idx; 571 return skb->len; 572 } 573 574 /* Retrieve one PNETID entry */ 575 static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info) 576 { 577 struct net *net = genl_info_net(info); 578 struct sk_buff *msg; 579 void *hdr; 580 581 if (!info->attrs[SMC_PNETID_NAME]) 582 return -EINVAL; 583 584 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 585 if (!msg) 586 return -ENOMEM; 587 588 _smc_pnet_dump(net, msg, info->snd_portid, info->snd_seq, 589 nla_data(info->attrs[SMC_PNETID_NAME]), 0); 590 591 /* finish multi part message and send it */ 592 hdr = nlmsg_put(msg, info->snd_portid, info->snd_seq, NLMSG_DONE, 0, 593 NLM_F_MULTI); 594 if (!hdr) { 595 nlmsg_free(msg); 596 return -EMSGSIZE; 597 } 598 return genlmsg_reply(msg, info); 599 } 600 601 /* Remove and delete all pnetids from pnet table. 602 */ 603 static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info) 604 { 605 struct net *net = genl_info_net(info); 606 607 smc_pnet_remove_by_pnetid(net, NULL); 608 return 0; 609 } 610 611 /* SMC_PNETID generic netlink operation definition */ 612 static const struct genl_ops smc_pnet_ops[] = { 613 { 614 .cmd = SMC_PNETID_GET, 615 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 616 .flags = GENL_ADMIN_PERM, 617 .doit = smc_pnet_get, 618 .dumpit = smc_pnet_dump, 619 .start = smc_pnet_dump_start 620 }, 621 { 622 .cmd = SMC_PNETID_ADD, 623 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 624 .flags = GENL_ADMIN_PERM, 625 .doit = smc_pnet_add 626 }, 627 { 628 .cmd = SMC_PNETID_DEL, 629 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 630 .flags = GENL_ADMIN_PERM, 631 .doit = smc_pnet_del 632 }, 633 { 634 .cmd = SMC_PNETID_FLUSH, 635 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 636 .flags = GENL_ADMIN_PERM, 637 .doit = smc_pnet_flush 638 } 639 }; 640 641 /* SMC_PNETID family definition */ 642 static struct genl_family smc_pnet_nl_family __ro_after_init = { 643 .hdrsize = 0, 644 .name = SMCR_GENL_FAMILY_NAME, 645 .version = SMCR_GENL_FAMILY_VERSION, 646 .maxattr = SMC_PNETID_MAX, 647 .policy = smc_pnet_policy, 648 .netnsok = true, 649 .module = THIS_MODULE, 650 .ops = smc_pnet_ops, 651 .n_ops = ARRAY_SIZE(smc_pnet_ops) 652 }; 653 654 static int smc_pnet_netdev_event(struct notifier_block *this, 655 unsigned long event, void *ptr) 656 { 657 struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); 658 659 switch (event) { 660 case NETDEV_REBOOT: 661 case NETDEV_UNREGISTER: 662 smc_pnet_remove_by_ndev(event_dev); 663 return NOTIFY_OK; 664 default: 665 return NOTIFY_DONE; 666 } 667 } 668 669 static struct notifier_block smc_netdev_notifier = { 670 .notifier_call = smc_pnet_netdev_event 671 }; 672 673 /* init network namespace */ 674 int smc_pnet_net_init(struct net *net) 675 { 676 struct smc_net *sn = net_generic(net, smc_net_id); 677 struct smc_pnettable *pnettable = &sn->pnettable; 678 679 INIT_LIST_HEAD(&pnettable->pnetlist); 680 rwlock_init(&pnettable->lock); 681 682 return 0; 683 } 684 685 int __init smc_pnet_init(void) 686 { 687 int rc; 688 689 rc = genl_register_family(&smc_pnet_nl_family); 690 if (rc) 691 return rc; 692 rc = register_netdevice_notifier(&smc_netdev_notifier); 693 if (rc) 694 genl_unregister_family(&smc_pnet_nl_family); 695 return rc; 696 } 697 698 /* exit network namespace */ 699 void smc_pnet_net_exit(struct net *net) 700 { 701 /* flush pnet table */ 702 smc_pnet_remove_by_pnetid(net, NULL); 703 } 704 705 void smc_pnet_exit(void) 706 { 707 unregister_netdevice_notifier(&smc_netdev_notifier); 708 genl_unregister_family(&smc_pnet_nl_family); 709 } 710 711 /* Determine one base device for stacked net devices. 712 * If the lower device level contains more than one devices 713 * (for instance with bonding slaves), just the first device 714 * is used to reach a base device. 715 */ 716 static struct net_device *pnet_find_base_ndev(struct net_device *ndev) 717 { 718 int i, nest_lvl; 719 720 rtnl_lock(); 721 nest_lvl = dev_get_nest_level(ndev); 722 for (i = 0; i < nest_lvl; i++) { 723 struct list_head *lower = &ndev->adj_list.lower; 724 725 if (list_empty(lower)) 726 break; 727 lower = lower->next; 728 ndev = netdev_lower_get_next(ndev, &lower); 729 } 730 rtnl_unlock(); 731 return ndev; 732 } 733 734 static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev, 735 u8 *pnetid) 736 { 737 struct smc_pnettable *pnettable; 738 struct net *net = dev_net(ndev); 739 struct smc_pnetentry *pnetelem; 740 struct smc_net *sn; 741 int rc = -ENOENT; 742 743 /* get pnettable for namespace */ 744 sn = net_generic(net, smc_net_id); 745 pnettable = &sn->pnettable; 746 747 read_lock(&pnettable->lock); 748 list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { 749 if (ndev == pnetelem->ndev) { 750 /* get pnetid of netdev device */ 751 memcpy(pnetid, pnetelem->pnet_name, SMC_MAX_PNETID_LEN); 752 rc = 0; 753 break; 754 } 755 } 756 read_unlock(&pnettable->lock); 757 return rc; 758 } 759 760 /* if handshake network device belongs to a roce device, return its 761 * IB device and port 762 */ 763 static void smc_pnet_find_rdma_dev(struct net_device *netdev, 764 struct smc_init_info *ini) 765 { 766 struct smc_ib_device *ibdev; 767 768 spin_lock(&smc_ib_devices.lock); 769 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 770 struct net_device *ndev; 771 int i; 772 773 for (i = 1; i <= SMC_MAX_PORTS; i++) { 774 if (!rdma_is_port_valid(ibdev->ibdev, i)) 775 continue; 776 if (!ibdev->ibdev->ops.get_netdev) 777 continue; 778 ndev = ibdev->ibdev->ops.get_netdev(ibdev->ibdev, i); 779 if (!ndev) 780 continue; 781 dev_put(ndev); 782 if (netdev == ndev && 783 smc_ib_port_active(ibdev, i) && 784 !smc_ib_determine_gid(ibdev, i, ini->vlan_id, 785 ini->ib_gid, NULL)) { 786 ini->ib_dev = ibdev; 787 ini->ib_port = i; 788 break; 789 } 790 } 791 } 792 spin_unlock(&smc_ib_devices.lock); 793 } 794 795 /* Determine the corresponding IB device port based on the hardware PNETID. 796 * Searching stops at the first matching active IB device port with vlan_id 797 * configured. 798 * If nothing found, check pnetid table. 799 * If nothing found, try to use handshake device 800 */ 801 static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, 802 struct smc_init_info *ini) 803 { 804 u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; 805 struct smc_ib_device *ibdev; 806 int i; 807 808 ndev = pnet_find_base_ndev(ndev); 809 if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, 810 ndev_pnetid) && 811 smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) { 812 smc_pnet_find_rdma_dev(ndev, ini); 813 return; /* pnetid could not be determined */ 814 } 815 816 spin_lock(&smc_ib_devices.lock); 817 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 818 for (i = 1; i <= SMC_MAX_PORTS; i++) { 819 if (!rdma_is_port_valid(ibdev->ibdev, i)) 820 continue; 821 if (smc_pnet_match(ibdev->pnetid[i - 1], ndev_pnetid) && 822 smc_ib_port_active(ibdev, i) && 823 !smc_ib_determine_gid(ibdev, i, ini->vlan_id, 824 ini->ib_gid, NULL)) { 825 ini->ib_dev = ibdev; 826 ini->ib_port = i; 827 goto out; 828 } 829 } 830 } 831 out: 832 spin_unlock(&smc_ib_devices.lock); 833 } 834 835 static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, 836 struct smc_init_info *ini) 837 { 838 u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; 839 struct smcd_dev *ismdev; 840 841 ndev = pnet_find_base_ndev(ndev); 842 if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, 843 ndev_pnetid) && 844 smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) 845 return; /* pnetid could not be determined */ 846 847 spin_lock(&smcd_dev_list.lock); 848 list_for_each_entry(ismdev, &smcd_dev_list.list, list) { 849 if (smc_pnet_match(ismdev->pnetid, ndev_pnetid)) { 850 ini->ism_dev = ismdev; 851 break; 852 } 853 } 854 spin_unlock(&smcd_dev_list.lock); 855 } 856 857 /* PNET table analysis for a given sock: 858 * determine ib_device and port belonging to used internal TCP socket 859 * ethernet interface. 860 */ 861 void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini) 862 { 863 struct dst_entry *dst = sk_dst_get(sk); 864 865 ini->ib_dev = NULL; 866 ini->ib_port = 0; 867 if (!dst) 868 goto out; 869 if (!dst->dev) 870 goto out_rel; 871 872 smc_pnet_find_roce_by_pnetid(dst->dev, ini); 873 874 out_rel: 875 dst_release(dst); 876 out: 877 return; 878 } 879 880 void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini) 881 { 882 struct dst_entry *dst = sk_dst_get(sk); 883 884 ini->ism_dev = NULL; 885 if (!dst) 886 goto out; 887 if (!dst->dev) 888 goto out_rel; 889 890 smc_pnet_find_ism_by_pnetid(dst->dev, ini); 891 892 out_rel: 893 dst_release(dst); 894 out: 895 return; 896 } 897