1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Basic Transport Functions exploiting Infiniband API 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/socket.h> 13 #include <linux/if_vlan.h> 14 #include <linux/random.h> 15 #include <linux/workqueue.h> 16 #include <net/tcp.h> 17 #include <net/sock.h> 18 #include <rdma/ib_verbs.h> 19 #include <rdma/ib_cache.h> 20 21 #include "smc.h" 22 #include "smc_clc.h" 23 #include "smc_core.h" 24 #include "smc_ib.h" 25 #include "smc_wr.h" 26 #include "smc_llc.h" 27 #include "smc_cdc.h" 28 #include "smc_close.h" 29 #include "smc_ism.h" 30 31 #define SMC_LGR_NUM_INCR 256 32 #define SMC_LGR_FREE_DELAY_SERV (600 * HZ) 33 #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ) 34 #define SMC_LGR_FREE_DELAY_FAST (8 * HZ) 35 36 static struct smc_lgr_list smc_lgr_list = { /* established link groups */ 37 .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock), 38 .list = LIST_HEAD_INIT(smc_lgr_list.list), 39 .num = 0, 40 }; 41 42 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, 43 struct smc_buf_desc *buf_desc); 44 45 static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) 46 { 47 /* client link group creation always follows the server link group 48 * creation. For client use a somewhat higher removal delay time, 49 * otherwise there is a risk of out-of-sync link groups. 50 */ 51 mod_delayed_work(system_wq, &lgr->free_work, 52 (!lgr->is_smcd && lgr->role == SMC_CLNT) ? 53 SMC_LGR_FREE_DELAY_CLNT : SMC_LGR_FREE_DELAY_SERV); 54 } 55 56 void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr) 57 { 58 mod_delayed_work(system_wq, &lgr->free_work, SMC_LGR_FREE_DELAY_FAST); 59 } 60 61 /* Register connection's alert token in our lookup structure. 62 * To use rbtrees we have to implement our own insert core. 63 * Requires @conns_lock 64 * @smc connection to register 65 * Returns 0 on success, != otherwise. 66 */ 67 static void smc_lgr_add_alert_token(struct smc_connection *conn) 68 { 69 struct rb_node **link, *parent = NULL; 70 u32 token = conn->alert_token_local; 71 72 link = &conn->lgr->conns_all.rb_node; 73 while (*link) { 74 struct smc_connection *cur = rb_entry(*link, 75 struct smc_connection, alert_node); 76 77 parent = *link; 78 if (cur->alert_token_local > token) 79 link = &parent->rb_left; 80 else 81 link = &parent->rb_right; 82 } 83 /* Put the new node there */ 84 rb_link_node(&conn->alert_node, parent, link); 85 rb_insert_color(&conn->alert_node, &conn->lgr->conns_all); 86 } 87 88 /* Register connection in link group by assigning an alert token 89 * registered in a search tree. 90 * Requires @conns_lock 91 * Note that '0' is a reserved value and not assigned. 92 */ 93 static void smc_lgr_register_conn(struct smc_connection *conn) 94 { 95 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 96 static atomic_t nexttoken = ATOMIC_INIT(0); 97 98 /* find a new alert_token_local value not yet used by some connection 99 * in this link group 100 */ 101 sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */ 102 while (!conn->alert_token_local) { 103 conn->alert_token_local = atomic_inc_return(&nexttoken); 104 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr)) 105 conn->alert_token_local = 0; 106 } 107 smc_lgr_add_alert_token(conn); 108 conn->lgr->conns_num++; 109 } 110 111 /* Unregister connection and reset the alert token of the given connection< 112 */ 113 static void __smc_lgr_unregister_conn(struct smc_connection *conn) 114 { 115 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 116 struct smc_link_group *lgr = conn->lgr; 117 118 rb_erase(&conn->alert_node, &lgr->conns_all); 119 lgr->conns_num--; 120 conn->alert_token_local = 0; 121 sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */ 122 } 123 124 /* Unregister connection from lgr 125 */ 126 static void smc_lgr_unregister_conn(struct smc_connection *conn) 127 { 128 struct smc_link_group *lgr = conn->lgr; 129 130 if (!lgr) 131 return; 132 write_lock_bh(&lgr->conns_lock); 133 if (conn->alert_token_local) { 134 __smc_lgr_unregister_conn(conn); 135 } 136 write_unlock_bh(&lgr->conns_lock); 137 } 138 139 /* Send delete link, either as client to request the initiation 140 * of the DELETE LINK sequence from server; or as server to 141 * initiate the delete processing. See smc_llc_rx_delete_link(). 142 */ 143 static int smc_link_send_delete(struct smc_link *lnk) 144 { 145 if (lnk->state == SMC_LNK_ACTIVE && 146 !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, true)) { 147 smc_llc_link_deleting(lnk); 148 return 0; 149 } 150 return -ENOTCONN; 151 } 152 153 static void smc_lgr_free(struct smc_link_group *lgr); 154 155 static void smc_lgr_free_work(struct work_struct *work) 156 { 157 struct smc_link_group *lgr = container_of(to_delayed_work(work), 158 struct smc_link_group, 159 free_work); 160 bool conns; 161 162 spin_lock_bh(&smc_lgr_list.lock); 163 read_lock_bh(&lgr->conns_lock); 164 conns = RB_EMPTY_ROOT(&lgr->conns_all); 165 read_unlock_bh(&lgr->conns_lock); 166 if (!conns) { /* number of lgr connections is no longer zero */ 167 spin_unlock_bh(&smc_lgr_list.lock); 168 return; 169 } 170 if (!list_empty(&lgr->list)) 171 list_del_init(&lgr->list); /* remove from smc_lgr_list */ 172 spin_unlock_bh(&smc_lgr_list.lock); 173 174 if (!lgr->is_smcd && !lgr->terminating) { 175 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; 176 177 /* try to send del link msg, on error free lgr immediately */ 178 if (lnk->state == SMC_LNK_ACTIVE && 179 !smc_link_send_delete(lnk)) { 180 /* reschedule in case we never receive a response */ 181 smc_lgr_schedule_free_work(lgr); 182 return; 183 } 184 } 185 186 if (!delayed_work_pending(&lgr->free_work)) { 187 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; 188 189 if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE) 190 smc_llc_link_inactive(lnk); 191 if (lgr->is_smcd) 192 smc_ism_signal_shutdown(lgr); 193 smc_lgr_free(lgr); 194 } 195 } 196 197 /* create a new SMC link group */ 198 static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) 199 { 200 struct smc_link_group *lgr; 201 struct smc_link *lnk; 202 u8 rndvec[3]; 203 int rc = 0; 204 int i; 205 206 if (ini->is_smcd && ini->vlan_id) { 207 if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) { 208 rc = SMC_CLC_DECL_ISMVLANERR; 209 goto out; 210 } 211 } 212 213 lgr = kzalloc(sizeof(*lgr), GFP_KERNEL); 214 if (!lgr) { 215 rc = SMC_CLC_DECL_MEM; 216 goto ism_put_vlan; 217 } 218 lgr->is_smcd = ini->is_smcd; 219 lgr->sync_err = 0; 220 lgr->vlan_id = ini->vlan_id; 221 rwlock_init(&lgr->sndbufs_lock); 222 rwlock_init(&lgr->rmbs_lock); 223 rwlock_init(&lgr->conns_lock); 224 for (i = 0; i < SMC_RMBE_SIZES; i++) { 225 INIT_LIST_HEAD(&lgr->sndbufs[i]); 226 INIT_LIST_HEAD(&lgr->rmbs[i]); 227 } 228 smc_lgr_list.num += SMC_LGR_NUM_INCR; 229 memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE); 230 INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work); 231 lgr->conns_all = RB_ROOT; 232 if (ini->is_smcd) { 233 /* SMC-D specific settings */ 234 lgr->peer_gid = ini->ism_gid; 235 lgr->smcd = ini->ism_dev; 236 } else { 237 /* SMC-R specific settings */ 238 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; 239 memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer, 240 SMC_SYSTEMID_LEN); 241 242 lnk = &lgr->lnk[SMC_SINGLE_LINK]; 243 /* initialize link */ 244 lnk->state = SMC_LNK_ACTIVATING; 245 lnk->link_id = SMC_SINGLE_LINK; 246 lnk->smcibdev = ini->ib_dev; 247 lnk->ibport = ini->ib_port; 248 lnk->path_mtu = 249 ini->ib_dev->pattr[ini->ib_port - 1].active_mtu; 250 if (!ini->ib_dev->initialized) 251 smc_ib_setup_per_ibdev(ini->ib_dev); 252 get_random_bytes(rndvec, sizeof(rndvec)); 253 lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + 254 (rndvec[2] << 16); 255 rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport, 256 ini->vlan_id, lnk->gid, 257 &lnk->sgid_index); 258 if (rc) 259 goto free_lgr; 260 rc = smc_llc_link_init(lnk); 261 if (rc) 262 goto free_lgr; 263 rc = smc_wr_alloc_link_mem(lnk); 264 if (rc) 265 goto clear_llc_lnk; 266 rc = smc_ib_create_protection_domain(lnk); 267 if (rc) 268 goto free_link_mem; 269 rc = smc_ib_create_queue_pair(lnk); 270 if (rc) 271 goto dealloc_pd; 272 rc = smc_wr_create_link(lnk); 273 if (rc) 274 goto destroy_qp; 275 } 276 smc->conn.lgr = lgr; 277 spin_lock_bh(&smc_lgr_list.lock); 278 list_add(&lgr->list, &smc_lgr_list.list); 279 spin_unlock_bh(&smc_lgr_list.lock); 280 return 0; 281 282 destroy_qp: 283 smc_ib_destroy_queue_pair(lnk); 284 dealloc_pd: 285 smc_ib_dealloc_protection_domain(lnk); 286 free_link_mem: 287 smc_wr_free_link_mem(lnk); 288 clear_llc_lnk: 289 smc_llc_link_clear(lnk); 290 free_lgr: 291 kfree(lgr); 292 ism_put_vlan: 293 if (ini->is_smcd && ini->vlan_id) 294 smc_ism_put_vlan(ini->ism_dev, ini->vlan_id); 295 out: 296 if (rc < 0) { 297 if (rc == -ENOMEM) 298 rc = SMC_CLC_DECL_MEM; 299 else 300 rc = SMC_CLC_DECL_INTERR; 301 } 302 return rc; 303 } 304 305 static void smc_buf_unuse(struct smc_connection *conn, 306 struct smc_link_group *lgr) 307 { 308 if (conn->sndbuf_desc) 309 conn->sndbuf_desc->used = 0; 310 if (conn->rmb_desc) { 311 if (!conn->rmb_desc->regerr) { 312 if (!lgr->is_smcd) { 313 /* unregister rmb with peer */ 314 smc_llc_do_delete_rkey( 315 &lgr->lnk[SMC_SINGLE_LINK], 316 conn->rmb_desc); 317 } 318 conn->rmb_desc->used = 0; 319 } else { 320 /* buf registration failed, reuse not possible */ 321 write_lock_bh(&lgr->rmbs_lock); 322 list_del(&conn->rmb_desc->list); 323 write_unlock_bh(&lgr->rmbs_lock); 324 325 smc_buf_free(lgr, true, conn->rmb_desc); 326 } 327 } 328 } 329 330 /* remove a finished connection from its link group */ 331 void smc_conn_free(struct smc_connection *conn) 332 { 333 struct smc_link_group *lgr = conn->lgr; 334 335 if (!lgr) 336 return; 337 if (lgr->is_smcd) { 338 smc_ism_unset_conn(conn); 339 tasklet_kill(&conn->rx_tsklet); 340 } else { 341 smc_cdc_tx_dismiss_slots(conn); 342 } 343 smc_lgr_unregister_conn(conn); 344 smc_buf_unuse(conn, lgr); /* allow buffer reuse */ 345 conn->lgr = NULL; 346 347 if (!lgr->conns_num) 348 smc_lgr_schedule_free_work(lgr); 349 } 350 351 static void smc_link_clear(struct smc_link *lnk) 352 { 353 lnk->peer_qpn = 0; 354 smc_llc_link_clear(lnk); 355 smc_ib_modify_qp_reset(lnk); 356 smc_wr_free_link(lnk); 357 smc_ib_destroy_queue_pair(lnk); 358 smc_ib_dealloc_protection_domain(lnk); 359 smc_wr_free_link_mem(lnk); 360 } 361 362 static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb, 363 struct smc_buf_desc *buf_desc) 364 { 365 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; 366 367 if (is_rmb) { 368 if (buf_desc->mr_rx[SMC_SINGLE_LINK]) 369 smc_ib_put_memory_region( 370 buf_desc->mr_rx[SMC_SINGLE_LINK]); 371 smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc, 372 DMA_FROM_DEVICE); 373 } else { 374 smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc, 375 DMA_TO_DEVICE); 376 } 377 sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]); 378 if (buf_desc->pages) 379 __free_pages(buf_desc->pages, buf_desc->order); 380 kfree(buf_desc); 381 } 382 383 static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb, 384 struct smc_buf_desc *buf_desc) 385 { 386 if (is_dmb) { 387 /* restore original buf len */ 388 buf_desc->len += sizeof(struct smcd_cdc_msg); 389 smc_ism_unregister_dmb(lgr->smcd, buf_desc); 390 } else { 391 kfree(buf_desc->cpu_addr); 392 } 393 kfree(buf_desc); 394 } 395 396 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, 397 struct smc_buf_desc *buf_desc) 398 { 399 if (lgr->is_smcd) 400 smcd_buf_free(lgr, is_rmb, buf_desc); 401 else 402 smcr_buf_free(lgr, is_rmb, buf_desc); 403 } 404 405 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb) 406 { 407 struct smc_buf_desc *buf_desc, *bf_desc; 408 struct list_head *buf_list; 409 int i; 410 411 for (i = 0; i < SMC_RMBE_SIZES; i++) { 412 if (is_rmb) 413 buf_list = &lgr->rmbs[i]; 414 else 415 buf_list = &lgr->sndbufs[i]; 416 list_for_each_entry_safe(buf_desc, bf_desc, buf_list, 417 list) { 418 list_del(&buf_desc->list); 419 smc_buf_free(lgr, is_rmb, buf_desc); 420 } 421 } 422 } 423 424 static void smc_lgr_free_bufs(struct smc_link_group *lgr) 425 { 426 /* free send buffers */ 427 __smc_lgr_free_bufs(lgr, false); 428 /* free rmbs */ 429 __smc_lgr_free_bufs(lgr, true); 430 } 431 432 /* remove a link group */ 433 static void smc_lgr_free(struct smc_link_group *lgr) 434 { 435 smc_lgr_free_bufs(lgr); 436 if (lgr->is_smcd) 437 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); 438 else 439 smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]); 440 kfree(lgr); 441 } 442 443 void smc_lgr_forget(struct smc_link_group *lgr) 444 { 445 spin_lock_bh(&smc_lgr_list.lock); 446 /* do not use this link group for new connections */ 447 if (!list_empty(&lgr->list)) 448 list_del_init(&lgr->list); 449 spin_unlock_bh(&smc_lgr_list.lock); 450 } 451 452 /* terminate linkgroup abnormally */ 453 static void __smc_lgr_terminate(struct smc_link_group *lgr) 454 { 455 struct smc_connection *conn; 456 struct smc_sock *smc; 457 struct rb_node *node; 458 459 if (lgr->terminating) 460 return; /* lgr already terminating */ 461 lgr->terminating = 1; 462 if (!list_empty(&lgr->list)) /* forget lgr */ 463 list_del_init(&lgr->list); 464 if (!lgr->is_smcd) 465 smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); 466 467 write_lock_bh(&lgr->conns_lock); 468 node = rb_first(&lgr->conns_all); 469 while (node) { 470 conn = rb_entry(node, struct smc_connection, alert_node); 471 smc = container_of(conn, struct smc_sock, conn); 472 sock_hold(&smc->sk); /* sock_put in close work */ 473 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 474 __smc_lgr_unregister_conn(conn); 475 conn->lgr = NULL; 476 write_unlock_bh(&lgr->conns_lock); 477 if (!schedule_work(&conn->close_work)) 478 sock_put(&smc->sk); 479 write_lock_bh(&lgr->conns_lock); 480 node = rb_first(&lgr->conns_all); 481 } 482 write_unlock_bh(&lgr->conns_lock); 483 if (!lgr->is_smcd) 484 wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait); 485 smc_lgr_schedule_free_work(lgr); 486 } 487 488 void smc_lgr_terminate(struct smc_link_group *lgr) 489 { 490 spin_lock_bh(&smc_lgr_list.lock); 491 __smc_lgr_terminate(lgr); 492 spin_unlock_bh(&smc_lgr_list.lock); 493 } 494 495 /* Called when IB port is terminated */ 496 void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport) 497 { 498 struct smc_link_group *lgr, *l; 499 500 spin_lock_bh(&smc_lgr_list.lock); 501 list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) { 502 if (!lgr->is_smcd && 503 lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev && 504 lgr->lnk[SMC_SINGLE_LINK].ibport == ibport) 505 __smc_lgr_terminate(lgr); 506 } 507 spin_unlock_bh(&smc_lgr_list.lock); 508 } 509 510 /* Called when SMC-D device is terminated or peer is lost */ 511 void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan) 512 { 513 struct smc_link_group *lgr, *l; 514 LIST_HEAD(lgr_free_list); 515 516 /* run common cleanup function and build free list */ 517 spin_lock_bh(&smc_lgr_list.lock); 518 list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) { 519 if (lgr->is_smcd && lgr->smcd == dev && 520 (!peer_gid || lgr->peer_gid == peer_gid) && 521 (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) { 522 __smc_lgr_terminate(lgr); 523 list_move(&lgr->list, &lgr_free_list); 524 } 525 } 526 spin_unlock_bh(&smc_lgr_list.lock); 527 528 /* cancel the regular free workers and actually free lgrs */ 529 list_for_each_entry_safe(lgr, l, &lgr_free_list, list) { 530 list_del_init(&lgr->list); 531 cancel_delayed_work_sync(&lgr->free_work); 532 if (!peer_gid && vlan == VLAN_VID_MASK) /* dev terminated? */ 533 smc_ism_signal_shutdown(lgr); 534 smc_lgr_free(lgr); 535 } 536 } 537 538 /* Determine vlan of internal TCP socket. 539 * @vlan_id: address to store the determined vlan id into 540 */ 541 int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini) 542 { 543 struct dst_entry *dst = sk_dst_get(clcsock->sk); 544 struct net_device *ndev; 545 int i, nest_lvl, rc = 0; 546 547 ini->vlan_id = 0; 548 if (!dst) { 549 rc = -ENOTCONN; 550 goto out; 551 } 552 if (!dst->dev) { 553 rc = -ENODEV; 554 goto out_rel; 555 } 556 557 ndev = dst->dev; 558 if (is_vlan_dev(ndev)) { 559 ini->vlan_id = vlan_dev_vlan_id(ndev); 560 goto out_rel; 561 } 562 563 rtnl_lock(); 564 nest_lvl = ndev->lower_level; 565 for (i = 0; i < nest_lvl; i++) { 566 struct list_head *lower = &ndev->adj_list.lower; 567 568 if (list_empty(lower)) 569 break; 570 lower = lower->next; 571 ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower); 572 if (is_vlan_dev(ndev)) { 573 ini->vlan_id = vlan_dev_vlan_id(ndev); 574 break; 575 } 576 } 577 rtnl_unlock(); 578 579 out_rel: 580 dst_release(dst); 581 out: 582 return rc; 583 } 584 585 static bool smcr_lgr_match(struct smc_link_group *lgr, 586 struct smc_clc_msg_local *lcl, 587 enum smc_lgr_role role, u32 clcqpn) 588 { 589 return !memcmp(lgr->peer_systemid, lcl->id_for_peer, 590 SMC_SYSTEMID_LEN) && 591 !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid, 592 SMC_GID_SIZE) && 593 !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac, 594 sizeof(lcl->mac)) && 595 lgr->role == role && 596 (lgr->role == SMC_SERV || 597 lgr->lnk[SMC_SINGLE_LINK].peer_qpn == clcqpn); 598 } 599 600 static bool smcd_lgr_match(struct smc_link_group *lgr, 601 struct smcd_dev *smcismdev, u64 peer_gid) 602 { 603 return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev; 604 } 605 606 /* create a new SMC connection (and a new link group if necessary) */ 607 int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) 608 { 609 struct smc_connection *conn = &smc->conn; 610 struct smc_link_group *lgr; 611 enum smc_lgr_role role; 612 int rc = 0; 613 614 ini->cln_first_contact = SMC_FIRST_CONTACT; 615 role = smc->listen_smc ? SMC_SERV : SMC_CLNT; 616 if (role == SMC_CLNT && ini->srv_first_contact) 617 /* create new link group as well */ 618 goto create; 619 620 /* determine if an existing link group can be reused */ 621 spin_lock_bh(&smc_lgr_list.lock); 622 list_for_each_entry(lgr, &smc_lgr_list.list, list) { 623 write_lock_bh(&lgr->conns_lock); 624 if ((ini->is_smcd ? 625 smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) : 626 smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) && 627 !lgr->sync_err && 628 lgr->vlan_id == ini->vlan_id && 629 (role == SMC_CLNT || 630 lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) { 631 /* link group found */ 632 ini->cln_first_contact = SMC_REUSE_CONTACT; 633 conn->lgr = lgr; 634 smc_lgr_register_conn(conn); /* add smc conn to lgr */ 635 if (delayed_work_pending(&lgr->free_work)) 636 cancel_delayed_work(&lgr->free_work); 637 write_unlock_bh(&lgr->conns_lock); 638 break; 639 } 640 write_unlock_bh(&lgr->conns_lock); 641 } 642 spin_unlock_bh(&smc_lgr_list.lock); 643 644 if (role == SMC_CLNT && !ini->srv_first_contact && 645 ini->cln_first_contact == SMC_FIRST_CONTACT) { 646 /* Server reuses a link group, but Client wants to start 647 * a new one 648 * send out_of_sync decline, reason synchr. error 649 */ 650 return SMC_CLC_DECL_SYNCERR; 651 } 652 653 create: 654 if (ini->cln_first_contact == SMC_FIRST_CONTACT) { 655 rc = smc_lgr_create(smc, ini); 656 if (rc) 657 goto out; 658 lgr = conn->lgr; 659 write_lock_bh(&lgr->conns_lock); 660 smc_lgr_register_conn(conn); /* add smc conn to lgr */ 661 write_unlock_bh(&lgr->conns_lock); 662 } 663 conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; 664 conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; 665 conn->urg_state = SMC_URG_READ; 666 if (ini->is_smcd) { 667 conn->rx_off = sizeof(struct smcd_cdc_msg); 668 smcd_cdc_rx_init(conn); /* init tasklet for this conn */ 669 } 670 #ifndef KERNEL_HAS_ATOMIC64 671 spin_lock_init(&conn->acurs_lock); 672 #endif 673 674 out: 675 return rc; 676 } 677 678 /* convert the RMB size into the compressed notation - minimum 16K. 679 * In contrast to plain ilog2, this rounds towards the next power of 2, 680 * so the socket application gets at least its desired sndbuf / rcvbuf size. 681 */ 682 static u8 smc_compress_bufsize(int size) 683 { 684 u8 compressed; 685 686 if (size <= SMC_BUF_MIN_SIZE) 687 return 0; 688 689 size = (size - 1) >> 14; 690 compressed = ilog2(size) + 1; 691 if (compressed >= SMC_RMBE_SIZES) 692 compressed = SMC_RMBE_SIZES - 1; 693 return compressed; 694 } 695 696 /* convert the RMB size from compressed notation into integer */ 697 int smc_uncompress_bufsize(u8 compressed) 698 { 699 u32 size; 700 701 size = 0x00000001 << (((int)compressed) + 14); 702 return (int)size; 703 } 704 705 /* try to reuse a sndbuf or rmb description slot for a certain 706 * buffer size; if not available, return NULL 707 */ 708 static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize, 709 rwlock_t *lock, 710 struct list_head *buf_list) 711 { 712 struct smc_buf_desc *buf_slot; 713 714 read_lock_bh(lock); 715 list_for_each_entry(buf_slot, buf_list, list) { 716 if (cmpxchg(&buf_slot->used, 0, 1) == 0) { 717 read_unlock_bh(lock); 718 return buf_slot; 719 } 720 } 721 read_unlock_bh(lock); 722 return NULL; 723 } 724 725 /* one of the conditions for announcing a receiver's current window size is 726 * that it "results in a minimum increase in the window size of 10% of the 727 * receive buffer space" [RFC7609] 728 */ 729 static inline int smc_rmb_wnd_update_limit(int rmbe_size) 730 { 731 return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); 732 } 733 734 static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr, 735 bool is_rmb, int bufsize) 736 { 737 struct smc_buf_desc *buf_desc; 738 struct smc_link *lnk; 739 int rc; 740 741 /* try to alloc a new buffer */ 742 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); 743 if (!buf_desc) 744 return ERR_PTR(-ENOMEM); 745 746 buf_desc->order = get_order(bufsize); 747 buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN | 748 __GFP_NOMEMALLOC | __GFP_COMP | 749 __GFP_NORETRY | __GFP_ZERO, 750 buf_desc->order); 751 if (!buf_desc->pages) { 752 kfree(buf_desc); 753 return ERR_PTR(-EAGAIN); 754 } 755 buf_desc->cpu_addr = (void *)page_address(buf_desc->pages); 756 757 /* build the sg table from the pages */ 758 lnk = &lgr->lnk[SMC_SINGLE_LINK]; 759 rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1, 760 GFP_KERNEL); 761 if (rc) { 762 smc_buf_free(lgr, is_rmb, buf_desc); 763 return ERR_PTR(rc); 764 } 765 sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl, 766 buf_desc->cpu_addr, bufsize); 767 768 /* map sg table to DMA address */ 769 rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc, 770 is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 771 /* SMC protocol depends on mapping to one DMA address only */ 772 if (rc != 1) { 773 smc_buf_free(lgr, is_rmb, buf_desc); 774 return ERR_PTR(-EAGAIN); 775 } 776 777 /* create a new memory region for the RMB */ 778 if (is_rmb) { 779 rc = smc_ib_get_memory_region(lnk->roce_pd, 780 IB_ACCESS_REMOTE_WRITE | 781 IB_ACCESS_LOCAL_WRITE, 782 buf_desc); 783 if (rc) { 784 smc_buf_free(lgr, is_rmb, buf_desc); 785 return ERR_PTR(rc); 786 } 787 } 788 789 buf_desc->len = bufsize; 790 return buf_desc; 791 } 792 793 #define SMCD_DMBE_SIZES 7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */ 794 795 static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr, 796 bool is_dmb, int bufsize) 797 { 798 struct smc_buf_desc *buf_desc; 799 int rc; 800 801 if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES) 802 return ERR_PTR(-EAGAIN); 803 804 /* try to alloc a new DMB */ 805 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); 806 if (!buf_desc) 807 return ERR_PTR(-ENOMEM); 808 if (is_dmb) { 809 rc = smc_ism_register_dmb(lgr, bufsize, buf_desc); 810 if (rc) { 811 kfree(buf_desc); 812 return ERR_PTR(-EAGAIN); 813 } 814 buf_desc->pages = virt_to_page(buf_desc->cpu_addr); 815 /* CDC header stored in buf. So, pretend it was smaller */ 816 buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg); 817 } else { 818 buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL | 819 __GFP_NOWARN | __GFP_NORETRY | 820 __GFP_NOMEMALLOC); 821 if (!buf_desc->cpu_addr) { 822 kfree(buf_desc); 823 return ERR_PTR(-EAGAIN); 824 } 825 buf_desc->len = bufsize; 826 } 827 return buf_desc; 828 } 829 830 static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) 831 { 832 struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM); 833 struct smc_connection *conn = &smc->conn; 834 struct smc_link_group *lgr = conn->lgr; 835 struct list_head *buf_list; 836 int bufsize, bufsize_short; 837 int sk_buf_size; 838 rwlock_t *lock; 839 840 if (is_rmb) 841 /* use socket recv buffer size (w/o overhead) as start value */ 842 sk_buf_size = smc->sk.sk_rcvbuf / 2; 843 else 844 /* use socket send buffer size (w/o overhead) as start value */ 845 sk_buf_size = smc->sk.sk_sndbuf / 2; 846 847 for (bufsize_short = smc_compress_bufsize(sk_buf_size); 848 bufsize_short >= 0; bufsize_short--) { 849 850 if (is_rmb) { 851 lock = &lgr->rmbs_lock; 852 buf_list = &lgr->rmbs[bufsize_short]; 853 } else { 854 lock = &lgr->sndbufs_lock; 855 buf_list = &lgr->sndbufs[bufsize_short]; 856 } 857 bufsize = smc_uncompress_bufsize(bufsize_short); 858 if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC) 859 continue; 860 861 /* check for reusable slot in the link group */ 862 buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list); 863 if (buf_desc) { 864 memset(buf_desc->cpu_addr, 0, bufsize); 865 break; /* found reusable slot */ 866 } 867 868 if (is_smcd) 869 buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize); 870 else 871 buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize); 872 873 if (PTR_ERR(buf_desc) == -ENOMEM) 874 break; 875 if (IS_ERR(buf_desc)) 876 continue; 877 878 buf_desc->used = 1; 879 write_lock_bh(lock); 880 list_add(&buf_desc->list, buf_list); 881 write_unlock_bh(lock); 882 break; /* found */ 883 } 884 885 if (IS_ERR(buf_desc)) 886 return -ENOMEM; 887 888 if (is_rmb) { 889 conn->rmb_desc = buf_desc; 890 conn->rmbe_size_short = bufsize_short; 891 smc->sk.sk_rcvbuf = bufsize * 2; 892 atomic_set(&conn->bytes_to_rcv, 0); 893 conn->rmbe_update_limit = 894 smc_rmb_wnd_update_limit(buf_desc->len); 895 if (is_smcd) 896 smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */ 897 } else { 898 conn->sndbuf_desc = buf_desc; 899 smc->sk.sk_sndbuf = bufsize * 2; 900 atomic_set(&conn->sndbuf_space, bufsize); 901 } 902 return 0; 903 } 904 905 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) 906 { 907 struct smc_link_group *lgr = conn->lgr; 908 909 if (!conn->lgr || conn->lgr->is_smcd) 910 return; 911 smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 912 conn->sndbuf_desc, DMA_TO_DEVICE); 913 } 914 915 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) 916 { 917 struct smc_link_group *lgr = conn->lgr; 918 919 if (!conn->lgr || conn->lgr->is_smcd) 920 return; 921 smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 922 conn->sndbuf_desc, DMA_TO_DEVICE); 923 } 924 925 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) 926 { 927 struct smc_link_group *lgr = conn->lgr; 928 929 if (!conn->lgr || conn->lgr->is_smcd) 930 return; 931 smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 932 conn->rmb_desc, DMA_FROM_DEVICE); 933 } 934 935 void smc_rmb_sync_sg_for_device(struct smc_connection *conn) 936 { 937 struct smc_link_group *lgr = conn->lgr; 938 939 if (!conn->lgr || conn->lgr->is_smcd) 940 return; 941 smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 942 conn->rmb_desc, DMA_FROM_DEVICE); 943 } 944 945 /* create the send and receive buffer for an SMC socket; 946 * receive buffers are called RMBs; 947 * (even though the SMC protocol allows more than one RMB-element per RMB, 948 * the Linux implementation uses just one RMB-element per RMB, i.e. uses an 949 * extra RMB for every connection in a link group 950 */ 951 int smc_buf_create(struct smc_sock *smc, bool is_smcd) 952 { 953 int rc; 954 955 /* create send buffer */ 956 rc = __smc_buf_create(smc, is_smcd, false); 957 if (rc) 958 return rc; 959 /* create rmb */ 960 rc = __smc_buf_create(smc, is_smcd, true); 961 if (rc) 962 smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc); 963 return rc; 964 } 965 966 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr) 967 { 968 int i; 969 970 for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) { 971 if (!test_and_set_bit(i, lgr->rtokens_used_mask)) 972 return i; 973 } 974 return -ENOSPC; 975 } 976 977 /* add a new rtoken from peer */ 978 int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey) 979 { 980 u64 dma_addr = be64_to_cpu(nw_vaddr); 981 u32 rkey = ntohl(nw_rkey); 982 int i; 983 984 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 985 if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) && 986 (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) && 987 test_bit(i, lgr->rtokens_used_mask)) { 988 /* already in list */ 989 return i; 990 } 991 } 992 i = smc_rmb_reserve_rtoken_idx(lgr); 993 if (i < 0) 994 return i; 995 lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey; 996 lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr; 997 return i; 998 } 999 1000 /* delete an rtoken */ 1001 int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey) 1002 { 1003 u32 rkey = ntohl(nw_rkey); 1004 int i; 1005 1006 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 1007 if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey && 1008 test_bit(i, lgr->rtokens_used_mask)) { 1009 lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0; 1010 lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0; 1011 1012 clear_bit(i, lgr->rtokens_used_mask); 1013 return 0; 1014 } 1015 } 1016 return -ENOENT; 1017 } 1018 1019 /* save rkey and dma_addr received from peer during clc handshake */ 1020 int smc_rmb_rtoken_handling(struct smc_connection *conn, 1021 struct smc_clc_msg_accept_confirm *clc) 1022 { 1023 conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr, 1024 clc->rmb_rkey); 1025 if (conn->rtoken_idx < 0) 1026 return conn->rtoken_idx; 1027 return 0; 1028 } 1029 1030 /* Called (from smc_exit) when module is removed */ 1031 void smc_core_exit(void) 1032 { 1033 struct smc_link_group *lgr, *lg; 1034 LIST_HEAD(lgr_freeing_list); 1035 1036 spin_lock_bh(&smc_lgr_list.lock); 1037 if (!list_empty(&smc_lgr_list.list)) 1038 list_splice_init(&smc_lgr_list.list, &lgr_freeing_list); 1039 spin_unlock_bh(&smc_lgr_list.lock); 1040 list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) { 1041 list_del_init(&lgr->list); 1042 if (!lgr->is_smcd) { 1043 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; 1044 1045 if (lnk->state == SMC_LNK_ACTIVE) 1046 smc_llc_send_delete_link(lnk, SMC_LLC_REQ, 1047 false); 1048 smc_llc_link_inactive(lnk); 1049 } 1050 cancel_delayed_work_sync(&lgr->free_work); 1051 if (lgr->is_smcd) 1052 smc_ism_signal_shutdown(lgr); 1053 smc_lgr_free(lgr); /* free link group */ 1054 } 1055 } 1056