1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Basic Transport Functions exploiting Infiniband API 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/socket.h> 13 #include <linux/if_vlan.h> 14 #include <linux/random.h> 15 #include <linux/workqueue.h> 16 #include <linux/wait.h> 17 #include <linux/reboot.h> 18 #include <net/tcp.h> 19 #include <net/sock.h> 20 #include <rdma/ib_verbs.h> 21 #include <rdma/ib_cache.h> 22 23 #include "smc.h" 24 #include "smc_clc.h" 25 #include "smc_core.h" 26 #include "smc_ib.h" 27 #include "smc_wr.h" 28 #include "smc_llc.h" 29 #include "smc_cdc.h" 30 #include "smc_close.h" 31 #include "smc_ism.h" 32 33 #define SMC_LGR_NUM_INCR 256 34 #define SMC_LGR_FREE_DELAY_SERV (600 * HZ) 35 #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ) 36 #define SMC_LGR_FREE_DELAY_FAST (8 * HZ) 37 38 static struct smc_lgr_list smc_lgr_list = { /* established link groups */ 39 .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock), 40 .list = LIST_HEAD_INIT(smc_lgr_list.list), 41 .num = 0, 42 }; 43 44 static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */ 45 static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted); 46 47 struct smc_ib_up_work { 48 struct work_struct work; 49 struct smc_link_group *lgr; 50 struct smc_ib_device *smcibdev; 51 u8 ibport; 52 }; 53 54 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, 55 struct smc_buf_desc *buf_desc); 56 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft); 57 58 static void smc_link_up_work(struct work_struct *work); 59 static void smc_link_down_work(struct work_struct *work); 60 61 /* return head of link group list and its lock for a given link group */ 62 static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr, 63 spinlock_t **lgr_lock) 64 { 65 if (lgr->is_smcd) { 66 *lgr_lock = &lgr->smcd->lgr_lock; 67 return &lgr->smcd->lgr_list; 68 } 69 70 *lgr_lock = &smc_lgr_list.lock; 71 return &smc_lgr_list.list; 72 } 73 74 static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) 75 { 76 /* client link group creation always follows the server link group 77 * creation. For client use a somewhat higher removal delay time, 78 * otherwise there is a risk of out-of-sync link groups. 79 */ 80 if (!lgr->freeing && !lgr->freefast) { 81 mod_delayed_work(system_wq, &lgr->free_work, 82 (!lgr->is_smcd && lgr->role == SMC_CLNT) ? 83 SMC_LGR_FREE_DELAY_CLNT : 84 SMC_LGR_FREE_DELAY_SERV); 85 } 86 } 87 88 void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr) 89 { 90 if (!lgr->freeing && !lgr->freefast) { 91 lgr->freefast = 1; 92 mod_delayed_work(system_wq, &lgr->free_work, 93 SMC_LGR_FREE_DELAY_FAST); 94 } 95 } 96 97 /* Register connection's alert token in our lookup structure. 98 * To use rbtrees we have to implement our own insert core. 99 * Requires @conns_lock 100 * @smc connection to register 101 * Returns 0 on success, != otherwise. 102 */ 103 static void smc_lgr_add_alert_token(struct smc_connection *conn) 104 { 105 struct rb_node **link, *parent = NULL; 106 u32 token = conn->alert_token_local; 107 108 link = &conn->lgr->conns_all.rb_node; 109 while (*link) { 110 struct smc_connection *cur = rb_entry(*link, 111 struct smc_connection, alert_node); 112 113 parent = *link; 114 if (cur->alert_token_local > token) 115 link = &parent->rb_left; 116 else 117 link = &parent->rb_right; 118 } 119 /* Put the new node there */ 120 rb_link_node(&conn->alert_node, parent, link); 121 rb_insert_color(&conn->alert_node, &conn->lgr->conns_all); 122 } 123 124 /* Register connection in link group by assigning an alert token 125 * registered in a search tree. 126 * Requires @conns_lock 127 * Note that '0' is a reserved value and not assigned. 128 */ 129 static int smc_lgr_register_conn(struct smc_connection *conn) 130 { 131 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 132 static atomic_t nexttoken = ATOMIC_INIT(0); 133 134 /* find a new alert_token_local value not yet used by some connection 135 * in this link group 136 */ 137 sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */ 138 while (!conn->alert_token_local) { 139 conn->alert_token_local = atomic_inc_return(&nexttoken); 140 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr)) 141 conn->alert_token_local = 0; 142 } 143 smc_lgr_add_alert_token(conn); 144 145 /* assign the new connection to a link */ 146 if (!conn->lgr->is_smcd) { 147 struct smc_link *lnk; 148 int i; 149 150 /* tbd - link balancing */ 151 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 152 lnk = &conn->lgr->lnk[i]; 153 if (lnk->state == SMC_LNK_ACTIVATING || 154 lnk->state == SMC_LNK_ACTIVE) 155 conn->lnk = lnk; 156 } 157 if (!conn->lnk) 158 return SMC_CLC_DECL_NOACTLINK; 159 } 160 conn->lgr->conns_num++; 161 return 0; 162 } 163 164 /* Unregister connection and reset the alert token of the given connection< 165 */ 166 static void __smc_lgr_unregister_conn(struct smc_connection *conn) 167 { 168 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 169 struct smc_link_group *lgr = conn->lgr; 170 171 rb_erase(&conn->alert_node, &lgr->conns_all); 172 lgr->conns_num--; 173 conn->alert_token_local = 0; 174 sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */ 175 } 176 177 /* Unregister connection from lgr 178 */ 179 static void smc_lgr_unregister_conn(struct smc_connection *conn) 180 { 181 struct smc_link_group *lgr = conn->lgr; 182 183 if (!lgr) 184 return; 185 write_lock_bh(&lgr->conns_lock); 186 if (conn->alert_token_local) { 187 __smc_lgr_unregister_conn(conn); 188 } 189 write_unlock_bh(&lgr->conns_lock); 190 conn->lgr = NULL; 191 } 192 193 void smc_lgr_cleanup_early(struct smc_connection *conn) 194 { 195 struct smc_link_group *lgr = conn->lgr; 196 struct list_head *lgr_list; 197 spinlock_t *lgr_lock; 198 199 if (!lgr) 200 return; 201 202 smc_conn_free(conn); 203 lgr_list = smc_lgr_list_head(lgr, &lgr_lock); 204 spin_lock_bh(lgr_lock); 205 /* do not use this link group for new connections */ 206 if (!list_empty(lgr_list)) 207 list_del_init(lgr_list); 208 spin_unlock_bh(lgr_lock); 209 smc_lgr_schedule_free_work_fast(lgr); 210 } 211 212 static void smc_lgr_free(struct smc_link_group *lgr); 213 214 static void smc_lgr_free_work(struct work_struct *work) 215 { 216 struct smc_link_group *lgr = container_of(to_delayed_work(work), 217 struct smc_link_group, 218 free_work); 219 spinlock_t *lgr_lock; 220 bool conns; 221 int i; 222 223 smc_lgr_list_head(lgr, &lgr_lock); 224 spin_lock_bh(lgr_lock); 225 if (lgr->freeing) { 226 spin_unlock_bh(lgr_lock); 227 return; 228 } 229 read_lock_bh(&lgr->conns_lock); 230 conns = RB_EMPTY_ROOT(&lgr->conns_all); 231 read_unlock_bh(&lgr->conns_lock); 232 if (!conns) { /* number of lgr connections is no longer zero */ 233 spin_unlock_bh(lgr_lock); 234 return; 235 } 236 list_del_init(&lgr->list); /* remove from smc_lgr_list */ 237 lgr->freeing = 1; /* this instance does the freeing, no new schedule */ 238 spin_unlock_bh(lgr_lock); 239 cancel_delayed_work(&lgr->free_work); 240 241 if (lgr->is_smcd && !lgr->terminating) 242 smc_ism_signal_shutdown(lgr); 243 if (!lgr->is_smcd) { 244 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 245 struct smc_link *lnk = &lgr->lnk[i]; 246 247 if (smc_link_usable(lnk)) 248 lnk->state = SMC_LNK_INACTIVE; 249 } 250 wake_up_interruptible_all(&lgr->llc_waiter); 251 } 252 smc_lgr_free(lgr); 253 } 254 255 static void smc_lgr_terminate_work(struct work_struct *work) 256 { 257 struct smc_link_group *lgr = container_of(work, struct smc_link_group, 258 terminate_work); 259 260 __smc_lgr_terminate(lgr, true); 261 } 262 263 /* return next unique link id for the lgr */ 264 static u8 smcr_next_link_id(struct smc_link_group *lgr) 265 { 266 u8 link_id; 267 int i; 268 269 while (1) { 270 link_id = ++lgr->next_link_id; 271 if (!link_id) /* skip zero as link_id */ 272 link_id = ++lgr->next_link_id; 273 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 274 if (smc_link_usable(&lgr->lnk[i]) && 275 lgr->lnk[i].link_id == link_id) 276 continue; 277 } 278 break; 279 } 280 return link_id; 281 } 282 283 int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, 284 u8 link_idx, struct smc_init_info *ini) 285 { 286 u8 rndvec[3]; 287 int rc; 288 289 get_device(&ini->ib_dev->ibdev->dev); 290 atomic_inc(&ini->ib_dev->lnk_cnt); 291 lnk->state = SMC_LNK_ACTIVATING; 292 lnk->link_id = smcr_next_link_id(lgr); 293 lnk->lgr = lgr; 294 lnk->link_idx = link_idx; 295 lnk->smcibdev = ini->ib_dev; 296 lnk->ibport = ini->ib_port; 297 lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu; 298 INIT_WORK(&lnk->link_down_wrk, smc_link_down_work); 299 if (!ini->ib_dev->initialized) { 300 rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev); 301 if (rc) 302 goto out; 303 } 304 get_random_bytes(rndvec, sizeof(rndvec)); 305 lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + 306 (rndvec[2] << 16); 307 rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport, 308 ini->vlan_id, lnk->gid, &lnk->sgid_index); 309 if (rc) 310 goto out; 311 rc = smc_llc_link_init(lnk); 312 if (rc) 313 goto out; 314 rc = smc_wr_alloc_link_mem(lnk); 315 if (rc) 316 goto clear_llc_lnk; 317 rc = smc_ib_create_protection_domain(lnk); 318 if (rc) 319 goto free_link_mem; 320 rc = smc_ib_create_queue_pair(lnk); 321 if (rc) 322 goto dealloc_pd; 323 rc = smc_wr_create_link(lnk); 324 if (rc) 325 goto destroy_qp; 326 return 0; 327 328 destroy_qp: 329 smc_ib_destroy_queue_pair(lnk); 330 dealloc_pd: 331 smc_ib_dealloc_protection_domain(lnk); 332 free_link_mem: 333 smc_wr_free_link_mem(lnk); 334 clear_llc_lnk: 335 smc_llc_link_clear(lnk); 336 out: 337 put_device(&ini->ib_dev->ibdev->dev); 338 memset(lnk, 0, sizeof(struct smc_link)); 339 lnk->state = SMC_LNK_UNUSED; 340 if (!atomic_dec_return(&ini->ib_dev->lnk_cnt)) 341 wake_up(&ini->ib_dev->lnks_deleted); 342 return rc; 343 } 344 345 /* create a new SMC link group */ 346 static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) 347 { 348 struct smc_link_group *lgr; 349 struct list_head *lgr_list; 350 struct smc_link *lnk; 351 spinlock_t *lgr_lock; 352 u8 link_idx; 353 int rc = 0; 354 int i; 355 356 if (ini->is_smcd && ini->vlan_id) { 357 if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) { 358 rc = SMC_CLC_DECL_ISMVLANERR; 359 goto out; 360 } 361 } 362 363 lgr = kzalloc(sizeof(*lgr), GFP_KERNEL); 364 if (!lgr) { 365 rc = SMC_CLC_DECL_MEM; 366 goto ism_put_vlan; 367 } 368 lgr->is_smcd = ini->is_smcd; 369 lgr->sync_err = 0; 370 lgr->terminating = 0; 371 lgr->freefast = 0; 372 lgr->freeing = 0; 373 lgr->vlan_id = ini->vlan_id; 374 mutex_init(&lgr->sndbufs_lock); 375 mutex_init(&lgr->rmbs_lock); 376 rwlock_init(&lgr->conns_lock); 377 for (i = 0; i < SMC_RMBE_SIZES; i++) { 378 INIT_LIST_HEAD(&lgr->sndbufs[i]); 379 INIT_LIST_HEAD(&lgr->rmbs[i]); 380 } 381 lgr->next_link_id = 0; 382 smc_lgr_list.num += SMC_LGR_NUM_INCR; 383 memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE); 384 INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work); 385 INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work); 386 lgr->conns_all = RB_ROOT; 387 if (ini->is_smcd) { 388 /* SMC-D specific settings */ 389 get_device(&ini->ism_dev->dev); 390 lgr->peer_gid = ini->ism_gid; 391 lgr->smcd = ini->ism_dev; 392 lgr_list = &ini->ism_dev->lgr_list; 393 lgr_lock = &lgr->smcd->lgr_lock; 394 lgr->peer_shutdown = 0; 395 atomic_inc(&ini->ism_dev->lgr_cnt); 396 } else { 397 /* SMC-R specific settings */ 398 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; 399 memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer, 400 SMC_SYSTEMID_LEN); 401 memcpy(lgr->pnet_id, ini->ib_dev->pnetid[ini->ib_port - 1], 402 SMC_MAX_PNETID_LEN); 403 smc_llc_lgr_init(lgr, smc); 404 405 link_idx = SMC_SINGLE_LINK; 406 lnk = &lgr->lnk[link_idx]; 407 rc = smcr_link_init(lgr, lnk, link_idx, ini); 408 if (rc) 409 goto free_lgr; 410 lgr_list = &smc_lgr_list.list; 411 lgr_lock = &smc_lgr_list.lock; 412 atomic_inc(&lgr_cnt); 413 } 414 smc->conn.lgr = lgr; 415 spin_lock_bh(lgr_lock); 416 list_add(&lgr->list, lgr_list); 417 spin_unlock_bh(lgr_lock); 418 return 0; 419 420 free_lgr: 421 kfree(lgr); 422 ism_put_vlan: 423 if (ini->is_smcd && ini->vlan_id) 424 smc_ism_put_vlan(ini->ism_dev, ini->vlan_id); 425 out: 426 if (rc < 0) { 427 if (rc == -ENOMEM) 428 rc = SMC_CLC_DECL_MEM; 429 else 430 rc = SMC_CLC_DECL_INTERR; 431 } 432 return rc; 433 } 434 435 static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc, 436 struct smc_link_group *lgr) 437 { 438 int rc; 439 440 if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) { 441 /* unregister rmb with peer */ 442 rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY); 443 if (!rc) { 444 /* protect against smc_llc_cli_rkey_exchange() */ 445 mutex_lock(&lgr->llc_conf_mutex); 446 smc_llc_do_delete_rkey(lgr, rmb_desc); 447 rmb_desc->is_conf_rkey = false; 448 mutex_unlock(&lgr->llc_conf_mutex); 449 smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); 450 } 451 } 452 453 if (rmb_desc->is_reg_err) { 454 /* buf registration failed, reuse not possible */ 455 mutex_lock(&lgr->rmbs_lock); 456 list_del(&rmb_desc->list); 457 mutex_unlock(&lgr->rmbs_lock); 458 459 smc_buf_free(lgr, true, rmb_desc); 460 } else { 461 rmb_desc->used = 0; 462 } 463 } 464 465 static void smc_buf_unuse(struct smc_connection *conn, 466 struct smc_link_group *lgr) 467 { 468 if (conn->sndbuf_desc) 469 conn->sndbuf_desc->used = 0; 470 if (conn->rmb_desc && lgr->is_smcd) 471 conn->rmb_desc->used = 0; 472 else if (conn->rmb_desc) 473 smcr_buf_unuse(conn->rmb_desc, lgr); 474 } 475 476 /* remove a finished connection from its link group */ 477 void smc_conn_free(struct smc_connection *conn) 478 { 479 struct smc_link_group *lgr = conn->lgr; 480 481 if (!lgr) 482 return; 483 if (lgr->is_smcd) { 484 if (!list_empty(&lgr->list)) 485 smc_ism_unset_conn(conn); 486 tasklet_kill(&conn->rx_tsklet); 487 } else { 488 smc_cdc_tx_dismiss_slots(conn); 489 } 490 if (!list_empty(&lgr->list)) { 491 smc_lgr_unregister_conn(conn); 492 smc_buf_unuse(conn, lgr); /* allow buffer reuse */ 493 } 494 495 if (!lgr->conns_num) 496 smc_lgr_schedule_free_work(lgr); 497 } 498 499 /* unregister a link from a buf_desc */ 500 static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb, 501 struct smc_link *lnk) 502 { 503 if (is_rmb) 504 buf_desc->is_reg_mr[lnk->link_idx] = false; 505 if (!buf_desc->is_map_ib[lnk->link_idx]) 506 return; 507 if (is_rmb) { 508 if (buf_desc->mr_rx[lnk->link_idx]) { 509 smc_ib_put_memory_region( 510 buf_desc->mr_rx[lnk->link_idx]); 511 buf_desc->mr_rx[lnk->link_idx] = NULL; 512 } 513 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE); 514 } else { 515 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE); 516 } 517 sg_free_table(&buf_desc->sgt[lnk->link_idx]); 518 buf_desc->is_map_ib[lnk->link_idx] = false; 519 } 520 521 /* unmap all buffers of lgr for a deleted link */ 522 static void smcr_buf_unmap_lgr(struct smc_link *lnk) 523 { 524 struct smc_link_group *lgr = lnk->lgr; 525 struct smc_buf_desc *buf_desc, *bf; 526 int i; 527 528 for (i = 0; i < SMC_RMBE_SIZES; i++) { 529 mutex_lock(&lgr->rmbs_lock); 530 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) 531 smcr_buf_unmap_link(buf_desc, true, lnk); 532 mutex_unlock(&lgr->rmbs_lock); 533 mutex_lock(&lgr->sndbufs_lock); 534 list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i], 535 list) 536 smcr_buf_unmap_link(buf_desc, false, lnk); 537 mutex_unlock(&lgr->sndbufs_lock); 538 } 539 } 540 541 static void smcr_rtoken_clear_link(struct smc_link *lnk) 542 { 543 struct smc_link_group *lgr = lnk->lgr; 544 int i; 545 546 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 547 lgr->rtokens[i][lnk->link_idx].rkey = 0; 548 lgr->rtokens[i][lnk->link_idx].dma_addr = 0; 549 } 550 } 551 552 /* must be called under lgr->llc_conf_mutex lock */ 553 void smcr_link_clear(struct smc_link *lnk) 554 { 555 struct smc_ib_device *smcibdev; 556 557 if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED) 558 return; 559 lnk->peer_qpn = 0; 560 smc_llc_link_clear(lnk); 561 smcr_buf_unmap_lgr(lnk); 562 smcr_rtoken_clear_link(lnk); 563 smc_ib_modify_qp_reset(lnk); 564 smc_wr_free_link(lnk); 565 smc_ib_destroy_queue_pair(lnk); 566 smc_ib_dealloc_protection_domain(lnk); 567 smc_wr_free_link_mem(lnk); 568 put_device(&lnk->smcibdev->ibdev->dev); 569 smcibdev = lnk->smcibdev; 570 memset(lnk, 0, sizeof(struct smc_link)); 571 lnk->state = SMC_LNK_UNUSED; 572 if (!atomic_dec_return(&smcibdev->lnk_cnt)) 573 wake_up(&smcibdev->lnks_deleted); 574 } 575 576 static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb, 577 struct smc_buf_desc *buf_desc) 578 { 579 int i; 580 581 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) 582 smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]); 583 584 if (buf_desc->pages) 585 __free_pages(buf_desc->pages, buf_desc->order); 586 kfree(buf_desc); 587 } 588 589 static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb, 590 struct smc_buf_desc *buf_desc) 591 { 592 if (is_dmb) { 593 /* restore original buf len */ 594 buf_desc->len += sizeof(struct smcd_cdc_msg); 595 smc_ism_unregister_dmb(lgr->smcd, buf_desc); 596 } else { 597 kfree(buf_desc->cpu_addr); 598 } 599 kfree(buf_desc); 600 } 601 602 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, 603 struct smc_buf_desc *buf_desc) 604 { 605 if (lgr->is_smcd) 606 smcd_buf_free(lgr, is_rmb, buf_desc); 607 else 608 smcr_buf_free(lgr, is_rmb, buf_desc); 609 } 610 611 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb) 612 { 613 struct smc_buf_desc *buf_desc, *bf_desc; 614 struct list_head *buf_list; 615 int i; 616 617 for (i = 0; i < SMC_RMBE_SIZES; i++) { 618 if (is_rmb) 619 buf_list = &lgr->rmbs[i]; 620 else 621 buf_list = &lgr->sndbufs[i]; 622 list_for_each_entry_safe(buf_desc, bf_desc, buf_list, 623 list) { 624 list_del(&buf_desc->list); 625 smc_buf_free(lgr, is_rmb, buf_desc); 626 } 627 } 628 } 629 630 static void smc_lgr_free_bufs(struct smc_link_group *lgr) 631 { 632 /* free send buffers */ 633 __smc_lgr_free_bufs(lgr, false); 634 /* free rmbs */ 635 __smc_lgr_free_bufs(lgr, true); 636 } 637 638 /* remove a link group */ 639 static void smc_lgr_free(struct smc_link_group *lgr) 640 { 641 int i; 642 643 smc_lgr_free_bufs(lgr); 644 if (lgr->is_smcd) { 645 if (!lgr->terminating) { 646 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); 647 put_device(&lgr->smcd->dev); 648 } 649 if (!atomic_dec_return(&lgr->smcd->lgr_cnt)) 650 wake_up(&lgr->smcd->lgrs_deleted); 651 } else { 652 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 653 if (lgr->lnk[i].state != SMC_LNK_UNUSED) 654 smcr_link_clear(&lgr->lnk[i]); 655 } 656 smc_llc_lgr_clear(lgr); 657 if (!atomic_dec_return(&lgr_cnt)) 658 wake_up(&lgrs_deleted); 659 } 660 kfree(lgr); 661 } 662 663 static void smcd_unregister_all_dmbs(struct smc_link_group *lgr) 664 { 665 int i; 666 667 for (i = 0; i < SMC_RMBE_SIZES; i++) { 668 struct smc_buf_desc *buf_desc; 669 670 list_for_each_entry(buf_desc, &lgr->rmbs[i], list) { 671 buf_desc->len += sizeof(struct smcd_cdc_msg); 672 smc_ism_unregister_dmb(lgr->smcd, buf_desc); 673 } 674 } 675 } 676 677 static void smc_sk_wake_ups(struct smc_sock *smc) 678 { 679 smc->sk.sk_write_space(&smc->sk); 680 smc->sk.sk_data_ready(&smc->sk); 681 smc->sk.sk_state_change(&smc->sk); 682 } 683 684 /* kill a connection */ 685 static void smc_conn_kill(struct smc_connection *conn, bool soft) 686 { 687 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 688 689 if (conn->lgr->is_smcd && conn->lgr->peer_shutdown) 690 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 691 else 692 smc_close_abort(conn); 693 conn->killed = 1; 694 smc->sk.sk_err = ECONNABORTED; 695 smc_sk_wake_ups(smc); 696 if (conn->lgr->is_smcd) { 697 smc_ism_unset_conn(conn); 698 if (soft) 699 tasklet_kill(&conn->rx_tsklet); 700 else 701 tasklet_unlock_wait(&conn->rx_tsklet); 702 } else { 703 smc_cdc_tx_dismiss_slots(conn); 704 } 705 smc_lgr_unregister_conn(conn); 706 smc_close_active_abort(smc); 707 } 708 709 static void smc_lgr_cleanup(struct smc_link_group *lgr) 710 { 711 int i; 712 713 if (lgr->is_smcd) { 714 smc_ism_signal_shutdown(lgr); 715 smcd_unregister_all_dmbs(lgr); 716 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); 717 put_device(&lgr->smcd->dev); 718 } else { 719 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 720 struct smc_link *lnk = &lgr->lnk[i]; 721 722 if (smc_link_usable(lnk)) 723 lnk->state = SMC_LNK_INACTIVE; 724 } 725 wake_up_interruptible_all(&lgr->llc_waiter); 726 } 727 } 728 729 /* terminate link group 730 * @soft: true if link group shutdown can take its time 731 * false if immediate link group shutdown is required 732 */ 733 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft) 734 { 735 struct smc_connection *conn; 736 struct smc_sock *smc; 737 struct rb_node *node; 738 739 if (lgr->terminating) 740 return; /* lgr already terminating */ 741 if (!soft) 742 cancel_delayed_work_sync(&lgr->free_work); 743 lgr->terminating = 1; 744 745 /* kill remaining link group connections */ 746 read_lock_bh(&lgr->conns_lock); 747 node = rb_first(&lgr->conns_all); 748 while (node) { 749 read_unlock_bh(&lgr->conns_lock); 750 conn = rb_entry(node, struct smc_connection, alert_node); 751 smc = container_of(conn, struct smc_sock, conn); 752 sock_hold(&smc->sk); /* sock_put below */ 753 lock_sock(&smc->sk); 754 smc_conn_kill(conn, soft); 755 release_sock(&smc->sk); 756 sock_put(&smc->sk); /* sock_hold above */ 757 read_lock_bh(&lgr->conns_lock); 758 node = rb_first(&lgr->conns_all); 759 } 760 read_unlock_bh(&lgr->conns_lock); 761 smc_lgr_cleanup(lgr); 762 if (soft) 763 smc_lgr_schedule_free_work_fast(lgr); 764 else 765 smc_lgr_free(lgr); 766 } 767 768 /* unlink link group and schedule termination */ 769 void smc_lgr_terminate_sched(struct smc_link_group *lgr) 770 { 771 spinlock_t *lgr_lock; 772 773 smc_lgr_list_head(lgr, &lgr_lock); 774 spin_lock_bh(lgr_lock); 775 if (list_empty(&lgr->list) || lgr->terminating || lgr->freeing) { 776 spin_unlock_bh(lgr_lock); 777 return; /* lgr already terminating */ 778 } 779 list_del_init(&lgr->list); 780 spin_unlock_bh(lgr_lock); 781 schedule_work(&lgr->terminate_work); 782 } 783 784 /* Called when peer lgr shutdown (regularly or abnormally) is received */ 785 void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan) 786 { 787 struct smc_link_group *lgr, *l; 788 LIST_HEAD(lgr_free_list); 789 790 /* run common cleanup function and build free list */ 791 spin_lock_bh(&dev->lgr_lock); 792 list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) { 793 if ((!peer_gid || lgr->peer_gid == peer_gid) && 794 (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) { 795 if (peer_gid) /* peer triggered termination */ 796 lgr->peer_shutdown = 1; 797 list_move(&lgr->list, &lgr_free_list); 798 } 799 } 800 spin_unlock_bh(&dev->lgr_lock); 801 802 /* cancel the regular free workers and actually free lgrs */ 803 list_for_each_entry_safe(lgr, l, &lgr_free_list, list) { 804 list_del_init(&lgr->list); 805 schedule_work(&lgr->terminate_work); 806 } 807 } 808 809 /* Called when an SMCD device is removed or the smc module is unloaded */ 810 void smc_smcd_terminate_all(struct smcd_dev *smcd) 811 { 812 struct smc_link_group *lgr, *lg; 813 LIST_HEAD(lgr_free_list); 814 815 spin_lock_bh(&smcd->lgr_lock); 816 list_splice_init(&smcd->lgr_list, &lgr_free_list); 817 list_for_each_entry(lgr, &lgr_free_list, list) 818 lgr->freeing = 1; 819 spin_unlock_bh(&smcd->lgr_lock); 820 821 list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) { 822 list_del_init(&lgr->list); 823 __smc_lgr_terminate(lgr, false); 824 } 825 826 if (atomic_read(&smcd->lgr_cnt)) 827 wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt)); 828 } 829 830 /* Called when an SMCR device is removed or the smc module is unloaded. 831 * If smcibdev is given, all SMCR link groups using this device are terminated. 832 * If smcibdev is NULL, all SMCR link groups are terminated. 833 */ 834 void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) 835 { 836 struct smc_link_group *lgr, *lg; 837 LIST_HEAD(lgr_free_list); 838 int i; 839 840 spin_lock_bh(&smc_lgr_list.lock); 841 if (!smcibdev) { 842 list_splice_init(&smc_lgr_list.list, &lgr_free_list); 843 list_for_each_entry(lgr, &lgr_free_list, list) 844 lgr->freeing = 1; 845 } else { 846 list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) { 847 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 848 if (lgr->lnk[i].smcibdev == smcibdev) 849 smcr_link_down_cond_sched(&lgr->lnk[i]); 850 } 851 } 852 } 853 spin_unlock_bh(&smc_lgr_list.lock); 854 855 list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) { 856 list_del_init(&lgr->list); 857 __smc_lgr_terminate(lgr, false); 858 } 859 860 if (smcibdev) { 861 if (atomic_read(&smcibdev->lnk_cnt)) 862 wait_event(smcibdev->lnks_deleted, 863 !atomic_read(&smcibdev->lnk_cnt)); 864 } else { 865 if (atomic_read(&lgr_cnt)) 866 wait_event(lgrs_deleted, !atomic_read(&lgr_cnt)); 867 } 868 } 869 870 /* link is up - establish alternate link if applicable */ 871 static void smcr_link_up(struct smc_link_group *lgr, 872 struct smc_ib_device *smcibdev, u8 ibport) 873 { 874 struct smc_link *link = NULL; 875 876 if (list_empty(&lgr->list) || 877 lgr->type == SMC_LGR_SYMMETRIC || 878 lgr->type == SMC_LGR_ASYMMETRIC_PEER) 879 return; 880 881 if (lgr->role == SMC_SERV) { 882 /* trigger local add link processing */ 883 link = smc_llc_usable_link(lgr); 884 if (!link) 885 return; 886 smc_llc_srv_add_link_local(link); 887 } else { 888 /* invite server to start add link processing */ 889 u8 gid[SMC_GID_SIZE]; 890 891 if (smc_ib_determine_gid(smcibdev, ibport, lgr->vlan_id, gid, 892 NULL)) 893 return; 894 if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) { 895 /* some other llc task is ongoing */ 896 wait_event_interruptible_timeout(lgr->llc_waiter, 897 (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE), 898 SMC_LLC_WAIT_TIME); 899 } 900 if (list_empty(&lgr->list) || 901 !smc_ib_port_active(smcibdev, ibport)) 902 return; /* lgr or device no longer active */ 903 link = smc_llc_usable_link(lgr); 904 if (!link) 905 return; 906 smc_llc_send_add_link(link, smcibdev->mac[ibport - 1], gid, 907 NULL, SMC_LLC_REQ); 908 } 909 } 910 911 void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport) 912 { 913 struct smc_ib_up_work *ib_work; 914 struct smc_link_group *lgr, *n; 915 916 list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) { 917 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id, 918 SMC_MAX_PNETID_LEN) || 919 lgr->type == SMC_LGR_SYMMETRIC || 920 lgr->type == SMC_LGR_ASYMMETRIC_PEER) 921 continue; 922 ib_work = kmalloc(sizeof(*ib_work), GFP_KERNEL); 923 if (!ib_work) 924 continue; 925 INIT_WORK(&ib_work->work, smc_link_up_work); 926 ib_work->lgr = lgr; 927 ib_work->smcibdev = smcibdev; 928 ib_work->ibport = ibport; 929 schedule_work(&ib_work->work); 930 } 931 } 932 933 /* link is down - switch connections to alternate link, 934 * must be called under lgr->llc_conf_mutex lock 935 */ 936 static void smcr_link_down(struct smc_link *lnk) 937 { 938 struct smc_link_group *lgr = lnk->lgr; 939 struct smc_link *to_lnk; 940 int del_link_id; 941 942 if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list)) 943 return; 944 945 smc_ib_modify_qp_reset(lnk); 946 to_lnk = NULL; 947 /* tbd: call to_lnk = smc_switch_conns(lgr, lnk, true); */ 948 if (!to_lnk) { /* no backup link available */ 949 smcr_link_clear(lnk); 950 return; 951 } 952 lgr->type = SMC_LGR_SINGLE; 953 del_link_id = lnk->link_id; 954 955 if (lgr->role == SMC_SERV) { 956 /* trigger local delete link processing */ 957 smc_llc_srv_delete_link_local(to_lnk, del_link_id); 958 } else { 959 if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) { 960 /* another llc task is ongoing */ 961 mutex_unlock(&lgr->llc_conf_mutex); 962 wait_event_interruptible_timeout(lgr->llc_waiter, 963 (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE), 964 SMC_LLC_WAIT_TIME); 965 mutex_lock(&lgr->llc_conf_mutex); 966 } 967 smc_llc_send_delete_link(to_lnk, del_link_id, SMC_LLC_REQ, true, 968 SMC_LLC_DEL_LOST_PATH); 969 } 970 } 971 972 /* must be called under lgr->llc_conf_mutex lock */ 973 void smcr_link_down_cond(struct smc_link *lnk) 974 { 975 if (smc_link_downing(&lnk->state)) 976 smcr_link_down(lnk); 977 } 978 979 /* will get the lgr->llc_conf_mutex lock */ 980 void smcr_link_down_cond_sched(struct smc_link *lnk) 981 { 982 if (smc_link_downing(&lnk->state)) 983 schedule_work(&lnk->link_down_wrk); 984 } 985 986 void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport) 987 { 988 struct smc_link_group *lgr, *n; 989 int i; 990 991 list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) { 992 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id, 993 SMC_MAX_PNETID_LEN)) 994 continue; /* lgr is not affected */ 995 if (list_empty(&lgr->list)) 996 continue; 997 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 998 struct smc_link *lnk = &lgr->lnk[i]; 999 1000 if (smc_link_usable(lnk) && 1001 lnk->smcibdev == smcibdev && lnk->ibport == ibport) 1002 smcr_link_down_cond_sched(lnk); 1003 } 1004 } 1005 } 1006 1007 static void smc_link_up_work(struct work_struct *work) 1008 { 1009 struct smc_ib_up_work *ib_work = container_of(work, 1010 struct smc_ib_up_work, 1011 work); 1012 struct smc_link_group *lgr = ib_work->lgr; 1013 1014 if (list_empty(&lgr->list)) 1015 goto out; 1016 smcr_link_up(lgr, ib_work->smcibdev, ib_work->ibport); 1017 out: 1018 kfree(ib_work); 1019 } 1020 1021 static void smc_link_down_work(struct work_struct *work) 1022 { 1023 struct smc_link *link = container_of(work, struct smc_link, 1024 link_down_wrk); 1025 struct smc_link_group *lgr = link->lgr; 1026 1027 if (list_empty(&lgr->list)) 1028 return; 1029 wake_up_interruptible_all(&lgr->llc_waiter); 1030 mutex_lock(&lgr->llc_conf_mutex); 1031 smcr_link_down(link); 1032 mutex_unlock(&lgr->llc_conf_mutex); 1033 } 1034 1035 /* Determine vlan of internal TCP socket. 1036 * @vlan_id: address to store the determined vlan id into 1037 */ 1038 int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini) 1039 { 1040 struct dst_entry *dst = sk_dst_get(clcsock->sk); 1041 struct net_device *ndev; 1042 int i, nest_lvl, rc = 0; 1043 1044 ini->vlan_id = 0; 1045 if (!dst) { 1046 rc = -ENOTCONN; 1047 goto out; 1048 } 1049 if (!dst->dev) { 1050 rc = -ENODEV; 1051 goto out_rel; 1052 } 1053 1054 ndev = dst->dev; 1055 if (is_vlan_dev(ndev)) { 1056 ini->vlan_id = vlan_dev_vlan_id(ndev); 1057 goto out_rel; 1058 } 1059 1060 rtnl_lock(); 1061 nest_lvl = ndev->lower_level; 1062 for (i = 0; i < nest_lvl; i++) { 1063 struct list_head *lower = &ndev->adj_list.lower; 1064 1065 if (list_empty(lower)) 1066 break; 1067 lower = lower->next; 1068 ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower); 1069 if (is_vlan_dev(ndev)) { 1070 ini->vlan_id = vlan_dev_vlan_id(ndev); 1071 break; 1072 } 1073 } 1074 rtnl_unlock(); 1075 1076 out_rel: 1077 dst_release(dst); 1078 out: 1079 return rc; 1080 } 1081 1082 static bool smcr_lgr_match(struct smc_link_group *lgr, 1083 struct smc_clc_msg_local *lcl, 1084 enum smc_lgr_role role, u32 clcqpn) 1085 { 1086 int i; 1087 1088 if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) || 1089 lgr->role != role) 1090 return false; 1091 1092 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 1093 if (lgr->lnk[i].state != SMC_LNK_ACTIVE) 1094 continue; 1095 if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) && 1096 !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) && 1097 !memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac))) 1098 return true; 1099 } 1100 return false; 1101 } 1102 1103 static bool smcd_lgr_match(struct smc_link_group *lgr, 1104 struct smcd_dev *smcismdev, u64 peer_gid) 1105 { 1106 return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev; 1107 } 1108 1109 /* create a new SMC connection (and a new link group if necessary) */ 1110 int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) 1111 { 1112 struct smc_connection *conn = &smc->conn; 1113 struct list_head *lgr_list; 1114 struct smc_link_group *lgr; 1115 enum smc_lgr_role role; 1116 spinlock_t *lgr_lock; 1117 int rc = 0; 1118 1119 lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list; 1120 lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock; 1121 ini->cln_first_contact = SMC_FIRST_CONTACT; 1122 role = smc->listen_smc ? SMC_SERV : SMC_CLNT; 1123 if (role == SMC_CLNT && ini->srv_first_contact) 1124 /* create new link group as well */ 1125 goto create; 1126 1127 /* determine if an existing link group can be reused */ 1128 spin_lock_bh(lgr_lock); 1129 list_for_each_entry(lgr, lgr_list, list) { 1130 write_lock_bh(&lgr->conns_lock); 1131 if ((ini->is_smcd ? 1132 smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) : 1133 smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) && 1134 !lgr->sync_err && 1135 lgr->vlan_id == ini->vlan_id && 1136 (role == SMC_CLNT || 1137 lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) { 1138 /* link group found */ 1139 ini->cln_first_contact = SMC_REUSE_CONTACT; 1140 conn->lgr = lgr; 1141 rc = smc_lgr_register_conn(conn); /* add conn to lgr */ 1142 write_unlock_bh(&lgr->conns_lock); 1143 if (!rc && delayed_work_pending(&lgr->free_work)) 1144 cancel_delayed_work(&lgr->free_work); 1145 break; 1146 } 1147 write_unlock_bh(&lgr->conns_lock); 1148 } 1149 spin_unlock_bh(lgr_lock); 1150 if (rc) 1151 return rc; 1152 1153 if (role == SMC_CLNT && !ini->srv_first_contact && 1154 ini->cln_first_contact == SMC_FIRST_CONTACT) { 1155 /* Server reuses a link group, but Client wants to start 1156 * a new one 1157 * send out_of_sync decline, reason synchr. error 1158 */ 1159 return SMC_CLC_DECL_SYNCERR; 1160 } 1161 1162 create: 1163 if (ini->cln_first_contact == SMC_FIRST_CONTACT) { 1164 rc = smc_lgr_create(smc, ini); 1165 if (rc) 1166 goto out; 1167 lgr = conn->lgr; 1168 write_lock_bh(&lgr->conns_lock); 1169 rc = smc_lgr_register_conn(conn); /* add smc conn to lgr */ 1170 write_unlock_bh(&lgr->conns_lock); 1171 if (rc) 1172 goto out; 1173 } 1174 conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; 1175 conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; 1176 conn->urg_state = SMC_URG_READ; 1177 if (ini->is_smcd) { 1178 conn->rx_off = sizeof(struct smcd_cdc_msg); 1179 smcd_cdc_rx_init(conn); /* init tasklet for this conn */ 1180 } 1181 #ifndef KERNEL_HAS_ATOMIC64 1182 spin_lock_init(&conn->acurs_lock); 1183 #endif 1184 1185 out: 1186 return rc; 1187 } 1188 1189 /* convert the RMB size into the compressed notation - minimum 16K. 1190 * In contrast to plain ilog2, this rounds towards the next power of 2, 1191 * so the socket application gets at least its desired sndbuf / rcvbuf size. 1192 */ 1193 static u8 smc_compress_bufsize(int size) 1194 { 1195 u8 compressed; 1196 1197 if (size <= SMC_BUF_MIN_SIZE) 1198 return 0; 1199 1200 size = (size - 1) >> 14; 1201 compressed = ilog2(size) + 1; 1202 if (compressed >= SMC_RMBE_SIZES) 1203 compressed = SMC_RMBE_SIZES - 1; 1204 return compressed; 1205 } 1206 1207 /* convert the RMB size from compressed notation into integer */ 1208 int smc_uncompress_bufsize(u8 compressed) 1209 { 1210 u32 size; 1211 1212 size = 0x00000001 << (((int)compressed) + 14); 1213 return (int)size; 1214 } 1215 1216 /* try to reuse a sndbuf or rmb description slot for a certain 1217 * buffer size; if not available, return NULL 1218 */ 1219 static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize, 1220 struct mutex *lock, 1221 struct list_head *buf_list) 1222 { 1223 struct smc_buf_desc *buf_slot; 1224 1225 mutex_lock(lock); 1226 list_for_each_entry(buf_slot, buf_list, list) { 1227 if (cmpxchg(&buf_slot->used, 0, 1) == 0) { 1228 mutex_unlock(lock); 1229 return buf_slot; 1230 } 1231 } 1232 mutex_unlock(lock); 1233 return NULL; 1234 } 1235 1236 /* one of the conditions for announcing a receiver's current window size is 1237 * that it "results in a minimum increase in the window size of 10% of the 1238 * receive buffer space" [RFC7609] 1239 */ 1240 static inline int smc_rmb_wnd_update_limit(int rmbe_size) 1241 { 1242 return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); 1243 } 1244 1245 /* map an rmb buf to a link */ 1246 static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb, 1247 struct smc_link *lnk) 1248 { 1249 int rc; 1250 1251 if (buf_desc->is_map_ib[lnk->link_idx]) 1252 return 0; 1253 1254 rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL); 1255 if (rc) 1256 return rc; 1257 sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl, 1258 buf_desc->cpu_addr, buf_desc->len); 1259 1260 /* map sg table to DMA address */ 1261 rc = smc_ib_buf_map_sg(lnk, buf_desc, 1262 is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 1263 /* SMC protocol depends on mapping to one DMA address only */ 1264 if (rc != 1) { 1265 rc = -EAGAIN; 1266 goto free_table; 1267 } 1268 1269 /* create a new memory region for the RMB */ 1270 if (is_rmb) { 1271 rc = smc_ib_get_memory_region(lnk->roce_pd, 1272 IB_ACCESS_REMOTE_WRITE | 1273 IB_ACCESS_LOCAL_WRITE, 1274 buf_desc, lnk->link_idx); 1275 if (rc) 1276 goto buf_unmap; 1277 smc_ib_sync_sg_for_device(lnk, buf_desc, DMA_FROM_DEVICE); 1278 } 1279 buf_desc->is_map_ib[lnk->link_idx] = true; 1280 return 0; 1281 1282 buf_unmap: 1283 smc_ib_buf_unmap_sg(lnk, buf_desc, 1284 is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 1285 free_table: 1286 sg_free_table(&buf_desc->sgt[lnk->link_idx]); 1287 return rc; 1288 } 1289 1290 /* register a new rmb on IB device, 1291 * must be called under lgr->llc_conf_mutex lock 1292 */ 1293 int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc) 1294 { 1295 if (list_empty(&link->lgr->list)) 1296 return -ENOLINK; 1297 if (!rmb_desc->is_reg_mr[link->link_idx]) { 1298 /* register memory region for new rmb */ 1299 if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) { 1300 rmb_desc->is_reg_err = true; 1301 return -EFAULT; 1302 } 1303 rmb_desc->is_reg_mr[link->link_idx] = true; 1304 } 1305 return 0; 1306 } 1307 1308 static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock, 1309 struct list_head *lst, bool is_rmb) 1310 { 1311 struct smc_buf_desc *buf_desc, *bf; 1312 int rc = 0; 1313 1314 mutex_lock(lock); 1315 list_for_each_entry_safe(buf_desc, bf, lst, list) { 1316 if (!buf_desc->used) 1317 continue; 1318 rc = smcr_buf_map_link(buf_desc, is_rmb, lnk); 1319 if (rc) 1320 goto out; 1321 } 1322 out: 1323 mutex_unlock(lock); 1324 return rc; 1325 } 1326 1327 /* map all used buffers of lgr for a new link */ 1328 int smcr_buf_map_lgr(struct smc_link *lnk) 1329 { 1330 struct smc_link_group *lgr = lnk->lgr; 1331 int i, rc = 0; 1332 1333 for (i = 0; i < SMC_RMBE_SIZES; i++) { 1334 rc = _smcr_buf_map_lgr(lnk, &lgr->rmbs_lock, 1335 &lgr->rmbs[i], true); 1336 if (rc) 1337 return rc; 1338 rc = _smcr_buf_map_lgr(lnk, &lgr->sndbufs_lock, 1339 &lgr->sndbufs[i], false); 1340 if (rc) 1341 return rc; 1342 } 1343 return 0; 1344 } 1345 1346 /* register all used buffers of lgr for a new link, 1347 * must be called under lgr->llc_conf_mutex lock 1348 */ 1349 int smcr_buf_reg_lgr(struct smc_link *lnk) 1350 { 1351 struct smc_link_group *lgr = lnk->lgr; 1352 struct smc_buf_desc *buf_desc, *bf; 1353 int i, rc = 0; 1354 1355 mutex_lock(&lgr->rmbs_lock); 1356 for (i = 0; i < SMC_RMBE_SIZES; i++) { 1357 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) { 1358 if (!buf_desc->used) 1359 continue; 1360 rc = smcr_link_reg_rmb(lnk, buf_desc); 1361 if (rc) 1362 goto out; 1363 } 1364 } 1365 out: 1366 mutex_unlock(&lgr->rmbs_lock); 1367 return rc; 1368 } 1369 1370 static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr, 1371 bool is_rmb, int bufsize) 1372 { 1373 struct smc_buf_desc *buf_desc; 1374 1375 /* try to alloc a new buffer */ 1376 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); 1377 if (!buf_desc) 1378 return ERR_PTR(-ENOMEM); 1379 1380 buf_desc->order = get_order(bufsize); 1381 buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN | 1382 __GFP_NOMEMALLOC | __GFP_COMP | 1383 __GFP_NORETRY | __GFP_ZERO, 1384 buf_desc->order); 1385 if (!buf_desc->pages) { 1386 kfree(buf_desc); 1387 return ERR_PTR(-EAGAIN); 1388 } 1389 buf_desc->cpu_addr = (void *)page_address(buf_desc->pages); 1390 buf_desc->len = bufsize; 1391 return buf_desc; 1392 } 1393 1394 /* map buf_desc on all usable links, 1395 * unused buffers stay mapped as long as the link is up 1396 */ 1397 static int smcr_buf_map_usable_links(struct smc_link_group *lgr, 1398 struct smc_buf_desc *buf_desc, bool is_rmb) 1399 { 1400 int i, rc = 0; 1401 1402 /* protect against parallel link reconfiguration */ 1403 mutex_lock(&lgr->llc_conf_mutex); 1404 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 1405 struct smc_link *lnk = &lgr->lnk[i]; 1406 1407 if (!smc_link_usable(lnk)) 1408 continue; 1409 if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) { 1410 rc = -ENOMEM; 1411 goto out; 1412 } 1413 } 1414 out: 1415 mutex_unlock(&lgr->llc_conf_mutex); 1416 return rc; 1417 } 1418 1419 #define SMCD_DMBE_SIZES 7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */ 1420 1421 static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr, 1422 bool is_dmb, int bufsize) 1423 { 1424 struct smc_buf_desc *buf_desc; 1425 int rc; 1426 1427 if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES) 1428 return ERR_PTR(-EAGAIN); 1429 1430 /* try to alloc a new DMB */ 1431 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); 1432 if (!buf_desc) 1433 return ERR_PTR(-ENOMEM); 1434 if (is_dmb) { 1435 rc = smc_ism_register_dmb(lgr, bufsize, buf_desc); 1436 if (rc) { 1437 kfree(buf_desc); 1438 return ERR_PTR(-EAGAIN); 1439 } 1440 buf_desc->pages = virt_to_page(buf_desc->cpu_addr); 1441 /* CDC header stored in buf. So, pretend it was smaller */ 1442 buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg); 1443 } else { 1444 buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL | 1445 __GFP_NOWARN | __GFP_NORETRY | 1446 __GFP_NOMEMALLOC); 1447 if (!buf_desc->cpu_addr) { 1448 kfree(buf_desc); 1449 return ERR_PTR(-EAGAIN); 1450 } 1451 buf_desc->len = bufsize; 1452 } 1453 return buf_desc; 1454 } 1455 1456 static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) 1457 { 1458 struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM); 1459 struct smc_connection *conn = &smc->conn; 1460 struct smc_link_group *lgr = conn->lgr; 1461 struct list_head *buf_list; 1462 int bufsize, bufsize_short; 1463 struct mutex *lock; /* lock buffer list */ 1464 int sk_buf_size; 1465 1466 if (is_rmb) 1467 /* use socket recv buffer size (w/o overhead) as start value */ 1468 sk_buf_size = smc->sk.sk_rcvbuf / 2; 1469 else 1470 /* use socket send buffer size (w/o overhead) as start value */ 1471 sk_buf_size = smc->sk.sk_sndbuf / 2; 1472 1473 for (bufsize_short = smc_compress_bufsize(sk_buf_size); 1474 bufsize_short >= 0; bufsize_short--) { 1475 1476 if (is_rmb) { 1477 lock = &lgr->rmbs_lock; 1478 buf_list = &lgr->rmbs[bufsize_short]; 1479 } else { 1480 lock = &lgr->sndbufs_lock; 1481 buf_list = &lgr->sndbufs[bufsize_short]; 1482 } 1483 bufsize = smc_uncompress_bufsize(bufsize_short); 1484 if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC) 1485 continue; 1486 1487 /* check for reusable slot in the link group */ 1488 buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list); 1489 if (buf_desc) { 1490 memset(buf_desc->cpu_addr, 0, bufsize); 1491 break; /* found reusable slot */ 1492 } 1493 1494 if (is_smcd) 1495 buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize); 1496 else 1497 buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize); 1498 1499 if (PTR_ERR(buf_desc) == -ENOMEM) 1500 break; 1501 if (IS_ERR(buf_desc)) 1502 continue; 1503 1504 buf_desc->used = 1; 1505 mutex_lock(lock); 1506 list_add(&buf_desc->list, buf_list); 1507 mutex_unlock(lock); 1508 break; /* found */ 1509 } 1510 1511 if (IS_ERR(buf_desc)) 1512 return -ENOMEM; 1513 1514 if (!is_smcd) { 1515 if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) { 1516 smcr_buf_unuse(buf_desc, lgr); 1517 return -ENOMEM; 1518 } 1519 } 1520 1521 if (is_rmb) { 1522 conn->rmb_desc = buf_desc; 1523 conn->rmbe_size_short = bufsize_short; 1524 smc->sk.sk_rcvbuf = bufsize * 2; 1525 atomic_set(&conn->bytes_to_rcv, 0); 1526 conn->rmbe_update_limit = 1527 smc_rmb_wnd_update_limit(buf_desc->len); 1528 if (is_smcd) 1529 smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */ 1530 } else { 1531 conn->sndbuf_desc = buf_desc; 1532 smc->sk.sk_sndbuf = bufsize * 2; 1533 atomic_set(&conn->sndbuf_space, bufsize); 1534 } 1535 return 0; 1536 } 1537 1538 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) 1539 { 1540 if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk)) 1541 return; 1542 smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); 1543 } 1544 1545 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) 1546 { 1547 if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk)) 1548 return; 1549 smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); 1550 } 1551 1552 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) 1553 { 1554 int i; 1555 1556 if (!conn->lgr || conn->lgr->is_smcd) 1557 return; 1558 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 1559 if (!smc_link_usable(&conn->lgr->lnk[i])) 1560 continue; 1561 smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc, 1562 DMA_FROM_DEVICE); 1563 } 1564 } 1565 1566 void smc_rmb_sync_sg_for_device(struct smc_connection *conn) 1567 { 1568 int i; 1569 1570 if (!conn->lgr || conn->lgr->is_smcd) 1571 return; 1572 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 1573 if (!smc_link_usable(&conn->lgr->lnk[i])) 1574 continue; 1575 smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc, 1576 DMA_FROM_DEVICE); 1577 } 1578 } 1579 1580 /* create the send and receive buffer for an SMC socket; 1581 * receive buffers are called RMBs; 1582 * (even though the SMC protocol allows more than one RMB-element per RMB, 1583 * the Linux implementation uses just one RMB-element per RMB, i.e. uses an 1584 * extra RMB for every connection in a link group 1585 */ 1586 int smc_buf_create(struct smc_sock *smc, bool is_smcd) 1587 { 1588 int rc; 1589 1590 /* create send buffer */ 1591 rc = __smc_buf_create(smc, is_smcd, false); 1592 if (rc) 1593 return rc; 1594 /* create rmb */ 1595 rc = __smc_buf_create(smc, is_smcd, true); 1596 if (rc) 1597 smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc); 1598 return rc; 1599 } 1600 1601 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr) 1602 { 1603 int i; 1604 1605 for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) { 1606 if (!test_and_set_bit(i, lgr->rtokens_used_mask)) 1607 return i; 1608 } 1609 return -ENOSPC; 1610 } 1611 1612 static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx, 1613 u32 rkey) 1614 { 1615 int i; 1616 1617 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 1618 if (test_bit(i, lgr->rtokens_used_mask) && 1619 lgr->rtokens[i][lnk_idx].rkey == rkey) 1620 return i; 1621 } 1622 return -ENOENT; 1623 } 1624 1625 /* set rtoken for a new link to an existing rmb */ 1626 void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new, 1627 __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey) 1628 { 1629 int rtok_idx; 1630 1631 rtok_idx = smc_rtoken_find_by_link(lgr, link_idx, ntohl(nw_rkey_known)); 1632 if (rtok_idx == -ENOENT) 1633 return; 1634 lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey); 1635 lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr); 1636 } 1637 1638 /* set rtoken for a new link whose link_id is given */ 1639 void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id, 1640 __be64 nw_vaddr, __be32 nw_rkey) 1641 { 1642 u64 dma_addr = be64_to_cpu(nw_vaddr); 1643 u32 rkey = ntohl(nw_rkey); 1644 bool found = false; 1645 int link_idx; 1646 1647 for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) { 1648 if (lgr->lnk[link_idx].link_id == link_id) { 1649 found = true; 1650 break; 1651 } 1652 } 1653 if (!found) 1654 return; 1655 lgr->rtokens[rtok_idx][link_idx].rkey = rkey; 1656 lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr; 1657 } 1658 1659 /* add a new rtoken from peer */ 1660 int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey) 1661 { 1662 struct smc_link_group *lgr = smc_get_lgr(lnk); 1663 u64 dma_addr = be64_to_cpu(nw_vaddr); 1664 u32 rkey = ntohl(nw_rkey); 1665 int i; 1666 1667 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 1668 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey && 1669 lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr && 1670 test_bit(i, lgr->rtokens_used_mask)) { 1671 /* already in list */ 1672 return i; 1673 } 1674 } 1675 i = smc_rmb_reserve_rtoken_idx(lgr); 1676 if (i < 0) 1677 return i; 1678 lgr->rtokens[i][lnk->link_idx].rkey = rkey; 1679 lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr; 1680 return i; 1681 } 1682 1683 /* delete an rtoken from all links */ 1684 int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey) 1685 { 1686 struct smc_link_group *lgr = smc_get_lgr(lnk); 1687 u32 rkey = ntohl(nw_rkey); 1688 int i, j; 1689 1690 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 1691 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey && 1692 test_bit(i, lgr->rtokens_used_mask)) { 1693 for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) { 1694 lgr->rtokens[i][j].rkey = 0; 1695 lgr->rtokens[i][j].dma_addr = 0; 1696 } 1697 clear_bit(i, lgr->rtokens_used_mask); 1698 return 0; 1699 } 1700 } 1701 return -ENOENT; 1702 } 1703 1704 /* save rkey and dma_addr received from peer during clc handshake */ 1705 int smc_rmb_rtoken_handling(struct smc_connection *conn, 1706 struct smc_link *lnk, 1707 struct smc_clc_msg_accept_confirm *clc) 1708 { 1709 conn->rtoken_idx = smc_rtoken_add(lnk, clc->rmb_dma_addr, 1710 clc->rmb_rkey); 1711 if (conn->rtoken_idx < 0) 1712 return conn->rtoken_idx; 1713 return 0; 1714 } 1715 1716 static void smc_core_going_away(void) 1717 { 1718 struct smc_ib_device *smcibdev; 1719 struct smcd_dev *smcd; 1720 1721 spin_lock(&smc_ib_devices.lock); 1722 list_for_each_entry(smcibdev, &smc_ib_devices.list, list) { 1723 int i; 1724 1725 for (i = 0; i < SMC_MAX_PORTS; i++) 1726 set_bit(i, smcibdev->ports_going_away); 1727 } 1728 spin_unlock(&smc_ib_devices.lock); 1729 1730 spin_lock(&smcd_dev_list.lock); 1731 list_for_each_entry(smcd, &smcd_dev_list.list, list) { 1732 smcd->going_away = 1; 1733 } 1734 spin_unlock(&smcd_dev_list.lock); 1735 } 1736 1737 /* Clean up all SMC link groups */ 1738 static void smc_lgrs_shutdown(void) 1739 { 1740 struct smcd_dev *smcd; 1741 1742 smc_core_going_away(); 1743 1744 smc_smcr_terminate_all(NULL); 1745 1746 spin_lock(&smcd_dev_list.lock); 1747 list_for_each_entry(smcd, &smcd_dev_list.list, list) 1748 smc_smcd_terminate_all(smcd); 1749 spin_unlock(&smcd_dev_list.lock); 1750 } 1751 1752 static int smc_core_reboot_event(struct notifier_block *this, 1753 unsigned long event, void *ptr) 1754 { 1755 smc_lgrs_shutdown(); 1756 smc_ib_unregister_client(); 1757 return 0; 1758 } 1759 1760 static struct notifier_block smc_reboot_notifier = { 1761 .notifier_call = smc_core_reboot_event, 1762 }; 1763 1764 int __init smc_core_init(void) 1765 { 1766 return register_reboot_notifier(&smc_reboot_notifier); 1767 } 1768 1769 /* Called (from smc_exit) when module is removed */ 1770 void smc_core_exit(void) 1771 { 1772 unregister_reboot_notifier(&smc_reboot_notifier); 1773 smc_lgrs_shutdown(); 1774 } 1775