1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Basic Transport Functions exploiting Infiniband API 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/socket.h> 13 #include <linux/if_vlan.h> 14 #include <linux/random.h> 15 #include <linux/workqueue.h> 16 #include <linux/wait.h> 17 #include <linux/reboot.h> 18 #include <net/tcp.h> 19 #include <net/sock.h> 20 #include <rdma/ib_verbs.h> 21 #include <rdma/ib_cache.h> 22 23 #include "smc.h" 24 #include "smc_clc.h" 25 #include "smc_core.h" 26 #include "smc_ib.h" 27 #include "smc_wr.h" 28 #include "smc_llc.h" 29 #include "smc_cdc.h" 30 #include "smc_close.h" 31 #include "smc_ism.h" 32 33 #define SMC_LGR_NUM_INCR 256 34 #define SMC_LGR_FREE_DELAY_SERV (600 * HZ) 35 #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ) 36 #define SMC_LGR_FREE_DELAY_FAST (8 * HZ) 37 38 static struct smc_lgr_list smc_lgr_list = { /* established link groups */ 39 .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock), 40 .list = LIST_HEAD_INIT(smc_lgr_list.list), 41 .num = 0, 42 }; 43 44 static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */ 45 static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted); 46 47 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, 48 struct smc_buf_desc *buf_desc); 49 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft); 50 51 /* return head of link group list and its lock for a given link group */ 52 static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr, 53 spinlock_t **lgr_lock) 54 { 55 if (lgr->is_smcd) { 56 *lgr_lock = &lgr->smcd->lgr_lock; 57 return &lgr->smcd->lgr_list; 58 } 59 60 *lgr_lock = &smc_lgr_list.lock; 61 return &smc_lgr_list.list; 62 } 63 64 static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) 65 { 66 /* client link group creation always follows the server link group 67 * creation. For client use a somewhat higher removal delay time, 68 * otherwise there is a risk of out-of-sync link groups. 69 */ 70 if (!lgr->freeing && !lgr->freefast) { 71 mod_delayed_work(system_wq, &lgr->free_work, 72 (!lgr->is_smcd && lgr->role == SMC_CLNT) ? 73 SMC_LGR_FREE_DELAY_CLNT : 74 SMC_LGR_FREE_DELAY_SERV); 75 } 76 } 77 78 void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr) 79 { 80 if (!lgr->freeing && !lgr->freefast) { 81 lgr->freefast = 1; 82 mod_delayed_work(system_wq, &lgr->free_work, 83 SMC_LGR_FREE_DELAY_FAST); 84 } 85 } 86 87 /* Register connection's alert token in our lookup structure. 88 * To use rbtrees we have to implement our own insert core. 89 * Requires @conns_lock 90 * @smc connection to register 91 * Returns 0 on success, != otherwise. 92 */ 93 static void smc_lgr_add_alert_token(struct smc_connection *conn) 94 { 95 struct rb_node **link, *parent = NULL; 96 u32 token = conn->alert_token_local; 97 98 link = &conn->lgr->conns_all.rb_node; 99 while (*link) { 100 struct smc_connection *cur = rb_entry(*link, 101 struct smc_connection, alert_node); 102 103 parent = *link; 104 if (cur->alert_token_local > token) 105 link = &parent->rb_left; 106 else 107 link = &parent->rb_right; 108 } 109 /* Put the new node there */ 110 rb_link_node(&conn->alert_node, parent, link); 111 rb_insert_color(&conn->alert_node, &conn->lgr->conns_all); 112 } 113 114 /* Register connection in link group by assigning an alert token 115 * registered in a search tree. 116 * Requires @conns_lock 117 * Note that '0' is a reserved value and not assigned. 118 */ 119 static int smc_lgr_register_conn(struct smc_connection *conn) 120 { 121 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 122 static atomic_t nexttoken = ATOMIC_INIT(0); 123 124 /* find a new alert_token_local value not yet used by some connection 125 * in this link group 126 */ 127 sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */ 128 while (!conn->alert_token_local) { 129 conn->alert_token_local = atomic_inc_return(&nexttoken); 130 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr)) 131 conn->alert_token_local = 0; 132 } 133 smc_lgr_add_alert_token(conn); 134 135 /* assign the new connection to a link */ 136 if (!conn->lgr->is_smcd) { 137 struct smc_link *lnk; 138 int i; 139 140 /* tbd - link balancing */ 141 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 142 lnk = &conn->lgr->lnk[i]; 143 if (lnk->state == SMC_LNK_ACTIVATING || 144 lnk->state == SMC_LNK_ACTIVE) 145 conn->lnk = lnk; 146 } 147 if (!conn->lnk) 148 return SMC_CLC_DECL_NOACTLINK; 149 } 150 conn->lgr->conns_num++; 151 return 0; 152 } 153 154 /* Unregister connection and reset the alert token of the given connection< 155 */ 156 static void __smc_lgr_unregister_conn(struct smc_connection *conn) 157 { 158 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 159 struct smc_link_group *lgr = conn->lgr; 160 161 rb_erase(&conn->alert_node, &lgr->conns_all); 162 lgr->conns_num--; 163 conn->alert_token_local = 0; 164 sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */ 165 } 166 167 /* Unregister connection from lgr 168 */ 169 static void smc_lgr_unregister_conn(struct smc_connection *conn) 170 { 171 struct smc_link_group *lgr = conn->lgr; 172 173 if (!lgr) 174 return; 175 write_lock_bh(&lgr->conns_lock); 176 if (conn->alert_token_local) { 177 __smc_lgr_unregister_conn(conn); 178 } 179 write_unlock_bh(&lgr->conns_lock); 180 conn->lgr = NULL; 181 } 182 183 void smc_lgr_cleanup_early(struct smc_connection *conn) 184 { 185 struct smc_link_group *lgr = conn->lgr; 186 187 if (!lgr) 188 return; 189 190 smc_conn_free(conn); 191 smc_lgr_forget(lgr); 192 smc_lgr_schedule_free_work_fast(lgr); 193 } 194 195 /* Send delete link, either as client to request the initiation 196 * of the DELETE LINK sequence from server; or as server to 197 * initiate the delete processing. See smc_llc_rx_delete_link(). 198 */ 199 static int smcr_link_send_delete(struct smc_link *lnk, bool orderly) 200 { 201 if (lnk->state == SMC_LNK_ACTIVE && 202 !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, orderly)) { 203 return 0; 204 } 205 return -ENOTCONN; 206 } 207 208 static void smc_lgr_free(struct smc_link_group *lgr); 209 210 static void smc_lgr_free_work(struct work_struct *work) 211 { 212 struct smc_link_group *lgr = container_of(to_delayed_work(work), 213 struct smc_link_group, 214 free_work); 215 spinlock_t *lgr_lock; 216 bool conns; 217 int i; 218 219 smc_lgr_list_head(lgr, &lgr_lock); 220 spin_lock_bh(lgr_lock); 221 if (lgr->freeing) { 222 spin_unlock_bh(lgr_lock); 223 return; 224 } 225 read_lock_bh(&lgr->conns_lock); 226 conns = RB_EMPTY_ROOT(&lgr->conns_all); 227 read_unlock_bh(&lgr->conns_lock); 228 if (!conns) { /* number of lgr connections is no longer zero */ 229 spin_unlock_bh(lgr_lock); 230 return; 231 } 232 list_del_init(&lgr->list); /* remove from smc_lgr_list */ 233 234 if (!lgr->is_smcd && !lgr->terminating) { 235 bool do_wait = false; 236 237 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 238 struct smc_link *lnk = &lgr->lnk[i]; 239 /* try to send del link msg, on err free immediately */ 240 if (lnk->state == SMC_LNK_ACTIVE && 241 !smcr_link_send_delete(lnk, true)) { 242 /* reschedule in case we never receive a resp */ 243 smc_lgr_schedule_free_work(lgr); 244 do_wait = true; 245 } 246 } 247 if (do_wait) { 248 spin_unlock_bh(lgr_lock); 249 return; /* wait for resp, see smc_llc_rx_delete_link */ 250 } 251 } 252 lgr->freeing = 1; /* this instance does the freeing, no new schedule */ 253 spin_unlock_bh(lgr_lock); 254 cancel_delayed_work(&lgr->free_work); 255 256 if (lgr->is_smcd && !lgr->terminating) 257 smc_ism_signal_shutdown(lgr); 258 if (!lgr->is_smcd) { 259 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 260 struct smc_link *lnk = &lgr->lnk[i]; 261 262 if (smc_link_usable(lnk)) 263 lnk->state = SMC_LNK_INACTIVE; 264 } 265 wake_up_interruptible_all(&lgr->llc_waiter); 266 } 267 smc_lgr_free(lgr); 268 } 269 270 static void smc_lgr_terminate_work(struct work_struct *work) 271 { 272 struct smc_link_group *lgr = container_of(work, struct smc_link_group, 273 terminate_work); 274 275 __smc_lgr_terminate(lgr, true); 276 } 277 278 /* return next unique link id for the lgr */ 279 static u8 smcr_next_link_id(struct smc_link_group *lgr) 280 { 281 u8 link_id; 282 int i; 283 284 while (1) { 285 link_id = ++lgr->next_link_id; 286 if (!link_id) /* skip zero as link_id */ 287 link_id = ++lgr->next_link_id; 288 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 289 if (smc_link_usable(&lgr->lnk[i]) && 290 lgr->lnk[i].link_id == link_id) 291 continue; 292 } 293 break; 294 } 295 return link_id; 296 } 297 298 static int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, 299 u8 link_idx, struct smc_init_info *ini) 300 { 301 u8 rndvec[3]; 302 int rc; 303 304 get_device(&ini->ib_dev->ibdev->dev); 305 atomic_inc(&ini->ib_dev->lnk_cnt); 306 lnk->state = SMC_LNK_ACTIVATING; 307 lnk->link_id = smcr_next_link_id(lgr); 308 lnk->lgr = lgr; 309 lnk->link_idx = link_idx; 310 lnk->smcibdev = ini->ib_dev; 311 lnk->ibport = ini->ib_port; 312 lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu; 313 if (!ini->ib_dev->initialized) { 314 rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev); 315 if (rc) 316 goto out; 317 } 318 get_random_bytes(rndvec, sizeof(rndvec)); 319 lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + 320 (rndvec[2] << 16); 321 rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport, 322 ini->vlan_id, lnk->gid, &lnk->sgid_index); 323 if (rc) 324 goto out; 325 rc = smc_llc_link_init(lnk); 326 if (rc) 327 goto out; 328 rc = smc_wr_alloc_link_mem(lnk); 329 if (rc) 330 goto clear_llc_lnk; 331 rc = smc_ib_create_protection_domain(lnk); 332 if (rc) 333 goto free_link_mem; 334 rc = smc_ib_create_queue_pair(lnk); 335 if (rc) 336 goto dealloc_pd; 337 rc = smc_wr_create_link(lnk); 338 if (rc) 339 goto destroy_qp; 340 return 0; 341 342 destroy_qp: 343 smc_ib_destroy_queue_pair(lnk); 344 dealloc_pd: 345 smc_ib_dealloc_protection_domain(lnk); 346 free_link_mem: 347 smc_wr_free_link_mem(lnk); 348 clear_llc_lnk: 349 smc_llc_link_clear(lnk); 350 out: 351 put_device(&ini->ib_dev->ibdev->dev); 352 memset(lnk, 0, sizeof(struct smc_link)); 353 lnk->state = SMC_LNK_UNUSED; 354 if (!atomic_dec_return(&ini->ib_dev->lnk_cnt)) 355 wake_up(&ini->ib_dev->lnks_deleted); 356 return rc; 357 } 358 359 /* create a new SMC link group */ 360 static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) 361 { 362 struct smc_link_group *lgr; 363 struct list_head *lgr_list; 364 struct smc_link *lnk; 365 spinlock_t *lgr_lock; 366 u8 link_idx; 367 int rc = 0; 368 int i; 369 370 if (ini->is_smcd && ini->vlan_id) { 371 if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) { 372 rc = SMC_CLC_DECL_ISMVLANERR; 373 goto out; 374 } 375 } 376 377 lgr = kzalloc(sizeof(*lgr), GFP_KERNEL); 378 if (!lgr) { 379 rc = SMC_CLC_DECL_MEM; 380 goto ism_put_vlan; 381 } 382 lgr->is_smcd = ini->is_smcd; 383 lgr->sync_err = 0; 384 lgr->terminating = 0; 385 lgr->freefast = 0; 386 lgr->freeing = 0; 387 lgr->vlan_id = ini->vlan_id; 388 mutex_init(&lgr->sndbufs_lock); 389 mutex_init(&lgr->rmbs_lock); 390 rwlock_init(&lgr->conns_lock); 391 for (i = 0; i < SMC_RMBE_SIZES; i++) { 392 INIT_LIST_HEAD(&lgr->sndbufs[i]); 393 INIT_LIST_HEAD(&lgr->rmbs[i]); 394 } 395 lgr->next_link_id = 0; 396 smc_lgr_list.num += SMC_LGR_NUM_INCR; 397 memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE); 398 INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work); 399 INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work); 400 lgr->conns_all = RB_ROOT; 401 if (ini->is_smcd) { 402 /* SMC-D specific settings */ 403 get_device(&ini->ism_dev->dev); 404 lgr->peer_gid = ini->ism_gid; 405 lgr->smcd = ini->ism_dev; 406 lgr_list = &ini->ism_dev->lgr_list; 407 lgr_lock = &lgr->smcd->lgr_lock; 408 lgr->peer_shutdown = 0; 409 atomic_inc(&ini->ism_dev->lgr_cnt); 410 } else { 411 /* SMC-R specific settings */ 412 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; 413 memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer, 414 SMC_SYSTEMID_LEN); 415 smc_llc_lgr_init(lgr, smc); 416 417 link_idx = SMC_SINGLE_LINK; 418 lnk = &lgr->lnk[link_idx]; 419 rc = smcr_link_init(lgr, lnk, link_idx, ini); 420 if (rc) 421 goto free_lgr; 422 lgr_list = &smc_lgr_list.list; 423 lgr_lock = &smc_lgr_list.lock; 424 atomic_inc(&lgr_cnt); 425 } 426 smc->conn.lgr = lgr; 427 spin_lock_bh(lgr_lock); 428 list_add(&lgr->list, lgr_list); 429 spin_unlock_bh(lgr_lock); 430 return 0; 431 432 free_lgr: 433 kfree(lgr); 434 ism_put_vlan: 435 if (ini->is_smcd && ini->vlan_id) 436 smc_ism_put_vlan(ini->ism_dev, ini->vlan_id); 437 out: 438 if (rc < 0) { 439 if (rc == -ENOMEM) 440 rc = SMC_CLC_DECL_MEM; 441 else 442 rc = SMC_CLC_DECL_INTERR; 443 } 444 return rc; 445 } 446 447 static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc, 448 struct smc_link_group *lgr) 449 { 450 if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) { 451 /* unregister rmb with peer */ 452 smc_llc_do_delete_rkey(lgr, rmb_desc); 453 rmb_desc->is_conf_rkey = false; 454 } 455 if (rmb_desc->is_reg_err) { 456 /* buf registration failed, reuse not possible */ 457 mutex_lock(&lgr->rmbs_lock); 458 list_del(&rmb_desc->list); 459 mutex_unlock(&lgr->rmbs_lock); 460 461 smc_buf_free(lgr, true, rmb_desc); 462 } else { 463 rmb_desc->used = 0; 464 } 465 } 466 467 static void smc_buf_unuse(struct smc_connection *conn, 468 struct smc_link_group *lgr) 469 { 470 if (conn->sndbuf_desc) 471 conn->sndbuf_desc->used = 0; 472 if (conn->rmb_desc && lgr->is_smcd) 473 conn->rmb_desc->used = 0; 474 else if (conn->rmb_desc) 475 smcr_buf_unuse(conn->rmb_desc, lgr); 476 } 477 478 /* remove a finished connection from its link group */ 479 void smc_conn_free(struct smc_connection *conn) 480 { 481 struct smc_link_group *lgr = conn->lgr; 482 483 if (!lgr) 484 return; 485 if (lgr->is_smcd) { 486 if (!list_empty(&lgr->list)) 487 smc_ism_unset_conn(conn); 488 tasklet_kill(&conn->rx_tsklet); 489 } else { 490 smc_cdc_tx_dismiss_slots(conn); 491 } 492 if (!list_empty(&lgr->list)) { 493 smc_lgr_unregister_conn(conn); 494 smc_buf_unuse(conn, lgr); /* allow buffer reuse */ 495 } 496 497 if (!lgr->conns_num) 498 smc_lgr_schedule_free_work(lgr); 499 } 500 501 static void smcr_link_clear(struct smc_link *lnk) 502 { 503 struct smc_ib_device *smcibdev; 504 505 if (lnk->peer_qpn == 0) 506 return; 507 lnk->peer_qpn = 0; 508 smc_llc_link_clear(lnk); 509 smc_ib_modify_qp_reset(lnk); 510 smc_wr_free_link(lnk); 511 smc_ib_destroy_queue_pair(lnk); 512 smc_ib_dealloc_protection_domain(lnk); 513 smc_wr_free_link_mem(lnk); 514 put_device(&lnk->smcibdev->ibdev->dev); 515 smcibdev = lnk->smcibdev; 516 memset(lnk, 0, sizeof(struct smc_link)); 517 lnk->state = SMC_LNK_UNUSED; 518 if (!atomic_dec_return(&smcibdev->lnk_cnt)) 519 wake_up(&smcibdev->lnks_deleted); 520 } 521 522 static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb, 523 struct smc_buf_desc *buf_desc) 524 { 525 struct smc_link *lnk; 526 int i; 527 528 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 529 lnk = &lgr->lnk[i]; 530 if (!buf_desc->is_map_ib[lnk->link_idx]) 531 continue; 532 if (is_rmb) { 533 if (buf_desc->mr_rx[lnk->link_idx]) 534 smc_ib_put_memory_region( 535 buf_desc->mr_rx[lnk->link_idx]); 536 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE); 537 } else { 538 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE); 539 } 540 sg_free_table(&buf_desc->sgt[lnk->link_idx]); 541 } 542 543 if (buf_desc->pages) 544 __free_pages(buf_desc->pages, buf_desc->order); 545 kfree(buf_desc); 546 } 547 548 static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb, 549 struct smc_buf_desc *buf_desc) 550 { 551 if (is_dmb) { 552 /* restore original buf len */ 553 buf_desc->len += sizeof(struct smcd_cdc_msg); 554 smc_ism_unregister_dmb(lgr->smcd, buf_desc); 555 } else { 556 kfree(buf_desc->cpu_addr); 557 } 558 kfree(buf_desc); 559 } 560 561 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, 562 struct smc_buf_desc *buf_desc) 563 { 564 if (lgr->is_smcd) 565 smcd_buf_free(lgr, is_rmb, buf_desc); 566 else 567 smcr_buf_free(lgr, is_rmb, buf_desc); 568 } 569 570 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb) 571 { 572 struct smc_buf_desc *buf_desc, *bf_desc; 573 struct list_head *buf_list; 574 int i; 575 576 for (i = 0; i < SMC_RMBE_SIZES; i++) { 577 if (is_rmb) 578 buf_list = &lgr->rmbs[i]; 579 else 580 buf_list = &lgr->sndbufs[i]; 581 list_for_each_entry_safe(buf_desc, bf_desc, buf_list, 582 list) { 583 list_del(&buf_desc->list); 584 smc_buf_free(lgr, is_rmb, buf_desc); 585 } 586 } 587 } 588 589 static void smc_lgr_free_bufs(struct smc_link_group *lgr) 590 { 591 /* free send buffers */ 592 __smc_lgr_free_bufs(lgr, false); 593 /* free rmbs */ 594 __smc_lgr_free_bufs(lgr, true); 595 } 596 597 /* remove a link group */ 598 static void smc_lgr_free(struct smc_link_group *lgr) 599 { 600 int i; 601 602 smc_lgr_free_bufs(lgr); 603 if (lgr->is_smcd) { 604 if (!lgr->terminating) { 605 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); 606 put_device(&lgr->smcd->dev); 607 } 608 if (!atomic_dec_return(&lgr->smcd->lgr_cnt)) 609 wake_up(&lgr->smcd->lgrs_deleted); 610 } else { 611 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 612 if (lgr->lnk[i].state != SMC_LNK_UNUSED) 613 smcr_link_clear(&lgr->lnk[i]); 614 } 615 smc_llc_lgr_clear(lgr); 616 if (!atomic_dec_return(&lgr_cnt)) 617 wake_up(&lgrs_deleted); 618 } 619 kfree(lgr); 620 } 621 622 void smc_lgr_forget(struct smc_link_group *lgr) 623 { 624 struct list_head *lgr_list; 625 spinlock_t *lgr_lock; 626 627 lgr_list = smc_lgr_list_head(lgr, &lgr_lock); 628 spin_lock_bh(lgr_lock); 629 /* do not use this link group for new connections */ 630 if (!list_empty(lgr_list)) 631 list_del_init(lgr_list); 632 spin_unlock_bh(lgr_lock); 633 } 634 635 static void smcd_unregister_all_dmbs(struct smc_link_group *lgr) 636 { 637 int i; 638 639 for (i = 0; i < SMC_RMBE_SIZES; i++) { 640 struct smc_buf_desc *buf_desc; 641 642 list_for_each_entry(buf_desc, &lgr->rmbs[i], list) { 643 buf_desc->len += sizeof(struct smcd_cdc_msg); 644 smc_ism_unregister_dmb(lgr->smcd, buf_desc); 645 } 646 } 647 } 648 649 static void smc_sk_wake_ups(struct smc_sock *smc) 650 { 651 smc->sk.sk_write_space(&smc->sk); 652 smc->sk.sk_data_ready(&smc->sk); 653 smc->sk.sk_state_change(&smc->sk); 654 } 655 656 /* kill a connection */ 657 static void smc_conn_kill(struct smc_connection *conn, bool soft) 658 { 659 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 660 661 if (conn->lgr->is_smcd && conn->lgr->peer_shutdown) 662 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 663 else 664 smc_close_abort(conn); 665 conn->killed = 1; 666 smc->sk.sk_err = ECONNABORTED; 667 smc_sk_wake_ups(smc); 668 if (conn->lgr->is_smcd) { 669 smc_ism_unset_conn(conn); 670 if (soft) 671 tasklet_kill(&conn->rx_tsklet); 672 else 673 tasklet_unlock_wait(&conn->rx_tsklet); 674 } else { 675 smc_cdc_tx_dismiss_slots(conn); 676 } 677 smc_lgr_unregister_conn(conn); 678 smc_close_active_abort(smc); 679 } 680 681 static void smc_lgr_cleanup(struct smc_link_group *lgr) 682 { 683 int i; 684 685 if (lgr->is_smcd) { 686 smc_ism_signal_shutdown(lgr); 687 smcd_unregister_all_dmbs(lgr); 688 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); 689 put_device(&lgr->smcd->dev); 690 } else { 691 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 692 struct smc_link *lnk = &lgr->lnk[i]; 693 694 if (smc_link_usable(lnk)) 695 lnk->state = SMC_LNK_INACTIVE; 696 } 697 wake_up_interruptible_all(&lgr->llc_waiter); 698 } 699 } 700 701 /* terminate link group 702 * @soft: true if link group shutdown can take its time 703 * false if immediate link group shutdown is required 704 */ 705 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft) 706 { 707 struct smc_connection *conn; 708 struct smc_sock *smc; 709 struct rb_node *node; 710 711 if (lgr->terminating) 712 return; /* lgr already terminating */ 713 if (!soft) 714 cancel_delayed_work_sync(&lgr->free_work); 715 lgr->terminating = 1; 716 717 /* kill remaining link group connections */ 718 read_lock_bh(&lgr->conns_lock); 719 node = rb_first(&lgr->conns_all); 720 while (node) { 721 read_unlock_bh(&lgr->conns_lock); 722 conn = rb_entry(node, struct smc_connection, alert_node); 723 smc = container_of(conn, struct smc_sock, conn); 724 sock_hold(&smc->sk); /* sock_put below */ 725 lock_sock(&smc->sk); 726 smc_conn_kill(conn, soft); 727 release_sock(&smc->sk); 728 sock_put(&smc->sk); /* sock_hold above */ 729 read_lock_bh(&lgr->conns_lock); 730 node = rb_first(&lgr->conns_all); 731 } 732 read_unlock_bh(&lgr->conns_lock); 733 smc_lgr_cleanup(lgr); 734 if (soft) 735 smc_lgr_schedule_free_work_fast(lgr); 736 else 737 smc_lgr_free(lgr); 738 } 739 740 /* unlink link group and schedule termination */ 741 void smc_lgr_terminate_sched(struct smc_link_group *lgr) 742 { 743 spinlock_t *lgr_lock; 744 745 smc_lgr_list_head(lgr, &lgr_lock); 746 spin_lock_bh(lgr_lock); 747 if (list_empty(&lgr->list) || lgr->terminating || lgr->freeing) { 748 spin_unlock_bh(lgr_lock); 749 return; /* lgr already terminating */ 750 } 751 list_del_init(&lgr->list); 752 spin_unlock_bh(lgr_lock); 753 schedule_work(&lgr->terminate_work); 754 } 755 756 /* Called when IB port is terminated */ 757 void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport) 758 { 759 struct smc_link_group *lgr, *l; 760 LIST_HEAD(lgr_free_list); 761 int i; 762 763 spin_lock_bh(&smc_lgr_list.lock); 764 list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) { 765 if (lgr->is_smcd) 766 continue; 767 /* tbd - terminate only when no more links are active */ 768 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 769 if (!smc_link_usable(&lgr->lnk[i])) 770 continue; 771 if (lgr->lnk[i].smcibdev == smcibdev && 772 lgr->lnk[i].ibport == ibport) { 773 list_move(&lgr->list, &lgr_free_list); 774 lgr->freeing = 1; 775 } 776 } 777 } 778 spin_unlock_bh(&smc_lgr_list.lock); 779 780 list_for_each_entry_safe(lgr, l, &lgr_free_list, list) { 781 list_del_init(&lgr->list); 782 __smc_lgr_terminate(lgr, false); 783 } 784 } 785 786 /* Called when peer lgr shutdown (regularly or abnormally) is received */ 787 void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan) 788 { 789 struct smc_link_group *lgr, *l; 790 LIST_HEAD(lgr_free_list); 791 792 /* run common cleanup function and build free list */ 793 spin_lock_bh(&dev->lgr_lock); 794 list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) { 795 if ((!peer_gid || lgr->peer_gid == peer_gid) && 796 (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) { 797 if (peer_gid) /* peer triggered termination */ 798 lgr->peer_shutdown = 1; 799 list_move(&lgr->list, &lgr_free_list); 800 } 801 } 802 spin_unlock_bh(&dev->lgr_lock); 803 804 /* cancel the regular free workers and actually free lgrs */ 805 list_for_each_entry_safe(lgr, l, &lgr_free_list, list) { 806 list_del_init(&lgr->list); 807 schedule_work(&lgr->terminate_work); 808 } 809 } 810 811 /* Called when an SMCD device is removed or the smc module is unloaded */ 812 void smc_smcd_terminate_all(struct smcd_dev *smcd) 813 { 814 struct smc_link_group *lgr, *lg; 815 LIST_HEAD(lgr_free_list); 816 817 spin_lock_bh(&smcd->lgr_lock); 818 list_splice_init(&smcd->lgr_list, &lgr_free_list); 819 list_for_each_entry(lgr, &lgr_free_list, list) 820 lgr->freeing = 1; 821 spin_unlock_bh(&smcd->lgr_lock); 822 823 list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) { 824 list_del_init(&lgr->list); 825 __smc_lgr_terminate(lgr, false); 826 } 827 828 if (atomic_read(&smcd->lgr_cnt)) 829 wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt)); 830 } 831 832 /* Called when an SMCR device is removed or the smc module is unloaded. 833 * If smcibdev is given, all SMCR link groups using this device are terminated. 834 * If smcibdev is NULL, all SMCR link groups are terminated. 835 */ 836 void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) 837 { 838 struct smc_link_group *lgr, *lg; 839 LIST_HEAD(lgr_free_list); 840 int i; 841 842 spin_lock_bh(&smc_lgr_list.lock); 843 if (!smcibdev) { 844 list_splice_init(&smc_lgr_list.list, &lgr_free_list); 845 list_for_each_entry(lgr, &lgr_free_list, list) 846 lgr->freeing = 1; 847 } else { 848 list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) { 849 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 850 if (lgr->lnk[i].smcibdev == smcibdev) { 851 list_move(&lgr->list, &lgr_free_list); 852 lgr->freeing = 1; 853 break; 854 } 855 } 856 } 857 } 858 spin_unlock_bh(&smc_lgr_list.lock); 859 860 list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) { 861 list_del_init(&lgr->list); 862 __smc_lgr_terminate(lgr, false); 863 } 864 865 if (smcibdev) { 866 if (atomic_read(&smcibdev->lnk_cnt)) 867 wait_event(smcibdev->lnks_deleted, 868 !atomic_read(&smcibdev->lnk_cnt)); 869 } else { 870 if (atomic_read(&lgr_cnt)) 871 wait_event(lgrs_deleted, !atomic_read(&lgr_cnt)); 872 } 873 } 874 875 /* Determine vlan of internal TCP socket. 876 * @vlan_id: address to store the determined vlan id into 877 */ 878 int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini) 879 { 880 struct dst_entry *dst = sk_dst_get(clcsock->sk); 881 struct net_device *ndev; 882 int i, nest_lvl, rc = 0; 883 884 ini->vlan_id = 0; 885 if (!dst) { 886 rc = -ENOTCONN; 887 goto out; 888 } 889 if (!dst->dev) { 890 rc = -ENODEV; 891 goto out_rel; 892 } 893 894 ndev = dst->dev; 895 if (is_vlan_dev(ndev)) { 896 ini->vlan_id = vlan_dev_vlan_id(ndev); 897 goto out_rel; 898 } 899 900 rtnl_lock(); 901 nest_lvl = ndev->lower_level; 902 for (i = 0; i < nest_lvl; i++) { 903 struct list_head *lower = &ndev->adj_list.lower; 904 905 if (list_empty(lower)) 906 break; 907 lower = lower->next; 908 ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower); 909 if (is_vlan_dev(ndev)) { 910 ini->vlan_id = vlan_dev_vlan_id(ndev); 911 break; 912 } 913 } 914 rtnl_unlock(); 915 916 out_rel: 917 dst_release(dst); 918 out: 919 return rc; 920 } 921 922 static bool smcr_lgr_match(struct smc_link_group *lgr, 923 struct smc_clc_msg_local *lcl, 924 enum smc_lgr_role role, u32 clcqpn) 925 { 926 int i; 927 928 if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) || 929 lgr->role != role) 930 return false; 931 932 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 933 if (lgr->lnk[i].state != SMC_LNK_ACTIVE) 934 continue; 935 if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) && 936 !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) && 937 !memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac))) 938 return true; 939 } 940 return false; 941 } 942 943 static bool smcd_lgr_match(struct smc_link_group *lgr, 944 struct smcd_dev *smcismdev, u64 peer_gid) 945 { 946 return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev; 947 } 948 949 /* create a new SMC connection (and a new link group if necessary) */ 950 int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) 951 { 952 struct smc_connection *conn = &smc->conn; 953 struct list_head *lgr_list; 954 struct smc_link_group *lgr; 955 enum smc_lgr_role role; 956 spinlock_t *lgr_lock; 957 int rc = 0; 958 959 lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list; 960 lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock; 961 ini->cln_first_contact = SMC_FIRST_CONTACT; 962 role = smc->listen_smc ? SMC_SERV : SMC_CLNT; 963 if (role == SMC_CLNT && ini->srv_first_contact) 964 /* create new link group as well */ 965 goto create; 966 967 /* determine if an existing link group can be reused */ 968 spin_lock_bh(lgr_lock); 969 list_for_each_entry(lgr, lgr_list, list) { 970 write_lock_bh(&lgr->conns_lock); 971 if ((ini->is_smcd ? 972 smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) : 973 smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) && 974 !lgr->sync_err && 975 lgr->vlan_id == ini->vlan_id && 976 (role == SMC_CLNT || 977 lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) { 978 /* link group found */ 979 ini->cln_first_contact = SMC_REUSE_CONTACT; 980 conn->lgr = lgr; 981 rc = smc_lgr_register_conn(conn); /* add conn to lgr */ 982 write_unlock_bh(&lgr->conns_lock); 983 if (!rc && delayed_work_pending(&lgr->free_work)) 984 cancel_delayed_work(&lgr->free_work); 985 break; 986 } 987 write_unlock_bh(&lgr->conns_lock); 988 } 989 spin_unlock_bh(lgr_lock); 990 if (rc) 991 return rc; 992 993 if (role == SMC_CLNT && !ini->srv_first_contact && 994 ini->cln_first_contact == SMC_FIRST_CONTACT) { 995 /* Server reuses a link group, but Client wants to start 996 * a new one 997 * send out_of_sync decline, reason synchr. error 998 */ 999 return SMC_CLC_DECL_SYNCERR; 1000 } 1001 1002 create: 1003 if (ini->cln_first_contact == SMC_FIRST_CONTACT) { 1004 rc = smc_lgr_create(smc, ini); 1005 if (rc) 1006 goto out; 1007 lgr = conn->lgr; 1008 write_lock_bh(&lgr->conns_lock); 1009 rc = smc_lgr_register_conn(conn); /* add smc conn to lgr */ 1010 write_unlock_bh(&lgr->conns_lock); 1011 if (rc) 1012 goto out; 1013 } 1014 conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; 1015 conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; 1016 conn->urg_state = SMC_URG_READ; 1017 if (ini->is_smcd) { 1018 conn->rx_off = sizeof(struct smcd_cdc_msg); 1019 smcd_cdc_rx_init(conn); /* init tasklet for this conn */ 1020 } 1021 #ifndef KERNEL_HAS_ATOMIC64 1022 spin_lock_init(&conn->acurs_lock); 1023 #endif 1024 1025 out: 1026 return rc; 1027 } 1028 1029 /* convert the RMB size into the compressed notation - minimum 16K. 1030 * In contrast to plain ilog2, this rounds towards the next power of 2, 1031 * so the socket application gets at least its desired sndbuf / rcvbuf size. 1032 */ 1033 static u8 smc_compress_bufsize(int size) 1034 { 1035 u8 compressed; 1036 1037 if (size <= SMC_BUF_MIN_SIZE) 1038 return 0; 1039 1040 size = (size - 1) >> 14; 1041 compressed = ilog2(size) + 1; 1042 if (compressed >= SMC_RMBE_SIZES) 1043 compressed = SMC_RMBE_SIZES - 1; 1044 return compressed; 1045 } 1046 1047 /* convert the RMB size from compressed notation into integer */ 1048 int smc_uncompress_bufsize(u8 compressed) 1049 { 1050 u32 size; 1051 1052 size = 0x00000001 << (((int)compressed) + 14); 1053 return (int)size; 1054 } 1055 1056 /* try to reuse a sndbuf or rmb description slot for a certain 1057 * buffer size; if not available, return NULL 1058 */ 1059 static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize, 1060 struct mutex *lock, 1061 struct list_head *buf_list) 1062 { 1063 struct smc_buf_desc *buf_slot; 1064 1065 mutex_lock(lock); 1066 list_for_each_entry(buf_slot, buf_list, list) { 1067 if (cmpxchg(&buf_slot->used, 0, 1) == 0) { 1068 mutex_unlock(lock); 1069 return buf_slot; 1070 } 1071 } 1072 mutex_unlock(lock); 1073 return NULL; 1074 } 1075 1076 /* one of the conditions for announcing a receiver's current window size is 1077 * that it "results in a minimum increase in the window size of 10% of the 1078 * receive buffer space" [RFC7609] 1079 */ 1080 static inline int smc_rmb_wnd_update_limit(int rmbe_size) 1081 { 1082 return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); 1083 } 1084 1085 /* map an rmb buf to a link */ 1086 static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb, 1087 struct smc_link *lnk) 1088 { 1089 int rc; 1090 1091 if (buf_desc->is_map_ib[lnk->link_idx]) 1092 return 0; 1093 1094 rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL); 1095 if (rc) 1096 return rc; 1097 sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl, 1098 buf_desc->cpu_addr, buf_desc->len); 1099 1100 /* map sg table to DMA address */ 1101 rc = smc_ib_buf_map_sg(lnk, buf_desc, 1102 is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 1103 /* SMC protocol depends on mapping to one DMA address only */ 1104 if (rc != 1) { 1105 rc = -EAGAIN; 1106 goto free_table; 1107 } 1108 1109 /* create a new memory region for the RMB */ 1110 if (is_rmb) { 1111 rc = smc_ib_get_memory_region(lnk->roce_pd, 1112 IB_ACCESS_REMOTE_WRITE | 1113 IB_ACCESS_LOCAL_WRITE, 1114 buf_desc, lnk->link_idx); 1115 if (rc) 1116 goto buf_unmap; 1117 smc_ib_sync_sg_for_device(lnk, buf_desc, DMA_FROM_DEVICE); 1118 } 1119 buf_desc->is_map_ib[lnk->link_idx] = true; 1120 return 0; 1121 1122 buf_unmap: 1123 smc_ib_buf_unmap_sg(lnk, buf_desc, 1124 is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 1125 free_table: 1126 sg_free_table(&buf_desc->sgt[lnk->link_idx]); 1127 return rc; 1128 } 1129 1130 static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr, 1131 bool is_rmb, int bufsize) 1132 { 1133 struct smc_buf_desc *buf_desc; 1134 1135 /* try to alloc a new buffer */ 1136 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); 1137 if (!buf_desc) 1138 return ERR_PTR(-ENOMEM); 1139 1140 buf_desc->order = get_order(bufsize); 1141 buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN | 1142 __GFP_NOMEMALLOC | __GFP_COMP | 1143 __GFP_NORETRY | __GFP_ZERO, 1144 buf_desc->order); 1145 if (!buf_desc->pages) { 1146 kfree(buf_desc); 1147 return ERR_PTR(-EAGAIN); 1148 } 1149 buf_desc->cpu_addr = (void *)page_address(buf_desc->pages); 1150 buf_desc->len = bufsize; 1151 return buf_desc; 1152 } 1153 1154 /* map buf_desc on all usable links, 1155 * unused buffers stay mapped as long as the link is up 1156 */ 1157 static int smcr_buf_map_usable_links(struct smc_link_group *lgr, 1158 struct smc_buf_desc *buf_desc, bool is_rmb) 1159 { 1160 int i, rc = 0; 1161 1162 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 1163 struct smc_link *lnk = &lgr->lnk[i]; 1164 1165 if (!smc_link_usable(lnk)) 1166 continue; 1167 if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) { 1168 rc = -ENOMEM; 1169 goto out; 1170 } 1171 } 1172 out: 1173 return rc; 1174 } 1175 1176 #define SMCD_DMBE_SIZES 7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */ 1177 1178 static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr, 1179 bool is_dmb, int bufsize) 1180 { 1181 struct smc_buf_desc *buf_desc; 1182 int rc; 1183 1184 if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES) 1185 return ERR_PTR(-EAGAIN); 1186 1187 /* try to alloc a new DMB */ 1188 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); 1189 if (!buf_desc) 1190 return ERR_PTR(-ENOMEM); 1191 if (is_dmb) { 1192 rc = smc_ism_register_dmb(lgr, bufsize, buf_desc); 1193 if (rc) { 1194 kfree(buf_desc); 1195 return ERR_PTR(-EAGAIN); 1196 } 1197 buf_desc->pages = virt_to_page(buf_desc->cpu_addr); 1198 /* CDC header stored in buf. So, pretend it was smaller */ 1199 buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg); 1200 } else { 1201 buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL | 1202 __GFP_NOWARN | __GFP_NORETRY | 1203 __GFP_NOMEMALLOC); 1204 if (!buf_desc->cpu_addr) { 1205 kfree(buf_desc); 1206 return ERR_PTR(-EAGAIN); 1207 } 1208 buf_desc->len = bufsize; 1209 } 1210 return buf_desc; 1211 } 1212 1213 static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) 1214 { 1215 struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM); 1216 struct smc_connection *conn = &smc->conn; 1217 struct smc_link_group *lgr = conn->lgr; 1218 struct list_head *buf_list; 1219 int bufsize, bufsize_short; 1220 struct mutex *lock; /* lock buffer list */ 1221 int sk_buf_size; 1222 1223 if (is_rmb) 1224 /* use socket recv buffer size (w/o overhead) as start value */ 1225 sk_buf_size = smc->sk.sk_rcvbuf / 2; 1226 else 1227 /* use socket send buffer size (w/o overhead) as start value */ 1228 sk_buf_size = smc->sk.sk_sndbuf / 2; 1229 1230 for (bufsize_short = smc_compress_bufsize(sk_buf_size); 1231 bufsize_short >= 0; bufsize_short--) { 1232 1233 if (is_rmb) { 1234 lock = &lgr->rmbs_lock; 1235 buf_list = &lgr->rmbs[bufsize_short]; 1236 } else { 1237 lock = &lgr->sndbufs_lock; 1238 buf_list = &lgr->sndbufs[bufsize_short]; 1239 } 1240 bufsize = smc_uncompress_bufsize(bufsize_short); 1241 if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC) 1242 continue; 1243 1244 /* check for reusable slot in the link group */ 1245 buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list); 1246 if (buf_desc) { 1247 memset(buf_desc->cpu_addr, 0, bufsize); 1248 break; /* found reusable slot */ 1249 } 1250 1251 if (is_smcd) 1252 buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize); 1253 else 1254 buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize); 1255 1256 if (PTR_ERR(buf_desc) == -ENOMEM) 1257 break; 1258 if (IS_ERR(buf_desc)) 1259 continue; 1260 1261 buf_desc->used = 1; 1262 mutex_lock(lock); 1263 list_add(&buf_desc->list, buf_list); 1264 mutex_unlock(lock); 1265 break; /* found */ 1266 } 1267 1268 if (IS_ERR(buf_desc)) 1269 return -ENOMEM; 1270 1271 if (!is_smcd) { 1272 if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) { 1273 smcr_buf_unuse(buf_desc, lgr); 1274 return -ENOMEM; 1275 } 1276 } 1277 1278 if (is_rmb) { 1279 conn->rmb_desc = buf_desc; 1280 conn->rmbe_size_short = bufsize_short; 1281 smc->sk.sk_rcvbuf = bufsize * 2; 1282 atomic_set(&conn->bytes_to_rcv, 0); 1283 conn->rmbe_update_limit = 1284 smc_rmb_wnd_update_limit(buf_desc->len); 1285 if (is_smcd) 1286 smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */ 1287 } else { 1288 conn->sndbuf_desc = buf_desc; 1289 smc->sk.sk_sndbuf = bufsize * 2; 1290 atomic_set(&conn->sndbuf_space, bufsize); 1291 } 1292 return 0; 1293 } 1294 1295 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) 1296 { 1297 if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk)) 1298 return; 1299 smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); 1300 } 1301 1302 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) 1303 { 1304 if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk)) 1305 return; 1306 smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); 1307 } 1308 1309 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) 1310 { 1311 int i; 1312 1313 if (!conn->lgr || conn->lgr->is_smcd) 1314 return; 1315 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 1316 if (!smc_link_usable(&conn->lgr->lnk[i])) 1317 continue; 1318 smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc, 1319 DMA_FROM_DEVICE); 1320 } 1321 } 1322 1323 void smc_rmb_sync_sg_for_device(struct smc_connection *conn) 1324 { 1325 int i; 1326 1327 if (!conn->lgr || conn->lgr->is_smcd) 1328 return; 1329 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 1330 if (!smc_link_usable(&conn->lgr->lnk[i])) 1331 continue; 1332 smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc, 1333 DMA_FROM_DEVICE); 1334 } 1335 } 1336 1337 /* create the send and receive buffer for an SMC socket; 1338 * receive buffers are called RMBs; 1339 * (even though the SMC protocol allows more than one RMB-element per RMB, 1340 * the Linux implementation uses just one RMB-element per RMB, i.e. uses an 1341 * extra RMB for every connection in a link group 1342 */ 1343 int smc_buf_create(struct smc_sock *smc, bool is_smcd) 1344 { 1345 int rc; 1346 1347 /* create send buffer */ 1348 rc = __smc_buf_create(smc, is_smcd, false); 1349 if (rc) 1350 return rc; 1351 /* create rmb */ 1352 rc = __smc_buf_create(smc, is_smcd, true); 1353 if (rc) 1354 smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc); 1355 return rc; 1356 } 1357 1358 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr) 1359 { 1360 int i; 1361 1362 for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) { 1363 if (!test_and_set_bit(i, lgr->rtokens_used_mask)) 1364 return i; 1365 } 1366 return -ENOSPC; 1367 } 1368 1369 static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx, 1370 u32 rkey) 1371 { 1372 int i; 1373 1374 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 1375 if (test_bit(i, lgr->rtokens_used_mask) && 1376 lgr->rtokens[i][lnk_idx].rkey == rkey) 1377 return i; 1378 } 1379 return -ENOENT; 1380 } 1381 1382 /* set rtoken for a new link to an existing rmb */ 1383 void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new, 1384 __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey) 1385 { 1386 int rtok_idx; 1387 1388 rtok_idx = smc_rtoken_find_by_link(lgr, link_idx, ntohl(nw_rkey_known)); 1389 if (rtok_idx == -ENOENT) 1390 return; 1391 lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey); 1392 lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr); 1393 } 1394 1395 /* set rtoken for a new link whose link_id is given */ 1396 void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id, 1397 __be64 nw_vaddr, __be32 nw_rkey) 1398 { 1399 u64 dma_addr = be64_to_cpu(nw_vaddr); 1400 u32 rkey = ntohl(nw_rkey); 1401 bool found = false; 1402 int link_idx; 1403 1404 for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) { 1405 if (lgr->lnk[link_idx].link_id == link_id) { 1406 found = true; 1407 break; 1408 } 1409 } 1410 if (!found) 1411 return; 1412 lgr->rtokens[rtok_idx][link_idx].rkey = rkey; 1413 lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr; 1414 } 1415 1416 /* add a new rtoken from peer */ 1417 int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey) 1418 { 1419 struct smc_link_group *lgr = smc_get_lgr(lnk); 1420 u64 dma_addr = be64_to_cpu(nw_vaddr); 1421 u32 rkey = ntohl(nw_rkey); 1422 int i; 1423 1424 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 1425 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey && 1426 lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr && 1427 test_bit(i, lgr->rtokens_used_mask)) { 1428 /* already in list */ 1429 return i; 1430 } 1431 } 1432 i = smc_rmb_reserve_rtoken_idx(lgr); 1433 if (i < 0) 1434 return i; 1435 lgr->rtokens[i][lnk->link_idx].rkey = rkey; 1436 lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr; 1437 return i; 1438 } 1439 1440 /* delete an rtoken from all links */ 1441 int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey) 1442 { 1443 struct smc_link_group *lgr = smc_get_lgr(lnk); 1444 u32 rkey = ntohl(nw_rkey); 1445 int i, j; 1446 1447 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 1448 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey && 1449 test_bit(i, lgr->rtokens_used_mask)) { 1450 for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) { 1451 lgr->rtokens[i][j].rkey = 0; 1452 lgr->rtokens[i][j].dma_addr = 0; 1453 } 1454 clear_bit(i, lgr->rtokens_used_mask); 1455 return 0; 1456 } 1457 } 1458 return -ENOENT; 1459 } 1460 1461 /* save rkey and dma_addr received from peer during clc handshake */ 1462 int smc_rmb_rtoken_handling(struct smc_connection *conn, 1463 struct smc_link *lnk, 1464 struct smc_clc_msg_accept_confirm *clc) 1465 { 1466 conn->rtoken_idx = smc_rtoken_add(lnk, clc->rmb_dma_addr, 1467 clc->rmb_rkey); 1468 if (conn->rtoken_idx < 0) 1469 return conn->rtoken_idx; 1470 return 0; 1471 } 1472 1473 static void smc_core_going_away(void) 1474 { 1475 struct smc_ib_device *smcibdev; 1476 struct smcd_dev *smcd; 1477 1478 spin_lock(&smc_ib_devices.lock); 1479 list_for_each_entry(smcibdev, &smc_ib_devices.list, list) { 1480 int i; 1481 1482 for (i = 0; i < SMC_MAX_PORTS; i++) 1483 set_bit(i, smcibdev->ports_going_away); 1484 } 1485 spin_unlock(&smc_ib_devices.lock); 1486 1487 spin_lock(&smcd_dev_list.lock); 1488 list_for_each_entry(smcd, &smcd_dev_list.list, list) { 1489 smcd->going_away = 1; 1490 } 1491 spin_unlock(&smcd_dev_list.lock); 1492 } 1493 1494 /* Clean up all SMC link groups */ 1495 static void smc_lgrs_shutdown(void) 1496 { 1497 struct smcd_dev *smcd; 1498 1499 smc_core_going_away(); 1500 1501 smc_smcr_terminate_all(NULL); 1502 1503 spin_lock(&smcd_dev_list.lock); 1504 list_for_each_entry(smcd, &smcd_dev_list.list, list) 1505 smc_smcd_terminate_all(smcd); 1506 spin_unlock(&smcd_dev_list.lock); 1507 } 1508 1509 static int smc_core_reboot_event(struct notifier_block *this, 1510 unsigned long event, void *ptr) 1511 { 1512 smc_lgrs_shutdown(); 1513 smc_ib_unregister_client(); 1514 return 0; 1515 } 1516 1517 static struct notifier_block smc_reboot_notifier = { 1518 .notifier_call = smc_core_reboot_event, 1519 }; 1520 1521 int __init smc_core_init(void) 1522 { 1523 return register_reboot_notifier(&smc_reboot_notifier); 1524 } 1525 1526 /* Called (from smc_exit) when module is removed */ 1527 void smc_core_exit(void) 1528 { 1529 unregister_reboot_notifier(&smc_reboot_notifier); 1530 smc_lgrs_shutdown(); 1531 } 1532