1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Basic Transport Functions exploiting Infiniband API 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/socket.h> 13 #include <linux/if_vlan.h> 14 #include <linux/random.h> 15 #include <linux/workqueue.h> 16 #include <linux/wait.h> 17 #include <linux/reboot.h> 18 #include <net/tcp.h> 19 #include <net/sock.h> 20 #include <rdma/ib_verbs.h> 21 #include <rdma/ib_cache.h> 22 23 #include "smc.h" 24 #include "smc_clc.h" 25 #include "smc_core.h" 26 #include "smc_ib.h" 27 #include "smc_wr.h" 28 #include "smc_llc.h" 29 #include "smc_cdc.h" 30 #include "smc_close.h" 31 #include "smc_ism.h" 32 33 #define SMC_LGR_NUM_INCR 256 34 #define SMC_LGR_FREE_DELAY_SERV (600 * HZ) 35 #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ) 36 #define SMC_LGR_FREE_DELAY_FAST (8 * HZ) 37 38 static struct smc_lgr_list smc_lgr_list = { /* established link groups */ 39 .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock), 40 .list = LIST_HEAD_INIT(smc_lgr_list.list), 41 .num = 0, 42 }; 43 44 static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */ 45 static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted); 46 47 struct smc_ib_up_work { 48 struct work_struct work; 49 struct smc_link_group *lgr; 50 struct smc_ib_device *smcibdev; 51 u8 ibport; 52 }; 53 54 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, 55 struct smc_buf_desc *buf_desc); 56 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft); 57 58 static void smc_link_up_work(struct work_struct *work); 59 static void smc_link_down_work(struct work_struct *work); 60 61 /* return head of link group list and its lock for a given link group */ 62 static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr, 63 spinlock_t **lgr_lock) 64 { 65 if (lgr->is_smcd) { 66 *lgr_lock = &lgr->smcd->lgr_lock; 67 return &lgr->smcd->lgr_list; 68 } 69 70 *lgr_lock = &smc_lgr_list.lock; 71 return &smc_lgr_list.list; 72 } 73 74 static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) 75 { 76 /* client link group creation always follows the server link group 77 * creation. For client use a somewhat higher removal delay time, 78 * otherwise there is a risk of out-of-sync link groups. 79 */ 80 if (!lgr->freeing && !lgr->freefast) { 81 mod_delayed_work(system_wq, &lgr->free_work, 82 (!lgr->is_smcd && lgr->role == SMC_CLNT) ? 83 SMC_LGR_FREE_DELAY_CLNT : 84 SMC_LGR_FREE_DELAY_SERV); 85 } 86 } 87 88 void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr) 89 { 90 if (!lgr->freeing && !lgr->freefast) { 91 lgr->freefast = 1; 92 mod_delayed_work(system_wq, &lgr->free_work, 93 SMC_LGR_FREE_DELAY_FAST); 94 } 95 } 96 97 /* Register connection's alert token in our lookup structure. 98 * To use rbtrees we have to implement our own insert core. 99 * Requires @conns_lock 100 * @smc connection to register 101 * Returns 0 on success, != otherwise. 102 */ 103 static void smc_lgr_add_alert_token(struct smc_connection *conn) 104 { 105 struct rb_node **link, *parent = NULL; 106 u32 token = conn->alert_token_local; 107 108 link = &conn->lgr->conns_all.rb_node; 109 while (*link) { 110 struct smc_connection *cur = rb_entry(*link, 111 struct smc_connection, alert_node); 112 113 parent = *link; 114 if (cur->alert_token_local > token) 115 link = &parent->rb_left; 116 else 117 link = &parent->rb_right; 118 } 119 /* Put the new node there */ 120 rb_link_node(&conn->alert_node, parent, link); 121 rb_insert_color(&conn->alert_node, &conn->lgr->conns_all); 122 } 123 124 /* assign an SMC-R link to the connection */ 125 static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first) 126 { 127 enum smc_link_state expected = first ? SMC_LNK_ACTIVATING : 128 SMC_LNK_ACTIVE; 129 int i, j; 130 131 /* do link balancing */ 132 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 133 struct smc_link *lnk = &conn->lgr->lnk[i]; 134 135 if (lnk->state != expected || lnk->link_is_asym) 136 continue; 137 if (conn->lgr->role == SMC_CLNT) { 138 conn->lnk = lnk; /* temporary, SMC server assigns link*/ 139 break; 140 } 141 if (conn->lgr->conns_num % 2) { 142 for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) { 143 struct smc_link *lnk2; 144 145 lnk2 = &conn->lgr->lnk[j]; 146 if (lnk2->state == expected && 147 !lnk2->link_is_asym) { 148 conn->lnk = lnk2; 149 break; 150 } 151 } 152 } 153 if (!conn->lnk) 154 conn->lnk = lnk; 155 break; 156 } 157 if (!conn->lnk) 158 return SMC_CLC_DECL_NOACTLINK; 159 return 0; 160 } 161 162 /* Register connection in link group by assigning an alert token 163 * registered in a search tree. 164 * Requires @conns_lock 165 * Note that '0' is a reserved value and not assigned. 166 */ 167 static int smc_lgr_register_conn(struct smc_connection *conn, bool first) 168 { 169 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 170 static atomic_t nexttoken = ATOMIC_INIT(0); 171 int rc; 172 173 if (!conn->lgr->is_smcd) { 174 rc = smcr_lgr_conn_assign_link(conn, first); 175 if (rc) 176 return rc; 177 } 178 /* find a new alert_token_local value not yet used by some connection 179 * in this link group 180 */ 181 sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */ 182 while (!conn->alert_token_local) { 183 conn->alert_token_local = atomic_inc_return(&nexttoken); 184 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr)) 185 conn->alert_token_local = 0; 186 } 187 smc_lgr_add_alert_token(conn); 188 conn->lgr->conns_num++; 189 return 0; 190 } 191 192 /* Unregister connection and reset the alert token of the given connection< 193 */ 194 static void __smc_lgr_unregister_conn(struct smc_connection *conn) 195 { 196 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 197 struct smc_link_group *lgr = conn->lgr; 198 199 rb_erase(&conn->alert_node, &lgr->conns_all); 200 lgr->conns_num--; 201 conn->alert_token_local = 0; 202 sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */ 203 } 204 205 /* Unregister connection from lgr 206 */ 207 static void smc_lgr_unregister_conn(struct smc_connection *conn) 208 { 209 struct smc_link_group *lgr = conn->lgr; 210 211 if (!lgr) 212 return; 213 write_lock_bh(&lgr->conns_lock); 214 if (conn->alert_token_local) { 215 __smc_lgr_unregister_conn(conn); 216 } 217 write_unlock_bh(&lgr->conns_lock); 218 conn->lgr = NULL; 219 } 220 221 void smc_lgr_cleanup_early(struct smc_connection *conn) 222 { 223 struct smc_link_group *lgr = conn->lgr; 224 struct list_head *lgr_list; 225 spinlock_t *lgr_lock; 226 227 if (!lgr) 228 return; 229 230 smc_conn_free(conn); 231 lgr_list = smc_lgr_list_head(lgr, &lgr_lock); 232 spin_lock_bh(lgr_lock); 233 /* do not use this link group for new connections */ 234 if (!list_empty(lgr_list)) 235 list_del_init(lgr_list); 236 spin_unlock_bh(lgr_lock); 237 smc_lgr_schedule_free_work_fast(lgr); 238 } 239 240 static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr) 241 { 242 int i; 243 244 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 245 struct smc_link *lnk = &lgr->lnk[i]; 246 247 if (smc_link_usable(lnk)) 248 lnk->state = SMC_LNK_INACTIVE; 249 } 250 wake_up_interruptible_all(&lgr->llc_waiter); 251 } 252 253 static void smc_lgr_free(struct smc_link_group *lgr); 254 255 static void smc_lgr_free_work(struct work_struct *work) 256 { 257 struct smc_link_group *lgr = container_of(to_delayed_work(work), 258 struct smc_link_group, 259 free_work); 260 spinlock_t *lgr_lock; 261 bool conns; 262 263 smc_lgr_list_head(lgr, &lgr_lock); 264 spin_lock_bh(lgr_lock); 265 if (lgr->freeing) { 266 spin_unlock_bh(lgr_lock); 267 return; 268 } 269 read_lock_bh(&lgr->conns_lock); 270 conns = RB_EMPTY_ROOT(&lgr->conns_all); 271 read_unlock_bh(&lgr->conns_lock); 272 if (!conns) { /* number of lgr connections is no longer zero */ 273 spin_unlock_bh(lgr_lock); 274 return; 275 } 276 list_del_init(&lgr->list); /* remove from smc_lgr_list */ 277 lgr->freeing = 1; /* this instance does the freeing, no new schedule */ 278 spin_unlock_bh(lgr_lock); 279 cancel_delayed_work(&lgr->free_work); 280 281 if (!lgr->is_smcd && !lgr->terminating) 282 smc_llc_send_link_delete_all(lgr, true, 283 SMC_LLC_DEL_PROG_INIT_TERM); 284 if (lgr->is_smcd && !lgr->terminating) 285 smc_ism_signal_shutdown(lgr); 286 if (!lgr->is_smcd) 287 smcr_lgr_link_deactivate_all(lgr); 288 smc_lgr_free(lgr); 289 } 290 291 static void smc_lgr_terminate_work(struct work_struct *work) 292 { 293 struct smc_link_group *lgr = container_of(work, struct smc_link_group, 294 terminate_work); 295 296 __smc_lgr_terminate(lgr, true); 297 } 298 299 /* return next unique link id for the lgr */ 300 static u8 smcr_next_link_id(struct smc_link_group *lgr) 301 { 302 u8 link_id; 303 int i; 304 305 while (1) { 306 link_id = ++lgr->next_link_id; 307 if (!link_id) /* skip zero as link_id */ 308 link_id = ++lgr->next_link_id; 309 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 310 if (smc_link_usable(&lgr->lnk[i]) && 311 lgr->lnk[i].link_id == link_id) 312 continue; 313 } 314 break; 315 } 316 return link_id; 317 } 318 319 int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, 320 u8 link_idx, struct smc_init_info *ini) 321 { 322 u8 rndvec[3]; 323 int rc; 324 325 get_device(&ini->ib_dev->ibdev->dev); 326 atomic_inc(&ini->ib_dev->lnk_cnt); 327 lnk->state = SMC_LNK_ACTIVATING; 328 lnk->link_id = smcr_next_link_id(lgr); 329 lnk->lgr = lgr; 330 lnk->link_idx = link_idx; 331 lnk->smcibdev = ini->ib_dev; 332 lnk->ibport = ini->ib_port; 333 lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu; 334 smc_llc_link_set_uid(lnk); 335 INIT_WORK(&lnk->link_down_wrk, smc_link_down_work); 336 if (!ini->ib_dev->initialized) { 337 rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev); 338 if (rc) 339 goto out; 340 } 341 get_random_bytes(rndvec, sizeof(rndvec)); 342 lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + 343 (rndvec[2] << 16); 344 rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport, 345 ini->vlan_id, lnk->gid, &lnk->sgid_index); 346 if (rc) 347 goto out; 348 rc = smc_llc_link_init(lnk); 349 if (rc) 350 goto out; 351 rc = smc_wr_alloc_link_mem(lnk); 352 if (rc) 353 goto clear_llc_lnk; 354 rc = smc_ib_create_protection_domain(lnk); 355 if (rc) 356 goto free_link_mem; 357 rc = smc_ib_create_queue_pair(lnk); 358 if (rc) 359 goto dealloc_pd; 360 rc = smc_wr_create_link(lnk); 361 if (rc) 362 goto destroy_qp; 363 return 0; 364 365 destroy_qp: 366 smc_ib_destroy_queue_pair(lnk); 367 dealloc_pd: 368 smc_ib_dealloc_protection_domain(lnk); 369 free_link_mem: 370 smc_wr_free_link_mem(lnk); 371 clear_llc_lnk: 372 smc_llc_link_clear(lnk, false); 373 out: 374 put_device(&ini->ib_dev->ibdev->dev); 375 memset(lnk, 0, sizeof(struct smc_link)); 376 lnk->state = SMC_LNK_UNUSED; 377 if (!atomic_dec_return(&ini->ib_dev->lnk_cnt)) 378 wake_up(&ini->ib_dev->lnks_deleted); 379 return rc; 380 } 381 382 /* create a new SMC link group */ 383 static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) 384 { 385 struct smc_link_group *lgr; 386 struct list_head *lgr_list; 387 struct smc_link *lnk; 388 spinlock_t *lgr_lock; 389 u8 link_idx; 390 int rc = 0; 391 int i; 392 393 if (ini->is_smcd && ini->vlan_id) { 394 if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) { 395 rc = SMC_CLC_DECL_ISMVLANERR; 396 goto out; 397 } 398 } 399 400 lgr = kzalloc(sizeof(*lgr), GFP_KERNEL); 401 if (!lgr) { 402 rc = SMC_CLC_DECL_MEM; 403 goto ism_put_vlan; 404 } 405 lgr->is_smcd = ini->is_smcd; 406 lgr->sync_err = 0; 407 lgr->terminating = 0; 408 lgr->freefast = 0; 409 lgr->freeing = 0; 410 lgr->vlan_id = ini->vlan_id; 411 mutex_init(&lgr->sndbufs_lock); 412 mutex_init(&lgr->rmbs_lock); 413 rwlock_init(&lgr->conns_lock); 414 for (i = 0; i < SMC_RMBE_SIZES; i++) { 415 INIT_LIST_HEAD(&lgr->sndbufs[i]); 416 INIT_LIST_HEAD(&lgr->rmbs[i]); 417 } 418 lgr->next_link_id = 0; 419 smc_lgr_list.num += SMC_LGR_NUM_INCR; 420 memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE); 421 INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work); 422 INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work); 423 lgr->conns_all = RB_ROOT; 424 if (ini->is_smcd) { 425 /* SMC-D specific settings */ 426 get_device(&ini->ism_dev->dev); 427 lgr->peer_gid = ini->ism_gid; 428 lgr->smcd = ini->ism_dev; 429 lgr_list = &ini->ism_dev->lgr_list; 430 lgr_lock = &lgr->smcd->lgr_lock; 431 lgr->peer_shutdown = 0; 432 atomic_inc(&ini->ism_dev->lgr_cnt); 433 } else { 434 /* SMC-R specific settings */ 435 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; 436 memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer, 437 SMC_SYSTEMID_LEN); 438 memcpy(lgr->pnet_id, ini->ib_dev->pnetid[ini->ib_port - 1], 439 SMC_MAX_PNETID_LEN); 440 smc_llc_lgr_init(lgr, smc); 441 442 link_idx = SMC_SINGLE_LINK; 443 lnk = &lgr->lnk[link_idx]; 444 rc = smcr_link_init(lgr, lnk, link_idx, ini); 445 if (rc) 446 goto free_lgr; 447 lgr_list = &smc_lgr_list.list; 448 lgr_lock = &smc_lgr_list.lock; 449 atomic_inc(&lgr_cnt); 450 } 451 smc->conn.lgr = lgr; 452 spin_lock_bh(lgr_lock); 453 list_add(&lgr->list, lgr_list); 454 spin_unlock_bh(lgr_lock); 455 return 0; 456 457 free_lgr: 458 kfree(lgr); 459 ism_put_vlan: 460 if (ini->is_smcd && ini->vlan_id) 461 smc_ism_put_vlan(ini->ism_dev, ini->vlan_id); 462 out: 463 if (rc < 0) { 464 if (rc == -ENOMEM) 465 rc = SMC_CLC_DECL_MEM; 466 else 467 rc = SMC_CLC_DECL_INTERR; 468 } 469 return rc; 470 } 471 472 static int smc_write_space(struct smc_connection *conn) 473 { 474 int buffer_len = conn->peer_rmbe_size; 475 union smc_host_cursor prod; 476 union smc_host_cursor cons; 477 int space; 478 479 smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn); 480 smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn); 481 /* determine rx_buf space */ 482 space = buffer_len - smc_curs_diff(buffer_len, &cons, &prod); 483 return space; 484 } 485 486 static int smc_switch_cursor(struct smc_sock *smc) 487 { 488 struct smc_connection *conn = &smc->conn; 489 union smc_host_cursor cons, fin; 490 int rc = 0; 491 int diff; 492 493 smc_curs_copy(&conn->tx_curs_sent, &conn->tx_curs_fin, conn); 494 smc_curs_copy(&fin, &conn->local_tx_ctrl_fin, conn); 495 /* set prod cursor to old state, enforce tx_rdma_writes() */ 496 smc_curs_copy(&conn->local_tx_ctrl.prod, &fin, conn); 497 smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn); 498 499 if (smc_curs_comp(conn->peer_rmbe_size, &cons, &fin) < 0) { 500 /* cons cursor advanced more than fin, and prod was set 501 * fin above, so now prod is smaller than cons. Fix that. 502 */ 503 diff = smc_curs_diff(conn->peer_rmbe_size, &fin, &cons); 504 smc_curs_add(conn->sndbuf_desc->len, 505 &conn->tx_curs_sent, diff); 506 smc_curs_add(conn->sndbuf_desc->len, 507 &conn->tx_curs_fin, diff); 508 509 smp_mb__before_atomic(); 510 atomic_add(diff, &conn->sndbuf_space); 511 smp_mb__after_atomic(); 512 513 smc_curs_add(conn->peer_rmbe_size, 514 &conn->local_tx_ctrl.prod, diff); 515 smc_curs_add(conn->peer_rmbe_size, 516 &conn->local_tx_ctrl_fin, diff); 517 } 518 /* recalculate, value is used by tx_rdma_writes() */ 519 atomic_set(&smc->conn.peer_rmbe_space, smc_write_space(conn)); 520 521 if (smc->sk.sk_state != SMC_INIT && 522 smc->sk.sk_state != SMC_CLOSED) { 523 rc = smcr_cdc_msg_send_validation(conn); 524 if (!rc) { 525 schedule_delayed_work(&conn->tx_work, 0); 526 smc->sk.sk_data_ready(&smc->sk); 527 } 528 } 529 return rc; 530 } 531 532 struct smc_link *smc_switch_conns(struct smc_link_group *lgr, 533 struct smc_link *from_lnk, bool is_dev_err) 534 { 535 struct smc_link *to_lnk = NULL; 536 struct smc_connection *conn; 537 struct smc_sock *smc; 538 struct rb_node *node; 539 int i, rc = 0; 540 541 /* link is inactive, wake up tx waiters */ 542 smc_wr_wakeup_tx_wait(from_lnk); 543 544 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 545 if (lgr->lnk[i].state != SMC_LNK_ACTIVE || 546 i == from_lnk->link_idx) 547 continue; 548 if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev && 549 from_lnk->ibport == lgr->lnk[i].ibport) { 550 continue; 551 } 552 to_lnk = &lgr->lnk[i]; 553 break; 554 } 555 if (!to_lnk) { 556 smc_lgr_terminate_sched(lgr); 557 return NULL; 558 } 559 again: 560 read_lock_bh(&lgr->conns_lock); 561 for (node = rb_first(&lgr->conns_all); node; node = rb_next(node)) { 562 conn = rb_entry(node, struct smc_connection, alert_node); 563 if (conn->lnk != from_lnk) 564 continue; 565 smc = container_of(conn, struct smc_sock, conn); 566 /* conn->lnk not yet set in SMC_INIT state */ 567 if (smc->sk.sk_state == SMC_INIT) 568 continue; 569 if (smc->sk.sk_state == SMC_CLOSED || 570 smc->sk.sk_state == SMC_PEERCLOSEWAIT1 || 571 smc->sk.sk_state == SMC_PEERCLOSEWAIT2 || 572 smc->sk.sk_state == SMC_APPFINCLOSEWAIT || 573 smc->sk.sk_state == SMC_APPCLOSEWAIT1 || 574 smc->sk.sk_state == SMC_APPCLOSEWAIT2 || 575 smc->sk.sk_state == SMC_PEERFINCLOSEWAIT || 576 smc->sk.sk_state == SMC_PEERABORTWAIT || 577 smc->sk.sk_state == SMC_PROCESSABORT) { 578 spin_lock_bh(&conn->send_lock); 579 conn->lnk = to_lnk; 580 spin_unlock_bh(&conn->send_lock); 581 continue; 582 } 583 sock_hold(&smc->sk); 584 read_unlock_bh(&lgr->conns_lock); 585 /* avoid race with smcr_tx_sndbuf_nonempty() */ 586 spin_lock_bh(&conn->send_lock); 587 conn->lnk = to_lnk; 588 rc = smc_switch_cursor(smc); 589 spin_unlock_bh(&conn->send_lock); 590 sock_put(&smc->sk); 591 if (rc) { 592 smcr_link_down_cond_sched(to_lnk); 593 return NULL; 594 } 595 goto again; 596 } 597 read_unlock_bh(&lgr->conns_lock); 598 return to_lnk; 599 } 600 601 static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc, 602 struct smc_link_group *lgr) 603 { 604 int rc; 605 606 if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) { 607 /* unregister rmb with peer */ 608 rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY); 609 if (!rc) { 610 /* protect against smc_llc_cli_rkey_exchange() */ 611 mutex_lock(&lgr->llc_conf_mutex); 612 smc_llc_do_delete_rkey(lgr, rmb_desc); 613 rmb_desc->is_conf_rkey = false; 614 mutex_unlock(&lgr->llc_conf_mutex); 615 smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); 616 } 617 } 618 619 if (rmb_desc->is_reg_err) { 620 /* buf registration failed, reuse not possible */ 621 mutex_lock(&lgr->rmbs_lock); 622 list_del(&rmb_desc->list); 623 mutex_unlock(&lgr->rmbs_lock); 624 625 smc_buf_free(lgr, true, rmb_desc); 626 } else { 627 rmb_desc->used = 0; 628 } 629 } 630 631 static void smc_buf_unuse(struct smc_connection *conn, 632 struct smc_link_group *lgr) 633 { 634 if (conn->sndbuf_desc) 635 conn->sndbuf_desc->used = 0; 636 if (conn->rmb_desc && lgr->is_smcd) 637 conn->rmb_desc->used = 0; 638 else if (conn->rmb_desc) 639 smcr_buf_unuse(conn->rmb_desc, lgr); 640 } 641 642 /* remove a finished connection from its link group */ 643 void smc_conn_free(struct smc_connection *conn) 644 { 645 struct smc_link_group *lgr = conn->lgr; 646 647 if (!lgr) 648 return; 649 if (lgr->is_smcd) { 650 if (!list_empty(&lgr->list)) 651 smc_ism_unset_conn(conn); 652 tasklet_kill(&conn->rx_tsklet); 653 } else { 654 smc_cdc_tx_dismiss_slots(conn); 655 if (current_work() != &conn->abort_work) 656 cancel_work_sync(&conn->abort_work); 657 } 658 if (!list_empty(&lgr->list)) { 659 smc_lgr_unregister_conn(conn); 660 smc_buf_unuse(conn, lgr); /* allow buffer reuse */ 661 } 662 663 if (!lgr->conns_num) 664 smc_lgr_schedule_free_work(lgr); 665 } 666 667 /* unregister a link from a buf_desc */ 668 static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb, 669 struct smc_link *lnk) 670 { 671 if (is_rmb) 672 buf_desc->is_reg_mr[lnk->link_idx] = false; 673 if (!buf_desc->is_map_ib[lnk->link_idx]) 674 return; 675 if (is_rmb) { 676 if (buf_desc->mr_rx[lnk->link_idx]) { 677 smc_ib_put_memory_region( 678 buf_desc->mr_rx[lnk->link_idx]); 679 buf_desc->mr_rx[lnk->link_idx] = NULL; 680 } 681 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE); 682 } else { 683 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE); 684 } 685 sg_free_table(&buf_desc->sgt[lnk->link_idx]); 686 buf_desc->is_map_ib[lnk->link_idx] = false; 687 } 688 689 /* unmap all buffers of lgr for a deleted link */ 690 static void smcr_buf_unmap_lgr(struct smc_link *lnk) 691 { 692 struct smc_link_group *lgr = lnk->lgr; 693 struct smc_buf_desc *buf_desc, *bf; 694 int i; 695 696 for (i = 0; i < SMC_RMBE_SIZES; i++) { 697 mutex_lock(&lgr->rmbs_lock); 698 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) 699 smcr_buf_unmap_link(buf_desc, true, lnk); 700 mutex_unlock(&lgr->rmbs_lock); 701 mutex_lock(&lgr->sndbufs_lock); 702 list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i], 703 list) 704 smcr_buf_unmap_link(buf_desc, false, lnk); 705 mutex_unlock(&lgr->sndbufs_lock); 706 } 707 } 708 709 static void smcr_rtoken_clear_link(struct smc_link *lnk) 710 { 711 struct smc_link_group *lgr = lnk->lgr; 712 int i; 713 714 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 715 lgr->rtokens[i][lnk->link_idx].rkey = 0; 716 lgr->rtokens[i][lnk->link_idx].dma_addr = 0; 717 } 718 } 719 720 /* must be called under lgr->llc_conf_mutex lock */ 721 void smcr_link_clear(struct smc_link *lnk, bool log) 722 { 723 struct smc_ib_device *smcibdev; 724 725 if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED) 726 return; 727 lnk->peer_qpn = 0; 728 smc_llc_link_clear(lnk, log); 729 smcr_buf_unmap_lgr(lnk); 730 smcr_rtoken_clear_link(lnk); 731 smc_ib_modify_qp_reset(lnk); 732 smc_wr_free_link(lnk); 733 smc_ib_destroy_queue_pair(lnk); 734 smc_ib_dealloc_protection_domain(lnk); 735 smc_wr_free_link_mem(lnk); 736 put_device(&lnk->smcibdev->ibdev->dev); 737 smcibdev = lnk->smcibdev; 738 memset(lnk, 0, sizeof(struct smc_link)); 739 lnk->state = SMC_LNK_UNUSED; 740 if (!atomic_dec_return(&smcibdev->lnk_cnt)) 741 wake_up(&smcibdev->lnks_deleted); 742 } 743 744 static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb, 745 struct smc_buf_desc *buf_desc) 746 { 747 int i; 748 749 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) 750 smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]); 751 752 if (buf_desc->pages) 753 __free_pages(buf_desc->pages, buf_desc->order); 754 kfree(buf_desc); 755 } 756 757 static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb, 758 struct smc_buf_desc *buf_desc) 759 { 760 if (is_dmb) { 761 /* restore original buf len */ 762 buf_desc->len += sizeof(struct smcd_cdc_msg); 763 smc_ism_unregister_dmb(lgr->smcd, buf_desc); 764 } else { 765 kfree(buf_desc->cpu_addr); 766 } 767 kfree(buf_desc); 768 } 769 770 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, 771 struct smc_buf_desc *buf_desc) 772 { 773 if (lgr->is_smcd) 774 smcd_buf_free(lgr, is_rmb, buf_desc); 775 else 776 smcr_buf_free(lgr, is_rmb, buf_desc); 777 } 778 779 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb) 780 { 781 struct smc_buf_desc *buf_desc, *bf_desc; 782 struct list_head *buf_list; 783 int i; 784 785 for (i = 0; i < SMC_RMBE_SIZES; i++) { 786 if (is_rmb) 787 buf_list = &lgr->rmbs[i]; 788 else 789 buf_list = &lgr->sndbufs[i]; 790 list_for_each_entry_safe(buf_desc, bf_desc, buf_list, 791 list) { 792 list_del(&buf_desc->list); 793 smc_buf_free(lgr, is_rmb, buf_desc); 794 } 795 } 796 } 797 798 static void smc_lgr_free_bufs(struct smc_link_group *lgr) 799 { 800 /* free send buffers */ 801 __smc_lgr_free_bufs(lgr, false); 802 /* free rmbs */ 803 __smc_lgr_free_bufs(lgr, true); 804 } 805 806 /* remove a link group */ 807 static void smc_lgr_free(struct smc_link_group *lgr) 808 { 809 int i; 810 811 if (!lgr->is_smcd) { 812 mutex_lock(&lgr->llc_conf_mutex); 813 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 814 if (lgr->lnk[i].state != SMC_LNK_UNUSED) 815 smcr_link_clear(&lgr->lnk[i], false); 816 } 817 mutex_unlock(&lgr->llc_conf_mutex); 818 smc_llc_lgr_clear(lgr); 819 } 820 821 smc_lgr_free_bufs(lgr); 822 if (lgr->is_smcd) { 823 if (!lgr->terminating) { 824 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); 825 put_device(&lgr->smcd->dev); 826 } 827 if (!atomic_dec_return(&lgr->smcd->lgr_cnt)) 828 wake_up(&lgr->smcd->lgrs_deleted); 829 } else { 830 if (!atomic_dec_return(&lgr_cnt)) 831 wake_up(&lgrs_deleted); 832 } 833 kfree(lgr); 834 } 835 836 static void smcd_unregister_all_dmbs(struct smc_link_group *lgr) 837 { 838 int i; 839 840 for (i = 0; i < SMC_RMBE_SIZES; i++) { 841 struct smc_buf_desc *buf_desc; 842 843 list_for_each_entry(buf_desc, &lgr->rmbs[i], list) { 844 buf_desc->len += sizeof(struct smcd_cdc_msg); 845 smc_ism_unregister_dmb(lgr->smcd, buf_desc); 846 } 847 } 848 } 849 850 static void smc_sk_wake_ups(struct smc_sock *smc) 851 { 852 smc->sk.sk_write_space(&smc->sk); 853 smc->sk.sk_data_ready(&smc->sk); 854 smc->sk.sk_state_change(&smc->sk); 855 } 856 857 /* kill a connection */ 858 static void smc_conn_kill(struct smc_connection *conn, bool soft) 859 { 860 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 861 862 if (conn->lgr->is_smcd && conn->lgr->peer_shutdown) 863 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 864 else 865 smc_close_abort(conn); 866 conn->killed = 1; 867 smc->sk.sk_err = ECONNABORTED; 868 smc_sk_wake_ups(smc); 869 if (conn->lgr->is_smcd) { 870 smc_ism_unset_conn(conn); 871 if (soft) 872 tasklet_kill(&conn->rx_tsklet); 873 else 874 tasklet_unlock_wait(&conn->rx_tsklet); 875 } else { 876 smc_cdc_tx_dismiss_slots(conn); 877 } 878 smc_lgr_unregister_conn(conn); 879 smc_close_active_abort(smc); 880 } 881 882 static void smc_lgr_cleanup(struct smc_link_group *lgr) 883 { 884 if (lgr->is_smcd) { 885 smc_ism_signal_shutdown(lgr); 886 smcd_unregister_all_dmbs(lgr); 887 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); 888 put_device(&lgr->smcd->dev); 889 } else { 890 u32 rsn = lgr->llc_termination_rsn; 891 892 if (!rsn) 893 rsn = SMC_LLC_DEL_PROG_INIT_TERM; 894 smc_llc_send_link_delete_all(lgr, false, rsn); 895 smcr_lgr_link_deactivate_all(lgr); 896 } 897 } 898 899 /* terminate link group 900 * @soft: true if link group shutdown can take its time 901 * false if immediate link group shutdown is required 902 */ 903 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft) 904 { 905 struct smc_connection *conn; 906 struct smc_sock *smc; 907 struct rb_node *node; 908 909 if (lgr->terminating) 910 return; /* lgr already terminating */ 911 /* cancel free_work sync, will terminate when lgr->freeing is set */ 912 cancel_delayed_work_sync(&lgr->free_work); 913 lgr->terminating = 1; 914 915 /* kill remaining link group connections */ 916 read_lock_bh(&lgr->conns_lock); 917 node = rb_first(&lgr->conns_all); 918 while (node) { 919 read_unlock_bh(&lgr->conns_lock); 920 conn = rb_entry(node, struct smc_connection, alert_node); 921 smc = container_of(conn, struct smc_sock, conn); 922 sock_hold(&smc->sk); /* sock_put below */ 923 lock_sock(&smc->sk); 924 smc_conn_kill(conn, soft); 925 release_sock(&smc->sk); 926 sock_put(&smc->sk); /* sock_hold above */ 927 read_lock_bh(&lgr->conns_lock); 928 node = rb_first(&lgr->conns_all); 929 } 930 read_unlock_bh(&lgr->conns_lock); 931 smc_lgr_cleanup(lgr); 932 smc_lgr_free(lgr); 933 } 934 935 /* unlink link group and schedule termination */ 936 void smc_lgr_terminate_sched(struct smc_link_group *lgr) 937 { 938 spinlock_t *lgr_lock; 939 940 smc_lgr_list_head(lgr, &lgr_lock); 941 spin_lock_bh(lgr_lock); 942 if (list_empty(&lgr->list) || lgr->terminating || lgr->freeing) { 943 spin_unlock_bh(lgr_lock); 944 return; /* lgr already terminating */ 945 } 946 list_del_init(&lgr->list); 947 lgr->freeing = 1; 948 spin_unlock_bh(lgr_lock); 949 schedule_work(&lgr->terminate_work); 950 } 951 952 /* Called when peer lgr shutdown (regularly or abnormally) is received */ 953 void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan) 954 { 955 struct smc_link_group *lgr, *l; 956 LIST_HEAD(lgr_free_list); 957 958 /* run common cleanup function and build free list */ 959 spin_lock_bh(&dev->lgr_lock); 960 list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) { 961 if ((!peer_gid || lgr->peer_gid == peer_gid) && 962 (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) { 963 if (peer_gid) /* peer triggered termination */ 964 lgr->peer_shutdown = 1; 965 list_move(&lgr->list, &lgr_free_list); 966 lgr->freeing = 1; 967 } 968 } 969 spin_unlock_bh(&dev->lgr_lock); 970 971 /* cancel the regular free workers and actually free lgrs */ 972 list_for_each_entry_safe(lgr, l, &lgr_free_list, list) { 973 list_del_init(&lgr->list); 974 schedule_work(&lgr->terminate_work); 975 } 976 } 977 978 /* Called when an SMCD device is removed or the smc module is unloaded */ 979 void smc_smcd_terminate_all(struct smcd_dev *smcd) 980 { 981 struct smc_link_group *lgr, *lg; 982 LIST_HEAD(lgr_free_list); 983 984 spin_lock_bh(&smcd->lgr_lock); 985 list_splice_init(&smcd->lgr_list, &lgr_free_list); 986 list_for_each_entry(lgr, &lgr_free_list, list) 987 lgr->freeing = 1; 988 spin_unlock_bh(&smcd->lgr_lock); 989 990 list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) { 991 list_del_init(&lgr->list); 992 __smc_lgr_terminate(lgr, false); 993 } 994 995 if (atomic_read(&smcd->lgr_cnt)) 996 wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt)); 997 } 998 999 /* Called when an SMCR device is removed or the smc module is unloaded. 1000 * If smcibdev is given, all SMCR link groups using this device are terminated. 1001 * If smcibdev is NULL, all SMCR link groups are terminated. 1002 */ 1003 void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) 1004 { 1005 struct smc_link_group *lgr, *lg; 1006 LIST_HEAD(lgr_free_list); 1007 int i; 1008 1009 spin_lock_bh(&smc_lgr_list.lock); 1010 if (!smcibdev) { 1011 list_splice_init(&smc_lgr_list.list, &lgr_free_list); 1012 list_for_each_entry(lgr, &lgr_free_list, list) 1013 lgr->freeing = 1; 1014 } else { 1015 list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) { 1016 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 1017 if (lgr->lnk[i].smcibdev == smcibdev) 1018 smcr_link_down_cond_sched(&lgr->lnk[i]); 1019 } 1020 } 1021 } 1022 spin_unlock_bh(&smc_lgr_list.lock); 1023 1024 list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) { 1025 list_del_init(&lgr->list); 1026 smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_OP_INIT_TERM); 1027 __smc_lgr_terminate(lgr, false); 1028 } 1029 1030 if (smcibdev) { 1031 if (atomic_read(&smcibdev->lnk_cnt)) 1032 wait_event(smcibdev->lnks_deleted, 1033 !atomic_read(&smcibdev->lnk_cnt)); 1034 } else { 1035 if (atomic_read(&lgr_cnt)) 1036 wait_event(lgrs_deleted, !atomic_read(&lgr_cnt)); 1037 } 1038 } 1039 1040 /* set new lgr type and clear all asymmetric link tagging */ 1041 void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type) 1042 { 1043 char *lgr_type = ""; 1044 int i; 1045 1046 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) 1047 if (smc_link_usable(&lgr->lnk[i])) 1048 lgr->lnk[i].link_is_asym = false; 1049 if (lgr->type == new_type) 1050 return; 1051 lgr->type = new_type; 1052 1053 switch (lgr->type) { 1054 case SMC_LGR_NONE: 1055 lgr_type = "NONE"; 1056 break; 1057 case SMC_LGR_SINGLE: 1058 lgr_type = "SINGLE"; 1059 break; 1060 case SMC_LGR_SYMMETRIC: 1061 lgr_type = "SYMMETRIC"; 1062 break; 1063 case SMC_LGR_ASYMMETRIC_PEER: 1064 lgr_type = "ASYMMETRIC_PEER"; 1065 break; 1066 case SMC_LGR_ASYMMETRIC_LOCAL: 1067 lgr_type = "ASYMMETRIC_LOCAL"; 1068 break; 1069 } 1070 pr_warn_ratelimited("smc: SMC-R lg %*phN state changed: " 1071 "%s, pnetid %.16s\n", SMC_LGR_ID_SIZE, &lgr->id, 1072 lgr_type, lgr->pnet_id); 1073 } 1074 1075 /* set new lgr type and tag a link as asymmetric */ 1076 void smcr_lgr_set_type_asym(struct smc_link_group *lgr, 1077 enum smc_lgr_type new_type, int asym_lnk_idx) 1078 { 1079 smcr_lgr_set_type(lgr, new_type); 1080 lgr->lnk[asym_lnk_idx].link_is_asym = true; 1081 } 1082 1083 /* abort connection, abort_work scheduled from tasklet context */ 1084 static void smc_conn_abort_work(struct work_struct *work) 1085 { 1086 struct smc_connection *conn = container_of(work, 1087 struct smc_connection, 1088 abort_work); 1089 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 1090 1091 smc_conn_kill(conn, true); 1092 sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */ 1093 } 1094 1095 /* link is up - establish alternate link if applicable */ 1096 static void smcr_link_up(struct smc_link_group *lgr, 1097 struct smc_ib_device *smcibdev, u8 ibport) 1098 { 1099 struct smc_link *link = NULL; 1100 1101 if (list_empty(&lgr->list) || 1102 lgr->type == SMC_LGR_SYMMETRIC || 1103 lgr->type == SMC_LGR_ASYMMETRIC_PEER) 1104 return; 1105 1106 if (lgr->role == SMC_SERV) { 1107 /* trigger local add link processing */ 1108 link = smc_llc_usable_link(lgr); 1109 if (!link) 1110 return; 1111 smc_llc_srv_add_link_local(link); 1112 } else { 1113 /* invite server to start add link processing */ 1114 u8 gid[SMC_GID_SIZE]; 1115 1116 if (smc_ib_determine_gid(smcibdev, ibport, lgr->vlan_id, gid, 1117 NULL)) 1118 return; 1119 if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) { 1120 /* some other llc task is ongoing */ 1121 wait_event_interruptible_timeout(lgr->llc_waiter, 1122 (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE), 1123 SMC_LLC_WAIT_TIME); 1124 } 1125 if (list_empty(&lgr->list) || 1126 !smc_ib_port_active(smcibdev, ibport)) 1127 return; /* lgr or device no longer active */ 1128 link = smc_llc_usable_link(lgr); 1129 if (!link) 1130 return; 1131 smc_llc_send_add_link(link, smcibdev->mac[ibport - 1], gid, 1132 NULL, SMC_LLC_REQ); 1133 } 1134 } 1135 1136 void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport) 1137 { 1138 struct smc_ib_up_work *ib_work; 1139 struct smc_link_group *lgr, *n; 1140 1141 list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) { 1142 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id, 1143 SMC_MAX_PNETID_LEN) || 1144 lgr->type == SMC_LGR_SYMMETRIC || 1145 lgr->type == SMC_LGR_ASYMMETRIC_PEER) 1146 continue; 1147 ib_work = kmalloc(sizeof(*ib_work), GFP_KERNEL); 1148 if (!ib_work) 1149 continue; 1150 INIT_WORK(&ib_work->work, smc_link_up_work); 1151 ib_work->lgr = lgr; 1152 ib_work->smcibdev = smcibdev; 1153 ib_work->ibport = ibport; 1154 schedule_work(&ib_work->work); 1155 } 1156 } 1157 1158 /* link is down - switch connections to alternate link, 1159 * must be called under lgr->llc_conf_mutex lock 1160 */ 1161 static void smcr_link_down(struct smc_link *lnk) 1162 { 1163 struct smc_link_group *lgr = lnk->lgr; 1164 struct smc_link *to_lnk; 1165 int del_link_id; 1166 1167 if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list)) 1168 return; 1169 1170 smc_ib_modify_qp_reset(lnk); 1171 to_lnk = smc_switch_conns(lgr, lnk, true); 1172 if (!to_lnk) { /* no backup link available */ 1173 smcr_link_clear(lnk, true); 1174 return; 1175 } 1176 smcr_lgr_set_type(lgr, SMC_LGR_SINGLE); 1177 del_link_id = lnk->link_id; 1178 1179 if (lgr->role == SMC_SERV) { 1180 /* trigger local delete link processing */ 1181 smc_llc_srv_delete_link_local(to_lnk, del_link_id); 1182 } else { 1183 if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) { 1184 /* another llc task is ongoing */ 1185 mutex_unlock(&lgr->llc_conf_mutex); 1186 wait_event_interruptible_timeout(lgr->llc_waiter, 1187 (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE), 1188 SMC_LLC_WAIT_TIME); 1189 mutex_lock(&lgr->llc_conf_mutex); 1190 } 1191 smc_llc_send_delete_link(to_lnk, del_link_id, SMC_LLC_REQ, true, 1192 SMC_LLC_DEL_LOST_PATH); 1193 } 1194 } 1195 1196 /* must be called under lgr->llc_conf_mutex lock */ 1197 void smcr_link_down_cond(struct smc_link *lnk) 1198 { 1199 if (smc_link_downing(&lnk->state)) 1200 smcr_link_down(lnk); 1201 } 1202 1203 /* will get the lgr->llc_conf_mutex lock */ 1204 void smcr_link_down_cond_sched(struct smc_link *lnk) 1205 { 1206 if (smc_link_downing(&lnk->state)) 1207 schedule_work(&lnk->link_down_wrk); 1208 } 1209 1210 void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport) 1211 { 1212 struct smc_link_group *lgr, *n; 1213 int i; 1214 1215 list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) { 1216 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id, 1217 SMC_MAX_PNETID_LEN)) 1218 continue; /* lgr is not affected */ 1219 if (list_empty(&lgr->list)) 1220 continue; 1221 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 1222 struct smc_link *lnk = &lgr->lnk[i]; 1223 1224 if (smc_link_usable(lnk) && 1225 lnk->smcibdev == smcibdev && lnk->ibport == ibport) 1226 smcr_link_down_cond_sched(lnk); 1227 } 1228 } 1229 } 1230 1231 static void smc_link_up_work(struct work_struct *work) 1232 { 1233 struct smc_ib_up_work *ib_work = container_of(work, 1234 struct smc_ib_up_work, 1235 work); 1236 struct smc_link_group *lgr = ib_work->lgr; 1237 1238 if (list_empty(&lgr->list)) 1239 goto out; 1240 smcr_link_up(lgr, ib_work->smcibdev, ib_work->ibport); 1241 out: 1242 kfree(ib_work); 1243 } 1244 1245 static void smc_link_down_work(struct work_struct *work) 1246 { 1247 struct smc_link *link = container_of(work, struct smc_link, 1248 link_down_wrk); 1249 struct smc_link_group *lgr = link->lgr; 1250 1251 if (list_empty(&lgr->list)) 1252 return; 1253 wake_up_interruptible_all(&lgr->llc_waiter); 1254 mutex_lock(&lgr->llc_conf_mutex); 1255 smcr_link_down(link); 1256 mutex_unlock(&lgr->llc_conf_mutex); 1257 } 1258 1259 /* Determine vlan of internal TCP socket. 1260 * @vlan_id: address to store the determined vlan id into 1261 */ 1262 int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini) 1263 { 1264 struct dst_entry *dst = sk_dst_get(clcsock->sk); 1265 struct net_device *ndev; 1266 int i, nest_lvl, rc = 0; 1267 1268 ini->vlan_id = 0; 1269 if (!dst) { 1270 rc = -ENOTCONN; 1271 goto out; 1272 } 1273 if (!dst->dev) { 1274 rc = -ENODEV; 1275 goto out_rel; 1276 } 1277 1278 ndev = dst->dev; 1279 if (is_vlan_dev(ndev)) { 1280 ini->vlan_id = vlan_dev_vlan_id(ndev); 1281 goto out_rel; 1282 } 1283 1284 rtnl_lock(); 1285 nest_lvl = ndev->lower_level; 1286 for (i = 0; i < nest_lvl; i++) { 1287 struct list_head *lower = &ndev->adj_list.lower; 1288 1289 if (list_empty(lower)) 1290 break; 1291 lower = lower->next; 1292 ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower); 1293 if (is_vlan_dev(ndev)) { 1294 ini->vlan_id = vlan_dev_vlan_id(ndev); 1295 break; 1296 } 1297 } 1298 rtnl_unlock(); 1299 1300 out_rel: 1301 dst_release(dst); 1302 out: 1303 return rc; 1304 } 1305 1306 static bool smcr_lgr_match(struct smc_link_group *lgr, 1307 struct smc_clc_msg_local *lcl, 1308 enum smc_lgr_role role, u32 clcqpn) 1309 { 1310 int i; 1311 1312 if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) || 1313 lgr->role != role) 1314 return false; 1315 1316 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 1317 if (lgr->lnk[i].state != SMC_LNK_ACTIVE) 1318 continue; 1319 if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) && 1320 !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) && 1321 !memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac))) 1322 return true; 1323 } 1324 return false; 1325 } 1326 1327 static bool smcd_lgr_match(struct smc_link_group *lgr, 1328 struct smcd_dev *smcismdev, u64 peer_gid) 1329 { 1330 return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev; 1331 } 1332 1333 /* create a new SMC connection (and a new link group if necessary) */ 1334 int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) 1335 { 1336 struct smc_connection *conn = &smc->conn; 1337 struct list_head *lgr_list; 1338 struct smc_link_group *lgr; 1339 enum smc_lgr_role role; 1340 spinlock_t *lgr_lock; 1341 int rc = 0; 1342 1343 lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list; 1344 lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock; 1345 ini->cln_first_contact = SMC_FIRST_CONTACT; 1346 role = smc->listen_smc ? SMC_SERV : SMC_CLNT; 1347 if (role == SMC_CLNT && ini->srv_first_contact) 1348 /* create new link group as well */ 1349 goto create; 1350 1351 /* determine if an existing link group can be reused */ 1352 spin_lock_bh(lgr_lock); 1353 list_for_each_entry(lgr, lgr_list, list) { 1354 write_lock_bh(&lgr->conns_lock); 1355 if ((ini->is_smcd ? 1356 smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) : 1357 smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) && 1358 !lgr->sync_err && 1359 lgr->vlan_id == ini->vlan_id && 1360 (role == SMC_CLNT || 1361 lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) { 1362 /* link group found */ 1363 ini->cln_first_contact = SMC_REUSE_CONTACT; 1364 conn->lgr = lgr; 1365 rc = smc_lgr_register_conn(conn, false); 1366 write_unlock_bh(&lgr->conns_lock); 1367 if (!rc && delayed_work_pending(&lgr->free_work)) 1368 cancel_delayed_work(&lgr->free_work); 1369 break; 1370 } 1371 write_unlock_bh(&lgr->conns_lock); 1372 } 1373 spin_unlock_bh(lgr_lock); 1374 if (rc) 1375 return rc; 1376 1377 if (role == SMC_CLNT && !ini->srv_first_contact && 1378 ini->cln_first_contact == SMC_FIRST_CONTACT) { 1379 /* Server reuses a link group, but Client wants to start 1380 * a new one 1381 * send out_of_sync decline, reason synchr. error 1382 */ 1383 return SMC_CLC_DECL_SYNCERR; 1384 } 1385 1386 create: 1387 if (ini->cln_first_contact == SMC_FIRST_CONTACT) { 1388 rc = smc_lgr_create(smc, ini); 1389 if (rc) 1390 goto out; 1391 lgr = conn->lgr; 1392 write_lock_bh(&lgr->conns_lock); 1393 rc = smc_lgr_register_conn(conn, true); 1394 write_unlock_bh(&lgr->conns_lock); 1395 if (rc) 1396 goto out; 1397 } 1398 conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; 1399 conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; 1400 conn->urg_state = SMC_URG_READ; 1401 INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work); 1402 if (ini->is_smcd) { 1403 conn->rx_off = sizeof(struct smcd_cdc_msg); 1404 smcd_cdc_rx_init(conn); /* init tasklet for this conn */ 1405 } 1406 #ifndef KERNEL_HAS_ATOMIC64 1407 spin_lock_init(&conn->acurs_lock); 1408 #endif 1409 1410 out: 1411 return rc; 1412 } 1413 1414 /* convert the RMB size into the compressed notation - minimum 16K. 1415 * In contrast to plain ilog2, this rounds towards the next power of 2, 1416 * so the socket application gets at least its desired sndbuf / rcvbuf size. 1417 */ 1418 static u8 smc_compress_bufsize(int size) 1419 { 1420 u8 compressed; 1421 1422 if (size <= SMC_BUF_MIN_SIZE) 1423 return 0; 1424 1425 size = (size - 1) >> 14; 1426 compressed = ilog2(size) + 1; 1427 if (compressed >= SMC_RMBE_SIZES) 1428 compressed = SMC_RMBE_SIZES - 1; 1429 return compressed; 1430 } 1431 1432 /* convert the RMB size from compressed notation into integer */ 1433 int smc_uncompress_bufsize(u8 compressed) 1434 { 1435 u32 size; 1436 1437 size = 0x00000001 << (((int)compressed) + 14); 1438 return (int)size; 1439 } 1440 1441 /* try to reuse a sndbuf or rmb description slot for a certain 1442 * buffer size; if not available, return NULL 1443 */ 1444 static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize, 1445 struct mutex *lock, 1446 struct list_head *buf_list) 1447 { 1448 struct smc_buf_desc *buf_slot; 1449 1450 mutex_lock(lock); 1451 list_for_each_entry(buf_slot, buf_list, list) { 1452 if (cmpxchg(&buf_slot->used, 0, 1) == 0) { 1453 mutex_unlock(lock); 1454 return buf_slot; 1455 } 1456 } 1457 mutex_unlock(lock); 1458 return NULL; 1459 } 1460 1461 /* one of the conditions for announcing a receiver's current window size is 1462 * that it "results in a minimum increase in the window size of 10% of the 1463 * receive buffer space" [RFC7609] 1464 */ 1465 static inline int smc_rmb_wnd_update_limit(int rmbe_size) 1466 { 1467 return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); 1468 } 1469 1470 /* map an rmb buf to a link */ 1471 static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb, 1472 struct smc_link *lnk) 1473 { 1474 int rc; 1475 1476 if (buf_desc->is_map_ib[lnk->link_idx]) 1477 return 0; 1478 1479 rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL); 1480 if (rc) 1481 return rc; 1482 sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl, 1483 buf_desc->cpu_addr, buf_desc->len); 1484 1485 /* map sg table to DMA address */ 1486 rc = smc_ib_buf_map_sg(lnk, buf_desc, 1487 is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 1488 /* SMC protocol depends on mapping to one DMA address only */ 1489 if (rc != 1) { 1490 rc = -EAGAIN; 1491 goto free_table; 1492 } 1493 1494 /* create a new memory region for the RMB */ 1495 if (is_rmb) { 1496 rc = smc_ib_get_memory_region(lnk->roce_pd, 1497 IB_ACCESS_REMOTE_WRITE | 1498 IB_ACCESS_LOCAL_WRITE, 1499 buf_desc, lnk->link_idx); 1500 if (rc) 1501 goto buf_unmap; 1502 smc_ib_sync_sg_for_device(lnk, buf_desc, DMA_FROM_DEVICE); 1503 } 1504 buf_desc->is_map_ib[lnk->link_idx] = true; 1505 return 0; 1506 1507 buf_unmap: 1508 smc_ib_buf_unmap_sg(lnk, buf_desc, 1509 is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 1510 free_table: 1511 sg_free_table(&buf_desc->sgt[lnk->link_idx]); 1512 return rc; 1513 } 1514 1515 /* register a new rmb on IB device, 1516 * must be called under lgr->llc_conf_mutex lock 1517 */ 1518 int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc) 1519 { 1520 if (list_empty(&link->lgr->list)) 1521 return -ENOLINK; 1522 if (!rmb_desc->is_reg_mr[link->link_idx]) { 1523 /* register memory region for new rmb */ 1524 if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) { 1525 rmb_desc->is_reg_err = true; 1526 return -EFAULT; 1527 } 1528 rmb_desc->is_reg_mr[link->link_idx] = true; 1529 } 1530 return 0; 1531 } 1532 1533 static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock, 1534 struct list_head *lst, bool is_rmb) 1535 { 1536 struct smc_buf_desc *buf_desc, *bf; 1537 int rc = 0; 1538 1539 mutex_lock(lock); 1540 list_for_each_entry_safe(buf_desc, bf, lst, list) { 1541 if (!buf_desc->used) 1542 continue; 1543 rc = smcr_buf_map_link(buf_desc, is_rmb, lnk); 1544 if (rc) 1545 goto out; 1546 } 1547 out: 1548 mutex_unlock(lock); 1549 return rc; 1550 } 1551 1552 /* map all used buffers of lgr for a new link */ 1553 int smcr_buf_map_lgr(struct smc_link *lnk) 1554 { 1555 struct smc_link_group *lgr = lnk->lgr; 1556 int i, rc = 0; 1557 1558 for (i = 0; i < SMC_RMBE_SIZES; i++) { 1559 rc = _smcr_buf_map_lgr(lnk, &lgr->rmbs_lock, 1560 &lgr->rmbs[i], true); 1561 if (rc) 1562 return rc; 1563 rc = _smcr_buf_map_lgr(lnk, &lgr->sndbufs_lock, 1564 &lgr->sndbufs[i], false); 1565 if (rc) 1566 return rc; 1567 } 1568 return 0; 1569 } 1570 1571 /* register all used buffers of lgr for a new link, 1572 * must be called under lgr->llc_conf_mutex lock 1573 */ 1574 int smcr_buf_reg_lgr(struct smc_link *lnk) 1575 { 1576 struct smc_link_group *lgr = lnk->lgr; 1577 struct smc_buf_desc *buf_desc, *bf; 1578 int i, rc = 0; 1579 1580 mutex_lock(&lgr->rmbs_lock); 1581 for (i = 0; i < SMC_RMBE_SIZES; i++) { 1582 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) { 1583 if (!buf_desc->used) 1584 continue; 1585 rc = smcr_link_reg_rmb(lnk, buf_desc); 1586 if (rc) 1587 goto out; 1588 } 1589 } 1590 out: 1591 mutex_unlock(&lgr->rmbs_lock); 1592 return rc; 1593 } 1594 1595 static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr, 1596 bool is_rmb, int bufsize) 1597 { 1598 struct smc_buf_desc *buf_desc; 1599 1600 /* try to alloc a new buffer */ 1601 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); 1602 if (!buf_desc) 1603 return ERR_PTR(-ENOMEM); 1604 1605 buf_desc->order = get_order(bufsize); 1606 buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN | 1607 __GFP_NOMEMALLOC | __GFP_COMP | 1608 __GFP_NORETRY | __GFP_ZERO, 1609 buf_desc->order); 1610 if (!buf_desc->pages) { 1611 kfree(buf_desc); 1612 return ERR_PTR(-EAGAIN); 1613 } 1614 buf_desc->cpu_addr = (void *)page_address(buf_desc->pages); 1615 buf_desc->len = bufsize; 1616 return buf_desc; 1617 } 1618 1619 /* map buf_desc on all usable links, 1620 * unused buffers stay mapped as long as the link is up 1621 */ 1622 static int smcr_buf_map_usable_links(struct smc_link_group *lgr, 1623 struct smc_buf_desc *buf_desc, bool is_rmb) 1624 { 1625 int i, rc = 0; 1626 1627 /* protect against parallel link reconfiguration */ 1628 mutex_lock(&lgr->llc_conf_mutex); 1629 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 1630 struct smc_link *lnk = &lgr->lnk[i]; 1631 1632 if (!smc_link_usable(lnk)) 1633 continue; 1634 if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) { 1635 rc = -ENOMEM; 1636 goto out; 1637 } 1638 } 1639 out: 1640 mutex_unlock(&lgr->llc_conf_mutex); 1641 return rc; 1642 } 1643 1644 #define SMCD_DMBE_SIZES 7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */ 1645 1646 static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr, 1647 bool is_dmb, int bufsize) 1648 { 1649 struct smc_buf_desc *buf_desc; 1650 int rc; 1651 1652 if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES) 1653 return ERR_PTR(-EAGAIN); 1654 1655 /* try to alloc a new DMB */ 1656 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); 1657 if (!buf_desc) 1658 return ERR_PTR(-ENOMEM); 1659 if (is_dmb) { 1660 rc = smc_ism_register_dmb(lgr, bufsize, buf_desc); 1661 if (rc) { 1662 kfree(buf_desc); 1663 return ERR_PTR(-EAGAIN); 1664 } 1665 buf_desc->pages = virt_to_page(buf_desc->cpu_addr); 1666 /* CDC header stored in buf. So, pretend it was smaller */ 1667 buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg); 1668 } else { 1669 buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL | 1670 __GFP_NOWARN | __GFP_NORETRY | 1671 __GFP_NOMEMALLOC); 1672 if (!buf_desc->cpu_addr) { 1673 kfree(buf_desc); 1674 return ERR_PTR(-EAGAIN); 1675 } 1676 buf_desc->len = bufsize; 1677 } 1678 return buf_desc; 1679 } 1680 1681 static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) 1682 { 1683 struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM); 1684 struct smc_connection *conn = &smc->conn; 1685 struct smc_link_group *lgr = conn->lgr; 1686 struct list_head *buf_list; 1687 int bufsize, bufsize_short; 1688 struct mutex *lock; /* lock buffer list */ 1689 int sk_buf_size; 1690 1691 if (is_rmb) 1692 /* use socket recv buffer size (w/o overhead) as start value */ 1693 sk_buf_size = smc->sk.sk_rcvbuf / 2; 1694 else 1695 /* use socket send buffer size (w/o overhead) as start value */ 1696 sk_buf_size = smc->sk.sk_sndbuf / 2; 1697 1698 for (bufsize_short = smc_compress_bufsize(sk_buf_size); 1699 bufsize_short >= 0; bufsize_short--) { 1700 1701 if (is_rmb) { 1702 lock = &lgr->rmbs_lock; 1703 buf_list = &lgr->rmbs[bufsize_short]; 1704 } else { 1705 lock = &lgr->sndbufs_lock; 1706 buf_list = &lgr->sndbufs[bufsize_short]; 1707 } 1708 bufsize = smc_uncompress_bufsize(bufsize_short); 1709 if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC) 1710 continue; 1711 1712 /* check for reusable slot in the link group */ 1713 buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list); 1714 if (buf_desc) { 1715 memset(buf_desc->cpu_addr, 0, bufsize); 1716 break; /* found reusable slot */ 1717 } 1718 1719 if (is_smcd) 1720 buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize); 1721 else 1722 buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize); 1723 1724 if (PTR_ERR(buf_desc) == -ENOMEM) 1725 break; 1726 if (IS_ERR(buf_desc)) 1727 continue; 1728 1729 buf_desc->used = 1; 1730 mutex_lock(lock); 1731 list_add(&buf_desc->list, buf_list); 1732 mutex_unlock(lock); 1733 break; /* found */ 1734 } 1735 1736 if (IS_ERR(buf_desc)) 1737 return -ENOMEM; 1738 1739 if (!is_smcd) { 1740 if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) { 1741 smcr_buf_unuse(buf_desc, lgr); 1742 return -ENOMEM; 1743 } 1744 } 1745 1746 if (is_rmb) { 1747 conn->rmb_desc = buf_desc; 1748 conn->rmbe_size_short = bufsize_short; 1749 smc->sk.sk_rcvbuf = bufsize * 2; 1750 atomic_set(&conn->bytes_to_rcv, 0); 1751 conn->rmbe_update_limit = 1752 smc_rmb_wnd_update_limit(buf_desc->len); 1753 if (is_smcd) 1754 smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */ 1755 } else { 1756 conn->sndbuf_desc = buf_desc; 1757 smc->sk.sk_sndbuf = bufsize * 2; 1758 atomic_set(&conn->sndbuf_space, bufsize); 1759 } 1760 return 0; 1761 } 1762 1763 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) 1764 { 1765 if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk)) 1766 return; 1767 smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); 1768 } 1769 1770 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) 1771 { 1772 if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk)) 1773 return; 1774 smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); 1775 } 1776 1777 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) 1778 { 1779 int i; 1780 1781 if (!conn->lgr || conn->lgr->is_smcd) 1782 return; 1783 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 1784 if (!smc_link_usable(&conn->lgr->lnk[i])) 1785 continue; 1786 smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc, 1787 DMA_FROM_DEVICE); 1788 } 1789 } 1790 1791 void smc_rmb_sync_sg_for_device(struct smc_connection *conn) 1792 { 1793 int i; 1794 1795 if (!conn->lgr || conn->lgr->is_smcd) 1796 return; 1797 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 1798 if (!smc_link_usable(&conn->lgr->lnk[i])) 1799 continue; 1800 smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc, 1801 DMA_FROM_DEVICE); 1802 } 1803 } 1804 1805 /* create the send and receive buffer for an SMC socket; 1806 * receive buffers are called RMBs; 1807 * (even though the SMC protocol allows more than one RMB-element per RMB, 1808 * the Linux implementation uses just one RMB-element per RMB, i.e. uses an 1809 * extra RMB for every connection in a link group 1810 */ 1811 int smc_buf_create(struct smc_sock *smc, bool is_smcd) 1812 { 1813 int rc; 1814 1815 /* create send buffer */ 1816 rc = __smc_buf_create(smc, is_smcd, false); 1817 if (rc) 1818 return rc; 1819 /* create rmb */ 1820 rc = __smc_buf_create(smc, is_smcd, true); 1821 if (rc) 1822 smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc); 1823 return rc; 1824 } 1825 1826 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr) 1827 { 1828 int i; 1829 1830 for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) { 1831 if (!test_and_set_bit(i, lgr->rtokens_used_mask)) 1832 return i; 1833 } 1834 return -ENOSPC; 1835 } 1836 1837 static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx, 1838 u32 rkey) 1839 { 1840 int i; 1841 1842 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 1843 if (test_bit(i, lgr->rtokens_used_mask) && 1844 lgr->rtokens[i][lnk_idx].rkey == rkey) 1845 return i; 1846 } 1847 return -ENOENT; 1848 } 1849 1850 /* set rtoken for a new link to an existing rmb */ 1851 void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new, 1852 __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey) 1853 { 1854 int rtok_idx; 1855 1856 rtok_idx = smc_rtoken_find_by_link(lgr, link_idx, ntohl(nw_rkey_known)); 1857 if (rtok_idx == -ENOENT) 1858 return; 1859 lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey); 1860 lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr); 1861 } 1862 1863 /* set rtoken for a new link whose link_id is given */ 1864 void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id, 1865 __be64 nw_vaddr, __be32 nw_rkey) 1866 { 1867 u64 dma_addr = be64_to_cpu(nw_vaddr); 1868 u32 rkey = ntohl(nw_rkey); 1869 bool found = false; 1870 int link_idx; 1871 1872 for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) { 1873 if (lgr->lnk[link_idx].link_id == link_id) { 1874 found = true; 1875 break; 1876 } 1877 } 1878 if (!found) 1879 return; 1880 lgr->rtokens[rtok_idx][link_idx].rkey = rkey; 1881 lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr; 1882 } 1883 1884 /* add a new rtoken from peer */ 1885 int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey) 1886 { 1887 struct smc_link_group *lgr = smc_get_lgr(lnk); 1888 u64 dma_addr = be64_to_cpu(nw_vaddr); 1889 u32 rkey = ntohl(nw_rkey); 1890 int i; 1891 1892 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 1893 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey && 1894 lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr && 1895 test_bit(i, lgr->rtokens_used_mask)) { 1896 /* already in list */ 1897 return i; 1898 } 1899 } 1900 i = smc_rmb_reserve_rtoken_idx(lgr); 1901 if (i < 0) 1902 return i; 1903 lgr->rtokens[i][lnk->link_idx].rkey = rkey; 1904 lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr; 1905 return i; 1906 } 1907 1908 /* delete an rtoken from all links */ 1909 int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey) 1910 { 1911 struct smc_link_group *lgr = smc_get_lgr(lnk); 1912 u32 rkey = ntohl(nw_rkey); 1913 int i, j; 1914 1915 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 1916 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey && 1917 test_bit(i, lgr->rtokens_used_mask)) { 1918 for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) { 1919 lgr->rtokens[i][j].rkey = 0; 1920 lgr->rtokens[i][j].dma_addr = 0; 1921 } 1922 clear_bit(i, lgr->rtokens_used_mask); 1923 return 0; 1924 } 1925 } 1926 return -ENOENT; 1927 } 1928 1929 /* save rkey and dma_addr received from peer during clc handshake */ 1930 int smc_rmb_rtoken_handling(struct smc_connection *conn, 1931 struct smc_link *lnk, 1932 struct smc_clc_msg_accept_confirm *clc) 1933 { 1934 conn->rtoken_idx = smc_rtoken_add(lnk, clc->rmb_dma_addr, 1935 clc->rmb_rkey); 1936 if (conn->rtoken_idx < 0) 1937 return conn->rtoken_idx; 1938 return 0; 1939 } 1940 1941 static void smc_core_going_away(void) 1942 { 1943 struct smc_ib_device *smcibdev; 1944 struct smcd_dev *smcd; 1945 1946 spin_lock(&smc_ib_devices.lock); 1947 list_for_each_entry(smcibdev, &smc_ib_devices.list, list) { 1948 int i; 1949 1950 for (i = 0; i < SMC_MAX_PORTS; i++) 1951 set_bit(i, smcibdev->ports_going_away); 1952 } 1953 spin_unlock(&smc_ib_devices.lock); 1954 1955 spin_lock(&smcd_dev_list.lock); 1956 list_for_each_entry(smcd, &smcd_dev_list.list, list) { 1957 smcd->going_away = 1; 1958 } 1959 spin_unlock(&smcd_dev_list.lock); 1960 } 1961 1962 /* Clean up all SMC link groups */ 1963 static void smc_lgrs_shutdown(void) 1964 { 1965 struct smcd_dev *smcd; 1966 1967 smc_core_going_away(); 1968 1969 smc_smcr_terminate_all(NULL); 1970 1971 spin_lock(&smcd_dev_list.lock); 1972 list_for_each_entry(smcd, &smcd_dev_list.list, list) 1973 smc_smcd_terminate_all(smcd); 1974 spin_unlock(&smcd_dev_list.lock); 1975 } 1976 1977 static int smc_core_reboot_event(struct notifier_block *this, 1978 unsigned long event, void *ptr) 1979 { 1980 smc_lgrs_shutdown(); 1981 smc_ib_unregister_client(); 1982 return 0; 1983 } 1984 1985 static struct notifier_block smc_reboot_notifier = { 1986 .notifier_call = smc_core_reboot_event, 1987 }; 1988 1989 int __init smc_core_init(void) 1990 { 1991 return register_reboot_notifier(&smc_reboot_notifier); 1992 } 1993 1994 /* Called (from smc_exit) when module is removed */ 1995 void smc_core_exit(void) 1996 { 1997 unregister_reboot_notifier(&smc_reboot_notifier); 1998 smc_lgrs_shutdown(); 1999 } 2000