1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Basic Transport Functions exploiting Infiniband API 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/socket.h> 13 #include <linux/if_vlan.h> 14 #include <linux/random.h> 15 #include <linux/workqueue.h> 16 #include <net/tcp.h> 17 #include <net/sock.h> 18 #include <rdma/ib_verbs.h> 19 20 #include "smc.h" 21 #include "smc_clc.h" 22 #include "smc_core.h" 23 #include "smc_ib.h" 24 #include "smc_wr.h" 25 #include "smc_llc.h" 26 #include "smc_cdc.h" 27 #include "smc_close.h" 28 29 #define SMC_LGR_NUM_INCR 256 30 #define SMC_LGR_FREE_DELAY_SERV (600 * HZ) 31 #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10) 32 33 static u32 smc_lgr_num; /* unique link group number */ 34 35 static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk, 36 bool is_rmb); 37 38 static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) 39 { 40 /* client link group creation always follows the server link group 41 * creation. For client use a somewhat higher removal delay time, 42 * otherwise there is a risk of out-of-sync link groups. 43 */ 44 mod_delayed_work(system_wq, &lgr->free_work, 45 lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT : 46 SMC_LGR_FREE_DELAY_SERV); 47 } 48 49 /* Register connection's alert token in our lookup structure. 50 * To use rbtrees we have to implement our own insert core. 51 * Requires @conns_lock 52 * @smc connection to register 53 * Returns 0 on success, != otherwise. 54 */ 55 static void smc_lgr_add_alert_token(struct smc_connection *conn) 56 { 57 struct rb_node **link, *parent = NULL; 58 u32 token = conn->alert_token_local; 59 60 link = &conn->lgr->conns_all.rb_node; 61 while (*link) { 62 struct smc_connection *cur = rb_entry(*link, 63 struct smc_connection, alert_node); 64 65 parent = *link; 66 if (cur->alert_token_local > token) 67 link = &parent->rb_left; 68 else 69 link = &parent->rb_right; 70 } 71 /* Put the new node there */ 72 rb_link_node(&conn->alert_node, parent, link); 73 rb_insert_color(&conn->alert_node, &conn->lgr->conns_all); 74 } 75 76 /* Register connection in link group by assigning an alert token 77 * registered in a search tree. 78 * Requires @conns_lock 79 * Note that '0' is a reserved value and not assigned. 80 */ 81 static void smc_lgr_register_conn(struct smc_connection *conn) 82 { 83 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 84 static atomic_t nexttoken = ATOMIC_INIT(0); 85 86 /* find a new alert_token_local value not yet used by some connection 87 * in this link group 88 */ 89 sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */ 90 while (!conn->alert_token_local) { 91 conn->alert_token_local = atomic_inc_return(&nexttoken); 92 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr)) 93 conn->alert_token_local = 0; 94 } 95 smc_lgr_add_alert_token(conn); 96 conn->lgr->conns_num++; 97 } 98 99 /* Unregister connection and reset the alert token of the given connection< 100 */ 101 static void __smc_lgr_unregister_conn(struct smc_connection *conn) 102 { 103 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 104 struct smc_link_group *lgr = conn->lgr; 105 106 rb_erase(&conn->alert_node, &lgr->conns_all); 107 lgr->conns_num--; 108 conn->alert_token_local = 0; 109 conn->lgr = NULL; 110 sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */ 111 } 112 113 /* Unregister connection and trigger lgr freeing if applicable 114 */ 115 static void smc_lgr_unregister_conn(struct smc_connection *conn) 116 { 117 struct smc_link_group *lgr = conn->lgr; 118 int reduced = 0; 119 120 write_lock_bh(&lgr->conns_lock); 121 if (conn->alert_token_local) { 122 reduced = 1; 123 __smc_lgr_unregister_conn(conn); 124 } 125 write_unlock_bh(&lgr->conns_lock); 126 if (!reduced || lgr->conns_num) 127 return; 128 smc_lgr_schedule_free_work(lgr); 129 } 130 131 static void smc_lgr_free_work(struct work_struct *work) 132 { 133 struct smc_link_group *lgr = container_of(to_delayed_work(work), 134 struct smc_link_group, 135 free_work); 136 bool conns; 137 138 spin_lock_bh(&smc_lgr_list.lock); 139 if (list_empty(&lgr->list)) 140 goto free; 141 read_lock_bh(&lgr->conns_lock); 142 conns = RB_EMPTY_ROOT(&lgr->conns_all); 143 read_unlock_bh(&lgr->conns_lock); 144 if (!conns) { /* number of lgr connections is no longer zero */ 145 spin_unlock_bh(&smc_lgr_list.lock); 146 return; 147 } 148 list_del_init(&lgr->list); /* remove from smc_lgr_list */ 149 free: 150 spin_unlock_bh(&smc_lgr_list.lock); 151 if (!delayed_work_pending(&lgr->free_work)) 152 smc_lgr_free(lgr); 153 } 154 155 /* create a new SMC link group */ 156 static int smc_lgr_create(struct smc_sock *smc, 157 struct smc_ib_device *smcibdev, u8 ibport, 158 char *peer_systemid, unsigned short vlan_id) 159 { 160 struct smc_link_group *lgr; 161 struct smc_link *lnk; 162 u8 rndvec[3]; 163 int rc = 0; 164 int i; 165 166 lgr = kzalloc(sizeof(*lgr), GFP_KERNEL); 167 if (!lgr) { 168 rc = -ENOMEM; 169 goto out; 170 } 171 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; 172 lgr->sync_err = false; 173 memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN); 174 lgr->vlan_id = vlan_id; 175 rwlock_init(&lgr->sndbufs_lock); 176 rwlock_init(&lgr->rmbs_lock); 177 for (i = 0; i < SMC_RMBE_SIZES; i++) { 178 INIT_LIST_HEAD(&lgr->sndbufs[i]); 179 INIT_LIST_HEAD(&lgr->rmbs[i]); 180 } 181 smc_lgr_num += SMC_LGR_NUM_INCR; 182 memcpy(&lgr->id, (u8 *)&smc_lgr_num, SMC_LGR_ID_SIZE); 183 INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work); 184 lgr->conns_all = RB_ROOT; 185 186 lnk = &lgr->lnk[SMC_SINGLE_LINK]; 187 /* initialize link */ 188 lnk->state = SMC_LNK_ACTIVATING; 189 lnk->link_id = SMC_SINGLE_LINK; 190 lnk->smcibdev = smcibdev; 191 lnk->ibport = ibport; 192 lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu; 193 if (!smcibdev->initialized) 194 smc_ib_setup_per_ibdev(smcibdev); 195 get_random_bytes(rndvec, sizeof(rndvec)); 196 lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + (rndvec[2] << 16); 197 rc = smc_wr_alloc_link_mem(lnk); 198 if (rc) 199 goto free_lgr; 200 rc = smc_ib_create_protection_domain(lnk); 201 if (rc) 202 goto free_link_mem; 203 rc = smc_ib_create_queue_pair(lnk); 204 if (rc) 205 goto dealloc_pd; 206 rc = smc_wr_create_link(lnk); 207 if (rc) 208 goto destroy_qp; 209 init_completion(&lnk->llc_confirm); 210 init_completion(&lnk->llc_confirm_resp); 211 init_completion(&lnk->llc_add); 212 init_completion(&lnk->llc_add_resp); 213 214 smc->conn.lgr = lgr; 215 rwlock_init(&lgr->conns_lock); 216 spin_lock_bh(&smc_lgr_list.lock); 217 list_add(&lgr->list, &smc_lgr_list.list); 218 spin_unlock_bh(&smc_lgr_list.lock); 219 return 0; 220 221 destroy_qp: 222 smc_ib_destroy_queue_pair(lnk); 223 dealloc_pd: 224 smc_ib_dealloc_protection_domain(lnk); 225 free_link_mem: 226 smc_wr_free_link_mem(lnk); 227 free_lgr: 228 kfree(lgr); 229 out: 230 return rc; 231 } 232 233 static void smc_buf_unuse(struct smc_connection *conn) 234 { 235 if (conn->sndbuf_desc) { 236 conn->sndbuf_desc->used = 0; 237 conn->sndbuf_size = 0; 238 } 239 if (conn->rmb_desc) { 240 if (!conn->rmb_desc->regerr) { 241 conn->rmb_desc->reused = 1; 242 conn->rmb_desc->used = 0; 243 conn->rmbe_size = 0; 244 } else { 245 /* buf registration failed, reuse not possible */ 246 struct smc_link_group *lgr = conn->lgr; 247 struct smc_link *lnk; 248 249 write_lock_bh(&lgr->rmbs_lock); 250 list_del(&conn->rmb_desc->list); 251 write_unlock_bh(&lgr->rmbs_lock); 252 253 lnk = &lgr->lnk[SMC_SINGLE_LINK]; 254 smc_buf_free(conn->rmb_desc, lnk, true); 255 } 256 } 257 } 258 259 /* remove a finished connection from its link group */ 260 void smc_conn_free(struct smc_connection *conn) 261 { 262 if (!conn->lgr) 263 return; 264 smc_cdc_tx_dismiss_slots(conn); 265 smc_lgr_unregister_conn(conn); 266 smc_buf_unuse(conn); 267 } 268 269 static void smc_link_clear(struct smc_link *lnk) 270 { 271 lnk->peer_qpn = 0; 272 smc_ib_modify_qp_reset(lnk); 273 smc_wr_free_link(lnk); 274 smc_ib_destroy_queue_pair(lnk); 275 smc_ib_dealloc_protection_domain(lnk); 276 smc_wr_free_link_mem(lnk); 277 } 278 279 static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk, 280 bool is_rmb) 281 { 282 if (is_rmb) { 283 if (buf_desc->mr_rx[SMC_SINGLE_LINK]) 284 smc_ib_put_memory_region( 285 buf_desc->mr_rx[SMC_SINGLE_LINK]); 286 smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc, 287 DMA_FROM_DEVICE); 288 } else { 289 smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc, 290 DMA_TO_DEVICE); 291 } 292 sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]); 293 if (buf_desc->pages) 294 __free_pages(buf_desc->pages, buf_desc->order); 295 kfree(buf_desc); 296 } 297 298 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb) 299 { 300 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; 301 struct smc_buf_desc *buf_desc, *bf_desc; 302 struct list_head *buf_list; 303 int i; 304 305 for (i = 0; i < SMC_RMBE_SIZES; i++) { 306 if (is_rmb) 307 buf_list = &lgr->rmbs[i]; 308 else 309 buf_list = &lgr->sndbufs[i]; 310 list_for_each_entry_safe(buf_desc, bf_desc, buf_list, 311 list) { 312 list_del(&buf_desc->list); 313 smc_buf_free(buf_desc, lnk, is_rmb); 314 } 315 } 316 } 317 318 static void smc_lgr_free_bufs(struct smc_link_group *lgr) 319 { 320 /* free send buffers */ 321 __smc_lgr_free_bufs(lgr, false); 322 /* free rmbs */ 323 __smc_lgr_free_bufs(lgr, true); 324 } 325 326 /* remove a link group */ 327 void smc_lgr_free(struct smc_link_group *lgr) 328 { 329 smc_llc_link_flush(&lgr->lnk[SMC_SINGLE_LINK]); 330 smc_lgr_free_bufs(lgr); 331 smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]); 332 kfree(lgr); 333 } 334 335 void smc_lgr_forget(struct smc_link_group *lgr) 336 { 337 spin_lock_bh(&smc_lgr_list.lock); 338 /* do not use this link group for new connections */ 339 if (!list_empty(&lgr->list)) 340 list_del_init(&lgr->list); 341 spin_unlock_bh(&smc_lgr_list.lock); 342 } 343 344 /* terminate linkgroup abnormally */ 345 void smc_lgr_terminate(struct smc_link_group *lgr) 346 { 347 struct smc_connection *conn; 348 struct smc_sock *smc; 349 struct rb_node *node; 350 351 smc_lgr_forget(lgr); 352 smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); 353 354 write_lock_bh(&lgr->conns_lock); 355 node = rb_first(&lgr->conns_all); 356 while (node) { 357 conn = rb_entry(node, struct smc_connection, alert_node); 358 smc = container_of(conn, struct smc_sock, conn); 359 sock_hold(&smc->sk); /* sock_put in close work */ 360 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 361 __smc_lgr_unregister_conn(conn); 362 write_unlock_bh(&lgr->conns_lock); 363 if (!schedule_work(&conn->close_work)) 364 sock_put(&smc->sk); 365 write_lock_bh(&lgr->conns_lock); 366 node = rb_first(&lgr->conns_all); 367 } 368 write_unlock_bh(&lgr->conns_lock); 369 wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait); 370 smc_lgr_schedule_free_work(lgr); 371 } 372 373 /* Determine vlan of internal TCP socket. 374 * @vlan_id: address to store the determined vlan id into 375 */ 376 static int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id) 377 { 378 struct dst_entry *dst = sk_dst_get(clcsock->sk); 379 struct net_device *ndev; 380 int i, nest_lvl, rc = 0; 381 382 *vlan_id = 0; 383 if (!dst) { 384 rc = -ENOTCONN; 385 goto out; 386 } 387 if (!dst->dev) { 388 rc = -ENODEV; 389 goto out_rel; 390 } 391 392 ndev = dst->dev; 393 if (is_vlan_dev(ndev)) { 394 *vlan_id = vlan_dev_vlan_id(ndev); 395 goto out_rel; 396 } 397 398 rtnl_lock(); 399 nest_lvl = dev_get_nest_level(ndev); 400 for (i = 0; i < nest_lvl; i++) { 401 struct list_head *lower = &ndev->adj_list.lower; 402 403 if (list_empty(lower)) 404 break; 405 lower = lower->next; 406 ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower); 407 if (is_vlan_dev(ndev)) { 408 *vlan_id = vlan_dev_vlan_id(ndev); 409 break; 410 } 411 } 412 rtnl_unlock(); 413 414 out_rel: 415 dst_release(dst); 416 out: 417 return rc; 418 } 419 420 /* determine the link gid matching the vlan id of the link group */ 421 static int smc_link_determine_gid(struct smc_link_group *lgr) 422 { 423 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; 424 struct ib_gid_attr gattr; 425 union ib_gid gid; 426 int i; 427 428 if (!lgr->vlan_id) { 429 lnk->gid = lnk->smcibdev->gid[lnk->ibport - 1]; 430 return 0; 431 } 432 433 for (i = 0; i < lnk->smcibdev->pattr[lnk->ibport - 1].gid_tbl_len; 434 i++) { 435 if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid, 436 &gattr)) 437 continue; 438 if (gattr.ndev) { 439 if (is_vlan_dev(gattr.ndev) && 440 vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id) { 441 lnk->gid = gid; 442 dev_put(gattr.ndev); 443 return 0; 444 } 445 dev_put(gattr.ndev); 446 } 447 } 448 return -ENODEV; 449 } 450 451 /* create a new SMC connection (and a new link group if necessary) */ 452 int smc_conn_create(struct smc_sock *smc, 453 struct smc_ib_device *smcibdev, u8 ibport, 454 struct smc_clc_msg_local *lcl, int srv_first_contact) 455 { 456 struct smc_connection *conn = &smc->conn; 457 struct smc_link_group *lgr; 458 unsigned short vlan_id; 459 enum smc_lgr_role role; 460 int local_contact = SMC_FIRST_CONTACT; 461 int rc = 0; 462 463 role = smc->listen_smc ? SMC_SERV : SMC_CLNT; 464 rc = smc_vlan_by_tcpsk(smc->clcsock, &vlan_id); 465 if (rc) 466 return rc; 467 468 if ((role == SMC_CLNT) && srv_first_contact) 469 /* create new link group as well */ 470 goto create; 471 472 /* determine if an existing link group can be reused */ 473 spin_lock_bh(&smc_lgr_list.lock); 474 list_for_each_entry(lgr, &smc_lgr_list.list, list) { 475 write_lock_bh(&lgr->conns_lock); 476 if (!memcmp(lgr->peer_systemid, lcl->id_for_peer, 477 SMC_SYSTEMID_LEN) && 478 !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid, 479 SMC_GID_SIZE) && 480 !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac, 481 sizeof(lcl->mac)) && 482 !lgr->sync_err && 483 (lgr->role == role) && 484 (lgr->vlan_id == vlan_id) && 485 ((role == SMC_CLNT) || 486 (lgr->conns_num < SMC_RMBS_PER_LGR_MAX))) { 487 /* link group found */ 488 local_contact = SMC_REUSE_CONTACT; 489 conn->lgr = lgr; 490 smc_lgr_register_conn(conn); /* add smc conn to lgr */ 491 write_unlock_bh(&lgr->conns_lock); 492 break; 493 } 494 write_unlock_bh(&lgr->conns_lock); 495 } 496 spin_unlock_bh(&smc_lgr_list.lock); 497 498 if (role == SMC_CLNT && !srv_first_contact && 499 (local_contact == SMC_FIRST_CONTACT)) { 500 /* Server reuses a link group, but Client wants to start 501 * a new one 502 * send out_of_sync decline, reason synchr. error 503 */ 504 return -ENOLINK; 505 } 506 507 create: 508 if (local_contact == SMC_FIRST_CONTACT) { 509 rc = smc_lgr_create(smc, smcibdev, ibport, 510 lcl->id_for_peer, vlan_id); 511 if (rc) 512 goto out; 513 smc_lgr_register_conn(conn); /* add smc conn to lgr */ 514 rc = smc_link_determine_gid(conn->lgr); 515 } 516 conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; 517 conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; 518 #ifndef KERNEL_HAS_ATOMIC64 519 spin_lock_init(&conn->acurs_lock); 520 #endif 521 522 out: 523 return rc ? rc : local_contact; 524 } 525 526 /* try to reuse a sndbuf or rmb description slot for a certain 527 * buffer size; if not available, return NULL 528 */ 529 static inline 530 struct smc_buf_desc *smc_buf_get_slot(struct smc_link_group *lgr, 531 int compressed_bufsize, 532 rwlock_t *lock, 533 struct list_head *buf_list) 534 { 535 struct smc_buf_desc *buf_slot; 536 537 read_lock_bh(lock); 538 list_for_each_entry(buf_slot, buf_list, list) { 539 if (cmpxchg(&buf_slot->used, 0, 1) == 0) { 540 read_unlock_bh(lock); 541 return buf_slot; 542 } 543 } 544 read_unlock_bh(lock); 545 return NULL; 546 } 547 548 /* one of the conditions for announcing a receiver's current window size is 549 * that it "results in a minimum increase in the window size of 10% of the 550 * receive buffer space" [RFC7609] 551 */ 552 static inline int smc_rmb_wnd_update_limit(int rmbe_size) 553 { 554 return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); 555 } 556 557 static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr, 558 bool is_rmb, int bufsize) 559 { 560 struct smc_buf_desc *buf_desc; 561 struct smc_link *lnk; 562 int rc; 563 564 /* try to alloc a new buffer */ 565 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); 566 if (!buf_desc) 567 return ERR_PTR(-ENOMEM); 568 569 buf_desc->order = get_order(bufsize); 570 buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN | 571 __GFP_NOMEMALLOC | __GFP_COMP | 572 __GFP_NORETRY | __GFP_ZERO, 573 buf_desc->order); 574 if (!buf_desc->pages) { 575 kfree(buf_desc); 576 return ERR_PTR(-EAGAIN); 577 } 578 buf_desc->cpu_addr = (void *)page_address(buf_desc->pages); 579 580 /* build the sg table from the pages */ 581 lnk = &lgr->lnk[SMC_SINGLE_LINK]; 582 rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1, 583 GFP_KERNEL); 584 if (rc) { 585 smc_buf_free(buf_desc, lnk, is_rmb); 586 return ERR_PTR(rc); 587 } 588 sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl, 589 buf_desc->cpu_addr, bufsize); 590 591 /* map sg table to DMA address */ 592 rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc, 593 is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 594 /* SMC protocol depends on mapping to one DMA address only */ 595 if (rc != 1) { 596 smc_buf_free(buf_desc, lnk, is_rmb); 597 return ERR_PTR(-EAGAIN); 598 } 599 600 /* create a new memory region for the RMB */ 601 if (is_rmb) { 602 rc = smc_ib_get_memory_region(lnk->roce_pd, 603 IB_ACCESS_REMOTE_WRITE | 604 IB_ACCESS_LOCAL_WRITE, 605 buf_desc); 606 if (rc) { 607 smc_buf_free(buf_desc, lnk, is_rmb); 608 return ERR_PTR(rc); 609 } 610 } 611 612 return buf_desc; 613 } 614 615 static int __smc_buf_create(struct smc_sock *smc, bool is_rmb) 616 { 617 struct smc_connection *conn = &smc->conn; 618 struct smc_link_group *lgr = conn->lgr; 619 struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM); 620 struct list_head *buf_list; 621 int bufsize, bufsize_short; 622 int sk_buf_size; 623 rwlock_t *lock; 624 625 if (is_rmb) 626 /* use socket recv buffer size (w/o overhead) as start value */ 627 sk_buf_size = smc->sk.sk_rcvbuf / 2; 628 else 629 /* use socket send buffer size (w/o overhead) as start value */ 630 sk_buf_size = smc->sk.sk_sndbuf / 2; 631 632 for (bufsize_short = smc_compress_bufsize(sk_buf_size); 633 bufsize_short >= 0; bufsize_short--) { 634 635 if (is_rmb) { 636 lock = &lgr->rmbs_lock; 637 buf_list = &lgr->rmbs[bufsize_short]; 638 } else { 639 lock = &lgr->sndbufs_lock; 640 buf_list = &lgr->sndbufs[bufsize_short]; 641 } 642 bufsize = smc_uncompress_bufsize(bufsize_short); 643 if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC) 644 continue; 645 646 /* check for reusable slot in the link group */ 647 buf_desc = smc_buf_get_slot(lgr, bufsize_short, lock, buf_list); 648 if (buf_desc) { 649 memset(buf_desc->cpu_addr, 0, bufsize); 650 break; /* found reusable slot */ 651 } 652 653 buf_desc = smc_new_buf_create(lgr, is_rmb, bufsize); 654 if (PTR_ERR(buf_desc) == -ENOMEM) 655 break; 656 if (IS_ERR(buf_desc)) 657 continue; 658 659 buf_desc->used = 1; 660 write_lock_bh(lock); 661 list_add(&buf_desc->list, buf_list); 662 write_unlock_bh(lock); 663 break; /* found */ 664 } 665 666 if (IS_ERR(buf_desc)) 667 return -ENOMEM; 668 669 if (is_rmb) { 670 conn->rmb_desc = buf_desc; 671 conn->rmbe_size = bufsize; 672 conn->rmbe_size_short = bufsize_short; 673 smc->sk.sk_rcvbuf = bufsize * 2; 674 atomic_set(&conn->bytes_to_rcv, 0); 675 conn->rmbe_update_limit = smc_rmb_wnd_update_limit(bufsize); 676 } else { 677 conn->sndbuf_desc = buf_desc; 678 conn->sndbuf_size = bufsize; 679 smc->sk.sk_sndbuf = bufsize * 2; 680 atomic_set(&conn->sndbuf_space, bufsize); 681 } 682 return 0; 683 } 684 685 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) 686 { 687 struct smc_link_group *lgr = conn->lgr; 688 689 smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 690 conn->sndbuf_desc, DMA_TO_DEVICE); 691 } 692 693 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) 694 { 695 struct smc_link_group *lgr = conn->lgr; 696 697 smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 698 conn->sndbuf_desc, DMA_TO_DEVICE); 699 } 700 701 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) 702 { 703 struct smc_link_group *lgr = conn->lgr; 704 705 smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 706 conn->rmb_desc, DMA_FROM_DEVICE); 707 } 708 709 void smc_rmb_sync_sg_for_device(struct smc_connection *conn) 710 { 711 struct smc_link_group *lgr = conn->lgr; 712 713 smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 714 conn->rmb_desc, DMA_FROM_DEVICE); 715 } 716 717 /* create the send and receive buffer for an SMC socket; 718 * receive buffers are called RMBs; 719 * (even though the SMC protocol allows more than one RMB-element per RMB, 720 * the Linux implementation uses just one RMB-element per RMB, i.e. uses an 721 * extra RMB for every connection in a link group 722 */ 723 int smc_buf_create(struct smc_sock *smc) 724 { 725 int rc; 726 727 /* create send buffer */ 728 rc = __smc_buf_create(smc, false); 729 if (rc) 730 return rc; 731 /* create rmb */ 732 rc = __smc_buf_create(smc, true); 733 if (rc) 734 smc_buf_free(smc->conn.sndbuf_desc, 735 &smc->conn.lgr->lnk[SMC_SINGLE_LINK], false); 736 return rc; 737 } 738 739 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr) 740 { 741 int i; 742 743 for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) { 744 if (!test_and_set_bit(i, lgr->rtokens_used_mask)) 745 return i; 746 } 747 return -ENOSPC; 748 } 749 750 /* add a new rtoken from peer */ 751 int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey) 752 { 753 u64 dma_addr = be64_to_cpu(nw_vaddr); 754 u32 rkey = ntohl(nw_rkey); 755 int i; 756 757 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 758 if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) && 759 (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) && 760 test_bit(i, lgr->rtokens_used_mask)) { 761 /* already in list */ 762 return i; 763 } 764 } 765 i = smc_rmb_reserve_rtoken_idx(lgr); 766 if (i < 0) 767 return i; 768 lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey; 769 lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr; 770 return i; 771 } 772 773 /* delete an rtoken */ 774 int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey) 775 { 776 u32 rkey = ntohl(nw_rkey); 777 int i; 778 779 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 780 if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey && 781 test_bit(i, lgr->rtokens_used_mask)) { 782 lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0; 783 lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0; 784 785 clear_bit(i, lgr->rtokens_used_mask); 786 return 0; 787 } 788 } 789 return -ENOENT; 790 } 791 792 /* save rkey and dma_addr received from peer during clc handshake */ 793 int smc_rmb_rtoken_handling(struct smc_connection *conn, 794 struct smc_clc_msg_accept_confirm *clc) 795 { 796 conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr, 797 clc->rmb_rkey); 798 if (conn->rtoken_idx < 0) 799 return conn->rtoken_idx; 800 return 0; 801 } 802