1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Basic Transport Functions exploiting Infiniband API 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/socket.h> 13 #include <linux/if_vlan.h> 14 #include <linux/random.h> 15 #include <linux/workqueue.h> 16 #include <net/tcp.h> 17 #include <net/sock.h> 18 #include <rdma/ib_verbs.h> 19 20 #include "smc.h" 21 #include "smc_clc.h" 22 #include "smc_core.h" 23 #include "smc_ib.h" 24 #include "smc_wr.h" 25 #include "smc_llc.h" 26 #include "smc_cdc.h" 27 #include "smc_close.h" 28 29 #define SMC_LGR_NUM_INCR 256 30 #define SMC_LGR_FREE_DELAY_SERV (600 * HZ) 31 #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10) 32 33 static u32 smc_lgr_num; /* unique link group number */ 34 35 static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk, 36 bool is_rmb); 37 38 static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) 39 { 40 /* client link group creation always follows the server link group 41 * creation. For client use a somewhat higher removal delay time, 42 * otherwise there is a risk of out-of-sync link groups. 43 */ 44 mod_delayed_work(system_wq, &lgr->free_work, 45 lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT : 46 SMC_LGR_FREE_DELAY_SERV); 47 } 48 49 /* Register connection's alert token in our lookup structure. 50 * To use rbtrees we have to implement our own insert core. 51 * Requires @conns_lock 52 * @smc connection to register 53 * Returns 0 on success, != otherwise. 54 */ 55 static void smc_lgr_add_alert_token(struct smc_connection *conn) 56 { 57 struct rb_node **link, *parent = NULL; 58 u32 token = conn->alert_token_local; 59 60 link = &conn->lgr->conns_all.rb_node; 61 while (*link) { 62 struct smc_connection *cur = rb_entry(*link, 63 struct smc_connection, alert_node); 64 65 parent = *link; 66 if (cur->alert_token_local > token) 67 link = &parent->rb_left; 68 else 69 link = &parent->rb_right; 70 } 71 /* Put the new node there */ 72 rb_link_node(&conn->alert_node, parent, link); 73 rb_insert_color(&conn->alert_node, &conn->lgr->conns_all); 74 } 75 76 /* Register connection in link group by assigning an alert token 77 * registered in a search tree. 78 * Requires @conns_lock 79 * Note that '0' is a reserved value and not assigned. 80 */ 81 static void smc_lgr_register_conn(struct smc_connection *conn) 82 { 83 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 84 static atomic_t nexttoken = ATOMIC_INIT(0); 85 86 /* find a new alert_token_local value not yet used by some connection 87 * in this link group 88 */ 89 sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */ 90 while (!conn->alert_token_local) { 91 conn->alert_token_local = atomic_inc_return(&nexttoken); 92 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr)) 93 conn->alert_token_local = 0; 94 } 95 smc_lgr_add_alert_token(conn); 96 conn->lgr->conns_num++; 97 } 98 99 /* Unregister connection and reset the alert token of the given connection< 100 */ 101 static void __smc_lgr_unregister_conn(struct smc_connection *conn) 102 { 103 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 104 struct smc_link_group *lgr = conn->lgr; 105 106 rb_erase(&conn->alert_node, &lgr->conns_all); 107 lgr->conns_num--; 108 conn->alert_token_local = 0; 109 conn->lgr = NULL; 110 sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */ 111 } 112 113 /* Unregister connection and trigger lgr freeing if applicable 114 */ 115 static void smc_lgr_unregister_conn(struct smc_connection *conn) 116 { 117 struct smc_link_group *lgr = conn->lgr; 118 int reduced = 0; 119 120 write_lock_bh(&lgr->conns_lock); 121 if (conn->alert_token_local) { 122 reduced = 1; 123 __smc_lgr_unregister_conn(conn); 124 } 125 write_unlock_bh(&lgr->conns_lock); 126 if (!reduced || lgr->conns_num) 127 return; 128 smc_lgr_schedule_free_work(lgr); 129 } 130 131 static void smc_lgr_free_work(struct work_struct *work) 132 { 133 struct smc_link_group *lgr = container_of(to_delayed_work(work), 134 struct smc_link_group, 135 free_work); 136 bool conns; 137 138 spin_lock_bh(&smc_lgr_list.lock); 139 if (list_empty(&lgr->list)) 140 goto free; 141 read_lock_bh(&lgr->conns_lock); 142 conns = RB_EMPTY_ROOT(&lgr->conns_all); 143 read_unlock_bh(&lgr->conns_lock); 144 if (!conns) { /* number of lgr connections is no longer zero */ 145 spin_unlock_bh(&smc_lgr_list.lock); 146 return; 147 } 148 list_del_init(&lgr->list); /* remove from smc_lgr_list */ 149 free: 150 spin_unlock_bh(&smc_lgr_list.lock); 151 if (!delayed_work_pending(&lgr->free_work)) { 152 if (lgr->lnk[SMC_SINGLE_LINK].state != SMC_LNK_INACTIVE) 153 smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); 154 smc_lgr_free(lgr); 155 } 156 } 157 158 /* create a new SMC link group */ 159 static int smc_lgr_create(struct smc_sock *smc, 160 struct smc_ib_device *smcibdev, u8 ibport, 161 char *peer_systemid, unsigned short vlan_id) 162 { 163 struct smc_link_group *lgr; 164 struct smc_link *lnk; 165 u8 rndvec[3]; 166 int rc = 0; 167 int i; 168 169 lgr = kzalloc(sizeof(*lgr), GFP_KERNEL); 170 if (!lgr) { 171 rc = -ENOMEM; 172 goto out; 173 } 174 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; 175 lgr->sync_err = 0; 176 memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN); 177 lgr->vlan_id = vlan_id; 178 rwlock_init(&lgr->sndbufs_lock); 179 rwlock_init(&lgr->rmbs_lock); 180 for (i = 0; i < SMC_RMBE_SIZES; i++) { 181 INIT_LIST_HEAD(&lgr->sndbufs[i]); 182 INIT_LIST_HEAD(&lgr->rmbs[i]); 183 } 184 smc_lgr_num += SMC_LGR_NUM_INCR; 185 memcpy(&lgr->id, (u8 *)&smc_lgr_num, SMC_LGR_ID_SIZE); 186 INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work); 187 lgr->conns_all = RB_ROOT; 188 189 lnk = &lgr->lnk[SMC_SINGLE_LINK]; 190 /* initialize link */ 191 lnk->state = SMC_LNK_ACTIVATING; 192 lnk->link_id = SMC_SINGLE_LINK; 193 lnk->smcibdev = smcibdev; 194 lnk->ibport = ibport; 195 lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu; 196 if (!smcibdev->initialized) 197 smc_ib_setup_per_ibdev(smcibdev); 198 get_random_bytes(rndvec, sizeof(rndvec)); 199 lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + (rndvec[2] << 16); 200 rc = smc_llc_link_init(lnk); 201 if (rc) 202 goto free_lgr; 203 rc = smc_wr_alloc_link_mem(lnk); 204 if (rc) 205 goto clear_llc_lnk; 206 rc = smc_ib_create_protection_domain(lnk); 207 if (rc) 208 goto free_link_mem; 209 rc = smc_ib_create_queue_pair(lnk); 210 if (rc) 211 goto dealloc_pd; 212 rc = smc_wr_create_link(lnk); 213 if (rc) 214 goto destroy_qp; 215 216 smc->conn.lgr = lgr; 217 rwlock_init(&lgr->conns_lock); 218 spin_lock_bh(&smc_lgr_list.lock); 219 list_add(&lgr->list, &smc_lgr_list.list); 220 spin_unlock_bh(&smc_lgr_list.lock); 221 return 0; 222 223 destroy_qp: 224 smc_ib_destroy_queue_pair(lnk); 225 dealloc_pd: 226 smc_ib_dealloc_protection_domain(lnk); 227 free_link_mem: 228 smc_wr_free_link_mem(lnk); 229 clear_llc_lnk: 230 smc_llc_link_clear(lnk); 231 free_lgr: 232 kfree(lgr); 233 out: 234 return rc; 235 } 236 237 static void smc_buf_unuse(struct smc_connection *conn) 238 { 239 if (conn->sndbuf_desc) { 240 conn->sndbuf_desc->used = 0; 241 conn->sndbuf_size = 0; 242 } 243 if (conn->rmb_desc) { 244 if (!conn->rmb_desc->regerr) { 245 conn->rmb_desc->reused = 1; 246 conn->rmb_desc->used = 0; 247 conn->rmbe_size = 0; 248 } else { 249 /* buf registration failed, reuse not possible */ 250 struct smc_link_group *lgr = conn->lgr; 251 struct smc_link *lnk; 252 253 write_lock_bh(&lgr->rmbs_lock); 254 list_del(&conn->rmb_desc->list); 255 write_unlock_bh(&lgr->rmbs_lock); 256 257 lnk = &lgr->lnk[SMC_SINGLE_LINK]; 258 smc_buf_free(conn->rmb_desc, lnk, true); 259 } 260 } 261 } 262 263 /* remove a finished connection from its link group */ 264 void smc_conn_free(struct smc_connection *conn) 265 { 266 if (!conn->lgr) 267 return; 268 smc_cdc_tx_dismiss_slots(conn); 269 smc_lgr_unregister_conn(conn); 270 smc_buf_unuse(conn); 271 } 272 273 static void smc_link_clear(struct smc_link *lnk) 274 { 275 lnk->peer_qpn = 0; 276 smc_llc_link_clear(lnk); 277 smc_ib_modify_qp_reset(lnk); 278 smc_wr_free_link(lnk); 279 smc_ib_destroy_queue_pair(lnk); 280 smc_ib_dealloc_protection_domain(lnk); 281 smc_wr_free_link_mem(lnk); 282 } 283 284 static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk, 285 bool is_rmb) 286 { 287 if (is_rmb) { 288 if (buf_desc->mr_rx[SMC_SINGLE_LINK]) 289 smc_ib_put_memory_region( 290 buf_desc->mr_rx[SMC_SINGLE_LINK]); 291 smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc, 292 DMA_FROM_DEVICE); 293 } else { 294 smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc, 295 DMA_TO_DEVICE); 296 } 297 sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]); 298 if (buf_desc->pages) 299 __free_pages(buf_desc->pages, buf_desc->order); 300 kfree(buf_desc); 301 } 302 303 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb) 304 { 305 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; 306 struct smc_buf_desc *buf_desc, *bf_desc; 307 struct list_head *buf_list; 308 int i; 309 310 for (i = 0; i < SMC_RMBE_SIZES; i++) { 311 if (is_rmb) 312 buf_list = &lgr->rmbs[i]; 313 else 314 buf_list = &lgr->sndbufs[i]; 315 list_for_each_entry_safe(buf_desc, bf_desc, buf_list, 316 list) { 317 list_del(&buf_desc->list); 318 smc_buf_free(buf_desc, lnk, is_rmb); 319 } 320 } 321 } 322 323 static void smc_lgr_free_bufs(struct smc_link_group *lgr) 324 { 325 /* free send buffers */ 326 __smc_lgr_free_bufs(lgr, false); 327 /* free rmbs */ 328 __smc_lgr_free_bufs(lgr, true); 329 } 330 331 /* remove a link group */ 332 void smc_lgr_free(struct smc_link_group *lgr) 333 { 334 smc_lgr_free_bufs(lgr); 335 smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]); 336 kfree(lgr); 337 } 338 339 void smc_lgr_forget(struct smc_link_group *lgr) 340 { 341 spin_lock_bh(&smc_lgr_list.lock); 342 /* do not use this link group for new connections */ 343 if (!list_empty(&lgr->list)) 344 list_del_init(&lgr->list); 345 spin_unlock_bh(&smc_lgr_list.lock); 346 } 347 348 /* terminate linkgroup abnormally */ 349 void smc_lgr_terminate(struct smc_link_group *lgr) 350 { 351 struct smc_connection *conn; 352 struct smc_sock *smc; 353 struct rb_node *node; 354 355 if (lgr->terminating) 356 return; /* lgr already terminating */ 357 lgr->terminating = 1; 358 smc_lgr_forget(lgr); 359 smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); 360 361 write_lock_bh(&lgr->conns_lock); 362 node = rb_first(&lgr->conns_all); 363 while (node) { 364 conn = rb_entry(node, struct smc_connection, alert_node); 365 smc = container_of(conn, struct smc_sock, conn); 366 sock_hold(&smc->sk); /* sock_put in close work */ 367 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 368 __smc_lgr_unregister_conn(conn); 369 write_unlock_bh(&lgr->conns_lock); 370 if (!schedule_work(&conn->close_work)) 371 sock_put(&smc->sk); 372 write_lock_bh(&lgr->conns_lock); 373 node = rb_first(&lgr->conns_all); 374 } 375 write_unlock_bh(&lgr->conns_lock); 376 wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait); 377 smc_lgr_schedule_free_work(lgr); 378 } 379 380 /* Determine vlan of internal TCP socket. 381 * @vlan_id: address to store the determined vlan id into 382 */ 383 static int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id) 384 { 385 struct dst_entry *dst = sk_dst_get(clcsock->sk); 386 struct net_device *ndev; 387 int i, nest_lvl, rc = 0; 388 389 *vlan_id = 0; 390 if (!dst) { 391 rc = -ENOTCONN; 392 goto out; 393 } 394 if (!dst->dev) { 395 rc = -ENODEV; 396 goto out_rel; 397 } 398 399 ndev = dst->dev; 400 if (is_vlan_dev(ndev)) { 401 *vlan_id = vlan_dev_vlan_id(ndev); 402 goto out_rel; 403 } 404 405 rtnl_lock(); 406 nest_lvl = dev_get_nest_level(ndev); 407 for (i = 0; i < nest_lvl; i++) { 408 struct list_head *lower = &ndev->adj_list.lower; 409 410 if (list_empty(lower)) 411 break; 412 lower = lower->next; 413 ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower); 414 if (is_vlan_dev(ndev)) { 415 *vlan_id = vlan_dev_vlan_id(ndev); 416 break; 417 } 418 } 419 rtnl_unlock(); 420 421 out_rel: 422 dst_release(dst); 423 out: 424 return rc; 425 } 426 427 /* determine the link gid matching the vlan id of the link group */ 428 static int smc_link_determine_gid(struct smc_link_group *lgr) 429 { 430 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; 431 struct ib_gid_attr gattr; 432 union ib_gid gid; 433 int i; 434 435 if (!lgr->vlan_id) { 436 lnk->gid = lnk->smcibdev->gid[lnk->ibport - 1]; 437 return 0; 438 } 439 440 for (i = 0; i < lnk->smcibdev->pattr[lnk->ibport - 1].gid_tbl_len; 441 i++) { 442 if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid, 443 &gattr)) 444 continue; 445 if (gattr.ndev) { 446 if (is_vlan_dev(gattr.ndev) && 447 vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id) { 448 lnk->gid = gid; 449 dev_put(gattr.ndev); 450 return 0; 451 } 452 dev_put(gattr.ndev); 453 } 454 } 455 return -ENODEV; 456 } 457 458 /* create a new SMC connection (and a new link group if necessary) */ 459 int smc_conn_create(struct smc_sock *smc, 460 struct smc_ib_device *smcibdev, u8 ibport, 461 struct smc_clc_msg_local *lcl, int srv_first_contact) 462 { 463 struct smc_connection *conn = &smc->conn; 464 struct smc_link_group *lgr; 465 unsigned short vlan_id; 466 enum smc_lgr_role role; 467 int local_contact = SMC_FIRST_CONTACT; 468 int rc = 0; 469 470 role = smc->listen_smc ? SMC_SERV : SMC_CLNT; 471 rc = smc_vlan_by_tcpsk(smc->clcsock, &vlan_id); 472 if (rc) 473 return rc; 474 475 if ((role == SMC_CLNT) && srv_first_contact) 476 /* create new link group as well */ 477 goto create; 478 479 /* determine if an existing link group can be reused */ 480 spin_lock_bh(&smc_lgr_list.lock); 481 list_for_each_entry(lgr, &smc_lgr_list.list, list) { 482 write_lock_bh(&lgr->conns_lock); 483 if (!memcmp(lgr->peer_systemid, lcl->id_for_peer, 484 SMC_SYSTEMID_LEN) && 485 !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid, 486 SMC_GID_SIZE) && 487 !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac, 488 sizeof(lcl->mac)) && 489 !lgr->sync_err && 490 (lgr->role == role) && 491 (lgr->vlan_id == vlan_id) && 492 ((role == SMC_CLNT) || 493 (lgr->conns_num < SMC_RMBS_PER_LGR_MAX))) { 494 /* link group found */ 495 local_contact = SMC_REUSE_CONTACT; 496 conn->lgr = lgr; 497 smc_lgr_register_conn(conn); /* add smc conn to lgr */ 498 write_unlock_bh(&lgr->conns_lock); 499 break; 500 } 501 write_unlock_bh(&lgr->conns_lock); 502 } 503 spin_unlock_bh(&smc_lgr_list.lock); 504 505 if (role == SMC_CLNT && !srv_first_contact && 506 (local_contact == SMC_FIRST_CONTACT)) { 507 /* Server reuses a link group, but Client wants to start 508 * a new one 509 * send out_of_sync decline, reason synchr. error 510 */ 511 return -ENOLINK; 512 } 513 514 create: 515 if (local_contact == SMC_FIRST_CONTACT) { 516 rc = smc_lgr_create(smc, smcibdev, ibport, 517 lcl->id_for_peer, vlan_id); 518 if (rc) 519 goto out; 520 smc_lgr_register_conn(conn); /* add smc conn to lgr */ 521 rc = smc_link_determine_gid(conn->lgr); 522 } 523 conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; 524 conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; 525 #ifndef KERNEL_HAS_ATOMIC64 526 spin_lock_init(&conn->acurs_lock); 527 #endif 528 529 out: 530 return rc ? rc : local_contact; 531 } 532 533 /* try to reuse a sndbuf or rmb description slot for a certain 534 * buffer size; if not available, return NULL 535 */ 536 static inline 537 struct smc_buf_desc *smc_buf_get_slot(struct smc_link_group *lgr, 538 int compressed_bufsize, 539 rwlock_t *lock, 540 struct list_head *buf_list) 541 { 542 struct smc_buf_desc *buf_slot; 543 544 read_lock_bh(lock); 545 list_for_each_entry(buf_slot, buf_list, list) { 546 if (cmpxchg(&buf_slot->used, 0, 1) == 0) { 547 read_unlock_bh(lock); 548 return buf_slot; 549 } 550 } 551 read_unlock_bh(lock); 552 return NULL; 553 } 554 555 /* one of the conditions for announcing a receiver's current window size is 556 * that it "results in a minimum increase in the window size of 10% of the 557 * receive buffer space" [RFC7609] 558 */ 559 static inline int smc_rmb_wnd_update_limit(int rmbe_size) 560 { 561 return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); 562 } 563 564 static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr, 565 bool is_rmb, int bufsize) 566 { 567 struct smc_buf_desc *buf_desc; 568 struct smc_link *lnk; 569 int rc; 570 571 /* try to alloc a new buffer */ 572 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); 573 if (!buf_desc) 574 return ERR_PTR(-ENOMEM); 575 576 buf_desc->order = get_order(bufsize); 577 buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN | 578 __GFP_NOMEMALLOC | __GFP_COMP | 579 __GFP_NORETRY | __GFP_ZERO, 580 buf_desc->order); 581 if (!buf_desc->pages) { 582 kfree(buf_desc); 583 return ERR_PTR(-EAGAIN); 584 } 585 buf_desc->cpu_addr = (void *)page_address(buf_desc->pages); 586 587 /* build the sg table from the pages */ 588 lnk = &lgr->lnk[SMC_SINGLE_LINK]; 589 rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1, 590 GFP_KERNEL); 591 if (rc) { 592 smc_buf_free(buf_desc, lnk, is_rmb); 593 return ERR_PTR(rc); 594 } 595 sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl, 596 buf_desc->cpu_addr, bufsize); 597 598 /* map sg table to DMA address */ 599 rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc, 600 is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 601 /* SMC protocol depends on mapping to one DMA address only */ 602 if (rc != 1) { 603 smc_buf_free(buf_desc, lnk, is_rmb); 604 return ERR_PTR(-EAGAIN); 605 } 606 607 /* create a new memory region for the RMB */ 608 if (is_rmb) { 609 rc = smc_ib_get_memory_region(lnk->roce_pd, 610 IB_ACCESS_REMOTE_WRITE | 611 IB_ACCESS_LOCAL_WRITE, 612 buf_desc); 613 if (rc) { 614 smc_buf_free(buf_desc, lnk, is_rmb); 615 return ERR_PTR(rc); 616 } 617 } 618 619 return buf_desc; 620 } 621 622 static int __smc_buf_create(struct smc_sock *smc, bool is_rmb) 623 { 624 struct smc_connection *conn = &smc->conn; 625 struct smc_link_group *lgr = conn->lgr; 626 struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM); 627 struct list_head *buf_list; 628 int bufsize, bufsize_short; 629 int sk_buf_size; 630 rwlock_t *lock; 631 632 if (is_rmb) 633 /* use socket recv buffer size (w/o overhead) as start value */ 634 sk_buf_size = smc->sk.sk_rcvbuf / 2; 635 else 636 /* use socket send buffer size (w/o overhead) as start value */ 637 sk_buf_size = smc->sk.sk_sndbuf / 2; 638 639 for (bufsize_short = smc_compress_bufsize(sk_buf_size); 640 bufsize_short >= 0; bufsize_short--) { 641 642 if (is_rmb) { 643 lock = &lgr->rmbs_lock; 644 buf_list = &lgr->rmbs[bufsize_short]; 645 } else { 646 lock = &lgr->sndbufs_lock; 647 buf_list = &lgr->sndbufs[bufsize_short]; 648 } 649 bufsize = smc_uncompress_bufsize(bufsize_short); 650 if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC) 651 continue; 652 653 /* check for reusable slot in the link group */ 654 buf_desc = smc_buf_get_slot(lgr, bufsize_short, lock, buf_list); 655 if (buf_desc) { 656 memset(buf_desc->cpu_addr, 0, bufsize); 657 break; /* found reusable slot */ 658 } 659 660 buf_desc = smc_new_buf_create(lgr, is_rmb, bufsize); 661 if (PTR_ERR(buf_desc) == -ENOMEM) 662 break; 663 if (IS_ERR(buf_desc)) 664 continue; 665 666 buf_desc->used = 1; 667 write_lock_bh(lock); 668 list_add(&buf_desc->list, buf_list); 669 write_unlock_bh(lock); 670 break; /* found */ 671 } 672 673 if (IS_ERR(buf_desc)) 674 return -ENOMEM; 675 676 if (is_rmb) { 677 conn->rmb_desc = buf_desc; 678 conn->rmbe_size = bufsize; 679 conn->rmbe_size_short = bufsize_short; 680 smc->sk.sk_rcvbuf = bufsize * 2; 681 atomic_set(&conn->bytes_to_rcv, 0); 682 conn->rmbe_update_limit = smc_rmb_wnd_update_limit(bufsize); 683 } else { 684 conn->sndbuf_desc = buf_desc; 685 conn->sndbuf_size = bufsize; 686 smc->sk.sk_sndbuf = bufsize * 2; 687 atomic_set(&conn->sndbuf_space, bufsize); 688 } 689 return 0; 690 } 691 692 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) 693 { 694 struct smc_link_group *lgr = conn->lgr; 695 696 smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 697 conn->sndbuf_desc, DMA_TO_DEVICE); 698 } 699 700 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) 701 { 702 struct smc_link_group *lgr = conn->lgr; 703 704 smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 705 conn->sndbuf_desc, DMA_TO_DEVICE); 706 } 707 708 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) 709 { 710 struct smc_link_group *lgr = conn->lgr; 711 712 smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 713 conn->rmb_desc, DMA_FROM_DEVICE); 714 } 715 716 void smc_rmb_sync_sg_for_device(struct smc_connection *conn) 717 { 718 struct smc_link_group *lgr = conn->lgr; 719 720 smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 721 conn->rmb_desc, DMA_FROM_DEVICE); 722 } 723 724 /* create the send and receive buffer for an SMC socket; 725 * receive buffers are called RMBs; 726 * (even though the SMC protocol allows more than one RMB-element per RMB, 727 * the Linux implementation uses just one RMB-element per RMB, i.e. uses an 728 * extra RMB for every connection in a link group 729 */ 730 int smc_buf_create(struct smc_sock *smc) 731 { 732 int rc; 733 734 /* create send buffer */ 735 rc = __smc_buf_create(smc, false); 736 if (rc) 737 return rc; 738 /* create rmb */ 739 rc = __smc_buf_create(smc, true); 740 if (rc) 741 smc_buf_free(smc->conn.sndbuf_desc, 742 &smc->conn.lgr->lnk[SMC_SINGLE_LINK], false); 743 return rc; 744 } 745 746 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr) 747 { 748 int i; 749 750 for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) { 751 if (!test_and_set_bit(i, lgr->rtokens_used_mask)) 752 return i; 753 } 754 return -ENOSPC; 755 } 756 757 /* add a new rtoken from peer */ 758 int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey) 759 { 760 u64 dma_addr = be64_to_cpu(nw_vaddr); 761 u32 rkey = ntohl(nw_rkey); 762 int i; 763 764 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 765 if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) && 766 (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) && 767 test_bit(i, lgr->rtokens_used_mask)) { 768 /* already in list */ 769 return i; 770 } 771 } 772 i = smc_rmb_reserve_rtoken_idx(lgr); 773 if (i < 0) 774 return i; 775 lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey; 776 lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr; 777 return i; 778 } 779 780 /* delete an rtoken */ 781 int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey) 782 { 783 u32 rkey = ntohl(nw_rkey); 784 int i; 785 786 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 787 if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey && 788 test_bit(i, lgr->rtokens_used_mask)) { 789 lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0; 790 lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0; 791 792 clear_bit(i, lgr->rtokens_used_mask); 793 return 0; 794 } 795 } 796 return -ENOENT; 797 } 798 799 /* save rkey and dma_addr received from peer during clc handshake */ 800 int smc_rmb_rtoken_handling(struct smc_connection *conn, 801 struct smc_clc_msg_accept_confirm *clc) 802 { 803 conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr, 804 clc->rmb_rkey); 805 if (conn->rtoken_idx < 0) 806 return conn->rtoken_idx; 807 return 0; 808 } 809