1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Basic Transport Functions exploiting Infiniband API 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/socket.h> 13 #include <linux/if_vlan.h> 14 #include <linux/random.h> 15 #include <linux/workqueue.h> 16 #include <net/tcp.h> 17 #include <net/sock.h> 18 #include <rdma/ib_verbs.h> 19 20 #include "smc.h" 21 #include "smc_clc.h" 22 #include "smc_core.h" 23 #include "smc_ib.h" 24 #include "smc_wr.h" 25 #include "smc_llc.h" 26 #include "smc_cdc.h" 27 #include "smc_close.h" 28 29 #define SMC_LGR_NUM_INCR 256 30 #define SMC_LGR_FREE_DELAY_SERV (600 * HZ) 31 #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ) 32 33 static struct smc_lgr_list smc_lgr_list = { /* established link groups */ 34 .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock), 35 .list = LIST_HEAD_INIT(smc_lgr_list.list), 36 .num = 0, 37 }; 38 39 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, 40 struct smc_buf_desc *buf_desc); 41 42 static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) 43 { 44 /* client link group creation always follows the server link group 45 * creation. For client use a somewhat higher removal delay time, 46 * otherwise there is a risk of out-of-sync link groups. 47 */ 48 mod_delayed_work(system_wq, &lgr->free_work, 49 lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT : 50 SMC_LGR_FREE_DELAY_SERV); 51 } 52 53 /* Register connection's alert token in our lookup structure. 54 * To use rbtrees we have to implement our own insert core. 55 * Requires @conns_lock 56 * @smc connection to register 57 * Returns 0 on success, != otherwise. 58 */ 59 static void smc_lgr_add_alert_token(struct smc_connection *conn) 60 { 61 struct rb_node **link, *parent = NULL; 62 u32 token = conn->alert_token_local; 63 64 link = &conn->lgr->conns_all.rb_node; 65 while (*link) { 66 struct smc_connection *cur = rb_entry(*link, 67 struct smc_connection, alert_node); 68 69 parent = *link; 70 if (cur->alert_token_local > token) 71 link = &parent->rb_left; 72 else 73 link = &parent->rb_right; 74 } 75 /* Put the new node there */ 76 rb_link_node(&conn->alert_node, parent, link); 77 rb_insert_color(&conn->alert_node, &conn->lgr->conns_all); 78 } 79 80 /* Register connection in link group by assigning an alert token 81 * registered in a search tree. 82 * Requires @conns_lock 83 * Note that '0' is a reserved value and not assigned. 84 */ 85 static void smc_lgr_register_conn(struct smc_connection *conn) 86 { 87 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 88 static atomic_t nexttoken = ATOMIC_INIT(0); 89 90 /* find a new alert_token_local value not yet used by some connection 91 * in this link group 92 */ 93 sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */ 94 while (!conn->alert_token_local) { 95 conn->alert_token_local = atomic_inc_return(&nexttoken); 96 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr)) 97 conn->alert_token_local = 0; 98 } 99 smc_lgr_add_alert_token(conn); 100 conn->lgr->conns_num++; 101 } 102 103 /* Unregister connection and reset the alert token of the given connection< 104 */ 105 static void __smc_lgr_unregister_conn(struct smc_connection *conn) 106 { 107 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 108 struct smc_link_group *lgr = conn->lgr; 109 110 rb_erase(&conn->alert_node, &lgr->conns_all); 111 lgr->conns_num--; 112 conn->alert_token_local = 0; 113 conn->lgr = NULL; 114 sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */ 115 } 116 117 /* Unregister connection and trigger lgr freeing if applicable 118 */ 119 static void smc_lgr_unregister_conn(struct smc_connection *conn) 120 { 121 struct smc_link_group *lgr = conn->lgr; 122 int reduced = 0; 123 124 write_lock_bh(&lgr->conns_lock); 125 if (conn->alert_token_local) { 126 reduced = 1; 127 __smc_lgr_unregister_conn(conn); 128 } 129 write_unlock_bh(&lgr->conns_lock); 130 if (!reduced || lgr->conns_num) 131 return; 132 smc_lgr_schedule_free_work(lgr); 133 } 134 135 static void smc_lgr_free_work(struct work_struct *work) 136 { 137 struct smc_link_group *lgr = container_of(to_delayed_work(work), 138 struct smc_link_group, 139 free_work); 140 bool conns; 141 142 spin_lock_bh(&smc_lgr_list.lock); 143 if (list_empty(&lgr->list)) 144 goto free; 145 read_lock_bh(&lgr->conns_lock); 146 conns = RB_EMPTY_ROOT(&lgr->conns_all); 147 read_unlock_bh(&lgr->conns_lock); 148 if (!conns) { /* number of lgr connections is no longer zero */ 149 spin_unlock_bh(&smc_lgr_list.lock); 150 return; 151 } 152 list_del_init(&lgr->list); /* remove from smc_lgr_list */ 153 free: 154 spin_unlock_bh(&smc_lgr_list.lock); 155 if (!delayed_work_pending(&lgr->free_work)) { 156 if (lgr->lnk[SMC_SINGLE_LINK].state != SMC_LNK_INACTIVE) 157 smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); 158 smc_lgr_free(lgr); 159 } 160 } 161 162 /* create a new SMC link group */ 163 static int smc_lgr_create(struct smc_sock *smc, 164 struct smc_ib_device *smcibdev, u8 ibport, 165 char *peer_systemid, unsigned short vlan_id) 166 { 167 struct smc_link_group *lgr; 168 struct smc_link *lnk; 169 u8 rndvec[3]; 170 int rc = 0; 171 int i; 172 173 lgr = kzalloc(sizeof(*lgr), GFP_KERNEL); 174 if (!lgr) { 175 rc = -ENOMEM; 176 goto out; 177 } 178 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; 179 lgr->sync_err = 0; 180 memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN); 181 lgr->vlan_id = vlan_id; 182 rwlock_init(&lgr->sndbufs_lock); 183 rwlock_init(&lgr->rmbs_lock); 184 for (i = 0; i < SMC_RMBE_SIZES; i++) { 185 INIT_LIST_HEAD(&lgr->sndbufs[i]); 186 INIT_LIST_HEAD(&lgr->rmbs[i]); 187 } 188 smc_lgr_list.num += SMC_LGR_NUM_INCR; 189 memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE); 190 INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work); 191 lgr->conns_all = RB_ROOT; 192 193 lnk = &lgr->lnk[SMC_SINGLE_LINK]; 194 /* initialize link */ 195 lnk->state = SMC_LNK_ACTIVATING; 196 lnk->link_id = SMC_SINGLE_LINK; 197 lnk->smcibdev = smcibdev; 198 lnk->ibport = ibport; 199 lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu; 200 if (!smcibdev->initialized) 201 smc_ib_setup_per_ibdev(smcibdev); 202 get_random_bytes(rndvec, sizeof(rndvec)); 203 lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + (rndvec[2] << 16); 204 rc = smc_llc_link_init(lnk); 205 if (rc) 206 goto free_lgr; 207 rc = smc_wr_alloc_link_mem(lnk); 208 if (rc) 209 goto clear_llc_lnk; 210 rc = smc_ib_create_protection_domain(lnk); 211 if (rc) 212 goto free_link_mem; 213 rc = smc_ib_create_queue_pair(lnk); 214 if (rc) 215 goto dealloc_pd; 216 rc = smc_wr_create_link(lnk); 217 if (rc) 218 goto destroy_qp; 219 220 smc->conn.lgr = lgr; 221 rwlock_init(&lgr->conns_lock); 222 spin_lock_bh(&smc_lgr_list.lock); 223 list_add(&lgr->list, &smc_lgr_list.list); 224 spin_unlock_bh(&smc_lgr_list.lock); 225 return 0; 226 227 destroy_qp: 228 smc_ib_destroy_queue_pair(lnk); 229 dealloc_pd: 230 smc_ib_dealloc_protection_domain(lnk); 231 free_link_mem: 232 smc_wr_free_link_mem(lnk); 233 clear_llc_lnk: 234 smc_llc_link_clear(lnk); 235 free_lgr: 236 kfree(lgr); 237 out: 238 return rc; 239 } 240 241 static void smc_buf_unuse(struct smc_connection *conn) 242 { 243 if (conn->sndbuf_desc) 244 conn->sndbuf_desc->used = 0; 245 if (conn->rmb_desc) { 246 if (!conn->rmb_desc->regerr) { 247 conn->rmb_desc->reused = 1; 248 conn->rmb_desc->used = 0; 249 } else { 250 /* buf registration failed, reuse not possible */ 251 struct smc_link_group *lgr = conn->lgr; 252 253 write_lock_bh(&lgr->rmbs_lock); 254 list_del(&conn->rmb_desc->list); 255 write_unlock_bh(&lgr->rmbs_lock); 256 257 smc_buf_free(lgr, true, conn->rmb_desc); 258 } 259 } 260 } 261 262 /* remove a finished connection from its link group */ 263 void smc_conn_free(struct smc_connection *conn) 264 { 265 if (!conn->lgr) 266 return; 267 smc_cdc_tx_dismiss_slots(conn); 268 smc_lgr_unregister_conn(conn); 269 smc_buf_unuse(conn); 270 } 271 272 static void smc_link_clear(struct smc_link *lnk) 273 { 274 lnk->peer_qpn = 0; 275 smc_llc_link_clear(lnk); 276 smc_ib_modify_qp_reset(lnk); 277 smc_wr_free_link(lnk); 278 smc_ib_destroy_queue_pair(lnk); 279 smc_ib_dealloc_protection_domain(lnk); 280 smc_wr_free_link_mem(lnk); 281 } 282 283 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, 284 struct smc_buf_desc *buf_desc) 285 { 286 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; 287 288 if (is_rmb) { 289 if (buf_desc->mr_rx[SMC_SINGLE_LINK]) 290 smc_ib_put_memory_region( 291 buf_desc->mr_rx[SMC_SINGLE_LINK]); 292 smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc, 293 DMA_FROM_DEVICE); 294 } else { 295 smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc, 296 DMA_TO_DEVICE); 297 } 298 sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]); 299 if (buf_desc->pages) 300 __free_pages(buf_desc->pages, buf_desc->order); 301 kfree(buf_desc); 302 } 303 304 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb) 305 { 306 struct smc_buf_desc *buf_desc, *bf_desc; 307 struct list_head *buf_list; 308 int i; 309 310 for (i = 0; i < SMC_RMBE_SIZES; i++) { 311 if (is_rmb) 312 buf_list = &lgr->rmbs[i]; 313 else 314 buf_list = &lgr->sndbufs[i]; 315 list_for_each_entry_safe(buf_desc, bf_desc, buf_list, 316 list) { 317 list_del(&buf_desc->list); 318 smc_buf_free(lgr, is_rmb, buf_desc); 319 } 320 } 321 } 322 323 static void smc_lgr_free_bufs(struct smc_link_group *lgr) 324 { 325 /* free send buffers */ 326 __smc_lgr_free_bufs(lgr, false); 327 /* free rmbs */ 328 __smc_lgr_free_bufs(lgr, true); 329 } 330 331 /* remove a link group */ 332 void smc_lgr_free(struct smc_link_group *lgr) 333 { 334 smc_lgr_free_bufs(lgr); 335 smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]); 336 kfree(lgr); 337 } 338 339 void smc_lgr_forget(struct smc_link_group *lgr) 340 { 341 spin_lock_bh(&smc_lgr_list.lock); 342 /* do not use this link group for new connections */ 343 if (!list_empty(&lgr->list)) 344 list_del_init(&lgr->list); 345 spin_unlock_bh(&smc_lgr_list.lock); 346 } 347 348 /* terminate linkgroup abnormally */ 349 static void __smc_lgr_terminate(struct smc_link_group *lgr) 350 { 351 struct smc_connection *conn; 352 struct smc_sock *smc; 353 struct rb_node *node; 354 355 if (lgr->terminating) 356 return; /* lgr already terminating */ 357 lgr->terminating = 1; 358 if (!list_empty(&lgr->list)) /* forget lgr */ 359 list_del_init(&lgr->list); 360 smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); 361 362 write_lock_bh(&lgr->conns_lock); 363 node = rb_first(&lgr->conns_all); 364 while (node) { 365 conn = rb_entry(node, struct smc_connection, alert_node); 366 smc = container_of(conn, struct smc_sock, conn); 367 sock_hold(&smc->sk); /* sock_put in close work */ 368 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 369 __smc_lgr_unregister_conn(conn); 370 write_unlock_bh(&lgr->conns_lock); 371 if (!schedule_work(&conn->close_work)) 372 sock_put(&smc->sk); 373 write_lock_bh(&lgr->conns_lock); 374 node = rb_first(&lgr->conns_all); 375 } 376 write_unlock_bh(&lgr->conns_lock); 377 wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait); 378 smc_lgr_schedule_free_work(lgr); 379 } 380 381 void smc_lgr_terminate(struct smc_link_group *lgr) 382 { 383 spin_lock_bh(&smc_lgr_list.lock); 384 __smc_lgr_terminate(lgr); 385 spin_unlock_bh(&smc_lgr_list.lock); 386 } 387 388 /* Called when IB port is terminated */ 389 void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport) 390 { 391 struct smc_link_group *lgr, *l; 392 393 spin_lock_bh(&smc_lgr_list.lock); 394 list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) { 395 if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev && 396 lgr->lnk[SMC_SINGLE_LINK].ibport == ibport) 397 __smc_lgr_terminate(lgr); 398 } 399 spin_unlock_bh(&smc_lgr_list.lock); 400 } 401 402 /* Determine vlan of internal TCP socket. 403 * @vlan_id: address to store the determined vlan id into 404 */ 405 static int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id) 406 { 407 struct dst_entry *dst = sk_dst_get(clcsock->sk); 408 struct net_device *ndev; 409 int i, nest_lvl, rc = 0; 410 411 *vlan_id = 0; 412 if (!dst) { 413 rc = -ENOTCONN; 414 goto out; 415 } 416 if (!dst->dev) { 417 rc = -ENODEV; 418 goto out_rel; 419 } 420 421 ndev = dst->dev; 422 if (is_vlan_dev(ndev)) { 423 *vlan_id = vlan_dev_vlan_id(ndev); 424 goto out_rel; 425 } 426 427 rtnl_lock(); 428 nest_lvl = dev_get_nest_level(ndev); 429 for (i = 0; i < nest_lvl; i++) { 430 struct list_head *lower = &ndev->adj_list.lower; 431 432 if (list_empty(lower)) 433 break; 434 lower = lower->next; 435 ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower); 436 if (is_vlan_dev(ndev)) { 437 *vlan_id = vlan_dev_vlan_id(ndev); 438 break; 439 } 440 } 441 rtnl_unlock(); 442 443 out_rel: 444 dst_release(dst); 445 out: 446 return rc; 447 } 448 449 /* determine the link gid matching the vlan id of the link group */ 450 static int smc_link_determine_gid(struct smc_link_group *lgr) 451 { 452 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; 453 struct ib_gid_attr gattr; 454 union ib_gid gid; 455 int i; 456 457 if (!lgr->vlan_id) { 458 lnk->gid = lnk->smcibdev->gid[lnk->ibport - 1]; 459 return 0; 460 } 461 462 for (i = 0; i < lnk->smcibdev->pattr[lnk->ibport - 1].gid_tbl_len; 463 i++) { 464 if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid, 465 &gattr)) 466 continue; 467 if (gattr.ndev) { 468 if (is_vlan_dev(gattr.ndev) && 469 vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id) { 470 lnk->gid = gid; 471 dev_put(gattr.ndev); 472 return 0; 473 } 474 dev_put(gattr.ndev); 475 } 476 } 477 return -ENODEV; 478 } 479 480 /* create a new SMC connection (and a new link group if necessary) */ 481 int smc_conn_create(struct smc_sock *smc, 482 struct smc_ib_device *smcibdev, u8 ibport, 483 struct smc_clc_msg_local *lcl, int srv_first_contact) 484 { 485 struct smc_connection *conn = &smc->conn; 486 int local_contact = SMC_FIRST_CONTACT; 487 struct smc_link_group *lgr; 488 unsigned short vlan_id; 489 enum smc_lgr_role role; 490 int rc = 0; 491 492 role = smc->listen_smc ? SMC_SERV : SMC_CLNT; 493 rc = smc_vlan_by_tcpsk(smc->clcsock, &vlan_id); 494 if (rc) 495 return rc; 496 497 if ((role == SMC_CLNT) && srv_first_contact) 498 /* create new link group as well */ 499 goto create; 500 501 /* determine if an existing link group can be reused */ 502 spin_lock_bh(&smc_lgr_list.lock); 503 list_for_each_entry(lgr, &smc_lgr_list.list, list) { 504 write_lock_bh(&lgr->conns_lock); 505 if (!memcmp(lgr->peer_systemid, lcl->id_for_peer, 506 SMC_SYSTEMID_LEN) && 507 !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid, 508 SMC_GID_SIZE) && 509 !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac, 510 sizeof(lcl->mac)) && 511 !lgr->sync_err && 512 (lgr->role == role) && 513 (lgr->vlan_id == vlan_id) && 514 ((role == SMC_CLNT) || 515 (lgr->conns_num < SMC_RMBS_PER_LGR_MAX))) { 516 /* link group found */ 517 local_contact = SMC_REUSE_CONTACT; 518 conn->lgr = lgr; 519 smc_lgr_register_conn(conn); /* add smc conn to lgr */ 520 write_unlock_bh(&lgr->conns_lock); 521 break; 522 } 523 write_unlock_bh(&lgr->conns_lock); 524 } 525 spin_unlock_bh(&smc_lgr_list.lock); 526 527 if (role == SMC_CLNT && !srv_first_contact && 528 (local_contact == SMC_FIRST_CONTACT)) { 529 /* Server reuses a link group, but Client wants to start 530 * a new one 531 * send out_of_sync decline, reason synchr. error 532 */ 533 return -ENOLINK; 534 } 535 536 create: 537 if (local_contact == SMC_FIRST_CONTACT) { 538 rc = smc_lgr_create(smc, smcibdev, ibport, 539 lcl->id_for_peer, vlan_id); 540 if (rc) 541 goto out; 542 smc_lgr_register_conn(conn); /* add smc conn to lgr */ 543 rc = smc_link_determine_gid(conn->lgr); 544 } 545 conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; 546 conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; 547 conn->urg_state = SMC_URG_READ; 548 #ifndef KERNEL_HAS_ATOMIC64 549 spin_lock_init(&conn->acurs_lock); 550 #endif 551 552 out: 553 return rc ? rc : local_contact; 554 } 555 556 /* convert the RMB size into the compressed notation - minimum 16K. 557 * In contrast to plain ilog2, this rounds towards the next power of 2, 558 * so the socket application gets at least its desired sndbuf / rcvbuf size. 559 */ 560 static u8 smc_compress_bufsize(int size) 561 { 562 u8 compressed; 563 564 if (size <= SMC_BUF_MIN_SIZE) 565 return 0; 566 567 size = (size - 1) >> 14; 568 compressed = ilog2(size) + 1; 569 if (compressed >= SMC_RMBE_SIZES) 570 compressed = SMC_RMBE_SIZES - 1; 571 return compressed; 572 } 573 574 /* convert the RMB size from compressed notation into integer */ 575 int smc_uncompress_bufsize(u8 compressed) 576 { 577 u32 size; 578 579 size = 0x00000001 << (((int)compressed) + 14); 580 return (int)size; 581 } 582 583 /* try to reuse a sndbuf or rmb description slot for a certain 584 * buffer size; if not available, return NULL 585 */ 586 static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize, 587 rwlock_t *lock, 588 struct list_head *buf_list) 589 { 590 struct smc_buf_desc *buf_slot; 591 592 read_lock_bh(lock); 593 list_for_each_entry(buf_slot, buf_list, list) { 594 if (cmpxchg(&buf_slot->used, 0, 1) == 0) { 595 read_unlock_bh(lock); 596 return buf_slot; 597 } 598 } 599 read_unlock_bh(lock); 600 return NULL; 601 } 602 603 /* one of the conditions for announcing a receiver's current window size is 604 * that it "results in a minimum increase in the window size of 10% of the 605 * receive buffer space" [RFC7609] 606 */ 607 static inline int smc_rmb_wnd_update_limit(int rmbe_size) 608 { 609 return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); 610 } 611 612 static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr, 613 bool is_rmb, int bufsize) 614 { 615 struct smc_buf_desc *buf_desc; 616 struct smc_link *lnk; 617 int rc; 618 619 /* try to alloc a new buffer */ 620 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); 621 if (!buf_desc) 622 return ERR_PTR(-ENOMEM); 623 624 buf_desc->order = get_order(bufsize); 625 buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN | 626 __GFP_NOMEMALLOC | __GFP_COMP | 627 __GFP_NORETRY | __GFP_ZERO, 628 buf_desc->order); 629 if (!buf_desc->pages) { 630 kfree(buf_desc); 631 return ERR_PTR(-EAGAIN); 632 } 633 buf_desc->cpu_addr = (void *)page_address(buf_desc->pages); 634 635 /* build the sg table from the pages */ 636 lnk = &lgr->lnk[SMC_SINGLE_LINK]; 637 rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1, 638 GFP_KERNEL); 639 if (rc) { 640 smc_buf_free(lgr, is_rmb, buf_desc); 641 return ERR_PTR(rc); 642 } 643 sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl, 644 buf_desc->cpu_addr, bufsize); 645 646 /* map sg table to DMA address */ 647 rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc, 648 is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 649 /* SMC protocol depends on mapping to one DMA address only */ 650 if (rc != 1) { 651 smc_buf_free(lgr, is_rmb, buf_desc); 652 return ERR_PTR(-EAGAIN); 653 } 654 655 /* create a new memory region for the RMB */ 656 if (is_rmb) { 657 rc = smc_ib_get_memory_region(lnk->roce_pd, 658 IB_ACCESS_REMOTE_WRITE | 659 IB_ACCESS_LOCAL_WRITE, 660 buf_desc); 661 if (rc) { 662 smc_buf_free(lgr, is_rmb, buf_desc); 663 return ERR_PTR(rc); 664 } 665 } 666 667 buf_desc->len = bufsize; 668 return buf_desc; 669 } 670 671 static int __smc_buf_create(struct smc_sock *smc, bool is_rmb) 672 { 673 struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM); 674 struct smc_connection *conn = &smc->conn; 675 struct smc_link_group *lgr = conn->lgr; 676 struct list_head *buf_list; 677 int bufsize, bufsize_short; 678 int sk_buf_size; 679 rwlock_t *lock; 680 681 if (is_rmb) 682 /* use socket recv buffer size (w/o overhead) as start value */ 683 sk_buf_size = smc->sk.sk_rcvbuf / 2; 684 else 685 /* use socket send buffer size (w/o overhead) as start value */ 686 sk_buf_size = smc->sk.sk_sndbuf / 2; 687 688 for (bufsize_short = smc_compress_bufsize(sk_buf_size); 689 bufsize_short >= 0; bufsize_short--) { 690 691 if (is_rmb) { 692 lock = &lgr->rmbs_lock; 693 buf_list = &lgr->rmbs[bufsize_short]; 694 } else { 695 lock = &lgr->sndbufs_lock; 696 buf_list = &lgr->sndbufs[bufsize_short]; 697 } 698 bufsize = smc_uncompress_bufsize(bufsize_short); 699 if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC) 700 continue; 701 702 /* check for reusable slot in the link group */ 703 buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list); 704 if (buf_desc) { 705 memset(buf_desc->cpu_addr, 0, bufsize); 706 break; /* found reusable slot */ 707 } 708 709 buf_desc = smc_new_buf_create(lgr, is_rmb, bufsize); 710 if (PTR_ERR(buf_desc) == -ENOMEM) 711 break; 712 if (IS_ERR(buf_desc)) 713 continue; 714 715 buf_desc->used = 1; 716 write_lock_bh(lock); 717 list_add(&buf_desc->list, buf_list); 718 write_unlock_bh(lock); 719 break; /* found */ 720 } 721 722 if (IS_ERR(buf_desc)) 723 return -ENOMEM; 724 725 if (is_rmb) { 726 conn->rmb_desc = buf_desc; 727 conn->rmbe_size_short = bufsize_short; 728 smc->sk.sk_rcvbuf = bufsize * 2; 729 atomic_set(&conn->bytes_to_rcv, 0); 730 conn->rmbe_update_limit = smc_rmb_wnd_update_limit(bufsize); 731 } else { 732 conn->sndbuf_desc = buf_desc; 733 smc->sk.sk_sndbuf = bufsize * 2; 734 atomic_set(&conn->sndbuf_space, bufsize); 735 } 736 return 0; 737 } 738 739 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) 740 { 741 struct smc_link_group *lgr = conn->lgr; 742 743 smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 744 conn->sndbuf_desc, DMA_TO_DEVICE); 745 } 746 747 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) 748 { 749 struct smc_link_group *lgr = conn->lgr; 750 751 smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 752 conn->sndbuf_desc, DMA_TO_DEVICE); 753 } 754 755 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) 756 { 757 struct smc_link_group *lgr = conn->lgr; 758 759 smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 760 conn->rmb_desc, DMA_FROM_DEVICE); 761 } 762 763 void smc_rmb_sync_sg_for_device(struct smc_connection *conn) 764 { 765 struct smc_link_group *lgr = conn->lgr; 766 767 smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 768 conn->rmb_desc, DMA_FROM_DEVICE); 769 } 770 771 /* create the send and receive buffer for an SMC socket; 772 * receive buffers are called RMBs; 773 * (even though the SMC protocol allows more than one RMB-element per RMB, 774 * the Linux implementation uses just one RMB-element per RMB, i.e. uses an 775 * extra RMB for every connection in a link group 776 */ 777 int smc_buf_create(struct smc_sock *smc) 778 { 779 int rc; 780 781 /* create send buffer */ 782 rc = __smc_buf_create(smc, false); 783 if (rc) 784 return rc; 785 /* create rmb */ 786 rc = __smc_buf_create(smc, true); 787 if (rc) 788 smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc); 789 return rc; 790 } 791 792 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr) 793 { 794 int i; 795 796 for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) { 797 if (!test_and_set_bit(i, lgr->rtokens_used_mask)) 798 return i; 799 } 800 return -ENOSPC; 801 } 802 803 /* add a new rtoken from peer */ 804 int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey) 805 { 806 u64 dma_addr = be64_to_cpu(nw_vaddr); 807 u32 rkey = ntohl(nw_rkey); 808 int i; 809 810 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 811 if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) && 812 (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) && 813 test_bit(i, lgr->rtokens_used_mask)) { 814 /* already in list */ 815 return i; 816 } 817 } 818 i = smc_rmb_reserve_rtoken_idx(lgr); 819 if (i < 0) 820 return i; 821 lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey; 822 lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr; 823 return i; 824 } 825 826 /* delete an rtoken */ 827 int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey) 828 { 829 u32 rkey = ntohl(nw_rkey); 830 int i; 831 832 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { 833 if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey && 834 test_bit(i, lgr->rtokens_used_mask)) { 835 lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0; 836 lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0; 837 838 clear_bit(i, lgr->rtokens_used_mask); 839 return 0; 840 } 841 } 842 return -ENOENT; 843 } 844 845 /* save rkey and dma_addr received from peer during clc handshake */ 846 int smc_rmb_rtoken_handling(struct smc_connection *conn, 847 struct smc_clc_msg_accept_confirm *clc) 848 { 849 conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr, 850 clc->rmb_rkey); 851 if (conn->rtoken_idx < 0) 852 return conn->rtoken_idx; 853 return 0; 854 } 855 856 /* Called (from smc_exit) when module is removed */ 857 void smc_core_exit(void) 858 { 859 struct smc_link_group *lgr, *lg; 860 LIST_HEAD(lgr_freeing_list); 861 862 spin_lock_bh(&smc_lgr_list.lock); 863 if (!list_empty(&smc_lgr_list.list)) 864 list_splice_init(&smc_lgr_list.list, &lgr_freeing_list); 865 spin_unlock_bh(&smc_lgr_list.lock); 866 list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) { 867 list_del_init(&lgr->list); 868 smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); 869 cancel_delayed_work_sync(&lgr->free_work); 870 smc_lgr_free(lgr); /* free link group */ 871 } 872 } 873