1 /* 2 * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. 3 * Copyright (c) 2004 Topspin Corporation. All rights reserved. 4 * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. 5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 6 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. 7 * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. 8 * 9 * This software is available to you under a choice of one of two 10 * licenses. You may choose to be licensed under the terms of the GNU 11 * General Public License (GPL) Version 2, available from the file 12 * COPYING in the main directory of this source tree, or the 13 * OpenIB.org BSD license below: 14 * 15 * Redistribution and use in source and binary forms, with or 16 * without modification, are permitted provided that the following 17 * conditions are met: 18 * 19 * - Redistributions of source code must retain the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer. 22 * 23 * - Redistributions in binary form must reproduce the above 24 * copyright notice, this list of conditions and the following 25 * disclaimer in the documentation and/or other materials 26 * provided with the distribution. 27 * 28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 29 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 30 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 31 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 32 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 33 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 35 * SOFTWARE. 36 * 37 */ 38 #include <linux/dma-mapping.h> 39 #include <linux/err.h> 40 #include <linux/idr.h> 41 #include <linux/interrupt.h> 42 #include <linux/rbtree.h> 43 #include <linux/sched.h> 44 #include <linux/spinlock.h> 45 #include <linux/workqueue.h> 46 #include <linux/completion.h> 47 #include <linux/slab.h> 48 #include <linux/module.h> 49 #include <linux/sysctl.h> 50 51 #include <rdma/iw_cm.h> 52 #include <rdma/ib_addr.h> 53 #include <rdma/iw_portmap.h> 54 #include <rdma/rdma_netlink.h> 55 56 #include "iwcm.h" 57 58 MODULE_AUTHOR("Tom Tucker"); 59 MODULE_DESCRIPTION("iWARP CM"); 60 MODULE_LICENSE("Dual BSD/GPL"); 61 62 static const char * const iwcm_rej_reason_strs[] = { 63 [ECONNRESET] = "reset by remote host", 64 [ECONNREFUSED] = "refused by remote application", 65 [ETIMEDOUT] = "setup timeout", 66 }; 67 68 const char *__attribute_const__ iwcm_reject_msg(int reason) 69 { 70 size_t index; 71 72 /* iWARP uses negative errnos */ 73 index = -reason; 74 75 if (index < ARRAY_SIZE(iwcm_rej_reason_strs) && 76 iwcm_rej_reason_strs[index]) 77 return iwcm_rej_reason_strs[index]; 78 else 79 return "unrecognized reason"; 80 } 81 EXPORT_SYMBOL(iwcm_reject_msg); 82 83 static struct rdma_nl_cbs iwcm_nl_cb_table[RDMA_NL_IWPM_NUM_OPS] = { 84 [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, 85 [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, 86 [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, 87 [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb}, 88 [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb}, 89 [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb}, 90 [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}, 91 [RDMA_NL_IWPM_HELLO] = {.dump = iwpm_hello_cb} 92 }; 93 94 static struct workqueue_struct *iwcm_wq; 95 struct iwcm_work { 96 struct work_struct work; 97 struct iwcm_id_private *cm_id; 98 struct iw_cm_event event; 99 struct list_head free_list; 100 }; 101 102 static unsigned int default_backlog = 256; 103 104 static struct ctl_table_header *iwcm_ctl_table_hdr; 105 static struct ctl_table iwcm_ctl_table[] = { 106 { 107 .procname = "default_backlog", 108 .data = &default_backlog, 109 .maxlen = sizeof(default_backlog), 110 .mode = 0644, 111 .proc_handler = proc_dointvec_minmax, 112 .extra1 = SYSCTL_ZERO, 113 .extra2 = SYSCTL_INT_MAX, 114 }, 115 }; 116 117 /* 118 * The following services provide a mechanism for pre-allocating iwcm_work 119 * elements. The design pre-allocates them based on the cm_id type: 120 * LISTENING IDS: Get enough elements preallocated to handle the 121 * listen backlog. 122 * ACTIVE IDS: 4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE 123 * PASSIVE IDS: 3: ESTABLISHED, DISCONNECT, CLOSE 124 * 125 * Allocating them in connect and listen avoids having to deal 126 * with allocation failures on the event upcall from the provider (which 127 * is called in the interrupt context). 128 * 129 * One exception is when creating the cm_id for incoming connection requests. 130 * There are two cases: 131 * 1) in the event upcall, cm_event_handler(), for a listening cm_id. If 132 * the backlog is exceeded, then no more connection request events will 133 * be processed. cm_event_handler() returns -ENOMEM in this case. Its up 134 * to the provider to reject the connection request. 135 * 2) in the connection request workqueue handler, cm_conn_req_handler(). 136 * If work elements cannot be allocated for the new connect request cm_id, 137 * then IWCM will call the provider reject method. This is ok since 138 * cm_conn_req_handler() runs in the workqueue thread context. 139 */ 140 141 static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv) 142 { 143 struct iwcm_work *work; 144 145 if (list_empty(&cm_id_priv->work_free_list)) 146 return NULL; 147 work = list_first_entry(&cm_id_priv->work_free_list, struct iwcm_work, 148 free_list); 149 list_del_init(&work->free_list); 150 return work; 151 } 152 153 static void put_work(struct iwcm_work *work) 154 { 155 list_add(&work->free_list, &work->cm_id->work_free_list); 156 } 157 158 static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv) 159 { 160 struct list_head *e, *tmp; 161 162 list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) { 163 list_del(e); 164 kfree(list_entry(e, struct iwcm_work, free_list)); 165 } 166 } 167 168 static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count) 169 { 170 struct iwcm_work *work; 171 172 BUG_ON(!list_empty(&cm_id_priv->work_free_list)); 173 while (count--) { 174 work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL); 175 if (!work) { 176 dealloc_work_entries(cm_id_priv); 177 return -ENOMEM; 178 } 179 work->cm_id = cm_id_priv; 180 put_work(work); 181 } 182 return 0; 183 } 184 185 /* 186 * Save private data from incoming connection requests to 187 * iw_cm_event, so the low level driver doesn't have to. Adjust 188 * the event ptr to point to the local copy. 189 */ 190 static int copy_private_data(struct iw_cm_event *event) 191 { 192 void *p; 193 194 p = kmemdup(event->private_data, event->private_data_len, GFP_ATOMIC); 195 if (!p) 196 return -ENOMEM; 197 event->private_data = p; 198 return 0; 199 } 200 201 static void free_cm_id(struct iwcm_id_private *cm_id_priv) 202 { 203 dealloc_work_entries(cm_id_priv); 204 kfree(cm_id_priv); 205 } 206 207 /* 208 * Release a reference on cm_id. If the last reference is being 209 * released, free the cm_id and return 'true'. 210 */ 211 static bool iwcm_deref_id(struct iwcm_id_private *cm_id_priv) 212 { 213 if (refcount_dec_and_test(&cm_id_priv->refcount)) { 214 free_cm_id(cm_id_priv); 215 return true; 216 } 217 218 return false; 219 } 220 221 static void add_ref(struct iw_cm_id *cm_id) 222 { 223 struct iwcm_id_private *cm_id_priv; 224 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 225 refcount_inc(&cm_id_priv->refcount); 226 } 227 228 static void rem_ref(struct iw_cm_id *cm_id) 229 { 230 struct iwcm_id_private *cm_id_priv; 231 232 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 233 234 (void)iwcm_deref_id(cm_id_priv); 235 } 236 237 static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event); 238 239 struct iw_cm_id *iw_create_cm_id(struct ib_device *device, 240 iw_cm_handler cm_handler, 241 void *context) 242 { 243 struct iwcm_id_private *cm_id_priv; 244 245 cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL); 246 if (!cm_id_priv) 247 return ERR_PTR(-ENOMEM); 248 249 cm_id_priv->state = IW_CM_STATE_IDLE; 250 cm_id_priv->id.device = device; 251 cm_id_priv->id.cm_handler = cm_handler; 252 cm_id_priv->id.context = context; 253 cm_id_priv->id.event_handler = cm_event_handler; 254 cm_id_priv->id.add_ref = add_ref; 255 cm_id_priv->id.rem_ref = rem_ref; 256 spin_lock_init(&cm_id_priv->lock); 257 refcount_set(&cm_id_priv->refcount, 1); 258 init_waitqueue_head(&cm_id_priv->connect_wait); 259 init_completion(&cm_id_priv->destroy_comp); 260 INIT_LIST_HEAD(&cm_id_priv->work_free_list); 261 262 return &cm_id_priv->id; 263 } 264 EXPORT_SYMBOL(iw_create_cm_id); 265 266 267 static int iwcm_modify_qp_err(struct ib_qp *qp) 268 { 269 struct ib_qp_attr qp_attr; 270 271 if (!qp) 272 return -EINVAL; 273 274 qp_attr.qp_state = IB_QPS_ERR; 275 return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 276 } 277 278 /* 279 * This is really the RDMAC CLOSING state. It is most similar to the 280 * IB SQD QP state. 281 */ 282 static int iwcm_modify_qp_sqd(struct ib_qp *qp) 283 { 284 struct ib_qp_attr qp_attr; 285 286 BUG_ON(qp == NULL); 287 qp_attr.qp_state = IB_QPS_SQD; 288 return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 289 } 290 291 /* 292 * CM_ID <-- CLOSING 293 * 294 * Block if a passive or active connection is currently being processed. Then 295 * process the event as follows: 296 * - If we are ESTABLISHED, move to CLOSING and modify the QP state 297 * based on the abrupt flag 298 * - If the connection is already in the CLOSING or IDLE state, the peer is 299 * disconnecting concurrently with us and we've already seen the 300 * DISCONNECT event -- ignore the request and return 0 301 * - Disconnect on a listening endpoint returns -EINVAL 302 */ 303 int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt) 304 { 305 struct iwcm_id_private *cm_id_priv; 306 unsigned long flags; 307 int ret = 0; 308 struct ib_qp *qp = NULL; 309 310 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 311 /* Wait if we're currently in a connect or accept downcall */ 312 wait_event(cm_id_priv->connect_wait, 313 !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); 314 315 spin_lock_irqsave(&cm_id_priv->lock, flags); 316 switch (cm_id_priv->state) { 317 case IW_CM_STATE_ESTABLISHED: 318 cm_id_priv->state = IW_CM_STATE_CLOSING; 319 320 /* QP could be <nul> for user-mode client */ 321 if (cm_id_priv->qp) 322 qp = cm_id_priv->qp; 323 else 324 ret = -EINVAL; 325 break; 326 case IW_CM_STATE_LISTEN: 327 ret = -EINVAL; 328 break; 329 case IW_CM_STATE_CLOSING: 330 /* remote peer closed first */ 331 case IW_CM_STATE_IDLE: 332 /* accept or connect returned !0 */ 333 break; 334 case IW_CM_STATE_CONN_RECV: 335 /* 336 * App called disconnect before/without calling accept after 337 * connect_request event delivered. 338 */ 339 break; 340 case IW_CM_STATE_CONN_SENT: 341 /* Can only get here if wait above fails */ 342 default: 343 BUG(); 344 } 345 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 346 347 if (qp) { 348 if (abrupt) 349 ret = iwcm_modify_qp_err(qp); 350 else 351 ret = iwcm_modify_qp_sqd(qp); 352 353 /* 354 * If both sides are disconnecting the QP could 355 * already be in ERR or SQD states 356 */ 357 ret = 0; 358 } 359 360 return ret; 361 } 362 EXPORT_SYMBOL(iw_cm_disconnect); 363 364 /* 365 * CM_ID <-- DESTROYING 366 * 367 * Clean up all resources associated with the connection. 368 */ 369 static void destroy_cm_id(struct iw_cm_id *cm_id) 370 { 371 struct iwcm_id_private *cm_id_priv; 372 struct ib_qp *qp; 373 unsigned long flags; 374 375 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 376 /* 377 * Wait if we're currently in a connect or accept downcall. A 378 * listening endpoint should never block here. 379 */ 380 wait_event(cm_id_priv->connect_wait, 381 !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); 382 383 /* 384 * Since we're deleting the cm_id, drop any events that 385 * might arrive before the last dereference. 386 */ 387 set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags); 388 389 spin_lock_irqsave(&cm_id_priv->lock, flags); 390 qp = cm_id_priv->qp; 391 cm_id_priv->qp = NULL; 392 393 switch (cm_id_priv->state) { 394 case IW_CM_STATE_LISTEN: 395 cm_id_priv->state = IW_CM_STATE_DESTROYING; 396 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 397 /* destroy the listening endpoint */ 398 cm_id->device->ops.iw_destroy_listen(cm_id); 399 spin_lock_irqsave(&cm_id_priv->lock, flags); 400 break; 401 case IW_CM_STATE_ESTABLISHED: 402 cm_id_priv->state = IW_CM_STATE_DESTROYING; 403 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 404 /* Abrupt close of the connection */ 405 (void)iwcm_modify_qp_err(qp); 406 spin_lock_irqsave(&cm_id_priv->lock, flags); 407 break; 408 case IW_CM_STATE_IDLE: 409 case IW_CM_STATE_CLOSING: 410 cm_id_priv->state = IW_CM_STATE_DESTROYING; 411 break; 412 case IW_CM_STATE_CONN_RECV: 413 /* 414 * App called destroy before/without calling accept after 415 * receiving connection request event notification or 416 * returned non zero from the event callback function. 417 * In either case, must tell the provider to reject. 418 */ 419 cm_id_priv->state = IW_CM_STATE_DESTROYING; 420 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 421 cm_id->device->ops.iw_reject(cm_id, NULL, 0); 422 spin_lock_irqsave(&cm_id_priv->lock, flags); 423 break; 424 case IW_CM_STATE_CONN_SENT: 425 case IW_CM_STATE_DESTROYING: 426 default: 427 BUG(); 428 break; 429 } 430 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 431 if (qp) 432 cm_id_priv->id.device->ops.iw_rem_ref(qp); 433 434 if (cm_id->mapped) { 435 iwpm_remove_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr); 436 iwpm_remove_mapping(&cm_id->local_addr, RDMA_NL_IWCM); 437 } 438 } 439 440 /* 441 * Destroy cm_id. If the cm_id still has other references, wait for all 442 * references to be released on the cm_id and then release the initial 443 * reference taken by iw_create_cm_id. 444 */ 445 void iw_destroy_cm_id(struct iw_cm_id *cm_id) 446 { 447 struct iwcm_id_private *cm_id_priv; 448 449 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 450 destroy_cm_id(cm_id); 451 if (refcount_read(&cm_id_priv->refcount) > 1) 452 flush_workqueue(iwcm_wq); 453 iwcm_deref_id(cm_id_priv); 454 } 455 EXPORT_SYMBOL(iw_destroy_cm_id); 456 457 /** 458 * iw_cm_check_wildcard - If IP address is 0 then use original 459 * @pm_addr: sockaddr containing the ip to check for wildcard 460 * @cm_addr: sockaddr containing the actual IP address 461 * @cm_outaddr: sockaddr to set IP addr which leaving port 462 * 463 * Checks the pm_addr for wildcard and then sets cm_outaddr's 464 * IP to the actual (cm_addr). 465 */ 466 static void iw_cm_check_wildcard(struct sockaddr_storage *pm_addr, 467 struct sockaddr_storage *cm_addr, 468 struct sockaddr_storage *cm_outaddr) 469 { 470 if (pm_addr->ss_family == AF_INET) { 471 struct sockaddr_in *pm4_addr = (struct sockaddr_in *)pm_addr; 472 473 if (pm4_addr->sin_addr.s_addr == htonl(INADDR_ANY)) { 474 struct sockaddr_in *cm4_addr = 475 (struct sockaddr_in *)cm_addr; 476 struct sockaddr_in *cm4_outaddr = 477 (struct sockaddr_in *)cm_outaddr; 478 479 cm4_outaddr->sin_addr = cm4_addr->sin_addr; 480 } 481 } else { 482 struct sockaddr_in6 *pm6_addr = (struct sockaddr_in6 *)pm_addr; 483 484 if (ipv6_addr_type(&pm6_addr->sin6_addr) == IPV6_ADDR_ANY) { 485 struct sockaddr_in6 *cm6_addr = 486 (struct sockaddr_in6 *)cm_addr; 487 struct sockaddr_in6 *cm6_outaddr = 488 (struct sockaddr_in6 *)cm_outaddr; 489 490 cm6_outaddr->sin6_addr = cm6_addr->sin6_addr; 491 } 492 } 493 } 494 495 /** 496 * iw_cm_map - Use portmapper to map the ports 497 * @cm_id: connection manager pointer 498 * @active: Indicates the active side when true 499 * returns nonzero for error only if iwpm_create_mapinfo() fails 500 * 501 * Tries to add a mapping for a port using the Portmapper. If 502 * successful in mapping the IP/Port it will check the remote 503 * mapped IP address for a wildcard IP address and replace the 504 * zero IP address with the remote_addr. 505 */ 506 static int iw_cm_map(struct iw_cm_id *cm_id, bool active) 507 { 508 const char *devname = dev_name(&cm_id->device->dev); 509 const char *ifname = cm_id->device->iw_ifname; 510 struct iwpm_dev_data pm_reg_msg = {}; 511 struct iwpm_sa_data pm_msg; 512 int status; 513 514 if (strlen(devname) >= sizeof(pm_reg_msg.dev_name) || 515 strlen(ifname) >= sizeof(pm_reg_msg.if_name)) 516 return -EINVAL; 517 518 cm_id->m_local_addr = cm_id->local_addr; 519 cm_id->m_remote_addr = cm_id->remote_addr; 520 521 strcpy(pm_reg_msg.dev_name, devname); 522 strcpy(pm_reg_msg.if_name, ifname); 523 524 if (iwpm_register_pid(&pm_reg_msg, RDMA_NL_IWCM) || 525 !iwpm_valid_pid()) 526 return 0; 527 528 cm_id->mapped = true; 529 pm_msg.loc_addr = cm_id->local_addr; 530 pm_msg.rem_addr = cm_id->remote_addr; 531 pm_msg.flags = (cm_id->device->iw_driver_flags & IW_F_NO_PORT_MAP) ? 532 IWPM_FLAGS_NO_PORT_MAP : 0; 533 if (active) 534 status = iwpm_add_and_query_mapping(&pm_msg, 535 RDMA_NL_IWCM); 536 else 537 status = iwpm_add_mapping(&pm_msg, RDMA_NL_IWCM); 538 539 if (!status) { 540 cm_id->m_local_addr = pm_msg.mapped_loc_addr; 541 if (active) { 542 cm_id->m_remote_addr = pm_msg.mapped_rem_addr; 543 iw_cm_check_wildcard(&pm_msg.mapped_rem_addr, 544 &cm_id->remote_addr, 545 &cm_id->m_remote_addr); 546 } 547 } 548 549 return iwpm_create_mapinfo(&cm_id->local_addr, 550 &cm_id->m_local_addr, 551 RDMA_NL_IWCM, pm_msg.flags); 552 } 553 554 /* 555 * CM_ID <-- LISTEN 556 * 557 * Start listening for connect requests. Generates one CONNECT_REQUEST 558 * event for each inbound connect request. 559 */ 560 int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) 561 { 562 struct iwcm_id_private *cm_id_priv; 563 unsigned long flags; 564 int ret; 565 566 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 567 568 if (!backlog) 569 backlog = default_backlog; 570 571 ret = alloc_work_entries(cm_id_priv, backlog); 572 if (ret) 573 return ret; 574 575 spin_lock_irqsave(&cm_id_priv->lock, flags); 576 switch (cm_id_priv->state) { 577 case IW_CM_STATE_IDLE: 578 cm_id_priv->state = IW_CM_STATE_LISTEN; 579 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 580 ret = iw_cm_map(cm_id, false); 581 if (!ret) 582 ret = cm_id->device->ops.iw_create_listen(cm_id, 583 backlog); 584 if (ret) 585 cm_id_priv->state = IW_CM_STATE_IDLE; 586 spin_lock_irqsave(&cm_id_priv->lock, flags); 587 break; 588 default: 589 ret = -EINVAL; 590 } 591 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 592 593 return ret; 594 } 595 EXPORT_SYMBOL(iw_cm_listen); 596 597 /* 598 * CM_ID <-- IDLE 599 * 600 * Rejects an inbound connection request. No events are generated. 601 */ 602 int iw_cm_reject(struct iw_cm_id *cm_id, 603 const void *private_data, 604 u8 private_data_len) 605 { 606 struct iwcm_id_private *cm_id_priv; 607 unsigned long flags; 608 int ret; 609 610 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 611 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 612 613 spin_lock_irqsave(&cm_id_priv->lock, flags); 614 if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { 615 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 616 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 617 wake_up_all(&cm_id_priv->connect_wait); 618 return -EINVAL; 619 } 620 cm_id_priv->state = IW_CM_STATE_IDLE; 621 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 622 623 ret = cm_id->device->ops.iw_reject(cm_id, private_data, 624 private_data_len); 625 626 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 627 wake_up_all(&cm_id_priv->connect_wait); 628 629 return ret; 630 } 631 EXPORT_SYMBOL(iw_cm_reject); 632 633 /* 634 * CM_ID <-- ESTABLISHED 635 * 636 * Accepts an inbound connection request and generates an ESTABLISHED 637 * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block 638 * until the ESTABLISHED event is received from the provider. 639 */ 640 int iw_cm_accept(struct iw_cm_id *cm_id, 641 struct iw_cm_conn_param *iw_param) 642 { 643 struct iwcm_id_private *cm_id_priv; 644 struct ib_qp *qp; 645 unsigned long flags; 646 int ret; 647 648 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 649 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 650 651 spin_lock_irqsave(&cm_id_priv->lock, flags); 652 if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { 653 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 654 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 655 wake_up_all(&cm_id_priv->connect_wait); 656 return -EINVAL; 657 } 658 /* Get the ib_qp given the QPN */ 659 qp = cm_id->device->ops.iw_get_qp(cm_id->device, iw_param->qpn); 660 if (!qp) { 661 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 662 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 663 wake_up_all(&cm_id_priv->connect_wait); 664 return -EINVAL; 665 } 666 cm_id->device->ops.iw_add_ref(qp); 667 cm_id_priv->qp = qp; 668 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 669 670 ret = cm_id->device->ops.iw_accept(cm_id, iw_param); 671 if (ret) { 672 /* An error on accept precludes provider events */ 673 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); 674 cm_id_priv->state = IW_CM_STATE_IDLE; 675 spin_lock_irqsave(&cm_id_priv->lock, flags); 676 qp = cm_id_priv->qp; 677 cm_id_priv->qp = NULL; 678 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 679 if (qp) 680 cm_id->device->ops.iw_rem_ref(qp); 681 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 682 wake_up_all(&cm_id_priv->connect_wait); 683 } 684 685 return ret; 686 } 687 EXPORT_SYMBOL(iw_cm_accept); 688 689 /* 690 * Active Side: CM_ID <-- CONN_SENT 691 * 692 * If successful, results in the generation of a CONNECT_REPLY 693 * event. iw_cm_disconnect and iw_cm_destroy will block until the 694 * CONNECT_REPLY event is received from the provider. 695 */ 696 int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) 697 { 698 struct iwcm_id_private *cm_id_priv; 699 int ret; 700 unsigned long flags; 701 struct ib_qp *qp = NULL; 702 703 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 704 705 ret = alloc_work_entries(cm_id_priv, 4); 706 if (ret) 707 return ret; 708 709 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 710 spin_lock_irqsave(&cm_id_priv->lock, flags); 711 712 if (cm_id_priv->state != IW_CM_STATE_IDLE) { 713 ret = -EINVAL; 714 goto err; 715 } 716 717 /* Get the ib_qp given the QPN */ 718 qp = cm_id->device->ops.iw_get_qp(cm_id->device, iw_param->qpn); 719 if (!qp) { 720 ret = -EINVAL; 721 goto err; 722 } 723 cm_id->device->ops.iw_add_ref(qp); 724 cm_id_priv->qp = qp; 725 cm_id_priv->state = IW_CM_STATE_CONN_SENT; 726 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 727 728 ret = iw_cm_map(cm_id, true); 729 if (!ret) 730 ret = cm_id->device->ops.iw_connect(cm_id, iw_param); 731 if (!ret) 732 return 0; /* success */ 733 734 spin_lock_irqsave(&cm_id_priv->lock, flags); 735 qp = cm_id_priv->qp; 736 cm_id_priv->qp = NULL; 737 cm_id_priv->state = IW_CM_STATE_IDLE; 738 err: 739 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 740 if (qp) 741 cm_id->device->ops.iw_rem_ref(qp); 742 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 743 wake_up_all(&cm_id_priv->connect_wait); 744 return ret; 745 } 746 EXPORT_SYMBOL(iw_cm_connect); 747 748 /* 749 * Passive Side: new CM_ID <-- CONN_RECV 750 * 751 * Handles an inbound connect request. The function creates a new 752 * iw_cm_id to represent the new connection and inherits the client 753 * callback function and other attributes from the listening parent. 754 * 755 * The work item contains a pointer to the listen_cm_id and the event. The 756 * listen_cm_id contains the client cm_handler, context and 757 * device. These are copied when the device is cloned. The event 758 * contains the new four tuple. 759 * 760 * An error on the child should not affect the parent, so this 761 * function does not return a value. 762 */ 763 static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, 764 struct iw_cm_event *iw_event) 765 { 766 unsigned long flags; 767 struct iw_cm_id *cm_id; 768 struct iwcm_id_private *cm_id_priv; 769 int ret; 770 771 /* 772 * The provider should never generate a connection request 773 * event with a bad status. 774 */ 775 BUG_ON(iw_event->status); 776 777 cm_id = iw_create_cm_id(listen_id_priv->id.device, 778 listen_id_priv->id.cm_handler, 779 listen_id_priv->id.context); 780 /* If the cm_id could not be created, ignore the request */ 781 if (IS_ERR(cm_id)) 782 goto out; 783 784 cm_id->provider_data = iw_event->provider_data; 785 cm_id->m_local_addr = iw_event->local_addr; 786 cm_id->m_remote_addr = iw_event->remote_addr; 787 cm_id->local_addr = listen_id_priv->id.local_addr; 788 789 ret = iwpm_get_remote_info(&listen_id_priv->id.m_local_addr, 790 &iw_event->remote_addr, 791 &cm_id->remote_addr, 792 RDMA_NL_IWCM); 793 if (ret) { 794 cm_id->remote_addr = iw_event->remote_addr; 795 } else { 796 iw_cm_check_wildcard(&listen_id_priv->id.m_local_addr, 797 &iw_event->local_addr, 798 &cm_id->local_addr); 799 iw_event->local_addr = cm_id->local_addr; 800 iw_event->remote_addr = cm_id->remote_addr; 801 } 802 803 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 804 cm_id_priv->state = IW_CM_STATE_CONN_RECV; 805 806 /* 807 * We could be destroying the listening id. If so, ignore this 808 * upcall. 809 */ 810 spin_lock_irqsave(&listen_id_priv->lock, flags); 811 if (listen_id_priv->state != IW_CM_STATE_LISTEN) { 812 spin_unlock_irqrestore(&listen_id_priv->lock, flags); 813 iw_cm_reject(cm_id, NULL, 0); 814 iw_destroy_cm_id(cm_id); 815 goto out; 816 } 817 spin_unlock_irqrestore(&listen_id_priv->lock, flags); 818 819 ret = alloc_work_entries(cm_id_priv, 3); 820 if (ret) { 821 iw_cm_reject(cm_id, NULL, 0); 822 iw_destroy_cm_id(cm_id); 823 goto out; 824 } 825 826 /* Call the client CM handler */ 827 ret = cm_id->cm_handler(cm_id, iw_event); 828 if (ret) { 829 iw_cm_reject(cm_id, NULL, 0); 830 iw_destroy_cm_id(cm_id); 831 } 832 833 out: 834 if (iw_event->private_data_len) 835 kfree(iw_event->private_data); 836 } 837 838 /* 839 * Passive Side: CM_ID <-- ESTABLISHED 840 * 841 * The provider generated an ESTABLISHED event which means that 842 * the MPA negotion has completed successfully and we are now in MPA 843 * FPDU mode. 844 * 845 * This event can only be received in the CONN_RECV state. If the 846 * remote peer closed, the ESTABLISHED event would be received followed 847 * by the CLOSE event. If the app closes, it will block until we wake 848 * it up after processing this event. 849 */ 850 static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv, 851 struct iw_cm_event *iw_event) 852 { 853 unsigned long flags; 854 int ret; 855 856 spin_lock_irqsave(&cm_id_priv->lock, flags); 857 858 /* 859 * We clear the CONNECT_WAIT bit here to allow the callback 860 * function to call iw_cm_disconnect. Calling iw_destroy_cm_id 861 * from a callback handler is not allowed. 862 */ 863 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 864 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); 865 cm_id_priv->state = IW_CM_STATE_ESTABLISHED; 866 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 867 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 868 wake_up_all(&cm_id_priv->connect_wait); 869 870 return ret; 871 } 872 873 /* 874 * Active Side: CM_ID <-- ESTABLISHED 875 * 876 * The app has called connect and is waiting for the established event to 877 * post it's requests to the server. This event will wake up anyone 878 * blocked in iw_cm_disconnect or iw_destroy_id. 879 */ 880 static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, 881 struct iw_cm_event *iw_event) 882 { 883 struct ib_qp *qp = NULL; 884 unsigned long flags; 885 int ret; 886 887 spin_lock_irqsave(&cm_id_priv->lock, flags); 888 /* 889 * Clear the connect wait bit so a callback function calling 890 * iw_cm_disconnect will not wait and deadlock this thread 891 */ 892 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 893 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); 894 if (iw_event->status == 0) { 895 cm_id_priv->id.m_local_addr = iw_event->local_addr; 896 cm_id_priv->id.m_remote_addr = iw_event->remote_addr; 897 iw_event->local_addr = cm_id_priv->id.local_addr; 898 iw_event->remote_addr = cm_id_priv->id.remote_addr; 899 cm_id_priv->state = IW_CM_STATE_ESTABLISHED; 900 } else { 901 /* REJECTED or RESET */ 902 qp = cm_id_priv->qp; 903 cm_id_priv->qp = NULL; 904 cm_id_priv->state = IW_CM_STATE_IDLE; 905 } 906 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 907 if (qp) 908 cm_id_priv->id.device->ops.iw_rem_ref(qp); 909 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 910 911 if (iw_event->private_data_len) 912 kfree(iw_event->private_data); 913 914 /* Wake up waiters on connect complete */ 915 wake_up_all(&cm_id_priv->connect_wait); 916 917 return ret; 918 } 919 920 /* 921 * CM_ID <-- CLOSING 922 * 923 * If in the ESTABLISHED state, move to CLOSING. 924 */ 925 static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv, 926 struct iw_cm_event *iw_event) 927 { 928 unsigned long flags; 929 930 spin_lock_irqsave(&cm_id_priv->lock, flags); 931 if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED) 932 cm_id_priv->state = IW_CM_STATE_CLOSING; 933 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 934 } 935 936 /* 937 * CM_ID <-- IDLE 938 * 939 * If in the ESTBLISHED or CLOSING states, the QP will have have been 940 * moved by the provider to the ERR state. Disassociate the CM_ID from 941 * the QP, move to IDLE, and remove the 'connected' reference. 942 * 943 * If in some other state, the cm_id was destroyed asynchronously. 944 * This is the last reference that will result in waking up 945 * the app thread blocked in iw_destroy_cm_id. 946 */ 947 static int cm_close_handler(struct iwcm_id_private *cm_id_priv, 948 struct iw_cm_event *iw_event) 949 { 950 struct ib_qp *qp; 951 unsigned long flags; 952 int ret = 0, notify_event = 0; 953 spin_lock_irqsave(&cm_id_priv->lock, flags); 954 qp = cm_id_priv->qp; 955 cm_id_priv->qp = NULL; 956 957 switch (cm_id_priv->state) { 958 case IW_CM_STATE_ESTABLISHED: 959 case IW_CM_STATE_CLOSING: 960 cm_id_priv->state = IW_CM_STATE_IDLE; 961 notify_event = 1; 962 break; 963 case IW_CM_STATE_DESTROYING: 964 break; 965 default: 966 BUG(); 967 } 968 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 969 970 if (qp) 971 cm_id_priv->id.device->ops.iw_rem_ref(qp); 972 if (notify_event) 973 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 974 return ret; 975 } 976 977 static int process_event(struct iwcm_id_private *cm_id_priv, 978 struct iw_cm_event *iw_event) 979 { 980 int ret = 0; 981 982 switch (iw_event->event) { 983 case IW_CM_EVENT_CONNECT_REQUEST: 984 cm_conn_req_handler(cm_id_priv, iw_event); 985 break; 986 case IW_CM_EVENT_CONNECT_REPLY: 987 ret = cm_conn_rep_handler(cm_id_priv, iw_event); 988 break; 989 case IW_CM_EVENT_ESTABLISHED: 990 ret = cm_conn_est_handler(cm_id_priv, iw_event); 991 break; 992 case IW_CM_EVENT_DISCONNECT: 993 cm_disconnect_handler(cm_id_priv, iw_event); 994 break; 995 case IW_CM_EVENT_CLOSE: 996 ret = cm_close_handler(cm_id_priv, iw_event); 997 break; 998 default: 999 BUG(); 1000 } 1001 1002 return ret; 1003 } 1004 1005 /* 1006 * Process events for the cm_id. If the callback function requests 1007 * that the cm_id be deleted, a flag is set in the cm_id flags to 1008 * indicate that when the last reference is removed, the cm_id is 1009 * to be destroyed. This is necessary to distinguish between an 1010 * object that will be destroyed by the app thread asleep on the 1011 * destroy_comp list vs. an object destroyed here synchronously 1012 * when the last reference is removed. 1013 */ 1014 static void cm_work_handler(struct work_struct *_work) 1015 { 1016 struct iwcm_work *work = container_of(_work, struct iwcm_work, work); 1017 struct iw_cm_event levent; 1018 struct iwcm_id_private *cm_id_priv = work->cm_id; 1019 unsigned long flags; 1020 int ret = 0; 1021 1022 spin_lock_irqsave(&cm_id_priv->lock, flags); 1023 levent = work->event; 1024 put_work(work); 1025 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1026 1027 if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) { 1028 ret = process_event(cm_id_priv, &levent); 1029 if (ret) { 1030 destroy_cm_id(&cm_id_priv->id); 1031 WARN_ON_ONCE(iwcm_deref_id(cm_id_priv)); 1032 } 1033 } else 1034 pr_debug("dropping event %d\n", levent.event); 1035 if (iwcm_deref_id(cm_id_priv)) 1036 return; 1037 } 1038 1039 /* 1040 * This function is called on interrupt context. Schedule events on 1041 * the iwcm_wq thread to allow callback functions to downcall into 1042 * the CM and/or block. 1043 * 1044 * Each event holds a reference on the cm_id. Until the last posted 1045 * event has been delivered and processed, the cm_id cannot be 1046 * deleted. 1047 * 1048 * Returns: 1049 * 0 - the event was handled. 1050 * -ENOMEM - the event was not handled due to lack of resources. 1051 */ 1052 static int cm_event_handler(struct iw_cm_id *cm_id, 1053 struct iw_cm_event *iw_event) 1054 { 1055 struct iwcm_work *work; 1056 struct iwcm_id_private *cm_id_priv; 1057 unsigned long flags; 1058 int ret = 0; 1059 1060 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 1061 1062 spin_lock_irqsave(&cm_id_priv->lock, flags); 1063 work = get_work(cm_id_priv); 1064 if (!work) { 1065 ret = -ENOMEM; 1066 goto out; 1067 } 1068 1069 INIT_WORK(&work->work, cm_work_handler); 1070 work->cm_id = cm_id_priv; 1071 work->event = *iw_event; 1072 1073 if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST || 1074 work->event.event == IW_CM_EVENT_CONNECT_REPLY) && 1075 work->event.private_data_len) { 1076 ret = copy_private_data(&work->event); 1077 if (ret) { 1078 put_work(work); 1079 goto out; 1080 } 1081 } 1082 1083 refcount_inc(&cm_id_priv->refcount); 1084 queue_work(iwcm_wq, &work->work); 1085 out: 1086 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1087 return ret; 1088 } 1089 1090 static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv, 1091 struct ib_qp_attr *qp_attr, 1092 int *qp_attr_mask) 1093 { 1094 unsigned long flags; 1095 int ret; 1096 1097 spin_lock_irqsave(&cm_id_priv->lock, flags); 1098 switch (cm_id_priv->state) { 1099 case IW_CM_STATE_IDLE: 1100 case IW_CM_STATE_CONN_SENT: 1101 case IW_CM_STATE_CONN_RECV: 1102 case IW_CM_STATE_ESTABLISHED: 1103 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; 1104 qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE| 1105 IB_ACCESS_REMOTE_READ; 1106 ret = 0; 1107 break; 1108 default: 1109 ret = -EINVAL; 1110 break; 1111 } 1112 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1113 return ret; 1114 } 1115 1116 static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv, 1117 struct ib_qp_attr *qp_attr, 1118 int *qp_attr_mask) 1119 { 1120 unsigned long flags; 1121 int ret; 1122 1123 spin_lock_irqsave(&cm_id_priv->lock, flags); 1124 switch (cm_id_priv->state) { 1125 case IW_CM_STATE_IDLE: 1126 case IW_CM_STATE_CONN_SENT: 1127 case IW_CM_STATE_CONN_RECV: 1128 case IW_CM_STATE_ESTABLISHED: 1129 *qp_attr_mask = 0; 1130 ret = 0; 1131 break; 1132 default: 1133 ret = -EINVAL; 1134 break; 1135 } 1136 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1137 return ret; 1138 } 1139 1140 int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, 1141 struct ib_qp_attr *qp_attr, 1142 int *qp_attr_mask) 1143 { 1144 struct iwcm_id_private *cm_id_priv; 1145 int ret; 1146 1147 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 1148 switch (qp_attr->qp_state) { 1149 case IB_QPS_INIT: 1150 case IB_QPS_RTR: 1151 ret = iwcm_init_qp_init_attr(cm_id_priv, 1152 qp_attr, qp_attr_mask); 1153 break; 1154 case IB_QPS_RTS: 1155 ret = iwcm_init_qp_rts_attr(cm_id_priv, 1156 qp_attr, qp_attr_mask); 1157 break; 1158 default: 1159 ret = -EINVAL; 1160 break; 1161 } 1162 return ret; 1163 } 1164 EXPORT_SYMBOL(iw_cm_init_qp_attr); 1165 1166 static int __init iw_cm_init(void) 1167 { 1168 int ret; 1169 1170 ret = iwpm_init(RDMA_NL_IWCM); 1171 if (ret) 1172 return ret; 1173 1174 iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", WQ_MEM_RECLAIM); 1175 if (!iwcm_wq) 1176 goto err_alloc; 1177 1178 iwcm_ctl_table_hdr = register_net_sysctl(&init_net, "net/iw_cm", 1179 iwcm_ctl_table); 1180 if (!iwcm_ctl_table_hdr) { 1181 pr_err("iw_cm: couldn't register sysctl paths\n"); 1182 goto err_sysctl; 1183 } 1184 1185 rdma_nl_register(RDMA_NL_IWCM, iwcm_nl_cb_table); 1186 return 0; 1187 1188 err_sysctl: 1189 destroy_workqueue(iwcm_wq); 1190 err_alloc: 1191 iwpm_exit(RDMA_NL_IWCM); 1192 return -ENOMEM; 1193 } 1194 1195 static void __exit iw_cm_cleanup(void) 1196 { 1197 rdma_nl_unregister(RDMA_NL_IWCM); 1198 unregister_net_sysctl_table(iwcm_ctl_table_hdr); 1199 destroy_workqueue(iwcm_wq); 1200 iwpm_exit(RDMA_NL_IWCM); 1201 } 1202 1203 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_IWCM, 2); 1204 1205 module_init(iw_cm_init); 1206 module_exit(iw_cm_cleanup); 1207