1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #ifndef _RDSV3_RDSV3_H 26 #define _RDSV3_RDSV3_H 27 28 /* 29 * The name of this file is rds.h in ofed. 30 */ 31 32 #ifdef __cplusplus 33 extern "C" { 34 #endif 35 36 #include <sys/sunndi.h> 37 #include <netinet/in.h> 38 #include <sys/synch.h> 39 #include <sys/stropts.h> 40 #include <sys/socket.h> 41 #include <sys/socketvar.h> 42 #include <inet/ip.h> 43 #include <sys/avl.h> 44 #include <sys/param.h> 45 #include <sys/time.h> 46 #include <sys/rds.h> 47 48 #include <sys/ib/ibtl/ibti.h> 49 #include <sys/ib/clients/of/rdma/ib_verbs.h> 50 #include <sys/ib/clients/of/rdma/ib_addr.h> 51 #include <sys/ib/clients/of/rdma/rdma_cm.h> 52 #include <sys/ib/clients/rdsv3/rdsv3_impl.h> 53 #include <sys/ib/clients/rdsv3/info.h> 54 55 #define NIPQUAD(addr) \ 56 (unsigned char)((ntohl(addr) >> 24) & 0xFF), \ 57 (unsigned char)((ntohl(addr) >> 16) & 0xFF), \ 58 (unsigned char)((ntohl(addr) >> 8) & 0xFF), \ 59 (unsigned char)(ntohl(addr) & 0xFF) 60 61 /* 62 * RDS Network protocol version 63 */ 64 #define RDS_PROTOCOL_3_0 0x0300 65 #define RDS_PROTOCOL_3_1 0x0301 66 #define RDS_PROTOCOL_VERSION RDS_PROTOCOL_3_1 67 #define RDS_PROTOCOL_MAJOR(v) ((v) >> 8) 68 #define RDS_PROTOCOL_MINOR(v) ((v) & 255) 69 #define RDS_PROTOCOL(maj, min) (((maj) << 8) | min) 70 71 /* 72 * XXX randomly chosen, but at least seems to be unused: 73 * # 18464-18768 Unassigned 74 * We should do better. We want a reserved port to discourage unpriv'ed 75 * userspace from listening. 76 * 77 * port 18633 was the version that had ack frames on the wire. 78 */ 79 #define RDSV3_PORT 18634 80 81 #define RDSV3_REAPER_WAIT_SECS (5*60) 82 #define RDSV3_REAPER_WAIT_JIFFIES SEC_TO_TICK(RDSV3_REAPER_WAIT_SECS) 83 84 /* 85 * This is the sad making. Some kernels have a bug in the per_cpu() api which 86 * makes DEFINE_PER_CPU trigger an oops on insmod because the per-cpu section 87 * in the module is not cacheline-aligned. As much as we'd like to tell users 88 * with older kernels to stuff it, that's not reasonable. We'll roll our own 89 * until this doesn't have to build against older kernels. 90 */ 91 #define RDSV3_DEFINE_PER_CPU(type, var) type var[NR_CPUS] 92 #define RDSV3_DECLARE_PER_CPU(type, var) extern type var[NR_CPUS] 93 #define rdsv3_per_cpu(var, cpu) var[cpu] 94 95 static inline ulong_t 96 ceil(ulong_t x, ulong_t y) 97 { 98 return ((x + y - 1) / y); 99 } 100 101 #define RDSV3_FRAG_SHIFT 12 102 #define RDSV3_FRAG_SIZE ((unsigned int)(1 << RDSV3_FRAG_SHIFT)) 103 104 #define RDSV3_CONG_MAP_BYTES (65536 / 8) 105 #define RDSV3_CONG_MAP_LONGS (RDSV3_CONG_MAP_BYTES / sizeof (unsigned long)) 106 #define RDSV3_CONG_MAP_PAGES (RDSV3_CONG_MAP_BYTES / PAGE_SIZE) 107 #define RDSV3_CONG_MAP_PAGE_BITS (PAGE_SIZE * 8) 108 109 struct rdsv3_cong_map { 110 struct avl_node m_rb_node; 111 uint32_be_t m_addr; 112 rdsv3_wait_queue_t m_waitq; 113 struct list m_conn_list; 114 unsigned long m_page_addrs[RDSV3_CONG_MAP_PAGES]; 115 }; 116 117 /* 118 * This is how we will track the connection state: 119 * A connection is always in one of the following 120 * states. Updates to the state are atomic and imply 121 * a memory barrier. 122 */ 123 enum { 124 RDSV3_CONN_DOWN = 0, 125 RDSV3_CONN_CONNECTING, 126 RDSV3_CONN_DISCONNECTING, 127 RDSV3_CONN_UP, 128 RDSV3_CONN_ERROR, 129 }; 130 131 /* Bits for c_flags */ 132 #define RDSV3_LL_SEND_FULL 0 133 #define RDSV3_RECONNECT_PENDING 1 134 135 struct rdsv3_connection { 136 struct avl_node c_hash_node; 137 uint32_be_t c_laddr; 138 uint32_be_t c_faddr; 139 unsigned int c_loopback:1; 140 struct rdsv3_connection *c_passive; 141 142 struct rdsv3_cong_map *c_lcong; 143 struct rdsv3_cong_map *c_fcong; 144 145 struct mutex c_send_lock; /* protect send ring */ 146 atomic_t c_send_generation; 147 atomic_t c_senders; 148 149 struct rdsv3_message *c_xmit_rm; 150 unsigned long c_xmit_sg; 151 unsigned int c_xmit_hdr_off; 152 unsigned int c_xmit_data_off; 153 unsigned int c_xmit_rdma_sent; 154 155 kmutex_t c_lock; /* protect msg queues */ 156 uint64_t c_next_tx_seq; 157 struct list c_send_queue; 158 struct list c_retrans; 159 160 uint64_t c_next_rx_seq; 161 162 struct rdsv3_transport *c_trans; 163 void *c_transport_data; 164 165 atomic_t c_state; 166 unsigned long c_flags; 167 unsigned long c_reconnect_jiffies; 168 clock_t c_last_connect_jiffies; 169 170 struct rdsv3_delayed_work_s c_send_w; 171 struct rdsv3_delayed_work_s c_recv_w; 172 struct rdsv3_delayed_work_s c_conn_w; 173 struct rdsv3_delayed_work_s c_reap_w; 174 struct rdsv3_work_s c_down_w; 175 struct mutex c_cm_lock; /* protect conn state & cm */ 176 177 struct list_node c_map_item; 178 unsigned long c_map_queued; 179 unsigned long c_map_offset; 180 unsigned long c_map_bytes; 181 182 unsigned int c_unacked_packets; 183 unsigned int c_unacked_bytes; 184 185 /* Protocol version */ 186 unsigned int c_version; 187 }; 188 189 #define RDSV3_FLAG_CONG_BITMAP 0x01 190 #define RDSV3_FLAG_ACK_REQUIRED 0x02 191 #define RDSV3_FLAG_RETRANSMITTED 0x04 192 #define RDSV3_MAX_ADV_CREDIT 127 193 194 /* 195 * Maximum space available for extension headers. 196 */ 197 #define RDSV3_HEADER_EXT_SPACE 16 198 199 struct rdsv3_header { 200 uint64_be_t h_sequence; 201 uint64_be_t h_ack; 202 uint32_be_t h_len; 203 uint16_be_t h_sport; 204 uint16_be_t h_dport; 205 uint8_t h_flags; 206 uint8_t h_credit; 207 uint8_t h_padding[4]; 208 uint16_be_t h_csum; 209 210 uint8_t h_exthdr[RDSV3_HEADER_EXT_SPACE]; 211 }; 212 213 /* Reserved - indicates end of extensions */ 214 #define RDSV3_EXTHDR_NONE 0 215 216 /* 217 * This extension header is included in the very 218 * first message that is sent on a new connection, 219 * and identifies the protocol level. This will help 220 * rolling updates if a future change requires breaking 221 * the protocol. 222 */ 223 #define RDSV3_EXTHDR_VERSION 1 224 struct rdsv3_ext_header_version { 225 uint32_be_t h_version; 226 }; 227 228 /* 229 * This extension header is included in the RDS message 230 * chasing an RDMA operation. 231 */ 232 #define RDSV3_EXTHDR_RDMA 2 233 struct rdsv3_ext_header_rdma { 234 uint32_be_t h_rdma_rkey; 235 }; 236 237 /* 238 * This extension header tells the peer about the 239 * destination <R_Key,offset> of the requested RDMA 240 * operation. 241 */ 242 #define RDSV3_EXTHDR_RDMA_DEST 3 243 struct rdsv3_ext_header_rdma_dest { 244 uint32_be_t h_rdma_rkey; 245 uint32_be_t h_rdma_offset; 246 }; 247 248 #define __RDSV3_EXTHDR_MAX 16 /* for now */ 249 250 struct rdsv3_incoming { 251 atomic_t i_refcount; 252 struct list_node i_item; 253 struct rdsv3_connection *i_conn; 254 struct rdsv3_header i_hdr; 255 unsigned long i_rx_jiffies; 256 uint32_be_t i_saddr; 257 258 rdsv3_rdma_cookie_t i_rdma_cookie; 259 }; 260 261 /* 262 * m_sock_item and m_conn_item are on lists that are serialized under 263 * conn->c_lock. m_sock_item has additional meaning in that once it is empty 264 * the message will not be put back on the retransmit list after being sent. 265 * messages that are canceled while being sent rely on this. 266 * 267 * m_inc is used by loopback so that it can pass an incoming message straight 268 * back up into the rx path. It embeds a wire header which is also used by 269 * the send path, which is kind of awkward. 270 * 271 * m_sock_item indicates the message's presence on a socket's send or receive 272 * queue. m_rs will point to that socket. 273 * 274 * m_daddr is used by cancellation to prune messages to a given destination. 275 * 276 * The RDS_MSG_ON_SOCK and RDS_MSG_ON_CONN flags are used to avoid lock 277 * nesting. As paths iterate over messages on a sock, or conn, they must 278 * also lock the conn, or sock, to remove the message from those lists too. 279 * Testing the flag to determine if the message is still on the lists lets 280 * us avoid testing the list_head directly. That means each path can use 281 * the message's list_head to keep it on a local list while juggling locks 282 * without confusing the other path. 283 * 284 * m_ack_seq is an optional field set by transports who need a different 285 * sequence number range to invalidate. They can use this in a callback 286 * that they pass to rdsv3_send_drop_acked() to see if each message has been 287 * acked. The HAS_ACK_SEQ flag can be used to detect messages which haven't 288 * had ack_seq set yet. 289 */ 290 #define RDSV3_MSG_ON_SOCK 1 291 #define RDSV3_MSG_ON_CONN 2 292 #define RDSV3_MSG_HAS_ACK_SEQ 3 293 #define RDSV3_MSG_ACK_REQUIRED 4 294 #define RDSV3_MSG_RETRANSMITTED 5 295 #define RDSV3_MSG_MAPPED 6 296 #define RDSV3_MSG_PAGEVEC 7 297 298 struct rdsv3_message { 299 atomic_t m_refcount; 300 struct list_node m_sock_item; 301 struct list_node m_conn_item; 302 struct rdsv3_incoming m_inc; 303 uint64_t m_ack_seq; 304 uint32_be_t m_daddr; 305 unsigned long m_flags; 306 307 /* 308 * Never access m_rs without holding m_rs_lock. 309 * Lock nesting is 310 * rm->m_rs_lock 311 * -> rs->rs_lock 312 */ 313 kmutex_t m_rs_lock; 314 rdsv3_wait_queue_t m_flush_wait; 315 316 struct rdsv3_sock *m_rs; 317 struct rdsv3_rdma_op *m_rdma_op; 318 rdsv3_rdma_cookie_t m_rdma_cookie; 319 struct rdsv3_mr *m_rdma_mr; 320 unsigned int m_nents; 321 unsigned int m_count; 322 struct rdsv3_scatterlist m_sg[1]; 323 }; 324 325 /* 326 * The RDS notifier is used (optionally) to tell the application about 327 * completed RDMA operations. Rather than keeping the whole rds message 328 * around on the queue, we allocate a small notifier that is put on the 329 * socket's notifier_list. Notifications are delivered to the application 330 * through control messages. 331 */ 332 struct rdsv3_notifier { 333 list_node_t n_list; 334 uint64_t n_user_token; 335 int n_status; 336 }; 337 338 /* 339 * struct rdsv3_transport - transport specific behavioural hooks 340 * 341 * @xmit: .xmit is called by rdsv3_send_xmit() to tell the transport to send 342 * part of a message. The caller serializes on the send_sem so this 343 * doesn't need to be reentrant for a given conn. The header must be 344 * sent before the data payload. .xmit must be prepared to send a 345 * message with no data payload. .xmit should return the number of 346 * bytes that were sent down the connection, including header bytes. 347 * Returning 0 tells the caller that it doesn't need to perform any 348 * additional work now. This is usually the case when the transport has 349 * filled the sending queue for its connection and will handle 350 * triggering the rds thread to continue the send when space becomes 351 * available. Returning -EAGAIN tells the caller to retry the send 352 * immediately. Returning -ENOMEM tells the caller to retry the send at 353 * some point in the future. 354 * 355 * @conn_shutdown: conn_shutdown stops traffic on the given connection. Once 356 * it returns the connection can not call rdsv3_recv_incoming(). 357 * This will only be called once after conn_connect returns 358 * non-zero success and will The caller serializes this with 359 * the send and connecting paths (xmit_* and conn_*). The 360 * transport is responsible for other serialization, including 361 * rdsv3_recv_incoming(). This is called in process context but 362 * should try hard not to block. 363 * 364 * @xmit_cong_map: This asks the transport to send the local bitmap down the 365 * given connection. XXX get a better story about the bitmap 366 * flag and header. 367 */ 368 369 #define RDS_TRANS_IB 0 370 #define RDS_TRANS_IWARP 1 371 #define RDS_TRANS_TCP 2 372 #define RDS_TRANS_COUNT 3 373 374 struct rdsv3_transport { 375 char t_name[TRANSNAMSIZ]; 376 struct list_node t_item; 377 unsigned int t_type; 378 unsigned int t_prefer_loopback:1; 379 380 int (*laddr_check)(uint32_be_t addr); 381 int (*conn_alloc)(struct rdsv3_connection *conn, int gfp); 382 void (*conn_free)(void *data); 383 int (*conn_connect)(struct rdsv3_connection *conn); 384 void (*conn_shutdown)(struct rdsv3_connection *conn); 385 void (*xmit_prepare)(struct rdsv3_connection *conn); 386 void (*xmit_complete)(struct rdsv3_connection *conn); 387 int (*xmit)(struct rdsv3_connection *conn, struct rdsv3_message *rm, 388 unsigned int hdr_off, unsigned int sg, unsigned int off); 389 int (*xmit_cong_map)(struct rdsv3_connection *conn, 390 struct rdsv3_cong_map *map, unsigned long offset); 391 int (*xmit_rdma)(struct rdsv3_connection *conn, 392 struct rdsv3_rdma_op *op); 393 int (*recv)(struct rdsv3_connection *conn); 394 int (*inc_copy_to_user)(struct rdsv3_incoming *inc, uio_t *uio, 395 size_t size); 396 void (*inc_free)(struct rdsv3_incoming *inc); 397 398 int (*cm_handle_connect)(struct rdma_cm_id *cm_id, 399 struct rdma_cm_event *event); 400 int (*cm_initiate_connect)(struct rdma_cm_id *cm_id); 401 void (*cm_connect_complete)(struct rdsv3_connection *conn, 402 struct rdma_cm_event *event); 403 404 unsigned int (*stats_info_copy)(struct rdsv3_info_iterator *iter, 405 unsigned int avail); 406 void (*exit)(void); 407 void *(*get_mr)(struct rdsv3_iovec *sg, unsigned long nr_sg, 408 struct rdsv3_sock *rs, uint32_t *key_ret); 409 void (*sync_mr)(void *trans_private, int direction); 410 void (*free_mr)(void *trans_private, int invalidate); 411 void (*flush_mrs)(void); 412 }; 413 414 struct rdsv3_sock { 415 struct rsock *rs_sk; 416 uint64_t rs_user_addr; 417 uint64_t rs_user_bytes; 418 419 /* 420 * bound_addr used for both incoming and outgoing, no INADDR_ANY 421 * support. 422 */ 423 struct avl_node rs_bound_node; 424 uint32_be_t rs_bound_addr; 425 uint32_be_t rs_conn_addr; 426 uint16_be_t rs_bound_port; 427 uint16_be_t rs_conn_port; 428 429 /* 430 * This is only used to communicate the transport between bind and 431 * initiating connections. All other trans use is referenced through 432 * the connection. 433 */ 434 struct rdsv3_transport *rs_transport; 435 436 /* 437 * rdsv3_sendmsg caches the conn it used the last time around. 438 * This helps avoid costly lookups. 439 */ 440 struct rdsv3_connection *rs_conn; 441 kmutex_t rs_conn_lock; 442 443 /* flag indicating we were congested or not */ 444 int rs_congested; 445 /* seen congestion (ENOBUFS) when sending? */ 446 int rs_seen_congestion; 447 kmutex_t rs_congested_lock; 448 kcondvar_t rs_congested_cv; 449 450 /* rs_lock protects all these adjacent members before the newline */ 451 kmutex_t rs_lock; 452 struct list rs_send_queue; 453 uint32_t rs_snd_bytes; 454 int rs_rcv_bytes; 455 /* currently used for failed RDMAs */ 456 struct list rs_notify_queue; 457 458 /* 459 * Congestion wake_up. If rs_cong_monitor is set, we use cong_mask 460 * to decide whether the application should be woken up. 461 * If not set, we use rs_cong_track to find out whether a cong map 462 * update arrived. 463 */ 464 uint64_t rs_cong_mask; 465 uint64_t rs_cong_notify; 466 struct list_node rs_cong_list; 467 unsigned long rs_cong_track; 468 469 /* 470 * rs_recv_lock protects the receive queue, and is 471 * used to serialize with rdsv3_release. 472 */ 473 krwlock_t rs_recv_lock; 474 struct list rs_recv_queue; 475 476 /* just for stats reporting */ 477 struct list_node rs_item; 478 479 /* these have their own lock */ 480 kmutex_t rs_rdma_lock; 481 struct avl_tree rs_rdma_keys; 482 483 /* Socket options - in case there will be more */ 484 unsigned char rs_recverr, 485 rs_cong_monitor; 486 487 cred_t *rs_cred; 488 zoneid_t rs_zoneid; 489 }; 490 491 static inline struct rdsv3_sock * 492 rdsv3_sk_to_rs(const struct rsock *sk) 493 { 494 return ((struct rdsv3_sock *)sk->sk_protinfo); 495 } 496 497 static inline struct rsock * 498 rdsv3_rs_to_sk(const struct rdsv3_sock *rs) 499 { 500 return ((struct rsock *)rs->rs_sk); 501 } 502 503 /* 504 * The stack assigns sk_sndbuf and sk_rcvbuf to twice the specified value 505 * to account for overhead. We don't account for overhead, we just apply 506 * the number of payload bytes to the specified value. 507 */ 508 static inline int 509 rdsv3_sk_sndbuf(struct rdsv3_sock *rs) 510 { 511 /* XXX */ 512 return (rdsv3_rs_to_sk(rs)->sk_sndbuf); 513 } 514 515 static inline int 516 rdsv3_sk_rcvbuf(struct rdsv3_sock *rs) 517 { 518 /* XXX */ 519 return (rdsv3_rs_to_sk(rs)->sk_rcvbuf); 520 } 521 522 struct rdsv3_statistics { 523 uint64_t s_conn_reset; 524 uint64_t s_recv_drop_bad_checksum; 525 uint64_t s_recv_drop_old_seq; 526 uint64_t s_recv_drop_no_sock; 527 uint64_t s_recv_drop_dead_sock; 528 uint64_t s_recv_deliver_raced; 529 uint64_t s_recv_delivered; 530 uint64_t s_recv_queued; 531 uint64_t s_recv_immediate_retry; 532 uint64_t s_recv_delayed_retry; 533 uint64_t s_recv_ack_required; 534 uint64_t s_recv_rdma_bytes; 535 uint64_t s_recv_ping; 536 uint64_t s_send_queue_empty; 537 uint64_t s_send_queue_full; 538 uint64_t s_send_sem_contention; 539 uint64_t s_send_sem_queue_raced; 540 uint64_t s_send_immediate_retry; 541 uint64_t s_send_delayed_retry; 542 uint64_t s_send_drop_acked; 543 uint64_t s_send_ack_required; 544 uint64_t s_send_queued; 545 uint64_t s_send_rdma; 546 uint64_t s_send_rdma_bytes; 547 uint64_t s_send_pong; 548 uint64_t s_page_remainder_hit; 549 uint64_t s_page_remainder_miss; 550 uint64_t s_copy_to_user; 551 uint64_t s_copy_from_user; 552 uint64_t s_cong_update_queued; 553 uint64_t s_cong_update_received; 554 uint64_t s_cong_send_error; 555 uint64_t s_cong_send_blocked; 556 }; 557 558 /* af_rds.c */ 559 void rdsv3_sock_addref(struct rdsv3_sock *rs); 560 void rdsv3_sock_put(struct rdsv3_sock *rs); 561 void rdsv3_wake_sk_sleep(struct rdsv3_sock *rs); 562 void __rdsv3_wake_sk_sleep(struct rsock *sk); 563 564 /* bind.c */ 565 int rdsv3_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 566 socklen_t len, cred_t *cr); 567 void rdsv3_remove_bound(struct rdsv3_sock *rs); 568 struct rdsv3_sock *rdsv3_find_bound(uint32_be_t addr, uint16_be_t port); 569 570 /* conn.c */ 571 int rdsv3_conn_init(void); 572 void rdsv3_conn_exit(void); 573 struct rdsv3_connection *rdsv3_conn_create(uint32_be_t laddr, uint32_be_t faddr, 574 struct rdsv3_transport *trans, int gfp); 575 struct rdsv3_connection *rdsv3_conn_create_outgoing(uint32_be_t laddr, 576 uint32_be_t faddr, 577 struct rdsv3_transport *trans, int gfp); 578 void rdsv3_conn_shutdown(struct rdsv3_connection *conn); 579 void rdsv3_conn_destroy(struct rdsv3_connection *conn); 580 void rdsv3_conn_reset(struct rdsv3_connection *conn); 581 void rdsv3_conn_drop(struct rdsv3_connection *conn); 582 void rdsv3_for_each_conn_info(struct rsock *sock, unsigned int len, 583 struct rdsv3_info_iterator *iter, 584 struct rdsv3_info_lengths *lens, 585 int (*visitor)(struct rdsv3_connection *, void *), 586 size_t item_len); 587 588 static inline int 589 rdsv3_conn_transition(struct rdsv3_connection *conn, int old, int new) 590 { 591 return (atomic_cmpxchg(&conn->c_state, old, new) == old); 592 } 593 594 static inline int 595 rdsv3_conn_state(struct rdsv3_connection *conn) 596 { 597 return (atomic_get(&conn->c_state)); 598 } 599 600 static inline int 601 rdsv3_conn_up(struct rdsv3_connection *conn) 602 { 603 return (atomic_get(&conn->c_state) == RDSV3_CONN_UP); 604 } 605 606 static inline int 607 rdsv3_conn_connecting(struct rdsv3_connection *conn) 608 { 609 return (atomic_get(&conn->c_state) == RDSV3_CONN_CONNECTING); 610 } 611 612 /* recv.c */ 613 void rdsv3_inc_init(struct rdsv3_incoming *inc, struct rdsv3_connection *conn, 614 uint32_be_t saddr); 615 void rdsv3_inc_addref(struct rdsv3_incoming *inc); 616 void rdsv3_inc_put(struct rdsv3_incoming *inc); 617 void rdsv3_recv_incoming(struct rdsv3_connection *conn, uint32_be_t saddr, 618 uint32_be_t daddr, 619 struct rdsv3_incoming *inc, int gfp); 620 int rdsv3_recvmsg(struct rdsv3_sock *rs, uio_t *uio, 621 struct msghdr *msg, size_t size, int msg_flags); 622 void rdsv3_clear_recv_queue(struct rdsv3_sock *rs); 623 int rdsv3_notify_queue_get(struct rdsv3_sock *rs, struct msghdr *msg); 624 void rdsv3_inc_info_copy(struct rdsv3_incoming *inc, 625 struct rdsv3_info_iterator *iter, 626 uint32_be_t saddr, uint32_be_t daddr, int flip); 627 628 /* page.c */ 629 int rdsv3_page_remainder_alloc(struct rdsv3_scatterlist *scat, 630 unsigned long bytes, int gfp); 631 632 /* send.c */ 633 int rdsv3_sendmsg(struct rdsv3_sock *rs, uio_t *uio, struct nmsghdr *msg, 634 size_t payload_len); 635 void rdsv3_send_reset(struct rdsv3_connection *conn); 636 int rdsv3_send_xmit(struct rdsv3_connection *conn); 637 struct sockaddr_in; 638 void rdsv3_send_drop_to(struct rdsv3_sock *rs, struct sockaddr_in *dest); 639 typedef int (*is_acked_func)(struct rdsv3_message *rm, uint64_t ack); 640 void rdsv3_send_drop_acked(struct rdsv3_connection *conn, uint64_t ack, 641 is_acked_func is_acked); 642 int rdsv3_send_acked_before(struct rdsv3_connection *conn, uint64_t seq); 643 void rdsv3_send_remove_from_sock(struct list *messages, int status); 644 int rdsv3_send_pong(struct rdsv3_connection *conn, uint16_be_t dport); 645 struct rdsv3_message *rdsv3_send_get_message(struct rdsv3_connection *, 646 struct rdsv3_rdma_op *); 647 648 /* rdma.c */ 649 void rdsv3_rdma_unuse(struct rdsv3_sock *rs, uint32_t r_key, int force); 650 651 /* cong.c */ 652 void rdsv3_cong_init(void); 653 int rdsv3_cong_get_maps(struct rdsv3_connection *conn); 654 void rdsv3_cong_add_conn(struct rdsv3_connection *conn); 655 void rdsv3_cong_remove_conn(struct rdsv3_connection *conn); 656 void rdsv3_cong_set_bit(struct rdsv3_cong_map *map, uint16_be_t port); 657 void rdsv3_cong_clear_bit(struct rdsv3_cong_map *map, uint16_be_t port); 658 int rdsv3_cong_wait(struct rdsv3_cong_map *map, uint16_be_t port, int nonblock, 659 struct rdsv3_sock *rs); 660 void rdsv3_cong_queue_updates(struct rdsv3_cong_map *map); 661 void rdsv3_cong_map_updated(struct rdsv3_cong_map *map, uint64_t); 662 int rdsv3_cong_updated_since(unsigned long *recent); 663 void rdsv3_cong_add_socket(struct rdsv3_sock *); 664 void rdsv3_cong_remove_socket(struct rdsv3_sock *); 665 void rdsv3_cong_exit(void); 666 struct rdsv3_message *rdsv3_cong_update_alloc(struct rdsv3_connection *conn); 667 668 /* stats.c */ 669 RDSV3_DECLARE_PER_CPU(struct rdsv3_statistics, rdsv3_stats); 670 #define rdsv3_stats_inc_which(which, member) do { \ 671 rdsv3_per_cpu(which, get_cpu()).member++; \ 672 put_cpu(); \ 673 } while (0) 674 #define rdsv3_stats_inc(member) rdsv3_stats_inc_which(rdsv3_stats, member) 675 #define rdsv3_stats_add_which(which, member, count) do { \ 676 rdsv3_per_cpu(which, get_cpu()).member += count; \ 677 put_cpu(); \ 678 } while (0) 679 #define rdsv3_stats_add(member, count) \ 680 rdsv3_stats_add_which(rdsv3_stats, member, count) 681 int rdsv3_stats_init(void); 682 void rdsv3_stats_exit(void); 683 void rdsv3_stats_info_copy(struct rdsv3_info_iterator *iter, 684 uint64_t *values, char **names, size_t nr); 685 686 687 /* sysctl.c */ 688 int rdsv3_sysctl_init(void); 689 void rdsv3_sysctl_exit(void); 690 extern unsigned long rdsv3_sysctl_sndbuf_min; 691 extern unsigned long rdsv3_sysctl_sndbuf_default; 692 extern unsigned long rdsv3_sysctl_sndbuf_max; 693 extern unsigned long rdsv3_sysctl_reconnect_min_jiffies; 694 extern unsigned long rdsv3_sysctl_reconnect_max_jiffies; 695 extern unsigned int rdsv3_sysctl_max_unacked_packets; 696 extern unsigned int rdsv3_sysctl_max_unacked_bytes; 697 extern unsigned int rdsv3_sysctl_ping_enable; 698 extern unsigned long rdsv3_sysctl_trace_flags; 699 extern unsigned int rdsv3_sysctl_trace_level; 700 701 /* threads.c */ 702 int rdsv3_threads_init(); 703 void rdsv3_threads_exit(void); 704 extern struct rdsv3_workqueue_struct_s *rdsv3_wq; 705 void rdsv3_queue_reconnect(struct rdsv3_connection *conn); 706 void rdsv3_connect_worker(struct rdsv3_work_s *); 707 void rdsv3_shutdown_worker(struct rdsv3_work_s *); 708 void rdsv3_send_worker(struct rdsv3_work_s *); 709 void rdsv3_recv_worker(struct rdsv3_work_s *); 710 void rdsv3_reaper_worker(struct rdsv3_work_s *); 711 void rdsv3_connect_complete(struct rdsv3_connection *conn); 712 713 /* transport.c */ 714 int rdsv3_trans_register(struct rdsv3_transport *trans); 715 void rdsv3_trans_unregister(struct rdsv3_transport *trans); 716 struct rdsv3_transport *rdsv3_trans_get_preferred(uint32_be_t addr); 717 unsigned int rdsv3_trans_stats_info_copy(struct rdsv3_info_iterator *iter, 718 unsigned int avail); 719 void rdsv3_trans_exit(void); 720 721 /* message.c */ 722 struct rdsv3_message *rdsv3_message_alloc(unsigned int nents, int gfp); 723 struct rdsv3_message *rdsv3_message_copy_from_user(struct uio *uiop, 724 size_t total_len); 725 struct rdsv3_message *rdsv3_message_map_pages(unsigned long *page_addrs, 726 unsigned int total_len); 727 void rdsv3_message_populate_header(struct rdsv3_header *hdr, uint16_be_t sport, 728 uint16_be_t dport, uint64_t seq); 729 int rdsv3_message_add_extension(struct rdsv3_header *hdr, 730 unsigned int type, const void *data, unsigned int len); 731 int rdsv3_message_next_extension(struct rdsv3_header *hdr, 732 unsigned int *pos, void *buf, unsigned int *buflen); 733 int rdsv3_message_add_version_extension(struct rdsv3_header *hdr, 734 unsigned int version); 735 int rdsv3_message_get_version_extension(struct rdsv3_header *hdr, 736 unsigned int *version); 737 int rdsv3_message_add_rdma_dest_extension(struct rdsv3_header *hdr, 738 uint32_t r_key, uint32_t offset); 739 int rdsv3_message_inc_copy_to_user(struct rdsv3_incoming *inc, 740 uio_t *uio, size_t size); 741 void rdsv3_message_inc_free(struct rdsv3_incoming *inc); 742 void rdsv3_message_addref(struct rdsv3_message *rm); 743 void rdsv3_message_put(struct rdsv3_message *rm); 744 void rdsv3_message_wait(struct rdsv3_message *rm); 745 void rdsv3_message_unmapped(struct rdsv3_message *rm); 746 747 static inline void 748 rdsv3_message_make_checksum(struct rdsv3_header *hdr) 749 { 750 hdr->h_csum = 0; 751 hdr->h_csum = 752 rdsv3_ip_fast_csum((void *)hdr, sizeof (*hdr) >> 2); 753 } 754 755 static inline int 756 rdsv3_message_verify_checksum(const struct rdsv3_header *hdr) 757 { 758 return (!hdr->h_csum || 759 rdsv3_ip_fast_csum((void *)hdr, sizeof (*hdr) >> 2) == 0); 760 } 761 762 /* rdsv3_sc.c */ 763 extern boolean_t rdsv3_if_lookup_by_name(char *if_name); 764 extern int rdsv3_sc_path_lookup(ipaddr_t *localip, ipaddr_t *remip); 765 extern ipaddr_t rdsv3_scaddr_to_ibaddr(ipaddr_t addr); 766 767 #ifdef __cplusplus 768 } 769 #endif 770 771 #endif /* _RDSV3_RDSV3_H */ 772