1 /* 2 * This file contains definitions imported from the OFED rds header rds.h. 3 * Oracle elects to have and use the contents of rds.h under and 4 * governed by the OpenIB.org BSD license. 5 */ 6 7 /* 8 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 9 */ 10 11 #ifndef _RDSV3_RDSV3_H 12 #define _RDSV3_RDSV3_H 13 14 /* 15 * The name of this file is rds.h in ofed. 16 */ 17 18 #ifdef __cplusplus 19 extern "C" { 20 #endif 21 22 #include <sys/sunndi.h> 23 #include <netinet/in.h> 24 #include <sys/synch.h> 25 #include <sys/stropts.h> 26 #include <sys/socket.h> 27 #include <sys/socketvar.h> 28 #include <inet/ip.h> 29 #include <sys/avl.h> 30 #include <sys/param.h> 31 #include <sys/time.h> 32 #include <sys/rds.h> 33 34 #include <sys/ib/ibtl/ibti.h> 35 #include <sys/ib/clients/of/rdma/ib_verbs.h> 36 #include <sys/ib/clients/of/rdma/ib_addr.h> 37 #include <sys/ib/clients/of/rdma/rdma_cm.h> 38 #include <sys/ib/clients/rdsv3/rdsv3_impl.h> 39 #include <sys/ib/clients/rdsv3/info.h> 40 41 #include <sys/cpuvar.h> 42 #include <sys/disp.h> 43 44 #define NIPQUAD(addr) \ 45 (unsigned char)((ntohl(addr) >> 24) & 0xFF), \ 46 (unsigned char)((ntohl(addr) >> 16) & 0xFF), \ 47 (unsigned char)((ntohl(addr) >> 8) & 0xFF), \ 48 (unsigned char)(ntohl(addr) & 0xFF) 49 50 /* 51 * RDS Network protocol version 52 */ 53 #define RDS_PROTOCOL_3_0 0x0300 54 #define RDS_PROTOCOL_3_1 0x0301 55 #define RDS_PROTOCOL_VERSION RDS_PROTOCOL_3_1 56 #define RDS_PROTOCOL_MAJOR(v) ((v) >> 8) 57 #define RDS_PROTOCOL_MINOR(v) ((v) & 255) 58 #define RDS_PROTOCOL(maj, min) (((maj) << 8) | min) 59 60 /* 61 * XXX randomly chosen, but at least seems to be unused: 62 * # 18464-18768 Unassigned 63 * We should do better. We want a reserved port to discourage unpriv'ed 64 * userspace from listening. 65 * 66 * port 18633 was the version that had ack frames on the wire. 67 */ 68 #define RDSV3_PORT 18634 69 70 #define RDSV3_REAPER_WAIT_SECS (5*60) 71 #define RDSV3_REAPER_WAIT_JIFFIES SEC_TO_TICK(RDSV3_REAPER_WAIT_SECS) 72 73 static inline ulong_t 74 ceil(ulong_t x, ulong_t y) 75 { 76 return ((x + y - 1) / y); 77 } 78 79 #define RDSV3_FRAG_SHIFT 12 80 #define RDSV3_FRAG_SIZE ((unsigned int)(1 << RDSV3_FRAG_SHIFT)) 81 82 #define RDSV3_CONG_MAP_BYTES (65536 / 8) 83 #define RDSV3_CONG_MAP_LONGS (RDSV3_CONG_MAP_BYTES / sizeof (unsigned long)) 84 #define RDSV3_CONG_MAP_PAGES (RDSV3_CONG_MAP_BYTES / PAGE_SIZE) 85 #define RDSV3_CONG_MAP_PAGE_BITS (PAGE_SIZE * 8) 86 87 struct rdsv3_cong_map { 88 struct avl_node m_rb_node; 89 uint32_be_t m_addr; 90 rdsv3_wait_queue_t m_waitq; 91 struct list m_conn_list; 92 unsigned long m_page_addrs[RDSV3_CONG_MAP_PAGES]; 93 }; 94 95 /* 96 * This is how we will track the connection state: 97 * A connection is always in one of the following 98 * states. Updates to the state are atomic and imply 99 * a memory barrier. 100 */ 101 enum { 102 RDSV3_CONN_DOWN = 0, 103 RDSV3_CONN_CONNECTING, 104 RDSV3_CONN_DISCONNECTING, 105 RDSV3_CONN_UP, 106 RDSV3_CONN_ERROR, 107 }; 108 109 /* Bits for c_flags */ 110 #define RDSV3_LL_SEND_FULL 0 111 #define RDSV3_RECONNECT_PENDING 1 112 113 struct rdsv3_connection { 114 struct avl_node c_hash_node; 115 struct rdsv3_ip_bucket *c_bucketp; 116 uint32_be_t c_laddr; 117 uint32_be_t c_faddr; 118 unsigned int c_loopback:1; 119 struct rdsv3_connection *c_passive; 120 121 struct rdsv3_cong_map *c_lcong; 122 struct rdsv3_cong_map *c_fcong; 123 124 struct mutex c_send_lock; /* protect send ring */ 125 atomic_t c_send_generation; 126 atomic_t c_senders; 127 128 struct rdsv3_message *c_xmit_rm; 129 unsigned long c_xmit_sg; 130 unsigned int c_xmit_hdr_off; 131 unsigned int c_xmit_data_off; 132 unsigned int c_xmit_rdma_sent; 133 134 kmutex_t c_lock; /* protect msg queues */ 135 uint64_t c_next_tx_seq; 136 struct list c_send_queue; 137 struct list c_retrans; 138 139 uint64_t c_next_rx_seq; 140 141 struct rdsv3_transport *c_trans; 142 void *c_transport_data; 143 144 atomic_t c_state; 145 unsigned long c_flags; 146 unsigned long c_reconnect_jiffies; 147 clock_t c_last_connect_jiffies; 148 149 struct rdsv3_delayed_work_s c_send_w; 150 struct rdsv3_delayed_work_s c_recv_w; 151 struct rdsv3_delayed_work_s c_conn_w; 152 struct rdsv3_delayed_work_s c_reap_w; 153 struct rdsv3_work_s c_down_w; 154 struct mutex c_cm_lock; /* protect conn state & cm */ 155 156 struct list_node c_map_item; 157 unsigned long c_map_queued; 158 unsigned long c_map_offset; 159 unsigned long c_map_bytes; 160 161 unsigned int c_unacked_packets; 162 unsigned int c_unacked_bytes; 163 164 /* Protocol version */ 165 unsigned int c_version; 166 }; 167 168 #define RDSV3_FLAG_CONG_BITMAP 0x01 169 #define RDSV3_FLAG_ACK_REQUIRED 0x02 170 #define RDSV3_FLAG_RETRANSMITTED 0x04 171 #define RDSV3_MAX_ADV_CREDIT 127 172 173 /* 174 * Maximum space available for extension headers. 175 */ 176 #define RDSV3_HEADER_EXT_SPACE 16 177 178 struct rdsv3_header { 179 uint64_be_t h_sequence; 180 uint64_be_t h_ack; 181 uint32_be_t h_len; 182 uint16_be_t h_sport; 183 uint16_be_t h_dport; 184 uint8_t h_flags; 185 uint8_t h_credit; 186 uint8_t h_padding[4]; 187 uint16_be_t h_csum; 188 189 uint8_t h_exthdr[RDSV3_HEADER_EXT_SPACE]; 190 }; 191 192 /* Reserved - indicates end of extensions */ 193 #define RDSV3_EXTHDR_NONE 0 194 195 /* 196 * This extension header is included in the very 197 * first message that is sent on a new connection, 198 * and identifies the protocol level. This will help 199 * rolling updates if a future change requires breaking 200 * the protocol. 201 */ 202 #define RDSV3_EXTHDR_VERSION 1 203 struct rdsv3_ext_header_version { 204 uint32_be_t h_version; 205 }; 206 207 /* 208 * This extension header is included in the RDS message 209 * chasing an RDMA operation. 210 */ 211 #define RDSV3_EXTHDR_RDMA 2 212 struct rdsv3_ext_header_rdma { 213 uint32_be_t h_rdma_rkey; 214 }; 215 216 /* 217 * This extension header tells the peer about the 218 * destination <R_Key,offset> of the requested RDMA 219 * operation. 220 */ 221 #define RDSV3_EXTHDR_RDMA_DEST 3 222 struct rdsv3_ext_header_rdma_dest { 223 uint32_be_t h_rdma_rkey; 224 uint32_be_t h_rdma_offset; 225 }; 226 227 #define __RDSV3_EXTHDR_MAX 16 /* for now */ 228 229 struct rdsv3_incoming { 230 atomic_t i_refcount; 231 struct list_node i_item; 232 struct rdsv3_connection *i_conn; 233 struct rdsv3_header i_hdr; 234 unsigned long i_rx_jiffies; 235 uint32_be_t i_saddr; 236 237 rds_rdma_cookie_t i_rdma_cookie; 238 }; 239 240 /* 241 * m_sock_item and m_conn_item are on lists that are serialized under 242 * conn->c_lock. m_sock_item has additional meaning in that once it is empty 243 * the message will not be put back on the retransmit list after being sent. 244 * messages that are canceled while being sent rely on this. 245 * 246 * m_inc is used by loopback so that it can pass an incoming message straight 247 * back up into the rx path. It embeds a wire header which is also used by 248 * the send path, which is kind of awkward. 249 * 250 * m_sock_item indicates the message's presence on a socket's send or receive 251 * queue. m_rs will point to that socket. 252 * 253 * m_daddr is used by cancellation to prune messages to a given destination. 254 * 255 * The RDS_MSG_ON_SOCK and RDS_MSG_ON_CONN flags are used to avoid lock 256 * nesting. As paths iterate over messages on a sock, or conn, they must 257 * also lock the conn, or sock, to remove the message from those lists too. 258 * Testing the flag to determine if the message is still on the lists lets 259 * us avoid testing the list_head directly. That means each path can use 260 * the message's list_head to keep it on a local list while juggling locks 261 * without confusing the other path. 262 * 263 * m_ack_seq is an optional field set by transports who need a different 264 * sequence number range to invalidate. They can use this in a callback 265 * that they pass to rdsv3_send_drop_acked() to see if each message has been 266 * acked. The HAS_ACK_SEQ flag can be used to detect messages which haven't 267 * had ack_seq set yet. 268 */ 269 #define RDSV3_MSG_ON_SOCK 1 270 #define RDSV3_MSG_ON_CONN 2 271 #define RDSV3_MSG_HAS_ACK_SEQ 3 272 #define RDSV3_MSG_ACK_REQUIRED 4 273 #define RDSV3_MSG_RETRANSMITTED 5 274 #define RDSV3_MSG_MAPPED 6 275 #define RDSV3_MSG_PAGEVEC 7 276 277 struct rdsv3_message { 278 atomic_t m_refcount; 279 struct list_node m_sock_item; 280 struct list_node m_conn_item; 281 struct rdsv3_incoming m_inc; 282 uint64_t m_ack_seq; 283 uint32_be_t m_daddr; 284 unsigned long m_flags; 285 286 /* 287 * Never access m_rs without holding m_rs_lock. 288 * Lock nesting is 289 * rm->m_rs_lock 290 * -> rs->rs_lock 291 */ 292 kmutex_t m_rs_lock; 293 rdsv3_wait_queue_t m_flush_wait; 294 295 struct rdsv3_sock *m_rs; 296 struct rdsv3_rdma_op *m_rdma_op; 297 rds_rdma_cookie_t m_rdma_cookie; 298 struct rdsv3_mr *m_rdma_mr; 299 unsigned int m_nents; 300 unsigned int m_count; 301 struct rdsv3_scatterlist m_sg[1]; 302 }; 303 304 /* 305 * The RDS notifier is used (optionally) to tell the application about 306 * completed RDMA operations. Rather than keeping the whole rds message 307 * around on the queue, we allocate a small notifier that is put on the 308 * socket's notifier_list. Notifications are delivered to the application 309 * through control messages. 310 */ 311 struct rdsv3_notifier { 312 list_node_t n_list; 313 uint64_t n_user_token; 314 int n_status; 315 }; 316 317 /* 318 * struct rdsv3_transport - transport specific behavioural hooks 319 * 320 * @xmit: .xmit is called by rdsv3_send_xmit() to tell the transport to send 321 * part of a message. The caller serializes on the send_sem so this 322 * doesn't need to be reentrant for a given conn. The header must be 323 * sent before the data payload. .xmit must be prepared to send a 324 * message with no data payload. .xmit should return the number of 325 * bytes that were sent down the connection, including header bytes. 326 * Returning 0 tells the caller that it doesn't need to perform any 327 * additional work now. This is usually the case when the transport has 328 * filled the sending queue for its connection and will handle 329 * triggering the rds thread to continue the send when space becomes 330 * available. Returning -EAGAIN tells the caller to retry the send 331 * immediately. Returning -ENOMEM tells the caller to retry the send at 332 * some point in the future. 333 * 334 * @conn_shutdown: conn_shutdown stops traffic on the given connection. Once 335 * it returns the connection can not call rdsv3_recv_incoming(). 336 * This will only be called once after conn_connect returns 337 * non-zero success and will The caller serializes this with 338 * the send and connecting paths (xmit_* and conn_*). The 339 * transport is responsible for other serialization, including 340 * rdsv3_recv_incoming(). This is called in process context but 341 * should try hard not to block. 342 * 343 * @xmit_cong_map: This asks the transport to send the local bitmap down the 344 * given connection. XXX get a better story about the bitmap 345 * flag and header. 346 */ 347 348 #define RDS_TRANS_IB 0 349 #define RDS_TRANS_IWARP 1 350 #define RDS_TRANS_TCP 2 351 #define RDS_TRANS_COUNT 3 352 353 struct rdsv3_transport { 354 char t_name[TRANSNAMSIZ]; 355 struct list_node t_item; 356 unsigned int t_type; 357 unsigned int t_prefer_loopback:1; 358 359 int (*laddr_check)(uint32_be_t addr); 360 int (*conn_alloc)(struct rdsv3_connection *conn, int gfp); 361 void (*conn_free)(void *data); 362 int (*conn_connect)(struct rdsv3_connection *conn); 363 void (*conn_shutdown)(struct rdsv3_connection *conn); 364 void (*xmit_prepare)(struct rdsv3_connection *conn); 365 void (*xmit_complete)(struct rdsv3_connection *conn); 366 int (*xmit)(struct rdsv3_connection *conn, struct rdsv3_message *rm, 367 unsigned int hdr_off, unsigned int sg, unsigned int off); 368 int (*xmit_cong_map)(struct rdsv3_connection *conn, 369 struct rdsv3_cong_map *map, unsigned long offset); 370 int (*xmit_rdma)(struct rdsv3_connection *conn, 371 struct rdsv3_rdma_op *op); 372 int (*recv)(struct rdsv3_connection *conn); 373 int (*inc_copy_to_user)(struct rdsv3_incoming *inc, uio_t *uio, 374 size_t size); 375 void (*inc_free)(struct rdsv3_incoming *inc); 376 377 int (*cm_handle_connect)(struct rdma_cm_id *cm_id, 378 struct rdma_cm_event *event); 379 int (*cm_initiate_connect)(struct rdma_cm_id *cm_id); 380 void (*cm_connect_complete)(struct rdsv3_connection *conn, 381 struct rdma_cm_event *event); 382 383 unsigned int (*stats_info_copy)(struct rdsv3_info_iterator *iter, 384 unsigned int avail); 385 void (*exit)(void); 386 void *(*get_mr)(struct rds_iovec *sg, unsigned long nr_sg, 387 struct rdsv3_sock *rs, uint32_t *key_ret); 388 void (*sync_mr)(void *trans_private, int direction); 389 void (*free_mr)(void *trans_private, int invalidate); 390 void (*flush_mrs)(void); 391 }; 392 393 struct rdsv3_sock { 394 struct rsock *rs_sk; 395 uint64_t rs_user_addr; 396 uint64_t rs_user_bytes; 397 398 /* 399 * bound_addr used for both incoming and outgoing, no INADDR_ANY 400 * support. 401 */ 402 struct avl_node rs_bound_node; 403 uint32_be_t rs_bound_addr; 404 uint32_be_t rs_conn_addr; 405 uint16_be_t rs_bound_port; 406 uint16_be_t rs_conn_port; 407 408 /* 409 * This is only used to communicate the transport between bind and 410 * initiating connections. All other trans use is referenced through 411 * the connection. 412 */ 413 struct rdsv3_transport *rs_transport; 414 415 /* 416 * rdsv3_sendmsg caches the conn it used the last time around. 417 * This helps avoid costly lookups. 418 */ 419 struct rdsv3_connection *rs_conn; 420 kmutex_t rs_conn_lock; 421 422 /* flag indicating we were congested or not */ 423 int rs_congested; 424 /* seen congestion (ENOBUFS) when sending? */ 425 int rs_seen_congestion; 426 kmutex_t rs_congested_lock; 427 kcondvar_t rs_congested_cv; 428 429 /* rs_lock protects all these adjacent members before the newline */ 430 kmutex_t rs_lock; 431 struct list rs_send_queue; 432 uint32_t rs_snd_bytes; 433 int rs_rcv_bytes; 434 /* currently used for failed RDMAs */ 435 struct list rs_notify_queue; 436 437 /* 438 * Congestion wake_up. If rs_cong_monitor is set, we use cong_mask 439 * to decide whether the application should be woken up. 440 * If not set, we use rs_cong_track to find out whether a cong map 441 * update arrived. 442 */ 443 uint64_t rs_cong_mask; 444 uint64_t rs_cong_notify; 445 struct list_node rs_cong_list; 446 unsigned long rs_cong_track; 447 448 /* 449 * rs_recv_lock protects the receive queue, and is 450 * used to serialize with rdsv3_release. 451 */ 452 krwlock_t rs_recv_lock; 453 struct list rs_recv_queue; 454 455 /* just for stats reporting */ 456 struct list_node rs_item; 457 458 /* these have their own lock */ 459 kmutex_t rs_rdma_lock; 460 struct avl_tree rs_rdma_keys; 461 462 /* Socket options - in case there will be more */ 463 unsigned char rs_recverr, 464 rs_cong_monitor; 465 466 cred_t *rs_cred; 467 zoneid_t rs_zoneid; 468 }; 469 470 static inline struct rdsv3_sock * 471 rdsv3_sk_to_rs(const struct rsock *sk) 472 { 473 return ((struct rdsv3_sock *)sk->sk_protinfo); 474 } 475 476 static inline struct rsock * 477 rdsv3_rs_to_sk(const struct rdsv3_sock *rs) 478 { 479 return ((struct rsock *)rs->rs_sk); 480 } 481 482 /* 483 * The stack assigns sk_sndbuf and sk_rcvbuf to twice the specified value 484 * to account for overhead. We don't account for overhead, we just apply 485 * the number of payload bytes to the specified value. 486 */ 487 static inline int 488 rdsv3_sk_sndbuf(struct rdsv3_sock *rs) 489 { 490 /* XXX */ 491 return (rdsv3_rs_to_sk(rs)->sk_sndbuf); 492 } 493 494 static inline int 495 rdsv3_sk_rcvbuf(struct rdsv3_sock *rs) 496 { 497 /* XXX */ 498 return (rdsv3_rs_to_sk(rs)->sk_rcvbuf); 499 } 500 501 struct rdsv3_statistics { 502 uint64_t s_conn_reset; 503 uint64_t s_recv_drop_bad_checksum; 504 uint64_t s_recv_drop_old_seq; 505 uint64_t s_recv_drop_no_sock; 506 uint64_t s_recv_drop_dead_sock; 507 uint64_t s_recv_deliver_raced; 508 uint64_t s_recv_delivered; 509 uint64_t s_recv_queued; 510 uint64_t s_recv_immediate_retry; 511 uint64_t s_recv_delayed_retry; 512 uint64_t s_recv_ack_required; 513 uint64_t s_recv_rdma_bytes; 514 uint64_t s_recv_ping; 515 uint64_t s_send_queue_empty; 516 uint64_t s_send_queue_full; 517 uint64_t s_send_sem_contention; 518 uint64_t s_send_sem_queue_raced; 519 uint64_t s_send_immediate_retry; 520 uint64_t s_send_delayed_retry; 521 uint64_t s_send_drop_acked; 522 uint64_t s_send_ack_required; 523 uint64_t s_send_queued; 524 uint64_t s_send_rdma; 525 uint64_t s_send_rdma_bytes; 526 uint64_t s_send_pong; 527 uint64_t s_page_remainder_hit; 528 uint64_t s_page_remainder_miss; 529 uint64_t s_copy_to_user; 530 uint64_t s_copy_from_user; 531 uint64_t s_cong_update_queued; 532 uint64_t s_cong_update_received; 533 uint64_t s_cong_send_error; 534 uint64_t s_cong_send_blocked; 535 }; 536 537 /* af_rds.c */ 538 void rdsv3_sock_addref(struct rdsv3_sock *rs); 539 void rdsv3_sock_put(struct rdsv3_sock *rs); 540 void rdsv3_wake_sk_sleep(struct rdsv3_sock *rs); 541 void __rdsv3_wake_sk_sleep(struct rsock *sk); 542 543 /* bind.c */ 544 int rdsv3_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 545 socklen_t len, cred_t *cr); 546 void rdsv3_remove_bound(struct rdsv3_sock *rs); 547 struct rdsv3_sock *rdsv3_find_bound(struct rdsv3_connection *conn, 548 uint16_be_t port); 549 struct rdsv3_ip_bucket *rdsv3_find_ip_bucket(ipaddr_t, zoneid_t); 550 551 /* conn.c */ 552 int rdsv3_conn_init(void); 553 void rdsv3_conn_exit(void); 554 struct rdsv3_connection *rdsv3_conn_create(uint32_be_t laddr, uint32_be_t faddr, 555 struct rdsv3_transport *trans, int gfp); 556 struct rdsv3_connection *rdsv3_conn_create_outgoing(uint32_be_t laddr, 557 uint32_be_t faddr, 558 struct rdsv3_transport *trans, int gfp); 559 void rdsv3_conn_shutdown(struct rdsv3_connection *conn); 560 void rdsv3_conn_destroy(struct rdsv3_connection *conn); 561 void rdsv3_conn_reset(struct rdsv3_connection *conn); 562 void rdsv3_conn_drop(struct rdsv3_connection *conn); 563 void rdsv3_for_each_conn_info(struct rsock *sock, unsigned int len, 564 struct rdsv3_info_iterator *iter, 565 struct rdsv3_info_lengths *lens, 566 int (*visitor)(struct rdsv3_connection *, void *), 567 size_t item_len); 568 569 static inline int 570 rdsv3_conn_transition(struct rdsv3_connection *conn, int old, int new) 571 { 572 return (atomic_cmpxchg(&conn->c_state, old, new) == old); 573 } 574 575 static inline int 576 rdsv3_conn_state(struct rdsv3_connection *conn) 577 { 578 return (atomic_get(&conn->c_state)); 579 } 580 581 static inline int 582 rdsv3_conn_up(struct rdsv3_connection *conn) 583 { 584 return (atomic_get(&conn->c_state) == RDSV3_CONN_UP); 585 } 586 587 static inline int 588 rdsv3_conn_connecting(struct rdsv3_connection *conn) 589 { 590 return (atomic_get(&conn->c_state) == RDSV3_CONN_CONNECTING); 591 } 592 593 /* recv.c */ 594 void rdsv3_inc_init(struct rdsv3_incoming *inc, struct rdsv3_connection *conn, 595 uint32_be_t saddr); 596 void rdsv3_inc_addref(struct rdsv3_incoming *inc); 597 void rdsv3_inc_put(struct rdsv3_incoming *inc); 598 void rdsv3_recv_incoming(struct rdsv3_connection *conn, uint32_be_t saddr, 599 uint32_be_t daddr, 600 struct rdsv3_incoming *inc, int gfp); 601 int rdsv3_recvmsg(struct rdsv3_sock *rs, uio_t *uio, 602 struct msghdr *msg, size_t size, int msg_flags); 603 void rdsv3_clear_recv_queue(struct rdsv3_sock *rs); 604 int rdsv3_notify_queue_get(struct rdsv3_sock *rs, struct msghdr *msg); 605 void rdsv3_inc_info_copy(struct rdsv3_incoming *inc, 606 struct rdsv3_info_iterator *iter, 607 uint32_be_t saddr, uint32_be_t daddr, int flip); 608 609 /* page.c */ 610 int rdsv3_page_remainder_alloc(struct rdsv3_scatterlist *scat, 611 unsigned long bytes, int gfp); 612 613 /* send.c */ 614 int rdsv3_sendmsg(struct rdsv3_sock *rs, uio_t *uio, struct nmsghdr *msg, 615 size_t payload_len); 616 void rdsv3_send_reset(struct rdsv3_connection *conn); 617 int rdsv3_send_xmit(struct rdsv3_connection *conn); 618 struct sockaddr_in; 619 void rdsv3_send_drop_to(struct rdsv3_sock *rs, struct sockaddr_in *dest); 620 typedef int (*is_acked_func)(struct rdsv3_message *rm, uint64_t ack); 621 void rdsv3_send_drop_acked(struct rdsv3_connection *conn, uint64_t ack, 622 is_acked_func is_acked); 623 int rdsv3_send_acked_before(struct rdsv3_connection *conn, uint64_t seq); 624 void rdsv3_send_remove_from_sock(struct list *messages, int status); 625 int rdsv3_send_pong(struct rdsv3_connection *conn, uint16_be_t dport); 626 struct rdsv3_message *rdsv3_send_get_message(struct rdsv3_connection *, 627 struct rdsv3_rdma_op *); 628 629 /* rdma.c */ 630 void rdsv3_rdma_unuse(struct rdsv3_sock *rs, uint32_t r_key, int force); 631 632 /* cong.c */ 633 void rdsv3_cong_init(void); 634 int rdsv3_cong_get_maps(struct rdsv3_connection *conn); 635 void rdsv3_cong_add_conn(struct rdsv3_connection *conn); 636 void rdsv3_cong_remove_conn(struct rdsv3_connection *conn); 637 void rdsv3_cong_set_bit(struct rdsv3_cong_map *map, uint16_be_t port); 638 void rdsv3_cong_clear_bit(struct rdsv3_cong_map *map, uint16_be_t port); 639 int rdsv3_cong_wait(struct rdsv3_cong_map *map, uint16_be_t port, int nonblock, 640 struct rdsv3_sock *rs); 641 void rdsv3_cong_queue_updates(struct rdsv3_cong_map *map); 642 void rdsv3_cong_map_updated(struct rdsv3_cong_map *map, uint64_t); 643 int rdsv3_cong_updated_since(unsigned long *recent); 644 void rdsv3_cong_add_socket(struct rdsv3_sock *); 645 void rdsv3_cong_remove_socket(struct rdsv3_sock *); 646 void rdsv3_cong_exit(void); 647 struct rdsv3_message *rdsv3_cong_update_alloc(struct rdsv3_connection *conn); 648 649 /* stats.c */ 650 extern uint_t nr_cpus; 651 extern struct rdsv3_statistics *rdsv3_stats; 652 #define rdsv3_per_cpu(var, cpu) var[cpu] 653 #define rdsv3_stats_add_which(which, member, count) do { \ 654 rdsv3_per_cpu(which, CPU->cpu_seqid).member += count; \ 655 } while (0) 656 #define rdsv3_stats_inc(member) \ 657 rdsv3_stats_add_which(rdsv3_stats, member, 1) 658 #define rdsv3_stats_add(member, count) \ 659 rdsv3_stats_add_which(rdsv3_stats, member, count) 660 int rdsv3_stats_init(void); 661 void rdsv3_stats_exit(void); 662 void rdsv3_stats_info_copy(struct rdsv3_info_iterator *iter, 663 uint64_t *values, char **names, size_t nr); 664 665 666 /* sysctl.c */ 667 int rdsv3_sysctl_init(void); 668 void rdsv3_sysctl_exit(void); 669 extern unsigned long rdsv3_sysctl_sndbuf_min; 670 extern unsigned long rdsv3_sysctl_sndbuf_default; 671 extern unsigned long rdsv3_sysctl_sndbuf_max; 672 extern unsigned long rdsv3_sysctl_reconnect_min_jiffies; 673 extern unsigned long rdsv3_sysctl_reconnect_max_jiffies; 674 extern unsigned int rdsv3_sysctl_max_unacked_packets; 675 extern unsigned int rdsv3_sysctl_max_unacked_bytes; 676 extern unsigned int rdsv3_sysctl_ping_enable; 677 extern unsigned long rdsv3_sysctl_trace_flags; 678 extern unsigned int rdsv3_sysctl_trace_level; 679 680 /* threads.c */ 681 int rdsv3_threads_init(); 682 void rdsv3_threads_exit(void); 683 extern struct rdsv3_workqueue_struct_s *rdsv3_wq; 684 void rdsv3_queue_reconnect(struct rdsv3_connection *conn); 685 void rdsv3_connect_worker(struct rdsv3_work_s *); 686 void rdsv3_shutdown_worker(struct rdsv3_work_s *); 687 void rdsv3_send_worker(struct rdsv3_work_s *); 688 void rdsv3_recv_worker(struct rdsv3_work_s *); 689 void rdsv3_reaper_worker(struct rdsv3_work_s *); 690 void rdsv3_connect_complete(struct rdsv3_connection *conn); 691 692 /* transport.c */ 693 int rdsv3_trans_register(struct rdsv3_transport *trans); 694 void rdsv3_trans_unregister(struct rdsv3_transport *trans); 695 struct rdsv3_transport *rdsv3_trans_get_preferred(uint32_be_t addr); 696 unsigned int rdsv3_trans_stats_info_copy(struct rdsv3_info_iterator *iter, 697 unsigned int avail); 698 void rdsv3_trans_exit(void); 699 700 /* message.c */ 701 struct rdsv3_message *rdsv3_message_alloc(unsigned int nents, int gfp); 702 struct rdsv3_message *rdsv3_message_copy_from_user(struct uio *uiop, 703 size_t total_len); 704 struct rdsv3_message *rdsv3_message_map_pages(unsigned long *page_addrs, 705 unsigned int total_len); 706 void rdsv3_message_populate_header(struct rdsv3_header *hdr, uint16_be_t sport, 707 uint16_be_t dport, uint64_t seq); 708 int rdsv3_message_add_extension(struct rdsv3_header *hdr, 709 unsigned int type, const void *data, unsigned int len); 710 int rdsv3_message_next_extension(struct rdsv3_header *hdr, 711 unsigned int *pos, void *buf, unsigned int *buflen); 712 int rdsv3_message_add_version_extension(struct rdsv3_header *hdr, 713 unsigned int version); 714 int rdsv3_message_get_version_extension(struct rdsv3_header *hdr, 715 unsigned int *version); 716 int rdsv3_message_add_rdma_dest_extension(struct rdsv3_header *hdr, 717 uint32_t r_key, uint32_t offset); 718 int rdsv3_message_inc_copy_to_user(struct rdsv3_incoming *inc, 719 uio_t *uio, size_t size); 720 void rdsv3_message_inc_free(struct rdsv3_incoming *inc); 721 void rdsv3_message_addref(struct rdsv3_message *rm); 722 void rdsv3_message_put(struct rdsv3_message *rm); 723 void rdsv3_message_wait(struct rdsv3_message *rm); 724 void rdsv3_message_unmapped(struct rdsv3_message *rm); 725 726 static inline void 727 rdsv3_message_make_checksum(struct rdsv3_header *hdr) 728 { 729 hdr->h_csum = 0; 730 hdr->h_csum = 731 rdsv3_ip_fast_csum((void *)hdr, sizeof (*hdr) >> 2); 732 } 733 734 static inline int 735 rdsv3_message_verify_checksum(const struct rdsv3_header *hdr) 736 { 737 return (!hdr->h_csum || 738 rdsv3_ip_fast_csum((void *)hdr, sizeof (*hdr) >> 2) == 0); 739 } 740 741 /* rdsv3_sc.c */ 742 extern boolean_t rdsv3_if_lookup_by_name(char *if_name); 743 extern int rdsv3_sc_path_lookup(ipaddr_t *localip, ipaddr_t *remip); 744 extern ipaddr_t rdsv3_scaddr_to_ibaddr(ipaddr_t addr); 745 746 #ifdef __cplusplus 747 } 748 #endif 749 750 #endif /* _RDSV3_RDSV3_H */ 751