1 /* 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 4 * Copyright (c) 2004 Voltaire, Inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 * 34 * $Id: ipoib.h 1358 2004-12-17 22:00:11Z roland $ 35 */ 36 37 #ifndef _IPOIB_H 38 #define _IPOIB_H 39 40 #include <linux/list.h> 41 #include <linux/skbuff.h> 42 #include <linux/netdevice.h> 43 #include <linux/workqueue.h> 44 #include <linux/pci.h> 45 #include <linux/kref.h> 46 #include <linux/if_infiniband.h> 47 #include <linux/mutex.h> 48 49 #include <net/neighbour.h> 50 51 #include <asm/atomic.h> 52 53 #include <rdma/ib_verbs.h> 54 #include <rdma/ib_pack.h> 55 #include <rdma/ib_sa.h> 56 57 /* constants */ 58 59 enum { 60 IPOIB_PACKET_SIZE = 2048, 61 IPOIB_BUF_SIZE = IPOIB_PACKET_SIZE + IB_GRH_BYTES, 62 63 IPOIB_ENCAP_LEN = 4, 64 65 IPOIB_CM_MTU = 0x10000 - 0x10, /* padding to align header to 16 */ 66 IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN, 67 IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE, 68 IPOIB_CM_RX_SG = ALIGN(IPOIB_CM_BUF_SIZE, PAGE_SIZE) / PAGE_SIZE, 69 IPOIB_RX_RING_SIZE = 128, 70 IPOIB_TX_RING_SIZE = 64, 71 IPOIB_MAX_QUEUE_SIZE = 8192, 72 IPOIB_MIN_QUEUE_SIZE = 2, 73 74 IPOIB_NUM_WC = 4, 75 76 IPOIB_MAX_PATH_REC_QUEUE = 3, 77 IPOIB_MAX_MCAST_QUEUE = 3, 78 79 IPOIB_FLAG_OPER_UP = 0, 80 IPOIB_FLAG_INITIALIZED = 1, 81 IPOIB_FLAG_ADMIN_UP = 2, 82 IPOIB_PKEY_ASSIGNED = 3, 83 IPOIB_PKEY_STOP = 4, 84 IPOIB_FLAG_SUBINTERFACE = 5, 85 IPOIB_MCAST_RUN = 6, 86 IPOIB_STOP_REAPER = 7, 87 IPOIB_MCAST_STARTED = 8, 88 IPOIB_FLAG_NETIF_STOPPED = 9, 89 IPOIB_FLAG_ADMIN_CM = 10, 90 91 IPOIB_MAX_BACKOFF_SECONDS = 16, 92 93 IPOIB_MCAST_FLAG_FOUND = 0, /* used in set_multicast_list */ 94 IPOIB_MCAST_FLAG_SENDONLY = 1, 95 IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */ 96 IPOIB_MCAST_FLAG_ATTACHED = 3, 97 }; 98 99 #define IPOIB_OP_RECV (1ul << 31) 100 #ifdef CONFIG_INFINIBAND_IPOIB_CM 101 #define IPOIB_CM_OP_SRQ (1ul << 30) 102 #else 103 #define IPOIB_CM_OP_SRQ (0) 104 #endif 105 106 /* structs */ 107 108 struct ipoib_header { 109 __be16 proto; 110 u16 reserved; 111 }; 112 113 struct ipoib_pseudoheader { 114 u8 hwaddr[INFINIBAND_ALEN]; 115 }; 116 117 struct ipoib_mcast; 118 119 struct ipoib_rx_buf { 120 struct sk_buff *skb; 121 u64 mapping; 122 }; 123 124 struct ipoib_tx_buf { 125 struct sk_buff *skb; 126 u64 mapping; 127 }; 128 129 struct ib_cm_id; 130 131 struct ipoib_cm_data { 132 __be32 qpn; /* High byte MUST be ignored on receive */ 133 __be32 mtu; 134 }; 135 136 struct ipoib_cm_rx { 137 struct ib_cm_id *id; 138 struct ib_qp *qp; 139 struct list_head list; 140 struct net_device *dev; 141 unsigned long jiffies; 142 }; 143 144 struct ipoib_cm_tx { 145 struct ib_cm_id *id; 146 struct ib_cq *cq; 147 struct ib_qp *qp; 148 struct list_head list; 149 struct net_device *dev; 150 struct ipoib_neigh *neigh; 151 struct ipoib_path *path; 152 struct ipoib_tx_buf *tx_ring; 153 unsigned tx_head; 154 unsigned tx_tail; 155 unsigned long flags; 156 u32 mtu; 157 struct ib_wc ibwc[IPOIB_NUM_WC]; 158 }; 159 160 struct ipoib_cm_rx_buf { 161 struct sk_buff *skb; 162 u64 mapping[IPOIB_CM_RX_SG]; 163 }; 164 165 struct ipoib_cm_dev_priv { 166 struct ib_srq *srq; 167 struct ipoib_cm_rx_buf *srq_ring; 168 struct ib_cm_id *id; 169 struct list_head passive_ids; 170 struct work_struct start_task; 171 struct work_struct reap_task; 172 struct work_struct skb_task; 173 struct delayed_work stale_task; 174 struct sk_buff_head skb_queue; 175 struct list_head start_list; 176 struct list_head reap_list; 177 struct ib_wc ibwc[IPOIB_NUM_WC]; 178 struct ib_sge rx_sge[IPOIB_CM_RX_SG]; 179 struct ib_recv_wr rx_wr; 180 }; 181 182 /* 183 * Device private locking: tx_lock protects members used in TX fast 184 * path (and we use LLTX so upper layers don't do extra locking). 185 * lock protects everything else. lock nests inside of tx_lock (ie 186 * tx_lock must be acquired first if needed). 187 */ 188 struct ipoib_dev_priv { 189 spinlock_t lock; 190 191 struct net_device *dev; 192 193 unsigned long flags; 194 195 struct mutex mcast_mutex; 196 struct mutex vlan_mutex; 197 198 struct rb_root path_tree; 199 struct list_head path_list; 200 201 struct ipoib_mcast *broadcast; 202 struct list_head multicast_list; 203 struct rb_root multicast_tree; 204 205 struct delayed_work pkey_task; 206 struct delayed_work mcast_task; 207 struct work_struct flush_task; 208 struct work_struct restart_task; 209 struct delayed_work ah_reap_task; 210 211 struct ib_device *ca; 212 u8 port; 213 u16 pkey; 214 struct ib_pd *pd; 215 struct ib_mr *mr; 216 struct ib_cq *cq; 217 struct ib_qp *qp; 218 u32 qkey; 219 220 union ib_gid local_gid; 221 u16 local_lid; 222 223 unsigned int admin_mtu; 224 unsigned int mcast_mtu; 225 226 struct ipoib_rx_buf *rx_ring; 227 228 spinlock_t tx_lock; 229 struct ipoib_tx_buf *tx_ring; 230 unsigned tx_head; 231 unsigned tx_tail; 232 struct ib_sge tx_sge; 233 struct ib_send_wr tx_wr; 234 235 struct ib_wc ibwc[IPOIB_NUM_WC]; 236 237 struct list_head dead_ahs; 238 239 struct ib_event_handler event_handler; 240 241 struct net_device_stats stats; 242 243 struct net_device *parent; 244 struct list_head child_intfs; 245 struct list_head list; 246 247 #ifdef CONFIG_INFINIBAND_IPOIB_CM 248 struct ipoib_cm_dev_priv cm; 249 #endif 250 251 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 252 struct list_head fs_list; 253 struct dentry *mcg_dentry; 254 struct dentry *path_dentry; 255 #endif 256 }; 257 258 struct ipoib_ah { 259 struct net_device *dev; 260 struct ib_ah *ah; 261 struct list_head list; 262 struct kref ref; 263 unsigned last_send; 264 }; 265 266 struct ipoib_path { 267 struct net_device *dev; 268 struct ib_sa_path_rec pathrec; 269 struct ipoib_ah *ah; 270 struct sk_buff_head queue; 271 272 struct list_head neigh_list; 273 274 int query_id; 275 struct ib_sa_query *query; 276 struct completion done; 277 278 struct rb_node rb_node; 279 struct list_head list; 280 }; 281 282 struct ipoib_neigh { 283 struct ipoib_ah *ah; 284 #ifdef CONFIG_INFINIBAND_IPOIB_CM 285 struct ipoib_cm_tx *cm; 286 #endif 287 union ib_gid dgid; 288 struct sk_buff_head queue; 289 290 struct neighbour *neighbour; 291 292 struct list_head list; 293 }; 294 295 /* 296 * We stash a pointer to our private neighbour information after our 297 * hardware address in neigh->ha. The ALIGN() expression here makes 298 * sure that this pointer is stored aligned so that an unaligned 299 * load is not needed to dereference it. 300 */ 301 static inline struct ipoib_neigh **to_ipoib_neigh(struct neighbour *neigh) 302 { 303 return (void*) neigh + ALIGN(offsetof(struct neighbour, ha) + 304 INFINIBAND_ALEN, sizeof(void *)); 305 } 306 307 struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neigh); 308 void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh); 309 310 extern struct workqueue_struct *ipoib_workqueue; 311 312 /* functions */ 313 314 void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr); 315 316 struct ipoib_ah *ipoib_create_ah(struct net_device *dev, 317 struct ib_pd *pd, struct ib_ah_attr *attr); 318 void ipoib_free_ah(struct kref *kref); 319 static inline void ipoib_put_ah(struct ipoib_ah *ah) 320 { 321 kref_put(&ah->ref, ipoib_free_ah); 322 } 323 324 int ipoib_open(struct net_device *dev); 325 int ipoib_add_pkey_attr(struct net_device *dev); 326 327 void ipoib_send(struct net_device *dev, struct sk_buff *skb, 328 struct ipoib_ah *address, u32 qpn); 329 void ipoib_reap_ah(struct work_struct *work); 330 331 void ipoib_flush_paths(struct net_device *dev); 332 struct ipoib_dev_priv *ipoib_intf_alloc(const char *format); 333 334 int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port); 335 void ipoib_ib_dev_flush(struct work_struct *work); 336 void ipoib_ib_dev_cleanup(struct net_device *dev); 337 338 int ipoib_ib_dev_open(struct net_device *dev); 339 int ipoib_ib_dev_up(struct net_device *dev); 340 int ipoib_ib_dev_down(struct net_device *dev, int flush); 341 int ipoib_ib_dev_stop(struct net_device *dev); 342 343 int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port); 344 void ipoib_dev_cleanup(struct net_device *dev); 345 346 void ipoib_mcast_join_task(struct work_struct *work); 347 void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb); 348 349 void ipoib_mcast_restart_task(struct work_struct *work); 350 int ipoib_mcast_start_thread(struct net_device *dev); 351 int ipoib_mcast_stop_thread(struct net_device *dev, int flush); 352 353 void ipoib_mcast_dev_down(struct net_device *dev); 354 void ipoib_mcast_dev_flush(struct net_device *dev); 355 356 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 357 struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev); 358 int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter); 359 void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter, 360 union ib_gid *gid, 361 unsigned long *created, 362 unsigned int *queuelen, 363 unsigned int *complete, 364 unsigned int *send_only); 365 366 struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev); 367 int ipoib_path_iter_next(struct ipoib_path_iter *iter); 368 void ipoib_path_iter_read(struct ipoib_path_iter *iter, 369 struct ipoib_path *path); 370 #endif 371 372 int ipoib_mcast_attach(struct net_device *dev, u16 mlid, 373 union ib_gid *mgid); 374 int ipoib_mcast_detach(struct net_device *dev, u16 mlid, 375 union ib_gid *mgid); 376 377 int ipoib_init_qp(struct net_device *dev); 378 int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca); 379 void ipoib_transport_dev_cleanup(struct net_device *dev); 380 381 void ipoib_event(struct ib_event_handler *handler, 382 struct ib_event *record); 383 384 int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey); 385 int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey); 386 387 void ipoib_pkey_poll(struct work_struct *work); 388 int ipoib_pkey_dev_delay_open(struct net_device *dev); 389 390 #ifdef CONFIG_INFINIBAND_IPOIB_CM 391 392 #define IPOIB_FLAGS_RC 0x80 393 #define IPOIB_FLAGS_UC 0x40 394 395 /* We don't support UC connections at the moment */ 396 #define IPOIB_CM_SUPPORTED(ha) (ha[0] & (IPOIB_FLAGS_RC)) 397 398 static inline int ipoib_cm_admin_enabled(struct net_device *dev) 399 { 400 struct ipoib_dev_priv *priv = netdev_priv(dev); 401 return IPOIB_CM_SUPPORTED(dev->dev_addr) && 402 test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); 403 } 404 405 static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n) 406 { 407 struct ipoib_dev_priv *priv = netdev_priv(dev); 408 return IPOIB_CM_SUPPORTED(n->ha) && 409 test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); 410 } 411 412 static inline int ipoib_cm_up(struct ipoib_neigh *neigh) 413 414 { 415 return test_bit(IPOIB_FLAG_OPER_UP, &neigh->cm->flags); 416 } 417 418 static inline struct ipoib_cm_tx *ipoib_cm_get(struct ipoib_neigh *neigh) 419 { 420 return neigh->cm; 421 } 422 423 static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *tx) 424 { 425 neigh->cm = tx; 426 } 427 428 void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx); 429 int ipoib_cm_dev_open(struct net_device *dev); 430 void ipoib_cm_dev_stop(struct net_device *dev); 431 int ipoib_cm_dev_init(struct net_device *dev); 432 int ipoib_cm_add_mode_attr(struct net_device *dev); 433 void ipoib_cm_dev_cleanup(struct net_device *dev); 434 struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path, 435 struct ipoib_neigh *neigh); 436 void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx); 437 void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb, 438 unsigned int mtu); 439 void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc); 440 #else 441 442 struct ipoib_cm_tx; 443 444 static inline int ipoib_cm_admin_enabled(struct net_device *dev) 445 { 446 return 0; 447 } 448 static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n) 449 450 { 451 return 0; 452 } 453 454 static inline int ipoib_cm_up(struct ipoib_neigh *neigh) 455 456 { 457 return 0; 458 } 459 460 static inline struct ipoib_cm_tx *ipoib_cm_get(struct ipoib_neigh *neigh) 461 { 462 return NULL; 463 } 464 465 static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *tx) 466 { 467 } 468 469 static inline 470 void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) 471 { 472 return; 473 } 474 475 static inline 476 int ipoib_cm_dev_open(struct net_device *dev) 477 { 478 return 0; 479 } 480 481 static inline 482 void ipoib_cm_dev_stop(struct net_device *dev) 483 { 484 return; 485 } 486 487 static inline 488 int ipoib_cm_dev_init(struct net_device *dev) 489 { 490 return -ENOSYS; 491 } 492 493 static inline 494 void ipoib_cm_dev_cleanup(struct net_device *dev) 495 { 496 return; 497 } 498 499 static inline 500 struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path, 501 struct ipoib_neigh *neigh) 502 { 503 return NULL; 504 } 505 506 static inline 507 void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) 508 { 509 return; 510 } 511 512 static inline 513 int ipoib_cm_add_mode_attr(struct net_device *dev) 514 { 515 return 0; 516 } 517 518 static inline void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb, 519 unsigned int mtu) 520 { 521 dev_kfree_skb_any(skb); 522 } 523 524 static inline void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) 525 { 526 } 527 528 #endif 529 530 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 531 void ipoib_create_debug_files(struct net_device *dev); 532 void ipoib_delete_debug_files(struct net_device *dev); 533 int ipoib_register_debugfs(void); 534 void ipoib_unregister_debugfs(void); 535 #else 536 static inline void ipoib_create_debug_files(struct net_device *dev) { } 537 static inline void ipoib_delete_debug_files(struct net_device *dev) { } 538 static inline int ipoib_register_debugfs(void) { return 0; } 539 static inline void ipoib_unregister_debugfs(void) { } 540 #endif 541 542 543 #define ipoib_printk(level, priv, format, arg...) \ 544 printk(level "%s: " format, ((struct ipoib_dev_priv *) priv)->dev->name , ## arg) 545 #define ipoib_warn(priv, format, arg...) \ 546 ipoib_printk(KERN_WARNING, priv, format , ## arg) 547 548 extern int ipoib_sendq_size; 549 extern int ipoib_recvq_size; 550 551 extern struct ib_sa_client ipoib_sa_client; 552 553 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 554 extern int ipoib_debug_level; 555 556 #define ipoib_dbg(priv, format, arg...) \ 557 do { \ 558 if (ipoib_debug_level > 0) \ 559 ipoib_printk(KERN_DEBUG, priv, format , ## arg); \ 560 } while (0) 561 #define ipoib_dbg_mcast(priv, format, arg...) \ 562 do { \ 563 if (mcast_debug_level > 0) \ 564 ipoib_printk(KERN_DEBUG, priv, format , ## arg); \ 565 } while (0) 566 #else /* CONFIG_INFINIBAND_IPOIB_DEBUG */ 567 #define ipoib_dbg(priv, format, arg...) \ 568 do { (void) (priv); } while (0) 569 #define ipoib_dbg_mcast(priv, format, arg...) \ 570 do { (void) (priv); } while (0) 571 #endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */ 572 573 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA 574 #define ipoib_dbg_data(priv, format, arg...) \ 575 do { \ 576 if (data_debug_level > 0) \ 577 ipoib_printk(KERN_DEBUG, priv, format , ## arg); \ 578 } while (0) 579 #else /* CONFIG_INFINIBAND_IPOIB_DEBUG_DATA */ 580 #define ipoib_dbg_data(priv, format, arg...) \ 581 do { (void) (priv); } while (0) 582 #endif /* CONFIG_INFINIBAND_IPOIB_DEBUG_DATA */ 583 584 585 #define IPOIB_GID_FMT "%2.2x%2.2x:%2.2x%2.2x:%2.2x%2.2x:%2.2x%2.2x:" \ 586 "%2.2x%2.2x:%2.2x%2.2x:%2.2x%2.2x:%2.2x%2.2x" 587 588 #define IPOIB_GID_RAW_ARG(gid) ((u8 *)(gid))[0], \ 589 ((u8 *)(gid))[1], \ 590 ((u8 *)(gid))[2], \ 591 ((u8 *)(gid))[3], \ 592 ((u8 *)(gid))[4], \ 593 ((u8 *)(gid))[5], \ 594 ((u8 *)(gid))[6], \ 595 ((u8 *)(gid))[7], \ 596 ((u8 *)(gid))[8], \ 597 ((u8 *)(gid))[9], \ 598 ((u8 *)(gid))[10],\ 599 ((u8 *)(gid))[11],\ 600 ((u8 *)(gid))[12],\ 601 ((u8 *)(gid))[13],\ 602 ((u8 *)(gid))[14],\ 603 ((u8 *)(gid))[15] 604 605 #define IPOIB_GID_ARG(gid) IPOIB_GID_RAW_ARG((gid).raw) 606 607 #define IPOIB_QPN(ha) (be32_to_cpup((__be32 *) ha) & 0xffffff) 608 609 #endif /* _IPOIB_H */ 610