1 /* 2 * Copyright(c) 2015, 2016 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 48 #ifndef HFI1_VERBS_H 49 #define HFI1_VERBS_H 50 51 #include <linux/types.h> 52 #include <linux/seqlock.h> 53 #include <linux/kernel.h> 54 #include <linux/interrupt.h> 55 #include <linux/kref.h> 56 #include <linux/workqueue.h> 57 #include <linux/kthread.h> 58 #include <linux/completion.h> 59 #include <linux/slab.h> 60 #include <rdma/ib_pack.h> 61 #include <rdma/ib_user_verbs.h> 62 #include <rdma/ib_mad.h> 63 #include <rdma/rdma_vt.h> 64 #include <rdma/rdmavt_qp.h> 65 #include <rdma/rdmavt_cq.h> 66 67 struct hfi1_ctxtdata; 68 struct hfi1_pportdata; 69 struct hfi1_devdata; 70 struct hfi1_packet; 71 72 #include "iowait.h" 73 74 #define HFI1_MAX_RDMA_ATOMIC 16 75 #define HFI1_GUIDS_PER_PORT 5 76 77 /* 78 * Increment this value if any changes that break userspace ABI 79 * compatibility are made. 80 */ 81 #define HFI1_UVERBS_ABI_VERSION 2 82 83 #define IB_SEQ_NAK (3 << 29) 84 85 /* AETH NAK opcode values */ 86 #define IB_RNR_NAK 0x20 87 #define IB_NAK_PSN_ERROR 0x60 88 #define IB_NAK_INVALID_REQUEST 0x61 89 #define IB_NAK_REMOTE_ACCESS_ERROR 0x62 90 #define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63 91 #define IB_NAK_INVALID_RD_REQUEST 0x64 92 93 /* IB Performance Manager status values */ 94 #define IB_PMA_SAMPLE_STATUS_DONE 0x00 95 #define IB_PMA_SAMPLE_STATUS_STARTED 0x01 96 #define IB_PMA_SAMPLE_STATUS_RUNNING 0x02 97 98 /* Mandatory IB performance counter select values. */ 99 #define IB_PMA_PORT_XMIT_DATA cpu_to_be16(0x0001) 100 #define IB_PMA_PORT_RCV_DATA cpu_to_be16(0x0002) 101 #define IB_PMA_PORT_XMIT_PKTS cpu_to_be16(0x0003) 102 #define IB_PMA_PORT_RCV_PKTS cpu_to_be16(0x0004) 103 #define IB_PMA_PORT_XMIT_WAIT cpu_to_be16(0x0005) 104 105 #define HFI1_VENDOR_IPG cpu_to_be16(0xFFA0) 106 107 #define IB_BTH_REQ_ACK BIT(31) 108 #define IB_BTH_SOLICITED BIT(23) 109 #define IB_BTH_MIG_REQ BIT(22) 110 111 #define IB_GRH_VERSION 6 112 #define IB_GRH_VERSION_MASK 0xF 113 #define IB_GRH_VERSION_SHIFT 28 114 #define IB_GRH_TCLASS_MASK 0xFF 115 #define IB_GRH_TCLASS_SHIFT 20 116 #define IB_GRH_FLOW_MASK 0xFFFFF 117 #define IB_GRH_FLOW_SHIFT 0 118 #define IB_GRH_NEXT_HDR 0x1B 119 120 #define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL) 121 122 /* flags passed by hfi1_ib_rcv() */ 123 enum { 124 HFI1_HAS_GRH = (1 << 0), 125 }; 126 127 struct ib_reth { 128 __be64 vaddr; 129 __be32 rkey; 130 __be32 length; 131 } __packed; 132 133 struct ib_atomic_eth { 134 __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */ 135 __be32 rkey; 136 __be64 swap_data; 137 __be64 compare_data; 138 } __packed; 139 140 union ib_ehdrs { 141 struct { 142 __be32 deth[2]; 143 __be32 imm_data; 144 } ud; 145 struct { 146 struct ib_reth reth; 147 __be32 imm_data; 148 } rc; 149 struct { 150 __be32 aeth; 151 __be32 atomic_ack_eth[2]; 152 } at; 153 __be32 imm_data; 154 __be32 aeth; 155 __be32 ieth; 156 struct ib_atomic_eth atomic_eth; 157 } __packed; 158 159 struct hfi1_other_headers { 160 __be32 bth[3]; 161 union ib_ehdrs u; 162 } __packed; 163 164 /* 165 * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes 166 * long (72 w/ imm_data). Only the first 56 bytes of the IB header 167 * will be in the eager header buffer. The remaining 12 or 16 bytes 168 * are in the data buffer. 169 */ 170 struct hfi1_ib_header { 171 __be16 lrh[4]; 172 union { 173 struct { 174 struct ib_grh grh; 175 struct hfi1_other_headers oth; 176 } l; 177 struct hfi1_other_headers oth; 178 } u; 179 } __packed; 180 181 struct ahg_ib_header { 182 struct sdma_engine *sde; 183 u32 ahgdesc[2]; 184 u16 tx_flags; 185 u8 ahgcount; 186 u8 ahgidx; 187 struct hfi1_ib_header ibh; 188 }; 189 190 struct hfi1_pio_header { 191 __le64 pbc; 192 struct hfi1_ib_header hdr; 193 } __packed; 194 195 /* 196 * hfi1 specific data structures that will be hidden from rvt after the queue 197 * pair is made common 198 */ 199 struct hfi1_qp_priv { 200 struct ahg_ib_header *s_hdr; /* next header to send */ 201 struct sdma_engine *s_sde; /* current sde */ 202 struct send_context *s_sendcontext; /* current sendcontext */ 203 u8 s_sc; /* SC[0..4] for next packet */ 204 u8 r_adefered; /* number of acks defered */ 205 struct iowait s_iowait; 206 struct timer_list s_rnr_timer; 207 struct rvt_qp *owner; 208 }; 209 210 /* 211 * This structure is used to hold commonly lookedup and computed values during 212 * the send engine progress. 213 */ 214 struct hfi1_pkt_state { 215 struct hfi1_ibdev *dev; 216 struct hfi1_ibport *ibp; 217 struct hfi1_pportdata *ppd; 218 struct verbs_txreq *s_txreq; 219 unsigned long flags; 220 }; 221 222 #define HFI1_PSN_CREDIT 16 223 224 struct hfi1_opcode_stats { 225 u64 n_packets; /* number of packets */ 226 u64 n_bytes; /* total number of bytes */ 227 }; 228 229 struct hfi1_opcode_stats_perctx { 230 struct hfi1_opcode_stats stats[256]; 231 }; 232 233 static inline void inc_opstats( 234 u32 tlen, 235 struct hfi1_opcode_stats *stats) 236 { 237 #ifdef CONFIG_DEBUG_FS 238 stats->n_bytes += tlen; 239 stats->n_packets++; 240 #endif 241 } 242 243 struct hfi1_ibport { 244 struct rvt_qp __rcu *qp[2]; 245 struct rvt_ibport rvp; 246 247 __be64 guids[HFI1_GUIDS_PER_PORT - 1]; /* writable GUIDs */ 248 249 /* the first 16 entries are sl_to_vl for !OPA */ 250 u8 sl_to_sc[32]; 251 u8 sc_to_sl[32]; 252 }; 253 254 struct hfi1_ibdev { 255 struct rvt_dev_info rdi; /* Must be first */ 256 257 /* QP numbers are shared by all IB ports */ 258 /* protect wait lists */ 259 seqlock_t iowait_lock; 260 struct list_head txwait; /* list for wait verbs_txreq */ 261 struct list_head memwait; /* list for wait kernel memory */ 262 struct list_head txreq_free; 263 struct kmem_cache *verbs_txreq_cache; 264 struct timer_list mem_timer; 265 266 u64 n_piowait; 267 u64 n_piodrain; 268 u64 n_txwait; 269 u64 n_kmem_wait; 270 271 #ifdef CONFIG_DEBUG_FS 272 /* per HFI debugfs */ 273 struct dentry *hfi1_ibdev_dbg; 274 /* per HFI symlinks to above */ 275 struct dentry *hfi1_ibdev_link; 276 #endif 277 }; 278 279 static inline struct hfi1_ibdev *to_idev(struct ib_device *ibdev) 280 { 281 struct rvt_dev_info *rdi; 282 283 rdi = container_of(ibdev, struct rvt_dev_info, ibdev); 284 return container_of(rdi, struct hfi1_ibdev, rdi); 285 } 286 287 static inline struct rvt_qp *iowait_to_qp(struct iowait *s_iowait) 288 { 289 struct hfi1_qp_priv *priv; 290 291 priv = container_of(s_iowait, struct hfi1_qp_priv, s_iowait); 292 return priv->owner; 293 } 294 295 /* 296 * Send if not busy or waiting for I/O and either 297 * a RC response is pending or we can process send work requests. 298 */ 299 static inline int hfi1_send_ok(struct rvt_qp *qp) 300 { 301 return !(qp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT_IO)) && 302 (qp->s_hdrwords || (qp->s_flags & RVT_S_RESP_PENDING) || 303 !(qp->s_flags & RVT_S_ANY_WAIT_SEND)); 304 } 305 306 /* 307 * This must be called with s_lock held. 308 */ 309 void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl, 310 u32 qp1, u32 qp2, u16 lid1, u16 lid2); 311 void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num); 312 void hfi1_sys_guid_chg(struct hfi1_ibport *ibp); 313 void hfi1_node_desc_chg(struct hfi1_ibport *ibp); 314 int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, 315 const struct ib_wc *in_wc, const struct ib_grh *in_grh, 316 const struct ib_mad_hdr *in_mad, size_t in_mad_size, 317 struct ib_mad_hdr *out_mad, size_t *out_mad_size, 318 u16 *out_mad_pkey_index); 319 320 /* 321 * The PSN_MASK and PSN_SHIFT allow for 322 * 1) comparing two PSNs 323 * 2) returning the PSN with any upper bits masked 324 * 3) returning the difference between to PSNs 325 * 326 * The number of significant bits in the PSN must 327 * necessarily be at least one bit less than 328 * the container holding the PSN. 329 */ 330 #ifndef CONFIG_HFI1_VERBS_31BIT_PSN 331 #define PSN_MASK 0xFFFFFF 332 #define PSN_SHIFT 8 333 #else 334 #define PSN_MASK 0x7FFFFFFF 335 #define PSN_SHIFT 1 336 #endif 337 #define PSN_MODIFY_MASK 0xFFFFFF 338 339 /* 340 * Compare the lower 24 bits of the msn values. 341 * Returns an integer <, ==, or > than zero. 342 */ 343 static inline int cmp_msn(u32 a, u32 b) 344 { 345 return (((int)a) - ((int)b)) << 8; 346 } 347 348 /* 349 * Compare two PSNs 350 * Returns an integer <, ==, or > than zero. 351 */ 352 static inline int cmp_psn(u32 a, u32 b) 353 { 354 return (((int)a) - ((int)b)) << PSN_SHIFT; 355 } 356 357 /* 358 * Return masked PSN 359 */ 360 static inline u32 mask_psn(u32 a) 361 { 362 return a & PSN_MASK; 363 } 364 365 /* 366 * Return delta between two PSNs 367 */ 368 static inline u32 delta_psn(u32 a, u32 b) 369 { 370 return (((int)a - (int)b) << PSN_SHIFT) >> PSN_SHIFT; 371 } 372 373 struct verbs_txreq; 374 void hfi1_put_txreq(struct verbs_txreq *tx); 375 376 int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps); 377 378 void hfi1_copy_sge(struct rvt_sge_state *ss, void *data, u32 length, 379 int release, int copy_last); 380 381 void hfi1_skip_sge(struct rvt_sge_state *ss, u32 length, int release); 382 383 void hfi1_cnp_rcv(struct hfi1_packet *packet); 384 385 void hfi1_uc_rcv(struct hfi1_packet *packet); 386 387 void hfi1_rc_rcv(struct hfi1_packet *packet); 388 389 void hfi1_rc_hdrerr( 390 struct hfi1_ctxtdata *rcd, 391 struct hfi1_ib_header *hdr, 392 u32 rcv_flags, 393 struct rvt_qp *qp); 394 395 u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); 396 397 struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid); 398 399 void hfi1_rc_rnr_retry(unsigned long arg); 400 void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to); 401 void hfi1_rc_timeout(unsigned long arg); 402 void hfi1_del_timers_sync(struct rvt_qp *qp); 403 void hfi1_stop_rc_timers(struct rvt_qp *qp); 404 405 void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr); 406 407 void hfi1_rc_error(struct rvt_qp *qp, enum ib_wc_status err); 408 409 void hfi1_ud_rcv(struct hfi1_packet *packet); 410 411 int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey); 412 413 int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only); 414 415 void hfi1_migrate_qp(struct rvt_qp *qp); 416 417 int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, 418 int attr_mask, struct ib_udata *udata); 419 420 void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, 421 int attr_mask, struct ib_udata *udata); 422 423 int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe); 424 425 extern const u32 rc_only_opcode; 426 extern const u32 uc_only_opcode; 427 428 static inline u8 get_opcode(struct hfi1_ib_header *h) 429 { 430 u16 lnh = be16_to_cpu(h->lrh[0]) & 3; 431 432 if (lnh == IB_LNH_IBA_LOCAL) 433 return be32_to_cpu(h->u.oth.bth[0]) >> 24; 434 else 435 return be32_to_cpu(h->u.l.oth.bth[0]) >> 24; 436 } 437 438 int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr, 439 int has_grh, struct rvt_qp *qp, u32 bth0); 440 441 u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, 442 struct ib_global_route *grh, u32 hwords, u32 nwords); 443 444 void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr, 445 u32 bth0, u32 bth2, int middle, 446 struct hfi1_pkt_state *ps); 447 448 void _hfi1_do_send(struct work_struct *work); 449 450 void hfi1_do_send(struct rvt_qp *qp); 451 452 void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, 453 enum ib_wc_status status); 454 455 void hfi1_send_rc_ack(struct hfi1_ctxtdata *, struct rvt_qp *qp, int is_fecn); 456 457 int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps); 458 459 int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps); 460 461 int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps); 462 463 int hfi1_register_ib_device(struct hfi1_devdata *); 464 465 void hfi1_unregister_ib_device(struct hfi1_devdata *); 466 467 void hfi1_ib_rcv(struct hfi1_packet *packet); 468 469 unsigned hfi1_get_npkeys(struct hfi1_devdata *); 470 471 int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, 472 u64 pbc); 473 474 int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, 475 u64 pbc); 476 477 int hfi1_wss_init(void); 478 void hfi1_wss_exit(void); 479 480 /* platform specific: return the lowest level cache (llc) size, in KiB */ 481 static inline int wss_llc_size(void) 482 { 483 /* assume that the boot CPU value is universal for all CPUs */ 484 return boot_cpu_data.x86_cache_size; 485 } 486 487 /* platform specific: cacheless copy */ 488 static inline void cacheless_memcpy(void *dst, void *src, size_t n) 489 { 490 /* 491 * Use the only available X64 cacheless copy. Add a __user cast 492 * to quiet sparse. The src agument is already in the kernel so 493 * there are no security issues. The extra fault recovery machinery 494 * is not invoked. 495 */ 496 __copy_user_nocache(dst, (void __user *)src, n, 0); 497 } 498 499 extern const enum ib_wc_opcode ib_hfi1_wc_opcode[]; 500 501 extern const u8 hdr_len_by_opcode[]; 502 503 extern const int ib_rvt_state_ops[]; 504 505 extern __be64 ib_hfi1_sys_image_guid; /* in network order */ 506 507 extern unsigned int hfi1_max_cqes; 508 509 extern unsigned int hfi1_max_cqs; 510 511 extern unsigned int hfi1_max_qp_wrs; 512 513 extern unsigned int hfi1_max_qps; 514 515 extern unsigned int hfi1_max_sges; 516 517 extern unsigned int hfi1_max_mcast_grps; 518 519 extern unsigned int hfi1_max_mcast_qp_attached; 520 521 extern unsigned int hfi1_max_srqs; 522 523 extern unsigned int hfi1_max_srq_sges; 524 525 extern unsigned int hfi1_max_srq_wrs; 526 527 extern unsigned short piothreshold; 528 529 extern const u32 ib_hfi1_rnr_table[]; 530 531 #endif /* HFI1_VERBS_H */ 532