1 /* 2 * Copyright(c) 2015, 2016 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 48 #ifndef HFI1_VERBS_H 49 #define HFI1_VERBS_H 50 51 #include <linux/types.h> 52 #include <linux/seqlock.h> 53 #include <linux/kernel.h> 54 #include <linux/interrupt.h> 55 #include <linux/kref.h> 56 #include <linux/workqueue.h> 57 #include <linux/kthread.h> 58 #include <linux/completion.h> 59 #include <linux/slab.h> 60 #include <rdma/ib_pack.h> 61 #include <rdma/ib_user_verbs.h> 62 #include <rdma/ib_mad.h> 63 #include <rdma/rdma_vt.h> 64 #include <rdma/rdmavt_qp.h> 65 #include <rdma/rdmavt_cq.h> 66 67 struct hfi1_ctxtdata; 68 struct hfi1_pportdata; 69 struct hfi1_devdata; 70 struct hfi1_packet; 71 72 #include "iowait.h" 73 74 #define HFI1_MAX_RDMA_ATOMIC 16 75 #define HFI1_GUIDS_PER_PORT 5 76 77 /* 78 * Increment this value if any changes that break userspace ABI 79 * compatibility are made. 80 */ 81 #define HFI1_UVERBS_ABI_VERSION 2 82 83 #define IB_SEQ_NAK (3 << 29) 84 85 /* AETH NAK opcode values */ 86 #define IB_RNR_NAK 0x20 87 #define IB_NAK_PSN_ERROR 0x60 88 #define IB_NAK_INVALID_REQUEST 0x61 89 #define IB_NAK_REMOTE_ACCESS_ERROR 0x62 90 #define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63 91 #define IB_NAK_INVALID_RD_REQUEST 0x64 92 93 /* IB Performance Manager status values */ 94 #define IB_PMA_SAMPLE_STATUS_DONE 0x00 95 #define IB_PMA_SAMPLE_STATUS_STARTED 0x01 96 #define IB_PMA_SAMPLE_STATUS_RUNNING 0x02 97 98 /* Mandatory IB performance counter select values. */ 99 #define IB_PMA_PORT_XMIT_DATA cpu_to_be16(0x0001) 100 #define IB_PMA_PORT_RCV_DATA cpu_to_be16(0x0002) 101 #define IB_PMA_PORT_XMIT_PKTS cpu_to_be16(0x0003) 102 #define IB_PMA_PORT_RCV_PKTS cpu_to_be16(0x0004) 103 #define IB_PMA_PORT_XMIT_WAIT cpu_to_be16(0x0005) 104 105 #define HFI1_VENDOR_IPG cpu_to_be16(0xFFA0) 106 107 #define IB_BTH_REQ_ACK BIT(31) 108 #define IB_BTH_SOLICITED BIT(23) 109 #define IB_BTH_MIG_REQ BIT(22) 110 111 #define IB_GRH_VERSION 6 112 #define IB_GRH_VERSION_MASK 0xF 113 #define IB_GRH_VERSION_SHIFT 28 114 #define IB_GRH_TCLASS_MASK 0xFF 115 #define IB_GRH_TCLASS_SHIFT 20 116 #define IB_GRH_FLOW_MASK 0xFFFFF 117 #define IB_GRH_FLOW_SHIFT 0 118 #define IB_GRH_NEXT_HDR 0x1B 119 120 #define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL) 121 122 /* flags passed by hfi1_ib_rcv() */ 123 enum { 124 HFI1_HAS_GRH = (1 << 0), 125 }; 126 127 struct ib_reth { 128 __be64 vaddr; 129 __be32 rkey; 130 __be32 length; 131 } __packed; 132 133 struct ib_atomic_eth { 134 __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */ 135 __be32 rkey; 136 __be64 swap_data; 137 __be64 compare_data; 138 } __packed; 139 140 union ib_ehdrs { 141 struct { 142 __be32 deth[2]; 143 __be32 imm_data; 144 } ud; 145 struct { 146 struct ib_reth reth; 147 __be32 imm_data; 148 } rc; 149 struct { 150 __be32 aeth; 151 __be32 atomic_ack_eth[2]; 152 } at; 153 __be32 imm_data; 154 __be32 aeth; 155 __be32 ieth; 156 struct ib_atomic_eth atomic_eth; 157 } __packed; 158 159 struct hfi1_other_headers { 160 __be32 bth[3]; 161 union ib_ehdrs u; 162 } __packed; 163 164 /* 165 * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes 166 * long (72 w/ imm_data). Only the first 56 bytes of the IB header 167 * will be in the eager header buffer. The remaining 12 or 16 bytes 168 * are in the data buffer. 169 */ 170 struct hfi1_ib_header { 171 __be16 lrh[4]; 172 union { 173 struct { 174 struct ib_grh grh; 175 struct hfi1_other_headers oth; 176 } l; 177 struct hfi1_other_headers oth; 178 } u; 179 } __packed; 180 181 struct hfi1_ahg_info { 182 u32 ahgdesc[2]; 183 u16 tx_flags; 184 u8 ahgcount; 185 u8 ahgidx; 186 }; 187 188 struct hfi1_sdma_header { 189 __le64 pbc; 190 struct hfi1_ib_header hdr; 191 } __packed; 192 193 /* 194 * hfi1 specific data structures that will be hidden from rvt after the queue 195 * pair is made common 196 */ 197 struct hfi1_qp_priv { 198 struct hfi1_ahg_info *s_ahg; /* ahg info for next header */ 199 struct sdma_engine *s_sde; /* current sde */ 200 struct send_context *s_sendcontext; /* current sendcontext */ 201 u8 s_sc; /* SC[0..4] for next packet */ 202 u8 r_adefered; /* number of acks defered */ 203 struct iowait s_iowait; 204 struct timer_list s_rnr_timer; 205 struct rvt_qp *owner; 206 }; 207 208 /* 209 * This structure is used to hold commonly lookedup and computed values during 210 * the send engine progress. 211 */ 212 struct hfi1_pkt_state { 213 struct hfi1_ibdev *dev; 214 struct hfi1_ibport *ibp; 215 struct hfi1_pportdata *ppd; 216 struct verbs_txreq *s_txreq; 217 unsigned long flags; 218 }; 219 220 #define HFI1_PSN_CREDIT 16 221 222 struct hfi1_opcode_stats { 223 u64 n_packets; /* number of packets */ 224 u64 n_bytes; /* total number of bytes */ 225 }; 226 227 struct hfi1_opcode_stats_perctx { 228 struct hfi1_opcode_stats stats[256]; 229 }; 230 231 static inline void inc_opstats( 232 u32 tlen, 233 struct hfi1_opcode_stats *stats) 234 { 235 #ifdef CONFIG_DEBUG_FS 236 stats->n_bytes += tlen; 237 stats->n_packets++; 238 #endif 239 } 240 241 struct hfi1_ibport { 242 struct rvt_qp __rcu *qp[2]; 243 struct rvt_ibport rvp; 244 245 __be64 guids[HFI1_GUIDS_PER_PORT - 1]; /* writable GUIDs */ 246 247 /* the first 16 entries are sl_to_vl for !OPA */ 248 u8 sl_to_sc[32]; 249 u8 sc_to_sl[32]; 250 }; 251 252 struct hfi1_ibdev { 253 struct rvt_dev_info rdi; /* Must be first */ 254 255 /* QP numbers are shared by all IB ports */ 256 /* protect wait lists */ 257 seqlock_t iowait_lock; 258 struct list_head txwait; /* list for wait verbs_txreq */ 259 struct list_head memwait; /* list for wait kernel memory */ 260 struct list_head txreq_free; 261 struct kmem_cache *verbs_txreq_cache; 262 struct timer_list mem_timer; 263 264 u64 n_piowait; 265 u64 n_piodrain; 266 u64 n_txwait; 267 u64 n_kmem_wait; 268 269 #ifdef CONFIG_DEBUG_FS 270 /* per HFI debugfs */ 271 struct dentry *hfi1_ibdev_dbg; 272 /* per HFI symlinks to above */ 273 struct dentry *hfi1_ibdev_link; 274 #endif 275 }; 276 277 static inline struct hfi1_ibdev *to_idev(struct ib_device *ibdev) 278 { 279 struct rvt_dev_info *rdi; 280 281 rdi = container_of(ibdev, struct rvt_dev_info, ibdev); 282 return container_of(rdi, struct hfi1_ibdev, rdi); 283 } 284 285 static inline struct rvt_qp *iowait_to_qp(struct iowait *s_iowait) 286 { 287 struct hfi1_qp_priv *priv; 288 289 priv = container_of(s_iowait, struct hfi1_qp_priv, s_iowait); 290 return priv->owner; 291 } 292 293 /* 294 * Send if not busy or waiting for I/O and either 295 * a RC response is pending or we can process send work requests. 296 */ 297 static inline int hfi1_send_ok(struct rvt_qp *qp) 298 { 299 return !(qp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT_IO)) && 300 (qp->s_hdrwords || (qp->s_flags & RVT_S_RESP_PENDING) || 301 !(qp->s_flags & RVT_S_ANY_WAIT_SEND)); 302 } 303 304 /* 305 * This must be called with s_lock held. 306 */ 307 void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl, 308 u32 qp1, u32 qp2, u16 lid1, u16 lid2); 309 void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num); 310 void hfi1_sys_guid_chg(struct hfi1_ibport *ibp); 311 void hfi1_node_desc_chg(struct hfi1_ibport *ibp); 312 int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, 313 const struct ib_wc *in_wc, const struct ib_grh *in_grh, 314 const struct ib_mad_hdr *in_mad, size_t in_mad_size, 315 struct ib_mad_hdr *out_mad, size_t *out_mad_size, 316 u16 *out_mad_pkey_index); 317 318 /* 319 * The PSN_MASK and PSN_SHIFT allow for 320 * 1) comparing two PSNs 321 * 2) returning the PSN with any upper bits masked 322 * 3) returning the difference between to PSNs 323 * 324 * The number of significant bits in the PSN must 325 * necessarily be at least one bit less than 326 * the container holding the PSN. 327 */ 328 #ifndef CONFIG_HFI1_VERBS_31BIT_PSN 329 #define PSN_MASK 0xFFFFFF 330 #define PSN_SHIFT 8 331 #else 332 #define PSN_MASK 0x7FFFFFFF 333 #define PSN_SHIFT 1 334 #endif 335 #define PSN_MODIFY_MASK 0xFFFFFF 336 337 /* 338 * Compare the lower 24 bits of the msn values. 339 * Returns an integer <, ==, or > than zero. 340 */ 341 static inline int cmp_msn(u32 a, u32 b) 342 { 343 return (((int)a) - ((int)b)) << 8; 344 } 345 346 /* 347 * Compare two PSNs 348 * Returns an integer <, ==, or > than zero. 349 */ 350 static inline int cmp_psn(u32 a, u32 b) 351 { 352 return (((int)a) - ((int)b)) << PSN_SHIFT; 353 } 354 355 /* 356 * Return masked PSN 357 */ 358 static inline u32 mask_psn(u32 a) 359 { 360 return a & PSN_MASK; 361 } 362 363 /* 364 * Return delta between two PSNs 365 */ 366 static inline u32 delta_psn(u32 a, u32 b) 367 { 368 return (((int)a - (int)b) << PSN_SHIFT) >> PSN_SHIFT; 369 } 370 371 struct verbs_txreq; 372 void hfi1_put_txreq(struct verbs_txreq *tx); 373 374 int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps); 375 376 void hfi1_copy_sge(struct rvt_sge_state *ss, void *data, u32 length, 377 int release, int copy_last); 378 379 void hfi1_skip_sge(struct rvt_sge_state *ss, u32 length, int release); 380 381 void hfi1_cnp_rcv(struct hfi1_packet *packet); 382 383 void hfi1_uc_rcv(struct hfi1_packet *packet); 384 385 void hfi1_rc_rcv(struct hfi1_packet *packet); 386 387 void hfi1_rc_hdrerr( 388 struct hfi1_ctxtdata *rcd, 389 struct hfi1_ib_header *hdr, 390 u32 rcv_flags, 391 struct rvt_qp *qp); 392 393 u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); 394 395 struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid); 396 397 void hfi1_rc_rnr_retry(unsigned long arg); 398 void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to); 399 void hfi1_rc_timeout(unsigned long arg); 400 void hfi1_del_timers_sync(struct rvt_qp *qp); 401 void hfi1_stop_rc_timers(struct rvt_qp *qp); 402 403 void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr); 404 405 void hfi1_rc_error(struct rvt_qp *qp, enum ib_wc_status err); 406 407 void hfi1_ud_rcv(struct hfi1_packet *packet); 408 409 int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey); 410 411 int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only); 412 413 void hfi1_migrate_qp(struct rvt_qp *qp); 414 415 int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, 416 int attr_mask, struct ib_udata *udata); 417 418 void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, 419 int attr_mask, struct ib_udata *udata); 420 421 int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe); 422 423 extern const u32 rc_only_opcode; 424 extern const u32 uc_only_opcode; 425 426 static inline u8 get_opcode(struct hfi1_ib_header *h) 427 { 428 u16 lnh = be16_to_cpu(h->lrh[0]) & 3; 429 430 if (lnh == IB_LNH_IBA_LOCAL) 431 return be32_to_cpu(h->u.oth.bth[0]) >> 24; 432 else 433 return be32_to_cpu(h->u.l.oth.bth[0]) >> 24; 434 } 435 436 int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr, 437 int has_grh, struct rvt_qp *qp, u32 bth0); 438 439 u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, 440 struct ib_global_route *grh, u32 hwords, u32 nwords); 441 442 void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr, 443 u32 bth0, u32 bth2, int middle, 444 struct hfi1_pkt_state *ps); 445 446 void _hfi1_do_send(struct work_struct *work); 447 448 void hfi1_do_send(struct rvt_qp *qp); 449 450 void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, 451 enum ib_wc_status status); 452 453 void hfi1_send_rc_ack(struct hfi1_ctxtdata *, struct rvt_qp *qp, int is_fecn); 454 455 int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps); 456 457 int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps); 458 459 int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps); 460 461 int hfi1_register_ib_device(struct hfi1_devdata *); 462 463 void hfi1_unregister_ib_device(struct hfi1_devdata *); 464 465 void hfi1_ib_rcv(struct hfi1_packet *packet); 466 467 unsigned hfi1_get_npkeys(struct hfi1_devdata *); 468 469 int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, 470 u64 pbc); 471 472 int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, 473 u64 pbc); 474 475 int hfi1_wss_init(void); 476 void hfi1_wss_exit(void); 477 478 /* platform specific: return the lowest level cache (llc) size, in KiB */ 479 static inline int wss_llc_size(void) 480 { 481 /* assume that the boot CPU value is universal for all CPUs */ 482 return boot_cpu_data.x86_cache_size; 483 } 484 485 /* platform specific: cacheless copy */ 486 static inline void cacheless_memcpy(void *dst, void *src, size_t n) 487 { 488 /* 489 * Use the only available X64 cacheless copy. Add a __user cast 490 * to quiet sparse. The src agument is already in the kernel so 491 * there are no security issues. The extra fault recovery machinery 492 * is not invoked. 493 */ 494 __copy_user_nocache(dst, (void __user *)src, n, 0); 495 } 496 497 extern const enum ib_wc_opcode ib_hfi1_wc_opcode[]; 498 499 extern const u8 hdr_len_by_opcode[]; 500 501 extern const int ib_rvt_state_ops[]; 502 503 extern __be64 ib_hfi1_sys_image_guid; /* in network order */ 504 505 extern unsigned int hfi1_max_cqes; 506 507 extern unsigned int hfi1_max_cqs; 508 509 extern unsigned int hfi1_max_qp_wrs; 510 511 extern unsigned int hfi1_max_qps; 512 513 extern unsigned int hfi1_max_sges; 514 515 extern unsigned int hfi1_max_mcast_grps; 516 517 extern unsigned int hfi1_max_mcast_qp_attached; 518 519 extern unsigned int hfi1_max_srqs; 520 521 extern unsigned int hfi1_max_srq_sges; 522 523 extern unsigned int hfi1_max_srq_wrs; 524 525 extern unsigned short piothreshold; 526 527 extern const u32 ib_hfi1_rnr_table[]; 528 529 #endif /* HFI1_VERBS_H */ 530