1 /* 2 * Copyright (c) 2006-2016 Chelsio, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * - Redistributions in binary form must reproduce the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer in the documentation and/or other materials 20 * provided with the distribution. 21 * 22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 23 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 24 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 25 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 26 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 27 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 28 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 29 * SOFTWARE. 30 */ 31 #ifndef __T4_H__ 32 #define __T4_H__ 33 34 #include <assert.h> 35 #include <errno.h> 36 #include <stddef.h> 37 #include <stdint.h> 38 #include <syslog.h> 39 #include <infiniband/types.h> 40 #include <infiniband/udma_barrier.h> 41 #include <infiniband/endian.h> 42 43 /* 44 * Try and minimize the changes from the kernel code that is pull in 45 * here for kernel bypass ops. 46 */ 47 #define u8 uint8_t 48 #define u16 uint16_t 49 #define u32 uint32_t 50 #define u64 uint64_t 51 #define DECLARE_PCI_UNMAP_ADDR(a) 52 #define __iomem 53 #define BUG_ON(c) assert(!(c)) 54 #define ROUND_UP(x, n) (((x) + (n) - 1u) & ~((n) - 1u)) 55 #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) 56 57 /* FIXME: Move me to a generic PCI mmio accessor */ 58 #define cpu_to_pci32(val) htole32(val) 59 60 #define writel(v, a) do { *((volatile u32 *)(a)) = cpu_to_pci32(v); } while (0) 61 62 #include "t4_regs.h" 63 #include "t4_chip_type.h" 64 #include "t4fw_api.h" 65 #include "t4fw_ri_api.h" 66 67 #ifdef DEBUG 68 #define DBGLOG(s) 69 #define PDBG(fmt, args...) do {syslog(LOG_DEBUG, fmt, ##args); } while (0) 70 #else 71 #define DBGLOG(s) 72 #define PDBG(fmt, args...) do {} while (0) 73 #endif 74 75 #define A_PCIE_MA_SYNC 0x30b4 76 77 #define T4_MAX_READ_DEPTH 16 78 #define T4_QID_BASE 1024 79 #define T4_MAX_QIDS 256 80 #define T4_MAX_NUM_PD 65536 81 #define T4_EQ_STATUS_ENTRIES (L1_CACHE_BYTES > 64 ? 2 : 1) 82 #define T4_MAX_EQ_SIZE (65520 - T4_EQ_STATUS_ENTRIES) 83 #define T4_MAX_IQ_SIZE (65520 - 1) 84 #define T4_MAX_RQ_SIZE (8192 - T4_EQ_STATUS_ENTRIES) 85 #define T4_MAX_SQ_SIZE (T4_MAX_EQ_SIZE - 1) 86 #define T4_MAX_QP_DEPTH (T4_MAX_RQ_SIZE - 1) 87 #define T4_MAX_CQ_DEPTH (T4_MAX_IQ_SIZE - 1) 88 #define T4_MAX_NUM_STAG (1<<15) 89 #define T4_MAX_MR_SIZE (~0ULL - 1) 90 #define T4_PAGESIZE_MASK 0xffffffff000 /* 4KB-8TB */ 91 #define T4_STAG_UNSET 0xffffffff 92 #define T4_FW_MAJ 0 93 94 struct t4_status_page { 95 __be32 rsvd1; /* flit 0 - hw owns */ 96 __be16 rsvd2; 97 __be16 qid; 98 __be16 cidx; 99 __be16 pidx; 100 u8 qp_err; /* flit 1 - sw owns */ 101 u8 db_off; 102 u8 pad; 103 u16 host_wq_pidx; 104 u16 host_cidx; 105 u16 host_pidx; 106 }; 107 108 #define T4_EQ_ENTRY_SIZE 64 109 110 #define T4_SQ_NUM_SLOTS 5 111 #define T4_SQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_SQ_NUM_SLOTS) 112 #define T4_MAX_SEND_SGE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - sizeof(struct fw_ri_isgl)) / sizeof (struct fw_ri_sge)) 113 #define T4_MAX_SEND_INLINE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - sizeof(struct fw_ri_immd))) 114 #define T4_MAX_WRITE_INLINE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_rdma_write_wr) - sizeof(struct fw_ri_immd))) 115 #define T4_MAX_WRITE_SGE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_rdma_write_wr) - sizeof(struct fw_ri_isgl)) / sizeof (struct fw_ri_sge)) 116 #define T4_MAX_FR_IMMD ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_fr_nsmr_wr) - sizeof(struct fw_ri_immd))) 117 #define T4_MAX_FR_DEPTH 255 118 119 #define T4_RQ_NUM_SLOTS 2 120 #define T4_RQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_RQ_NUM_SLOTS) 121 #define T4_MAX_RECV_SGE 4 122 123 union t4_wr { 124 struct fw_ri_res_wr res; 125 struct fw_ri_wr init; 126 struct fw_ri_rdma_write_wr write; 127 struct fw_ri_send_wr send; 128 struct fw_ri_rdma_read_wr read; 129 struct fw_ri_bind_mw_wr bind; 130 struct fw_ri_fr_nsmr_wr fr; 131 struct fw_ri_inv_lstag_wr inv; 132 struct t4_status_page status; 133 __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_SQ_NUM_SLOTS]; 134 }; 135 136 union t4_recv_wr { 137 struct fw_ri_recv_wr recv; 138 struct t4_status_page status; 139 __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_RQ_NUM_SLOTS]; 140 }; 141 142 static inline void init_wr_hdr(union t4_wr *wqe, u16 wrid, 143 enum fw_wr_opcodes opcode, u8 flags, u8 len16) 144 { 145 wqe->send.opcode = (u8)opcode; 146 wqe->send.flags = flags; 147 wqe->send.wrid = wrid; 148 wqe->send.r1[0] = 0; 149 wqe->send.r1[1] = 0; 150 wqe->send.r1[2] = 0; 151 wqe->send.len16 = len16; 152 } 153 154 /* CQE/AE status codes */ 155 #define T4_ERR_SUCCESS 0x0 156 #define T4_ERR_STAG 0x1 /* STAG invalid: either the */ 157 /* STAG is offlimt, being 0, */ 158 /* or STAG_key mismatch */ 159 #define T4_ERR_PDID 0x2 /* PDID mismatch */ 160 #define T4_ERR_QPID 0x3 /* QPID mismatch */ 161 #define T4_ERR_ACCESS 0x4 /* Invalid access right */ 162 #define T4_ERR_WRAP 0x5 /* Wrap error */ 163 #define T4_ERR_BOUND 0x6 /* base and bounds voilation */ 164 #define T4_ERR_INVALIDATE_SHARED_MR 0x7 /* attempt to invalidate a */ 165 /* shared memory region */ 166 #define T4_ERR_INVALIDATE_MR_WITH_MW_BOUND 0x8 /* attempt to invalidate a */ 167 /* shared memory region */ 168 #define T4_ERR_ECC 0x9 /* ECC error detected */ 169 #define T4_ERR_ECC_PSTAG 0xA /* ECC error detected when */ 170 /* reading PSTAG for a MW */ 171 /* Invalidate */ 172 #define T4_ERR_PBL_ADDR_BOUND 0xB /* pbl addr out of bounds: */ 173 /* software error */ 174 #define T4_ERR_SWFLUSH 0xC /* SW FLUSHED */ 175 #define T4_ERR_CRC 0x10 /* CRC error */ 176 #define T4_ERR_MARKER 0x11 /* Marker error */ 177 #define T4_ERR_PDU_LEN_ERR 0x12 /* invalid PDU length */ 178 #define T4_ERR_OUT_OF_RQE 0x13 /* out of RQE */ 179 #define T4_ERR_DDP_VERSION 0x14 /* wrong DDP version */ 180 #define T4_ERR_RDMA_VERSION 0x15 /* wrong RDMA version */ 181 #define T4_ERR_OPCODE 0x16 /* invalid rdma opcode */ 182 #define T4_ERR_DDP_QUEUE_NUM 0x17 /* invalid ddp queue number */ 183 #define T4_ERR_MSN 0x18 /* MSN error */ 184 #define T4_ERR_TBIT 0x19 /* tag bit not set correctly */ 185 #define T4_ERR_MO 0x1A /* MO not 0 for TERMINATE */ 186 /* or READ_REQ */ 187 #define T4_ERR_MSN_GAP 0x1B 188 #define T4_ERR_MSN_RANGE 0x1C 189 #define T4_ERR_IRD_OVERFLOW 0x1D 190 #define T4_ERR_RQE_ADDR_BOUND 0x1E /* RQE addr out of bounds: */ 191 /* software error */ 192 #define T4_ERR_INTERNAL_ERR 0x1F /* internal error (opcode */ 193 /* mismatch) */ 194 /* 195 * CQE defs 196 */ 197 struct t4_cqe { 198 __be32 header; 199 __be32 len; 200 union { 201 struct { 202 __be32 stag; 203 __be32 msn; 204 } rcqe; 205 struct { 206 u32 nada1; 207 u16 nada2; 208 u16 cidx; 209 } scqe; 210 struct { 211 __be32 wrid_hi; 212 __be32 wrid_low; 213 } gen; 214 } u; 215 __be64 reserved; 216 __be64 bits_type_ts; 217 }; 218 219 /* macros for flit 0 of the cqe */ 220 221 #define S_CQE_QPID 12 222 #define M_CQE_QPID 0xFFFFF 223 #define G_CQE_QPID(x) ((((x) >> S_CQE_QPID)) & M_CQE_QPID) 224 #define V_CQE_QPID(x) ((x)<<S_CQE_QPID) 225 226 #define S_CQE_SWCQE 11 227 #define M_CQE_SWCQE 0x1 228 #define G_CQE_SWCQE(x) ((((x) >> S_CQE_SWCQE)) & M_CQE_SWCQE) 229 #define V_CQE_SWCQE(x) ((x)<<S_CQE_SWCQE) 230 231 #define S_CQE_STATUS 5 232 #define M_CQE_STATUS 0x1F 233 #define G_CQE_STATUS(x) ((((x) >> S_CQE_STATUS)) & M_CQE_STATUS) 234 #define V_CQE_STATUS(x) ((x)<<S_CQE_STATUS) 235 236 #define S_CQE_TYPE 4 237 #define M_CQE_TYPE 0x1 238 #define G_CQE_TYPE(x) ((((x) >> S_CQE_TYPE)) & M_CQE_TYPE) 239 #define V_CQE_TYPE(x) ((x)<<S_CQE_TYPE) 240 241 #define S_CQE_OPCODE 0 242 #define M_CQE_OPCODE 0xF 243 #define G_CQE_OPCODE(x) ((((x) >> S_CQE_OPCODE)) & M_CQE_OPCODE) 244 #define V_CQE_OPCODE(x) ((x)<<S_CQE_OPCODE) 245 246 #define SW_CQE(x) (G_CQE_SWCQE(be32toh((x)->header))) 247 #define CQE_QPID(x) (G_CQE_QPID(be32toh((x)->header))) 248 #define CQE_TYPE(x) (G_CQE_TYPE(be32toh((x)->header))) 249 #define SQ_TYPE(x) (CQE_TYPE((x))) 250 #define RQ_TYPE(x) (!CQE_TYPE((x))) 251 #define CQE_STATUS(x) (G_CQE_STATUS(be32toh((x)->header))) 252 #define CQE_OPCODE(x) (G_CQE_OPCODE(be32toh((x)->header))) 253 254 #define CQE_SEND_OPCODE(x)( \ 255 (G_CQE_OPCODE(be32toh((x)->header)) == FW_RI_SEND) || \ 256 (G_CQE_OPCODE(be32toh((x)->header)) == FW_RI_SEND_WITH_SE) || \ 257 (G_CQE_OPCODE(be32toh((x)->header)) == FW_RI_SEND_WITH_INV) || \ 258 (G_CQE_OPCODE(be32toh((x)->header)) == FW_RI_SEND_WITH_SE_INV)) 259 260 #define CQE_LEN(x) (be32toh((x)->len)) 261 262 /* used for RQ completion processing */ 263 #define CQE_WRID_STAG(x) (be32toh((x)->u.rcqe.stag)) 264 #define CQE_WRID_MSN(x) (be32toh((x)->u.rcqe.msn)) 265 266 /* used for SQ completion processing */ 267 #define CQE_WRID_SQ_IDX(x) (x)->u.scqe.cidx 268 269 /* generic accessor macros */ 270 #define CQE_WRID_HI(x) ((x)->u.gen.wrid_hi) 271 #define CQE_WRID_LOW(x) ((x)->u.gen.wrid_low) 272 273 /* macros for flit 3 of the cqe */ 274 #define S_CQE_GENBIT 63 275 #define M_CQE_GENBIT 0x1 276 #define G_CQE_GENBIT(x) (((x) >> S_CQE_GENBIT) & M_CQE_GENBIT) 277 #define V_CQE_GENBIT(x) ((x)<<S_CQE_GENBIT) 278 279 #define S_CQE_OVFBIT 62 280 #define M_CQE_OVFBIT 0x1 281 #define G_CQE_OVFBIT(x) ((((x) >> S_CQE_OVFBIT)) & M_CQE_OVFBIT) 282 283 #define S_CQE_IQTYPE 60 284 #define M_CQE_IQTYPE 0x3 285 #define G_CQE_IQTYPE(x) ((((x) >> S_CQE_IQTYPE)) & M_CQE_IQTYPE) 286 287 #define M_CQE_TS 0x0fffffffffffffffULL 288 #define G_CQE_TS(x) ((x) & M_CQE_TS) 289 290 #define CQE_OVFBIT(x) ((unsigned)G_CQE_OVFBIT(be64toh((x)->bits_type_ts))) 291 #define CQE_GENBIT(x) ((unsigned)G_CQE_GENBIT(be64toh((x)->bits_type_ts))) 292 #define CQE_TS(x) (G_CQE_TS(be64toh((x)->bits_type_ts))) 293 294 struct t4_swsqe { 295 u64 wr_id; 296 struct t4_cqe cqe; 297 __be32 read_len; 298 int opcode; 299 int complete; 300 int signaled; 301 u16 idx; 302 int flushed; 303 }; 304 305 enum { 306 T4_SQ_ONCHIP = (1<<0), 307 }; 308 309 struct t4_sq { 310 /* queue is either host memory or WC MMIO memory if 311 * t4_sq_onchip(). */ 312 union t4_wr *queue; 313 struct t4_swsqe *sw_sq; 314 struct t4_swsqe *oldest_read; 315 /* udb is either UC or WC MMIO memory depending on device version. */ 316 volatile u32 *udb; 317 size_t memsize; 318 u32 qid; 319 u32 bar2_qid; 320 void *ma_sync; 321 u16 in_use; 322 u16 size; 323 u16 cidx; 324 u16 pidx; 325 u16 wq_pidx; 326 u16 flags; 327 short flush_cidx; 328 int wc_reg_available; 329 }; 330 331 struct t4_swrqe { 332 u64 wr_id; 333 }; 334 335 struct t4_rq { 336 union t4_recv_wr *queue; 337 struct t4_swrqe *sw_rq; 338 volatile u32 *udb; 339 size_t memsize; 340 u32 qid; 341 u32 bar2_qid; 342 u32 msn; 343 u32 rqt_hwaddr; 344 u16 rqt_size; 345 u16 in_use; 346 u16 size; 347 u16 cidx; 348 u16 pidx; 349 u16 wq_pidx; 350 int wc_reg_available; 351 }; 352 353 struct t4_wq { 354 struct t4_sq sq; 355 struct t4_rq rq; 356 struct c4iw_rdev *rdev; 357 u32 qid_mask; 358 int error; 359 int flushed; 360 u8 *db_offp; 361 }; 362 363 static inline int t4_rqes_posted(struct t4_wq *wq) 364 { 365 return wq->rq.in_use; 366 } 367 368 static inline int t4_rq_empty(struct t4_wq *wq) 369 { 370 return wq->rq.in_use == 0; 371 } 372 373 static inline int t4_rq_full(struct t4_wq *wq) 374 { 375 return wq->rq.in_use == (wq->rq.size - 1); 376 } 377 378 static inline u32 t4_rq_avail(struct t4_wq *wq) 379 { 380 return wq->rq.size - 1 - wq->rq.in_use; 381 } 382 383 static inline void t4_rq_produce(struct t4_wq *wq, u8 len16) 384 { 385 wq->rq.in_use++; 386 if (++wq->rq.pidx == wq->rq.size) 387 wq->rq.pidx = 0; 388 wq->rq.wq_pidx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); 389 if (wq->rq.wq_pidx >= wq->rq.size * T4_RQ_NUM_SLOTS) 390 wq->rq.wq_pidx %= wq->rq.size * T4_RQ_NUM_SLOTS; 391 if (!wq->error) 392 wq->rq.queue[wq->rq.size].status.host_pidx = wq->rq.pidx; 393 } 394 395 static inline void t4_rq_consume(struct t4_wq *wq) 396 { 397 wq->rq.in_use--; 398 wq->rq.msn++; 399 if (++wq->rq.cidx == wq->rq.size) 400 wq->rq.cidx = 0; 401 assert((wq->rq.cidx != wq->rq.pidx) || wq->rq.in_use == 0); 402 if (!wq->error) 403 wq->rq.queue[wq->rq.size].status.host_cidx = wq->rq.cidx; 404 } 405 406 static inline int t4_sq_empty(struct t4_wq *wq) 407 { 408 return wq->sq.in_use == 0; 409 } 410 411 static inline int t4_sq_full(struct t4_wq *wq) 412 { 413 return wq->sq.in_use == (wq->sq.size - 1); 414 } 415 416 static inline u32 t4_sq_avail(struct t4_wq *wq) 417 { 418 return wq->sq.size - 1 - wq->sq.in_use; 419 } 420 421 static inline int t4_sq_onchip(struct t4_wq *wq) 422 { 423 return wq->sq.flags & T4_SQ_ONCHIP; 424 } 425 426 static inline void t4_sq_produce(struct t4_wq *wq, u8 len16) 427 { 428 wq->sq.in_use++; 429 if (++wq->sq.pidx == wq->sq.size) 430 wq->sq.pidx = 0; 431 wq->sq.wq_pidx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); 432 if (wq->sq.wq_pidx >= wq->sq.size * T4_SQ_NUM_SLOTS) 433 wq->sq.wq_pidx %= wq->sq.size * T4_SQ_NUM_SLOTS; 434 if (!wq->error) { 435 /* This write is only for debugging, the value does not matter 436 * for DMA */ 437 wq->sq.queue[wq->sq.size].status.host_pidx = (wq->sq.pidx); 438 } 439 } 440 441 static inline void t4_sq_consume(struct t4_wq *wq) 442 { 443 assert(wq->sq.in_use >= 1); 444 if (wq->sq.cidx == wq->sq.flush_cidx) 445 wq->sq.flush_cidx = -1; 446 wq->sq.in_use--; 447 if (++wq->sq.cidx == wq->sq.size) 448 wq->sq.cidx = 0; 449 assert((wq->sq.cidx != wq->sq.pidx) || wq->sq.in_use == 0); 450 if (!wq->error){ 451 /* This write is only for debugging, the value does not matter 452 * for DMA */ 453 wq->sq.queue[wq->sq.size].status.host_cidx = wq->sq.cidx; 454 } 455 } 456 457 /* Copies to WC MMIO memory */ 458 static void copy_wqe_to_udb(volatile u32 *udb_offset, void *wqe) 459 { 460 u64 *src, *dst; 461 int len16 = 4; 462 463 src = (u64 *)wqe; 464 dst = (u64 *)udb_offset; 465 466 while (len16) { 467 *dst++ = *src++; 468 *dst++ = *src++; 469 len16--; 470 } 471 } 472 473 extern int ma_wr; 474 extern int t5_en_wc; 475 476 static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, u8 t4, u8 len16, 477 union t4_wr *wqe) 478 { 479 if (!t4) { 480 mmio_wc_start(); 481 if (t5_en_wc && inc == 1 && wq->sq.wc_reg_available) { 482 PDBG("%s: WC wq->sq.pidx = %d; len16=%d\n", 483 __func__, wq->sq.pidx, len16); 484 copy_wqe_to_udb(wq->sq.udb + 14, wqe); 485 } else { 486 PDBG("%s: DB wq->sq.pidx = %d; len16=%d\n", 487 __func__, wq->sq.pidx, len16); 488 writel(QID_V(wq->sq.bar2_qid) | PIDX_T5_V(inc), 489 wq->sq.udb); 490 } 491 /* udb is WC for > t4 devices */ 492 mmio_flush_writes(); 493 return; 494 } 495 496 udma_to_device_barrier(); 497 if (ma_wr) { 498 if (t4_sq_onchip(wq)) { 499 int i; 500 501 mmio_wc_start(); 502 for (i = 0; i < 16; i++) 503 *(volatile u32 *)&wq->sq.queue[wq->sq.size].flits[2+i] = i; 504 mmio_flush_writes(); 505 } 506 } else { 507 if (t4_sq_onchip(wq)) { 508 int i; 509 510 mmio_wc_start(); 511 for (i = 0; i < 16; i++) 512 /* FIXME: What is this supposed to be doing? 513 * Writing to the same address multiple times 514 * with WC memory is not guarenteed to 515 * generate any more than one TLP. Why isn't 516 * writing to WC memory marked volatile? */ 517 *(u32 *)&wq->sq.queue[wq->sq.size].flits[2] = i; 518 mmio_flush_writes(); 519 } 520 } 521 /* udb is UC for t4 devices */ 522 writel(QID_V(wq->sq.qid & wq->qid_mask) | PIDX_V(inc), wq->sq.udb); 523 } 524 525 static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc, u8 t4, u8 len16, 526 union t4_recv_wr *wqe) 527 { 528 if (!t4) { 529 mmio_wc_start(); 530 if (t5_en_wc && inc == 1 && wq->sq.wc_reg_available) { 531 PDBG("%s: WC wq->rq.pidx = %d; len16=%d\n", 532 __func__, wq->rq.pidx, len16); 533 copy_wqe_to_udb(wq->rq.udb + 14, wqe); 534 } else { 535 PDBG("%s: DB wq->rq.pidx = %d; len16=%d\n", 536 __func__, wq->rq.pidx, len16); 537 writel(QID_V(wq->rq.bar2_qid) | PIDX_T5_V(inc), 538 wq->rq.udb); 539 } 540 /* udb is WC for > t4 devices */ 541 mmio_flush_writes(); 542 return; 543 } 544 /* udb is UC for t4 devices */ 545 udma_to_device_barrier(); 546 writel(QID_V(wq->rq.qid & wq->qid_mask) | PIDX_V(inc), wq->rq.udb); 547 } 548 549 static inline int t4_wq_in_error(struct t4_wq *wq) 550 { 551 return wq->error || wq->rq.queue[wq->rq.size].status.qp_err; 552 } 553 554 static inline void t4_set_wq_in_error(struct t4_wq *wq) 555 { 556 wq->rq.queue[wq->rq.size].status.qp_err = 1; 557 } 558 559 extern int c4iw_abi_version; 560 561 static inline int t4_wq_db_enabled(struct t4_wq *wq) 562 { 563 /* 564 * If iw_cxgb4 driver supports door bell drop recovery then its 565 * c4iw_abi_version would be greater than or equal to 2. In such 566 * case return the status of db_off flag to ring the kernel mode 567 * DB from user mode library. 568 */ 569 if ( c4iw_abi_version >= 2 ) 570 return ! *wq->db_offp; 571 else 572 return 1; 573 } 574 575 struct t4_cq { 576 struct t4_cqe *queue; 577 struct t4_cqe *sw_queue; 578 struct c4iw_rdev *rdev; 579 volatile u32 *ugts; 580 size_t memsize; 581 u64 bits_type_ts; 582 u32 cqid; 583 u32 qid_mask; 584 u16 size; /* including status page */ 585 u16 cidx; 586 u16 sw_pidx; 587 u16 sw_cidx; 588 u16 sw_in_use; 589 u16 cidx_inc; 590 u8 gen; 591 u8 error; 592 }; 593 594 static inline int t4_arm_cq(struct t4_cq *cq, int se) 595 { 596 u32 val; 597 598 while (cq->cidx_inc > CIDXINC_M) { 599 val = SEINTARM_V(0) | CIDXINC_V(CIDXINC_M) | TIMERREG_V(7) | 600 INGRESSQID_V(cq->cqid & cq->qid_mask); 601 writel(val, cq->ugts); 602 cq->cidx_inc -= CIDXINC_M; 603 } 604 val = SEINTARM_V(se) | CIDXINC_V(cq->cidx_inc) | TIMERREG_V(6) | 605 INGRESSQID_V(cq->cqid & cq->qid_mask); 606 writel(val, cq->ugts); 607 cq->cidx_inc = 0; 608 return 0; 609 } 610 611 static inline void t4_swcq_produce(struct t4_cq *cq) 612 { 613 cq->sw_in_use++; 614 if (cq->sw_in_use == cq->size) { 615 syslog(LOG_NOTICE, "cxgb4 sw cq overflow cqid %u\n", cq->cqid); 616 cq->error = 1; 617 assert(0); 618 } 619 if (++cq->sw_pidx == cq->size) 620 cq->sw_pidx = 0; 621 } 622 623 static inline void t4_swcq_consume(struct t4_cq *cq) 624 { 625 assert(cq->sw_in_use >= 1); 626 cq->sw_in_use--; 627 if (++cq->sw_cidx == cq->size) 628 cq->sw_cidx = 0; 629 } 630 631 static inline void t4_hwcq_consume(struct t4_cq *cq) 632 { 633 cq->bits_type_ts = cq->queue[cq->cidx].bits_type_ts; 634 if (++cq->cidx_inc == (cq->size >> 4) || cq->cidx_inc == CIDXINC_M) { 635 uint32_t val; 636 637 val = SEINTARM_V(0) | CIDXINC_V(cq->cidx_inc) | TIMERREG_V(7) | 638 INGRESSQID_V(cq->cqid & cq->qid_mask); 639 writel(val, cq->ugts); 640 cq->cidx_inc = 0; 641 } 642 if (++cq->cidx == cq->size) { 643 cq->cidx = 0; 644 cq->gen ^= 1; 645 } 646 ((struct t4_status_page *)&cq->queue[cq->size])->host_cidx = cq->cidx; 647 } 648 649 static inline int t4_valid_cqe(struct t4_cq *cq, struct t4_cqe *cqe) 650 { 651 return (CQE_GENBIT(cqe) == cq->gen); 652 } 653 654 static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe) 655 { 656 int ret; 657 u16 prev_cidx; 658 659 if (cq->cidx == 0) 660 prev_cidx = cq->size - 1; 661 else 662 prev_cidx = cq->cidx - 1; 663 664 if (cq->queue[prev_cidx].bits_type_ts != cq->bits_type_ts) { 665 ret = -EOVERFLOW; 666 syslog(LOG_NOTICE, "cxgb4 cq overflow cqid %u\n", cq->cqid); 667 cq->error = 1; 668 assert(0); 669 } else if (t4_valid_cqe(cq, &cq->queue[cq->cidx])) { 670 udma_from_device_barrier(); 671 *cqe = &cq->queue[cq->cidx]; 672 ret = 0; 673 } else 674 ret = -ENODATA; 675 return ret; 676 } 677 678 static inline struct t4_cqe *t4_next_sw_cqe(struct t4_cq *cq) 679 { 680 if (cq->sw_in_use == cq->size) { 681 syslog(LOG_NOTICE, "cxgb4 sw cq overflow cqid %u\n", cq->cqid); 682 cq->error = 1; 683 assert(0); 684 return NULL; 685 } 686 if (cq->sw_in_use) 687 return &cq->sw_queue[cq->sw_cidx]; 688 return NULL; 689 } 690 691 static inline int t4_cq_notempty(struct t4_cq *cq) 692 { 693 return cq->sw_in_use || t4_valid_cqe(cq, &cq->queue[cq->cidx]); 694 } 695 696 static inline int t4_next_cqe(struct t4_cq *cq, struct t4_cqe **cqe) 697 { 698 int ret = 0; 699 700 if (cq->error) 701 ret = -ENODATA; 702 else if (cq->sw_in_use) 703 *cqe = &cq->sw_queue[cq->sw_cidx]; 704 else ret = t4_next_hw_cqe(cq, cqe); 705 return ret; 706 } 707 708 static inline int t4_cq_in_error(struct t4_cq *cq) 709 { 710 return ((struct t4_status_page *)&cq->queue[cq->size])->qp_err; 711 } 712 713 static inline void t4_set_cq_in_error(struct t4_cq *cq) 714 { 715 ((struct t4_status_page *)&cq->queue[cq->size])->qp_err = 1; 716 } 717 718 static inline void t4_reset_cq_in_error(struct t4_cq *cq) 719 { 720 ((struct t4_status_page *)&cq->queue[cq->size])->qp_err = 0; 721 } 722 723 struct t4_dev_status_page 724 { 725 u8 db_off; 726 u8 wc_supported; 727 u16 pad2; 728 u32 pad3; 729 u64 qp_start; 730 u64 qp_size; 731 u64 cq_start; 732 u64 cq_size; 733 }; 734 735 #endif 736