xref: /linux/drivers/infiniband/hw/mthca/mthca_eq.c (revision 13abf8130139c2ccd4962a7e5a8902be5e6cb5a7)
1 /*
2  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  *
33  * $Id: mthca_eq.c 1382 2004-12-24 02:21:02Z roland $
34  */
35 
36 #include <linux/init.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/pci.h>
40 
41 #include "mthca_dev.h"
42 #include "mthca_cmd.h"
43 #include "mthca_config_reg.h"
44 
45 enum {
46 	MTHCA_NUM_ASYNC_EQE = 0x80,
47 	MTHCA_NUM_CMD_EQE   = 0x80,
48 	MTHCA_EQ_ENTRY_SIZE = 0x20
49 };
50 
51 /*
52  * Must be packed because start is 64 bits but only aligned to 32 bits.
53  */
54 struct mthca_eq_context {
55 	__be32 flags;
56 	__be64 start;
57 	__be32 logsize_usrpage;
58 	__be32 tavor_pd;	/* reserved for Arbel */
59 	u8     reserved1[3];
60 	u8     intr;
61 	__be32 arbel_pd;	/* lost_count for Tavor */
62 	__be32 lkey;
63 	u32    reserved2[2];
64 	__be32 consumer_index;
65 	__be32 producer_index;
66 	u32    reserved3[4];
67 } __attribute__((packed));
68 
69 #define MTHCA_EQ_STATUS_OK          ( 0 << 28)
70 #define MTHCA_EQ_STATUS_OVERFLOW    ( 9 << 28)
71 #define MTHCA_EQ_STATUS_WRITE_FAIL  (10 << 28)
72 #define MTHCA_EQ_OWNER_SW           ( 0 << 24)
73 #define MTHCA_EQ_OWNER_HW           ( 1 << 24)
74 #define MTHCA_EQ_FLAG_TR            ( 1 << 18)
75 #define MTHCA_EQ_FLAG_OI            ( 1 << 17)
76 #define MTHCA_EQ_STATE_ARMED        ( 1 <<  8)
77 #define MTHCA_EQ_STATE_FIRED        ( 2 <<  8)
78 #define MTHCA_EQ_STATE_ALWAYS_ARMED ( 3 <<  8)
79 #define MTHCA_EQ_STATE_ARBEL        ( 8 <<  8)
80 
81 enum {
82 	MTHCA_EVENT_TYPE_COMP       	    = 0x00,
83 	MTHCA_EVENT_TYPE_PATH_MIG   	    = 0x01,
84 	MTHCA_EVENT_TYPE_COMM_EST   	    = 0x02,
85 	MTHCA_EVENT_TYPE_SQ_DRAINED 	    = 0x03,
86 	MTHCA_EVENT_TYPE_SRQ_LAST_WQE       = 0x13,
87 	MTHCA_EVENT_TYPE_CQ_ERROR   	    = 0x04,
88 	MTHCA_EVENT_TYPE_WQ_CATAS_ERROR     = 0x05,
89 	MTHCA_EVENT_TYPE_EEC_CATAS_ERROR    = 0x06,
90 	MTHCA_EVENT_TYPE_PATH_MIG_FAILED    = 0x07,
91 	MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR = 0x10,
92 	MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR    = 0x11,
93 	MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR    = 0x12,
94 	MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR  = 0x08,
95 	MTHCA_EVENT_TYPE_PORT_CHANGE        = 0x09,
96 	MTHCA_EVENT_TYPE_EQ_OVERFLOW        = 0x0f,
97 	MTHCA_EVENT_TYPE_ECC_DETECT         = 0x0e,
98 	MTHCA_EVENT_TYPE_CMD                = 0x0a
99 };
100 
101 #define MTHCA_ASYNC_EVENT_MASK ((1ULL << MTHCA_EVENT_TYPE_PATH_MIG)           | \
102 				(1ULL << MTHCA_EVENT_TYPE_COMM_EST)           | \
103 				(1ULL << MTHCA_EVENT_TYPE_SQ_DRAINED)         | \
104 				(1ULL << MTHCA_EVENT_TYPE_CQ_ERROR)           | \
105 				(1ULL << MTHCA_EVENT_TYPE_WQ_CATAS_ERROR)     | \
106 				(1ULL << MTHCA_EVENT_TYPE_EEC_CATAS_ERROR)    | \
107 				(1ULL << MTHCA_EVENT_TYPE_PATH_MIG_FAILED)    | \
108 				(1ULL << MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
109 				(1ULL << MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR)    | \
110 				(1ULL << MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR)  | \
111 				(1ULL << MTHCA_EVENT_TYPE_PORT_CHANGE)        | \
112 				(1ULL << MTHCA_EVENT_TYPE_ECC_DETECT))
113 #define MTHCA_SRQ_EVENT_MASK    (1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR)    | \
114 				(1ULL << MTHCA_EVENT_TYPE_SRQ_LAST_WQE)
115 #define MTHCA_CMD_EVENT_MASK    (1ULL << MTHCA_EVENT_TYPE_CMD)
116 
117 #define MTHCA_EQ_DB_INC_CI     (1 << 24)
118 #define MTHCA_EQ_DB_REQ_NOT    (2 << 24)
119 #define MTHCA_EQ_DB_DISARM_CQ  (3 << 24)
120 #define MTHCA_EQ_DB_SET_CI     (4 << 24)
121 #define MTHCA_EQ_DB_ALWAYS_ARM (5 << 24)
122 
123 struct mthca_eqe {
124 	u8 reserved1;
125 	u8 type;
126 	u8 reserved2;
127 	u8 subtype;
128 	union {
129 		u32 raw[6];
130 		struct {
131 			__be32 cqn;
132 		} __attribute__((packed)) comp;
133 		struct {
134 			u16    reserved1;
135 			__be16 token;
136 			u32    reserved2;
137 			u8     reserved3[3];
138 			u8     status;
139 			__be64 out_param;
140 		} __attribute__((packed)) cmd;
141 		struct {
142 			__be32 qpn;
143 		} __attribute__((packed)) qp;
144 		struct {
145 			__be32 cqn;
146 			u32    reserved1;
147 			u8     reserved2[3];
148 			u8     syndrome;
149 		} __attribute__((packed)) cq_err;
150 		struct {
151 			u32    reserved1[2];
152 			__be32 port;
153 		} __attribute__((packed)) port_change;
154 	} event;
155 	u8 reserved3[3];
156 	u8 owner;
157 } __attribute__((packed));
158 
159 #define  MTHCA_EQ_ENTRY_OWNER_SW      (0 << 7)
160 #define  MTHCA_EQ_ENTRY_OWNER_HW      (1 << 7)
161 
162 static inline u64 async_mask(struct mthca_dev *dev)
163 {
164 	return dev->mthca_flags & MTHCA_FLAG_SRQ ?
165 		MTHCA_ASYNC_EVENT_MASK | MTHCA_SRQ_EVENT_MASK :
166 		MTHCA_ASYNC_EVENT_MASK;
167 }
168 
169 static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
170 {
171 	__be32 doorbell[2];
172 
173 	doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_SET_CI | eq->eqn);
174 	doorbell[1] = cpu_to_be32(ci & (eq->nent - 1));
175 
176 	/*
177 	 * This barrier makes sure that all updates to ownership bits
178 	 * done by set_eqe_hw() hit memory before the consumer index
179 	 * is updated.  set_eq_ci() allows the HCA to possibly write
180 	 * more EQ entries, and we want to avoid the exceedingly
181 	 * unlikely possibility of the HCA writing an entry and then
182 	 * having set_eqe_hw() overwrite the owner field.
183 	 */
184 	wmb();
185 	mthca_write64(doorbell,
186 		      dev->kar + MTHCA_EQ_DOORBELL,
187 		      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
188 }
189 
190 static inline void arbel_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
191 {
192 	/* See comment in tavor_set_eq_ci() above. */
193 	wmb();
194 	__raw_writel((__force u32) cpu_to_be32(ci),
195 		     dev->eq_regs.arbel.eq_set_ci_base + eq->eqn * 8);
196 	/* We still want ordering, just not swabbing, so add a barrier */
197 	mb();
198 }
199 
200 static inline void set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
201 {
202 	if (mthca_is_memfree(dev))
203 		arbel_set_eq_ci(dev, eq, ci);
204 	else
205 		tavor_set_eq_ci(dev, eq, ci);
206 }
207 
208 static inline void tavor_eq_req_not(struct mthca_dev *dev, int eqn)
209 {
210 	__be32 doorbell[2];
211 
212 	doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_REQ_NOT | eqn);
213 	doorbell[1] = 0;
214 
215 	mthca_write64(doorbell,
216 		      dev->kar + MTHCA_EQ_DOORBELL,
217 		      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
218 }
219 
220 static inline void arbel_eq_req_not(struct mthca_dev *dev, u32 eqn_mask)
221 {
222 	writel(eqn_mask, dev->eq_regs.arbel.eq_arm);
223 }
224 
225 static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn)
226 {
227 	if (!mthca_is_memfree(dev)) {
228 		__be32 doorbell[2];
229 
230 		doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_DISARM_CQ | eqn);
231 		doorbell[1] = cpu_to_be32(cqn);
232 
233 		mthca_write64(doorbell,
234 			      dev->kar + MTHCA_EQ_DOORBELL,
235 			      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
236 	}
237 }
238 
239 static inline struct mthca_eqe *get_eqe(struct mthca_eq *eq, u32 entry)
240 {
241 	unsigned long off = (entry & (eq->nent - 1)) * MTHCA_EQ_ENTRY_SIZE;
242 	return eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE;
243 }
244 
245 static inline struct mthca_eqe* next_eqe_sw(struct mthca_eq *eq)
246 {
247 	struct mthca_eqe* eqe;
248 	eqe = get_eqe(eq, eq->cons_index);
249 	return (MTHCA_EQ_ENTRY_OWNER_HW & eqe->owner) ? NULL : eqe;
250 }
251 
252 static inline void set_eqe_hw(struct mthca_eqe *eqe)
253 {
254 	eqe->owner =  MTHCA_EQ_ENTRY_OWNER_HW;
255 }
256 
257 static void port_change(struct mthca_dev *dev, int port, int active)
258 {
259 	struct ib_event record;
260 
261 	mthca_dbg(dev, "Port change to %s for port %d\n",
262 		  active ? "active" : "down", port);
263 
264 	record.device = &dev->ib_dev;
265 	record.event  = active ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
266 	record.element.port_num = port;
267 
268 	ib_dispatch_event(&record);
269 }
270 
271 static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq)
272 {
273 	struct mthca_eqe *eqe;
274 	int disarm_cqn;
275 	int  eqes_found = 0;
276 
277 	while ((eqe = next_eqe_sw(eq))) {
278 		int set_ci = 0;
279 
280 		/*
281 		 * Make sure we read EQ entry contents after we've
282 		 * checked the ownership bit.
283 		 */
284 		rmb();
285 
286 		switch (eqe->type) {
287 		case MTHCA_EVENT_TYPE_COMP:
288 			disarm_cqn = be32_to_cpu(eqe->event.comp.cqn) & 0xffffff;
289 			disarm_cq(dev, eq->eqn, disarm_cqn);
290 			mthca_cq_event(dev, disarm_cqn);
291 			break;
292 
293 		case MTHCA_EVENT_TYPE_PATH_MIG:
294 			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
295 				       IB_EVENT_PATH_MIG);
296 			break;
297 
298 		case MTHCA_EVENT_TYPE_COMM_EST:
299 			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
300 				       IB_EVENT_COMM_EST);
301 			break;
302 
303 		case MTHCA_EVENT_TYPE_SQ_DRAINED:
304 			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
305 				       IB_EVENT_SQ_DRAINED);
306 			break;
307 
308 		case MTHCA_EVENT_TYPE_WQ_CATAS_ERROR:
309 			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
310 				       IB_EVENT_QP_FATAL);
311 			break;
312 
313 		case MTHCA_EVENT_TYPE_PATH_MIG_FAILED:
314 			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
315 				       IB_EVENT_PATH_MIG_ERR);
316 			break;
317 
318 		case MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
319 			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
320 				       IB_EVENT_QP_REQ_ERR);
321 			break;
322 
323 		case MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR:
324 			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
325 				       IB_EVENT_QP_ACCESS_ERR);
326 			break;
327 
328 		case MTHCA_EVENT_TYPE_CMD:
329 			mthca_cmd_event(dev,
330 					be16_to_cpu(eqe->event.cmd.token),
331 					eqe->event.cmd.status,
332 					be64_to_cpu(eqe->event.cmd.out_param));
333 			/*
334 			 * cmd_event() may add more commands.
335 			 * The card will think the queue has overflowed if
336 			 * we don't tell it we've been processing events.
337 			 */
338 			set_ci = 1;
339 			break;
340 
341 		case MTHCA_EVENT_TYPE_PORT_CHANGE:
342 			port_change(dev,
343 				    (be32_to_cpu(eqe->event.port_change.port) >> 28) & 3,
344 				    eqe->subtype == 0x4);
345 			break;
346 
347 		case MTHCA_EVENT_TYPE_CQ_ERROR:
348 			mthca_warn(dev, "CQ %s on CQN %06x\n",
349 				   eqe->event.cq_err.syndrome == 1 ?
350 				   "overrun" : "access violation",
351 				   be32_to_cpu(eqe->event.cq_err.cqn) & 0xffffff);
352 			break;
353 
354 		case MTHCA_EVENT_TYPE_EQ_OVERFLOW:
355 			mthca_warn(dev, "EQ overrun on EQN %d\n", eq->eqn);
356 			break;
357 
358 		case MTHCA_EVENT_TYPE_EEC_CATAS_ERROR:
359 		case MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR:
360 		case MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR:
361 		case MTHCA_EVENT_TYPE_ECC_DETECT:
362 		default:
363 			mthca_warn(dev, "Unhandled event %02x(%02x) on EQ %d\n",
364 				   eqe->type, eqe->subtype, eq->eqn);
365 			break;
366 		};
367 
368 		set_eqe_hw(eqe);
369 		++eq->cons_index;
370 		eqes_found = 1;
371 
372 		if (unlikely(set_ci)) {
373 			/*
374 			 * Conditional on hca_type is OK here because
375 			 * this is a rare case, not the fast path.
376 			 */
377 			set_eq_ci(dev, eq, eq->cons_index);
378 			set_ci = 0;
379 		}
380 	}
381 
382 	/*
383 	 * Rely on caller to set consumer index so that we don't have
384 	 * to test hca_type in our interrupt handling fast path.
385 	 */
386 	return eqes_found;
387 }
388 
389 static irqreturn_t mthca_tavor_interrupt(int irq, void *dev_ptr, struct pt_regs *regs)
390 {
391 	struct mthca_dev *dev = dev_ptr;
392 	u32 ecr;
393 	int i;
394 
395 	if (dev->eq_table.clr_mask)
396 		writel(dev->eq_table.clr_mask, dev->eq_table.clr_int);
397 
398 	ecr = readl(dev->eq_regs.tavor.ecr_base + 4);
399 	if (ecr) {
400 		writel(ecr, dev->eq_regs.tavor.ecr_base +
401 		       MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4);
402 
403 		for (i = 0; i < MTHCA_NUM_EQ; ++i)
404 			if (ecr & dev->eq_table.eq[i].eqn_mask &&
405 			    mthca_eq_int(dev, &dev->eq_table.eq[i])) {
406 				tavor_set_eq_ci(dev, &dev->eq_table.eq[i],
407 						dev->eq_table.eq[i].cons_index);
408 				tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn);
409 			}
410 	}
411 
412 	return IRQ_RETVAL(ecr);
413 }
414 
415 static irqreturn_t mthca_tavor_msi_x_interrupt(int irq, void *eq_ptr,
416 					 struct pt_regs *regs)
417 {
418 	struct mthca_eq  *eq  = eq_ptr;
419 	struct mthca_dev *dev = eq->dev;
420 
421 	mthca_eq_int(dev, eq);
422 	tavor_set_eq_ci(dev, eq, eq->cons_index);
423 	tavor_eq_req_not(dev, eq->eqn);
424 
425 	/* MSI-X vectors always belong to us */
426 	return IRQ_HANDLED;
427 }
428 
429 static irqreturn_t mthca_arbel_interrupt(int irq, void *dev_ptr, struct pt_regs *regs)
430 {
431 	struct mthca_dev *dev = dev_ptr;
432 	int work = 0;
433 	int i;
434 
435 	if (dev->eq_table.clr_mask)
436 		writel(dev->eq_table.clr_mask, dev->eq_table.clr_int);
437 
438 	for (i = 0; i < MTHCA_NUM_EQ; ++i)
439 		if (mthca_eq_int(dev, &dev->eq_table.eq[i])) {
440 			work = 1;
441 			arbel_set_eq_ci(dev, &dev->eq_table.eq[i],
442 					dev->eq_table.eq[i].cons_index);
443 		}
444 
445 	arbel_eq_req_not(dev, dev->eq_table.arm_mask);
446 
447 	return IRQ_RETVAL(work);
448 }
449 
450 static irqreturn_t mthca_arbel_msi_x_interrupt(int irq, void *eq_ptr,
451 					       struct pt_regs *regs)
452 {
453 	struct mthca_eq  *eq  = eq_ptr;
454 	struct mthca_dev *dev = eq->dev;
455 
456 	mthca_eq_int(dev, eq);
457 	arbel_set_eq_ci(dev, eq, eq->cons_index);
458 	arbel_eq_req_not(dev, eq->eqn_mask);
459 
460 	/* MSI-X vectors always belong to us */
461 	return IRQ_HANDLED;
462 }
463 
464 static int __devinit mthca_create_eq(struct mthca_dev *dev,
465 				     int nent,
466 				     u8 intr,
467 				     struct mthca_eq *eq)
468 {
469 	int npages = (nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) /
470 		PAGE_SIZE;
471 	u64 *dma_list = NULL;
472 	dma_addr_t t;
473 	struct mthca_mailbox *mailbox;
474 	struct mthca_eq_context *eq_context;
475 	int err = -ENOMEM;
476 	int i;
477 	u8 status;
478 
479 	/* Make sure EQ size is aligned to a power of 2 size. */
480 	for (i = 1; i < nent; i <<= 1)
481 		; /* nothing */
482 	nent = i;
483 
484 	eq->dev = dev;
485 
486 	eq->page_list = kmalloc(npages * sizeof *eq->page_list,
487 				GFP_KERNEL);
488 	if (!eq->page_list)
489 		goto err_out;
490 
491 	for (i = 0; i < npages; ++i)
492 		eq->page_list[i].buf = NULL;
493 
494 	dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
495 	if (!dma_list)
496 		goto err_out_free;
497 
498 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
499 	if (IS_ERR(mailbox))
500 		goto err_out_free;
501 	eq_context = mailbox->buf;
502 
503 	for (i = 0; i < npages; ++i) {
504 		eq->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev,
505 							  PAGE_SIZE, &t, GFP_KERNEL);
506 		if (!eq->page_list[i].buf)
507 			goto err_out_free_pages;
508 
509 		dma_list[i] = t;
510 		pci_unmap_addr_set(&eq->page_list[i], mapping, t);
511 
512 		memset(eq->page_list[i].buf, 0, PAGE_SIZE);
513 	}
514 
515 	for (i = 0; i < nent; ++i)
516 		set_eqe_hw(get_eqe(eq, i));
517 
518 	eq->eqn = mthca_alloc(&dev->eq_table.alloc);
519 	if (eq->eqn == -1)
520 		goto err_out_free_pages;
521 
522 	err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num,
523 				  dma_list, PAGE_SHIFT, npages,
524 				  0, npages * PAGE_SIZE,
525 				  MTHCA_MPT_FLAG_LOCAL_WRITE |
526 				  MTHCA_MPT_FLAG_LOCAL_READ,
527 				  &eq->mr);
528 	if (err)
529 		goto err_out_free_eq;
530 
531 	eq->nent = nent;
532 
533 	memset(eq_context, 0, sizeof *eq_context);
534 	eq_context->flags           = cpu_to_be32(MTHCA_EQ_STATUS_OK   |
535 						  MTHCA_EQ_OWNER_HW    |
536 						  MTHCA_EQ_STATE_ARMED |
537 						  MTHCA_EQ_FLAG_TR);
538 	if (mthca_is_memfree(dev))
539 		eq_context->flags  |= cpu_to_be32(MTHCA_EQ_STATE_ARBEL);
540 
541 	eq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24);
542 	if (mthca_is_memfree(dev)) {
543 		eq_context->arbel_pd = cpu_to_be32(dev->driver_pd.pd_num);
544 	} else {
545 		eq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index);
546 		eq_context->tavor_pd         = cpu_to_be32(dev->driver_pd.pd_num);
547 	}
548 	eq_context->intr            = intr;
549 	eq_context->lkey            = cpu_to_be32(eq->mr.ibmr.lkey);
550 
551 	err = mthca_SW2HW_EQ(dev, mailbox, eq->eqn, &status);
552 	if (err) {
553 		mthca_warn(dev, "SW2HW_EQ failed (%d)\n", err);
554 		goto err_out_free_mr;
555 	}
556 	if (status) {
557 		mthca_warn(dev, "SW2HW_EQ returned status 0x%02x\n",
558 			   status);
559 		err = -EINVAL;
560 		goto err_out_free_mr;
561 	}
562 
563 	kfree(dma_list);
564 	mthca_free_mailbox(dev, mailbox);
565 
566 	eq->eqn_mask   = swab32(1 << eq->eqn);
567 	eq->cons_index = 0;
568 
569 	dev->eq_table.arm_mask |= eq->eqn_mask;
570 
571 	mthca_dbg(dev, "Allocated EQ %d with %d entries\n",
572 		  eq->eqn, nent);
573 
574 	return err;
575 
576  err_out_free_mr:
577 	mthca_free_mr(dev, &eq->mr);
578 
579  err_out_free_eq:
580 	mthca_free(&dev->eq_table.alloc, eq->eqn);
581 
582  err_out_free_pages:
583 	for (i = 0; i < npages; ++i)
584 		if (eq->page_list[i].buf)
585 			dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
586 					  eq->page_list[i].buf,
587 					  pci_unmap_addr(&eq->page_list[i],
588 							 mapping));
589 
590 	mthca_free_mailbox(dev, mailbox);
591 
592  err_out_free:
593 	kfree(eq->page_list);
594 	kfree(dma_list);
595 
596  err_out:
597 	return err;
598 }
599 
600 static void mthca_free_eq(struct mthca_dev *dev,
601 			  struct mthca_eq *eq)
602 {
603 	struct mthca_mailbox *mailbox;
604 	int err;
605 	u8 status;
606 	int npages = (eq->nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) /
607 		PAGE_SIZE;
608 	int i;
609 
610 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
611 	if (IS_ERR(mailbox))
612 		return;
613 
614 	err = mthca_HW2SW_EQ(dev, mailbox, eq->eqn, &status);
615 	if (err)
616 		mthca_warn(dev, "HW2SW_EQ failed (%d)\n", err);
617 	if (status)
618 		mthca_warn(dev, "HW2SW_EQ returned status 0x%02x\n", status);
619 
620 	dev->eq_table.arm_mask &= ~eq->eqn_mask;
621 
622 	if (0) {
623 		mthca_dbg(dev, "Dumping EQ context %02x:\n", eq->eqn);
624 		for (i = 0; i < sizeof (struct mthca_eq_context) / 4; ++i) {
625 			if (i % 4 == 0)
626 				printk("[%02x] ", i * 4);
627 			printk(" %08x", be32_to_cpup(mailbox->buf + i * 4));
628 			if ((i + 1) % 4 == 0)
629 				printk("\n");
630 		}
631 	}
632 
633 	mthca_free_mr(dev, &eq->mr);
634 	for (i = 0; i < npages; ++i)
635 		pci_free_consistent(dev->pdev, PAGE_SIZE,
636 				    eq->page_list[i].buf,
637 				    pci_unmap_addr(&eq->page_list[i], mapping));
638 
639 	kfree(eq->page_list);
640 	mthca_free_mailbox(dev, mailbox);
641 }
642 
643 static void mthca_free_irqs(struct mthca_dev *dev)
644 {
645 	int i;
646 
647 	if (dev->eq_table.have_irq)
648 		free_irq(dev->pdev->irq, dev);
649 	for (i = 0; i < MTHCA_NUM_EQ; ++i)
650 		if (dev->eq_table.eq[i].have_irq)
651 			free_irq(dev->eq_table.eq[i].msi_x_vector,
652 				 dev->eq_table.eq + i);
653 }
654 
655 static int __devinit mthca_map_reg(struct mthca_dev *dev,
656 				   unsigned long offset, unsigned long size,
657 				   void __iomem **map)
658 {
659 	unsigned long base = pci_resource_start(dev->pdev, 0);
660 
661 	if (!request_mem_region(base + offset, size, DRV_NAME))
662 		return -EBUSY;
663 
664 	*map = ioremap(base + offset, size);
665 	if (!*map) {
666 		release_mem_region(base + offset, size);
667 		return -ENOMEM;
668 	}
669 
670 	return 0;
671 }
672 
673 static void mthca_unmap_reg(struct mthca_dev *dev, unsigned long offset,
674 			    unsigned long size, void __iomem *map)
675 {
676 	unsigned long base = pci_resource_start(dev->pdev, 0);
677 
678 	release_mem_region(base + offset, size);
679 	iounmap(map);
680 }
681 
682 static int __devinit mthca_map_eq_regs(struct mthca_dev *dev)
683 {
684 	unsigned long mthca_base;
685 
686 	mthca_base = pci_resource_start(dev->pdev, 0);
687 
688 	if (mthca_is_memfree(dev)) {
689 		/*
690 		 * We assume that the EQ arm and EQ set CI registers
691 		 * fall within the first BAR.  We can't trust the
692 		 * values firmware gives us, since those addresses are
693 		 * valid on the HCA's side of the PCI bus but not
694 		 * necessarily the host side.
695 		 */
696 		if (mthca_map_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
697 				  dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
698 				  &dev->clr_base)) {
699 			mthca_err(dev, "Couldn't map interrupt clear register, "
700 				  "aborting.\n");
701 			return -ENOMEM;
702 		}
703 
704 		/*
705 		 * Add 4 because we limit ourselves to EQs 0 ... 31,
706 		 * so we only need the low word of the register.
707 		 */
708 		if (mthca_map_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
709 					dev->fw.arbel.eq_arm_base) + 4, 4,
710 				  &dev->eq_regs.arbel.eq_arm)) {
711 			mthca_err(dev, "Couldn't map EQ arm register, aborting.\n");
712 			mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
713 					dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
714 					dev->clr_base);
715 			return -ENOMEM;
716 		}
717 
718 		if (mthca_map_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
719 				  dev->fw.arbel.eq_set_ci_base,
720 				  MTHCA_EQ_SET_CI_SIZE,
721 				  &dev->eq_regs.arbel.eq_set_ci_base)) {
722 			mthca_err(dev, "Couldn't map EQ CI register, aborting.\n");
723 			mthca_unmap_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
724 					      dev->fw.arbel.eq_arm_base) + 4, 4,
725 					dev->eq_regs.arbel.eq_arm);
726 			mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
727 					dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
728 					dev->clr_base);
729 			return -ENOMEM;
730 		}
731 	} else {
732 		if (mthca_map_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE,
733 				  &dev->clr_base)) {
734 			mthca_err(dev, "Couldn't map interrupt clear register, "
735 				  "aborting.\n");
736 			return -ENOMEM;
737 		}
738 
739 		if (mthca_map_reg(dev, MTHCA_ECR_BASE,
740 				  MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE,
741 				  &dev->eq_regs.tavor.ecr_base)) {
742 			mthca_err(dev, "Couldn't map ecr register, "
743 				  "aborting.\n");
744 			mthca_unmap_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE,
745 					dev->clr_base);
746 			return -ENOMEM;
747 		}
748 	}
749 
750 	return 0;
751 
752 }
753 
754 static void __devexit mthca_unmap_eq_regs(struct mthca_dev *dev)
755 {
756 	if (mthca_is_memfree(dev)) {
757 		mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
758 				dev->fw.arbel.eq_set_ci_base,
759 				MTHCA_EQ_SET_CI_SIZE,
760 				dev->eq_regs.arbel.eq_set_ci_base);
761 		mthca_unmap_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
762 				      dev->fw.arbel.eq_arm_base) + 4, 4,
763 				dev->eq_regs.arbel.eq_arm);
764 		mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
765 				dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
766 				dev->clr_base);
767 	} else {
768 		mthca_unmap_reg(dev, MTHCA_ECR_BASE,
769 				MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE,
770 				dev->eq_regs.tavor.ecr_base);
771 		mthca_unmap_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE,
772 				dev->clr_base);
773 	}
774 }
775 
776 int __devinit mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt)
777 {
778 	int ret;
779 	u8 status;
780 
781 	/*
782 	 * We assume that mapping one page is enough for the whole EQ
783 	 * context table.  This is fine with all current HCAs, because
784 	 * we only use 32 EQs and each EQ uses 32 bytes of context
785 	 * memory, or 1 KB total.
786 	 */
787 	dev->eq_table.icm_virt = icm_virt;
788 	dev->eq_table.icm_page = alloc_page(GFP_HIGHUSER);
789 	if (!dev->eq_table.icm_page)
790 		return -ENOMEM;
791 	dev->eq_table.icm_dma  = pci_map_page(dev->pdev, dev->eq_table.icm_page, 0,
792 					      PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
793 	if (pci_dma_mapping_error(dev->eq_table.icm_dma)) {
794 		__free_page(dev->eq_table.icm_page);
795 		return -ENOMEM;
796 	}
797 
798 	ret = mthca_MAP_ICM_page(dev, dev->eq_table.icm_dma, icm_virt, &status);
799 	if (!ret && status)
800 		ret = -EINVAL;
801 	if (ret) {
802 		pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE,
803 			       PCI_DMA_BIDIRECTIONAL);
804 		__free_page(dev->eq_table.icm_page);
805 	}
806 
807 	return ret;
808 }
809 
810 void __devexit mthca_unmap_eq_icm(struct mthca_dev *dev)
811 {
812 	u8 status;
813 
814 	mthca_UNMAP_ICM(dev, dev->eq_table.icm_virt, PAGE_SIZE / 4096, &status);
815 	pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE,
816 		       PCI_DMA_BIDIRECTIONAL);
817 	__free_page(dev->eq_table.icm_page);
818 }
819 
820 int __devinit mthca_init_eq_table(struct mthca_dev *dev)
821 {
822 	int err;
823 	u8 status;
824 	u8 intr;
825 	int i;
826 
827 	err = mthca_alloc_init(&dev->eq_table.alloc,
828 			       dev->limits.num_eqs,
829 			       dev->limits.num_eqs - 1,
830 			       dev->limits.reserved_eqs);
831 	if (err)
832 		return err;
833 
834 	err = mthca_map_eq_regs(dev);
835 	if (err)
836 		goto err_out_free;
837 
838 	if (dev->mthca_flags & MTHCA_FLAG_MSI ||
839 	    dev->mthca_flags & MTHCA_FLAG_MSI_X) {
840 		dev->eq_table.clr_mask = 0;
841 	} else {
842 		dev->eq_table.clr_mask =
843 			swab32(1 << (dev->eq_table.inta_pin & 31));
844 		dev->eq_table.clr_int  = dev->clr_base +
845 			(dev->eq_table.inta_pin < 31 ? 4 : 0);
846 	}
847 
848 	dev->eq_table.arm_mask = 0;
849 
850 	intr = (dev->mthca_flags & MTHCA_FLAG_MSI) ?
851 		128 : dev->eq_table.inta_pin;
852 
853 	err = mthca_create_eq(dev, dev->limits.num_cqs,
854 			      (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 128 : intr,
855 			      &dev->eq_table.eq[MTHCA_EQ_COMP]);
856 	if (err)
857 		goto err_out_unmap;
858 
859 	err = mthca_create_eq(dev, MTHCA_NUM_ASYNC_EQE,
860 			      (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 129 : intr,
861 			      &dev->eq_table.eq[MTHCA_EQ_ASYNC]);
862 	if (err)
863 		goto err_out_comp;
864 
865 	err = mthca_create_eq(dev, MTHCA_NUM_CMD_EQE,
866 			      (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 130 : intr,
867 			      &dev->eq_table.eq[MTHCA_EQ_CMD]);
868 	if (err)
869 		goto err_out_async;
870 
871 	if (dev->mthca_flags & MTHCA_FLAG_MSI_X) {
872 		static const char *eq_name[] = {
873 			[MTHCA_EQ_COMP]  = DRV_NAME " (comp)",
874 			[MTHCA_EQ_ASYNC] = DRV_NAME " (async)",
875 			[MTHCA_EQ_CMD]   = DRV_NAME " (cmd)"
876 		};
877 
878 		for (i = 0; i < MTHCA_NUM_EQ; ++i) {
879 			err = request_irq(dev->eq_table.eq[i].msi_x_vector,
880 					  mthca_is_memfree(dev) ?
881 					  mthca_arbel_msi_x_interrupt :
882 					  mthca_tavor_msi_x_interrupt,
883 					  0, eq_name[i], dev->eq_table.eq + i);
884 			if (err)
885 				goto err_out_cmd;
886 			dev->eq_table.eq[i].have_irq = 1;
887 		}
888 	} else {
889 		err = request_irq(dev->pdev->irq,
890 				  mthca_is_memfree(dev) ?
891 				  mthca_arbel_interrupt :
892 				  mthca_tavor_interrupt,
893 				  SA_SHIRQ, DRV_NAME, dev);
894 		if (err)
895 			goto err_out_cmd;
896 		dev->eq_table.have_irq = 1;
897 	}
898 
899 	err = mthca_MAP_EQ(dev, async_mask(dev),
900 			   0, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, &status);
901 	if (err)
902 		mthca_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n",
903 			   dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, err);
904 	if (status)
905 		mthca_warn(dev, "MAP_EQ for async EQ %d returned status 0x%02x\n",
906 			   dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, status);
907 
908 	err = mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK,
909 			   0, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status);
910 	if (err)
911 		mthca_warn(dev, "MAP_EQ for cmd EQ %d failed (%d)\n",
912 			   dev->eq_table.eq[MTHCA_EQ_CMD].eqn, err);
913 	if (status)
914 		mthca_warn(dev, "MAP_EQ for cmd EQ %d returned status 0x%02x\n",
915 			   dev->eq_table.eq[MTHCA_EQ_CMD].eqn, status);
916 
917 	for (i = 0; i < MTHCA_EQ_CMD; ++i)
918 		if (mthca_is_memfree(dev))
919 			arbel_eq_req_not(dev, dev->eq_table.eq[i].eqn_mask);
920 		else
921 			tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn);
922 
923 	return 0;
924 
925 err_out_cmd:
926 	mthca_free_irqs(dev);
927 	mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_CMD]);
928 
929 err_out_async:
930 	mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_ASYNC]);
931 
932 err_out_comp:
933 	mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_COMP]);
934 
935 err_out_unmap:
936 	mthca_unmap_eq_regs(dev);
937 
938 err_out_free:
939 	mthca_alloc_cleanup(&dev->eq_table.alloc);
940 	return err;
941 }
942 
943 void __devexit mthca_cleanup_eq_table(struct mthca_dev *dev)
944 {
945 	u8 status;
946 	int i;
947 
948 	mthca_free_irqs(dev);
949 
950 	mthca_MAP_EQ(dev, async_mask(dev),
951 		     1, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, &status);
952 	mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK,
953 		     1, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status);
954 
955 	for (i = 0; i < MTHCA_NUM_EQ; ++i)
956 		mthca_free_eq(dev, &dev->eq_table.eq[i]);
957 
958 	mthca_unmap_eq_regs(dev);
959 
960 	mthca_alloc_cleanup(&dev->eq_table.alloc);
961 }
962