xref: /linux/drivers/infiniband/hw/mthca/mthca_eq.c (revision 54a8a2220c936a47840c9a3d74910c5a56fae2ed)
1 /*
2  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  *
33  * $Id: mthca_eq.c 1382 2004-12-24 02:21:02Z roland $
34  */
35 
36 #include <linux/init.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/pci.h>
40 
41 #include "mthca_dev.h"
42 #include "mthca_cmd.h"
43 #include "mthca_config_reg.h"
44 
45 enum {
46 	MTHCA_NUM_ASYNC_EQE = 0x80,
47 	MTHCA_NUM_CMD_EQE   = 0x80,
48 	MTHCA_EQ_ENTRY_SIZE = 0x20
49 };
50 
51 /*
52  * Must be packed because start is 64 bits but only aligned to 32 bits.
53  */
54 struct mthca_eq_context {
55 	__be32 flags;
56 	__be64 start;
57 	__be32 logsize_usrpage;
58 	__be32 tavor_pd;	/* reserved for Arbel */
59 	u8     reserved1[3];
60 	u8     intr;
61 	__be32 arbel_pd;	/* lost_count for Tavor */
62 	__be32 lkey;
63 	u32    reserved2[2];
64 	__be32 consumer_index;
65 	__be32 producer_index;
66 	u32    reserved3[4];
67 } __attribute__((packed));
68 
69 #define MTHCA_EQ_STATUS_OK          ( 0 << 28)
70 #define MTHCA_EQ_STATUS_OVERFLOW    ( 9 << 28)
71 #define MTHCA_EQ_STATUS_WRITE_FAIL  (10 << 28)
72 #define MTHCA_EQ_OWNER_SW           ( 0 << 24)
73 #define MTHCA_EQ_OWNER_HW           ( 1 << 24)
74 #define MTHCA_EQ_FLAG_TR            ( 1 << 18)
75 #define MTHCA_EQ_FLAG_OI            ( 1 << 17)
76 #define MTHCA_EQ_STATE_ARMED        ( 1 <<  8)
77 #define MTHCA_EQ_STATE_FIRED        ( 2 <<  8)
78 #define MTHCA_EQ_STATE_ALWAYS_ARMED ( 3 <<  8)
79 #define MTHCA_EQ_STATE_ARBEL        ( 8 <<  8)
80 
81 enum {
82 	MTHCA_EVENT_TYPE_COMP       	    = 0x00,
83 	MTHCA_EVENT_TYPE_PATH_MIG   	    = 0x01,
84 	MTHCA_EVENT_TYPE_COMM_EST   	    = 0x02,
85 	MTHCA_EVENT_TYPE_SQ_DRAINED 	    = 0x03,
86 	MTHCA_EVENT_TYPE_SRQ_LAST_WQE       = 0x13,
87 	MTHCA_EVENT_TYPE_CQ_ERROR   	    = 0x04,
88 	MTHCA_EVENT_TYPE_WQ_CATAS_ERROR     = 0x05,
89 	MTHCA_EVENT_TYPE_EEC_CATAS_ERROR    = 0x06,
90 	MTHCA_EVENT_TYPE_PATH_MIG_FAILED    = 0x07,
91 	MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR = 0x10,
92 	MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR    = 0x11,
93 	MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR    = 0x12,
94 	MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR  = 0x08,
95 	MTHCA_EVENT_TYPE_PORT_CHANGE        = 0x09,
96 	MTHCA_EVENT_TYPE_EQ_OVERFLOW        = 0x0f,
97 	MTHCA_EVENT_TYPE_ECC_DETECT         = 0x0e,
98 	MTHCA_EVENT_TYPE_CMD                = 0x0a
99 };
100 
101 #define MTHCA_ASYNC_EVENT_MASK ((1ULL << MTHCA_EVENT_TYPE_PATH_MIG)           | \
102 				(1ULL << MTHCA_EVENT_TYPE_COMM_EST)           | \
103 				(1ULL << MTHCA_EVENT_TYPE_SQ_DRAINED)         | \
104 				(1ULL << MTHCA_EVENT_TYPE_CQ_ERROR)           | \
105 				(1ULL << MTHCA_EVENT_TYPE_WQ_CATAS_ERROR)     | \
106 				(1ULL << MTHCA_EVENT_TYPE_EEC_CATAS_ERROR)    | \
107 				(1ULL << MTHCA_EVENT_TYPE_PATH_MIG_FAILED)    | \
108 				(1ULL << MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
109 				(1ULL << MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR)    | \
110 				(1ULL << MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR)  | \
111 				(1ULL << MTHCA_EVENT_TYPE_PORT_CHANGE)        | \
112 				(1ULL << MTHCA_EVENT_TYPE_ECC_DETECT))
113 #define MTHCA_SRQ_EVENT_MASK    (1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR)    | \
114 				(1ULL << MTHCA_EVENT_TYPE_SRQ_LAST_WQE)
115 #define MTHCA_CMD_EVENT_MASK    (1ULL << MTHCA_EVENT_TYPE_CMD)
116 
117 #define MTHCA_EQ_DB_INC_CI     (1 << 24)
118 #define MTHCA_EQ_DB_REQ_NOT    (2 << 24)
119 #define MTHCA_EQ_DB_DISARM_CQ  (3 << 24)
120 #define MTHCA_EQ_DB_SET_CI     (4 << 24)
121 #define MTHCA_EQ_DB_ALWAYS_ARM (5 << 24)
122 
123 struct mthca_eqe {
124 	u8 reserved1;
125 	u8 type;
126 	u8 reserved2;
127 	u8 subtype;
128 	union {
129 		u32 raw[6];
130 		struct {
131 			__be32 cqn;
132 		} __attribute__((packed)) comp;
133 		struct {
134 			u16    reserved1;
135 			__be16 token;
136 			u32    reserved2;
137 			u8     reserved3[3];
138 			u8     status;
139 			__be64 out_param;
140 		} __attribute__((packed)) cmd;
141 		struct {
142 			__be32 qpn;
143 		} __attribute__((packed)) qp;
144 		struct {
145 			__be32 cqn;
146 			u32    reserved1;
147 			u8     reserved2[3];
148 			u8     syndrome;
149 		} __attribute__((packed)) cq_err;
150 		struct {
151 			u32    reserved1[2];
152 			__be32 port;
153 		} __attribute__((packed)) port_change;
154 	} event;
155 	u8 reserved3[3];
156 	u8 owner;
157 } __attribute__((packed));
158 
159 #define  MTHCA_EQ_ENTRY_OWNER_SW      (0 << 7)
160 #define  MTHCA_EQ_ENTRY_OWNER_HW      (1 << 7)
161 
162 static inline u64 async_mask(struct mthca_dev *dev)
163 {
164 	return dev->mthca_flags & MTHCA_FLAG_SRQ ?
165 		MTHCA_ASYNC_EVENT_MASK | MTHCA_SRQ_EVENT_MASK :
166 		MTHCA_ASYNC_EVENT_MASK;
167 }
168 
169 static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
170 {
171 	__be32 doorbell[2];
172 
173 	doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_SET_CI | eq->eqn);
174 	doorbell[1] = cpu_to_be32(ci & (eq->nent - 1));
175 
176 	/*
177 	 * This barrier makes sure that all updates to ownership bits
178 	 * done by set_eqe_hw() hit memory before the consumer index
179 	 * is updated.  set_eq_ci() allows the HCA to possibly write
180 	 * more EQ entries, and we want to avoid the exceedingly
181 	 * unlikely possibility of the HCA writing an entry and then
182 	 * having set_eqe_hw() overwrite the owner field.
183 	 */
184 	wmb();
185 	mthca_write64(doorbell,
186 		      dev->kar + MTHCA_EQ_DOORBELL,
187 		      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
188 }
189 
190 static inline void arbel_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
191 {
192 	/* See comment in tavor_set_eq_ci() above. */
193 	wmb();
194 	__raw_writel((__force u32) cpu_to_be32(ci),
195 		     dev->eq_regs.arbel.eq_set_ci_base + eq->eqn * 8);
196 	/* We still want ordering, just not swabbing, so add a barrier */
197 	mb();
198 }
199 
200 static inline void set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
201 {
202 	if (mthca_is_memfree(dev))
203 		arbel_set_eq_ci(dev, eq, ci);
204 	else
205 		tavor_set_eq_ci(dev, eq, ci);
206 }
207 
208 static inline void tavor_eq_req_not(struct mthca_dev *dev, int eqn)
209 {
210 	__be32 doorbell[2];
211 
212 	doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_REQ_NOT | eqn);
213 	doorbell[1] = 0;
214 
215 	mthca_write64(doorbell,
216 		      dev->kar + MTHCA_EQ_DOORBELL,
217 		      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
218 }
219 
220 static inline void arbel_eq_req_not(struct mthca_dev *dev, u32 eqn_mask)
221 {
222 	writel(eqn_mask, dev->eq_regs.arbel.eq_arm);
223 }
224 
225 static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn)
226 {
227 	if (!mthca_is_memfree(dev)) {
228 		__be32 doorbell[2];
229 
230 		doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_DISARM_CQ | eqn);
231 		doorbell[1] = cpu_to_be32(cqn);
232 
233 		mthca_write64(doorbell,
234 			      dev->kar + MTHCA_EQ_DOORBELL,
235 			      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
236 	}
237 }
238 
239 static inline struct mthca_eqe *get_eqe(struct mthca_eq *eq, u32 entry)
240 {
241 	unsigned long off = (entry & (eq->nent - 1)) * MTHCA_EQ_ENTRY_SIZE;
242 	return eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE;
243 }
244 
245 static inline struct mthca_eqe* next_eqe_sw(struct mthca_eq *eq)
246 {
247 	struct mthca_eqe* eqe;
248 	eqe = get_eqe(eq, eq->cons_index);
249 	return (MTHCA_EQ_ENTRY_OWNER_HW & eqe->owner) ? NULL : eqe;
250 }
251 
252 static inline void set_eqe_hw(struct mthca_eqe *eqe)
253 {
254 	eqe->owner =  MTHCA_EQ_ENTRY_OWNER_HW;
255 }
256 
257 static void port_change(struct mthca_dev *dev, int port, int active)
258 {
259 	struct ib_event record;
260 
261 	mthca_dbg(dev, "Port change to %s for port %d\n",
262 		  active ? "active" : "down", port);
263 
264 	record.device = &dev->ib_dev;
265 	record.event  = active ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
266 	record.element.port_num = port;
267 
268 	ib_dispatch_event(&record);
269 }
270 
271 static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq)
272 {
273 	struct mthca_eqe *eqe;
274 	int disarm_cqn;
275 	int  eqes_found = 0;
276 
277 	while ((eqe = next_eqe_sw(eq))) {
278 		int set_ci = 0;
279 
280 		/*
281 		 * Make sure we read EQ entry contents after we've
282 		 * checked the ownership bit.
283 		 */
284 		rmb();
285 
286 		switch (eqe->type) {
287 		case MTHCA_EVENT_TYPE_COMP:
288 			disarm_cqn = be32_to_cpu(eqe->event.comp.cqn) & 0xffffff;
289 			disarm_cq(dev, eq->eqn, disarm_cqn);
290 			mthca_cq_event(dev, disarm_cqn);
291 			break;
292 
293 		case MTHCA_EVENT_TYPE_PATH_MIG:
294 			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
295 				       IB_EVENT_PATH_MIG);
296 			break;
297 
298 		case MTHCA_EVENT_TYPE_COMM_EST:
299 			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
300 				       IB_EVENT_COMM_EST);
301 			break;
302 
303 		case MTHCA_EVENT_TYPE_SQ_DRAINED:
304 			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
305 				       IB_EVENT_SQ_DRAINED);
306 			break;
307 
308 		case MTHCA_EVENT_TYPE_WQ_CATAS_ERROR:
309 			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
310 				       IB_EVENT_QP_FATAL);
311 			break;
312 
313 		case MTHCA_EVENT_TYPE_PATH_MIG_FAILED:
314 			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
315 				       IB_EVENT_PATH_MIG_ERR);
316 			break;
317 
318 		case MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
319 			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
320 				       IB_EVENT_QP_REQ_ERR);
321 			break;
322 
323 		case MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR:
324 			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
325 				       IB_EVENT_QP_ACCESS_ERR);
326 			break;
327 
328 		case MTHCA_EVENT_TYPE_CMD:
329 			mthca_cmd_event(dev,
330 					be16_to_cpu(eqe->event.cmd.token),
331 					eqe->event.cmd.status,
332 					be64_to_cpu(eqe->event.cmd.out_param));
333 			/*
334 			 * cmd_event() may add more commands.
335 			 * The card will think the queue has overflowed if
336 			 * we don't tell it we've been processing events.
337 			 */
338 			set_ci = 1;
339 			break;
340 
341 		case MTHCA_EVENT_TYPE_PORT_CHANGE:
342 			port_change(dev,
343 				    (be32_to_cpu(eqe->event.port_change.port) >> 28) & 3,
344 				    eqe->subtype == 0x4);
345 			break;
346 
347 		case MTHCA_EVENT_TYPE_CQ_ERROR:
348 			mthca_warn(dev, "CQ %s on CQN %06x\n",
349 				   eqe->event.cq_err.syndrome == 1 ?
350 				   "overrun" : "access violation",
351 				   be32_to_cpu(eqe->event.cq_err.cqn) & 0xffffff);
352 			break;
353 
354 		case MTHCA_EVENT_TYPE_EQ_OVERFLOW:
355 			mthca_warn(dev, "EQ overrun on EQN %d\n", eq->eqn);
356 			break;
357 
358 		case MTHCA_EVENT_TYPE_EEC_CATAS_ERROR:
359 		case MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR:
360 		case MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR:
361 		case MTHCA_EVENT_TYPE_ECC_DETECT:
362 		default:
363 			mthca_warn(dev, "Unhandled event %02x(%02x) on EQ %d\n",
364 				   eqe->type, eqe->subtype, eq->eqn);
365 			break;
366 		};
367 
368 		set_eqe_hw(eqe);
369 		++eq->cons_index;
370 		eqes_found = 1;
371 
372 		if (unlikely(set_ci)) {
373 			/*
374 			 * Conditional on hca_type is OK here because
375 			 * this is a rare case, not the fast path.
376 			 */
377 			set_eq_ci(dev, eq, eq->cons_index);
378 			set_ci = 0;
379 		}
380 	}
381 
382 	/*
383 	 * Rely on caller to set consumer index so that we don't have
384 	 * to test hca_type in our interrupt handling fast path.
385 	 */
386 	return eqes_found;
387 }
388 
389 static irqreturn_t mthca_tavor_interrupt(int irq, void *dev_ptr, struct pt_regs *regs)
390 {
391 	struct mthca_dev *dev = dev_ptr;
392 	u32 ecr;
393 	int i;
394 
395 	if (dev->eq_table.clr_mask)
396 		writel(dev->eq_table.clr_mask, dev->eq_table.clr_int);
397 
398 	ecr = readl(dev->eq_regs.tavor.ecr_base + 4);
399 	if (ecr) {
400 		writel(ecr, dev->eq_regs.tavor.ecr_base +
401 		       MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4);
402 
403 		for (i = 0; i < MTHCA_NUM_EQ; ++i)
404 			if (ecr & dev->eq_table.eq[i].eqn_mask &&
405 			    mthca_eq_int(dev, &dev->eq_table.eq[i])) {
406 				tavor_set_eq_ci(dev, &dev->eq_table.eq[i],
407 						dev->eq_table.eq[i].cons_index);
408 				tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn);
409 			}
410 	}
411 
412 	return IRQ_RETVAL(ecr);
413 }
414 
415 static irqreturn_t mthca_tavor_msi_x_interrupt(int irq, void *eq_ptr,
416 					 struct pt_regs *regs)
417 {
418 	struct mthca_eq  *eq  = eq_ptr;
419 	struct mthca_dev *dev = eq->dev;
420 
421 	mthca_eq_int(dev, eq);
422 	tavor_set_eq_ci(dev, eq, eq->cons_index);
423 	tavor_eq_req_not(dev, eq->eqn);
424 
425 	/* MSI-X vectors always belong to us */
426 	return IRQ_HANDLED;
427 }
428 
429 static irqreturn_t mthca_arbel_interrupt(int irq, void *dev_ptr, struct pt_regs *regs)
430 {
431 	struct mthca_dev *dev = dev_ptr;
432 	int work = 0;
433 	int i;
434 
435 	if (dev->eq_table.clr_mask)
436 		writel(dev->eq_table.clr_mask, dev->eq_table.clr_int);
437 
438 	for (i = 0; i < MTHCA_NUM_EQ; ++i)
439 		if (mthca_eq_int(dev, &dev->eq_table.eq[i])) {
440 			work = 1;
441 			arbel_set_eq_ci(dev, &dev->eq_table.eq[i],
442 					dev->eq_table.eq[i].cons_index);
443 		}
444 
445 	arbel_eq_req_not(dev, dev->eq_table.arm_mask);
446 
447 	return IRQ_RETVAL(work);
448 }
449 
450 static irqreturn_t mthca_arbel_msi_x_interrupt(int irq, void *eq_ptr,
451 					       struct pt_regs *regs)
452 {
453 	struct mthca_eq  *eq  = eq_ptr;
454 	struct mthca_dev *dev = eq->dev;
455 
456 	mthca_eq_int(dev, eq);
457 	arbel_set_eq_ci(dev, eq, eq->cons_index);
458 	arbel_eq_req_not(dev, eq->eqn_mask);
459 
460 	/* MSI-X vectors always belong to us */
461 	return IRQ_HANDLED;
462 }
463 
464 static int __devinit mthca_create_eq(struct mthca_dev *dev,
465 				     int nent,
466 				     u8 intr,
467 				     struct mthca_eq *eq)
468 {
469 	int npages = (nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) /
470 		PAGE_SIZE;
471 	u64 *dma_list = NULL;
472 	dma_addr_t t;
473 	struct mthca_mailbox *mailbox;
474 	struct mthca_eq_context *eq_context;
475 	int err = -ENOMEM;
476 	int i;
477 	u8 status;
478 
479 	eq->dev  = dev;
480 	eq->nent = roundup_pow_of_two(max(nent, 2));
481 
482 	eq->page_list = kmalloc(npages * sizeof *eq->page_list,
483 				GFP_KERNEL);
484 	if (!eq->page_list)
485 		goto err_out;
486 
487 	for (i = 0; i < npages; ++i)
488 		eq->page_list[i].buf = NULL;
489 
490 	dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
491 	if (!dma_list)
492 		goto err_out_free;
493 
494 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
495 	if (IS_ERR(mailbox))
496 		goto err_out_free;
497 	eq_context = mailbox->buf;
498 
499 	for (i = 0; i < npages; ++i) {
500 		eq->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev,
501 							  PAGE_SIZE, &t, GFP_KERNEL);
502 		if (!eq->page_list[i].buf)
503 			goto err_out_free_pages;
504 
505 		dma_list[i] = t;
506 		pci_unmap_addr_set(&eq->page_list[i], mapping, t);
507 
508 		memset(eq->page_list[i].buf, 0, PAGE_SIZE);
509 	}
510 
511 	for (i = 0; i < eq->nent; ++i)
512 		set_eqe_hw(get_eqe(eq, i));
513 
514 	eq->eqn = mthca_alloc(&dev->eq_table.alloc);
515 	if (eq->eqn == -1)
516 		goto err_out_free_pages;
517 
518 	err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num,
519 				  dma_list, PAGE_SHIFT, npages,
520 				  0, npages * PAGE_SIZE,
521 				  MTHCA_MPT_FLAG_LOCAL_WRITE |
522 				  MTHCA_MPT_FLAG_LOCAL_READ,
523 				  &eq->mr);
524 	if (err)
525 		goto err_out_free_eq;
526 
527 	memset(eq_context, 0, sizeof *eq_context);
528 	eq_context->flags           = cpu_to_be32(MTHCA_EQ_STATUS_OK   |
529 						  MTHCA_EQ_OWNER_HW    |
530 						  MTHCA_EQ_STATE_ARMED |
531 						  MTHCA_EQ_FLAG_TR);
532 	if (mthca_is_memfree(dev))
533 		eq_context->flags  |= cpu_to_be32(MTHCA_EQ_STATE_ARBEL);
534 
535 	eq_context->logsize_usrpage = cpu_to_be32((ffs(eq->nent) - 1) << 24);
536 	if (mthca_is_memfree(dev)) {
537 		eq_context->arbel_pd = cpu_to_be32(dev->driver_pd.pd_num);
538 	} else {
539 		eq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index);
540 		eq_context->tavor_pd         = cpu_to_be32(dev->driver_pd.pd_num);
541 	}
542 	eq_context->intr            = intr;
543 	eq_context->lkey            = cpu_to_be32(eq->mr.ibmr.lkey);
544 
545 	err = mthca_SW2HW_EQ(dev, mailbox, eq->eqn, &status);
546 	if (err) {
547 		mthca_warn(dev, "SW2HW_EQ failed (%d)\n", err);
548 		goto err_out_free_mr;
549 	}
550 	if (status) {
551 		mthca_warn(dev, "SW2HW_EQ returned status 0x%02x\n",
552 			   status);
553 		err = -EINVAL;
554 		goto err_out_free_mr;
555 	}
556 
557 	kfree(dma_list);
558 	mthca_free_mailbox(dev, mailbox);
559 
560 	eq->eqn_mask   = swab32(1 << eq->eqn);
561 	eq->cons_index = 0;
562 
563 	dev->eq_table.arm_mask |= eq->eqn_mask;
564 
565 	mthca_dbg(dev, "Allocated EQ %d with %d entries\n",
566 		  eq->eqn, eq->nent);
567 
568 	return err;
569 
570  err_out_free_mr:
571 	mthca_free_mr(dev, &eq->mr);
572 
573  err_out_free_eq:
574 	mthca_free(&dev->eq_table.alloc, eq->eqn);
575 
576  err_out_free_pages:
577 	for (i = 0; i < npages; ++i)
578 		if (eq->page_list[i].buf)
579 			dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
580 					  eq->page_list[i].buf,
581 					  pci_unmap_addr(&eq->page_list[i],
582 							 mapping));
583 
584 	mthca_free_mailbox(dev, mailbox);
585 
586  err_out_free:
587 	kfree(eq->page_list);
588 	kfree(dma_list);
589 
590  err_out:
591 	return err;
592 }
593 
594 static void mthca_free_eq(struct mthca_dev *dev,
595 			  struct mthca_eq *eq)
596 {
597 	struct mthca_mailbox *mailbox;
598 	int err;
599 	u8 status;
600 	int npages = (eq->nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) /
601 		PAGE_SIZE;
602 	int i;
603 
604 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
605 	if (IS_ERR(mailbox))
606 		return;
607 
608 	err = mthca_HW2SW_EQ(dev, mailbox, eq->eqn, &status);
609 	if (err)
610 		mthca_warn(dev, "HW2SW_EQ failed (%d)\n", err);
611 	if (status)
612 		mthca_warn(dev, "HW2SW_EQ returned status 0x%02x\n", status);
613 
614 	dev->eq_table.arm_mask &= ~eq->eqn_mask;
615 
616 	if (0) {
617 		mthca_dbg(dev, "Dumping EQ context %02x:\n", eq->eqn);
618 		for (i = 0; i < sizeof (struct mthca_eq_context) / 4; ++i) {
619 			if (i % 4 == 0)
620 				printk("[%02x] ", i * 4);
621 			printk(" %08x", be32_to_cpup(mailbox->buf + i * 4));
622 			if ((i + 1) % 4 == 0)
623 				printk("\n");
624 		}
625 	}
626 
627 	mthca_free_mr(dev, &eq->mr);
628 	for (i = 0; i < npages; ++i)
629 		pci_free_consistent(dev->pdev, PAGE_SIZE,
630 				    eq->page_list[i].buf,
631 				    pci_unmap_addr(&eq->page_list[i], mapping));
632 
633 	kfree(eq->page_list);
634 	mthca_free_mailbox(dev, mailbox);
635 }
636 
637 static void mthca_free_irqs(struct mthca_dev *dev)
638 {
639 	int i;
640 
641 	if (dev->eq_table.have_irq)
642 		free_irq(dev->pdev->irq, dev);
643 	for (i = 0; i < MTHCA_NUM_EQ; ++i)
644 		if (dev->eq_table.eq[i].have_irq)
645 			free_irq(dev->eq_table.eq[i].msi_x_vector,
646 				 dev->eq_table.eq + i);
647 }
648 
649 static int __devinit mthca_map_reg(struct mthca_dev *dev,
650 				   unsigned long offset, unsigned long size,
651 				   void __iomem **map)
652 {
653 	unsigned long base = pci_resource_start(dev->pdev, 0);
654 
655 	if (!request_mem_region(base + offset, size, DRV_NAME))
656 		return -EBUSY;
657 
658 	*map = ioremap(base + offset, size);
659 	if (!*map) {
660 		release_mem_region(base + offset, size);
661 		return -ENOMEM;
662 	}
663 
664 	return 0;
665 }
666 
667 static void mthca_unmap_reg(struct mthca_dev *dev, unsigned long offset,
668 			    unsigned long size, void __iomem *map)
669 {
670 	unsigned long base = pci_resource_start(dev->pdev, 0);
671 
672 	release_mem_region(base + offset, size);
673 	iounmap(map);
674 }
675 
676 static int __devinit mthca_map_eq_regs(struct mthca_dev *dev)
677 {
678 	unsigned long mthca_base;
679 
680 	mthca_base = pci_resource_start(dev->pdev, 0);
681 
682 	if (mthca_is_memfree(dev)) {
683 		/*
684 		 * We assume that the EQ arm and EQ set CI registers
685 		 * fall within the first BAR.  We can't trust the
686 		 * values firmware gives us, since those addresses are
687 		 * valid on the HCA's side of the PCI bus but not
688 		 * necessarily the host side.
689 		 */
690 		if (mthca_map_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
691 				  dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
692 				  &dev->clr_base)) {
693 			mthca_err(dev, "Couldn't map interrupt clear register, "
694 				  "aborting.\n");
695 			return -ENOMEM;
696 		}
697 
698 		/*
699 		 * Add 4 because we limit ourselves to EQs 0 ... 31,
700 		 * so we only need the low word of the register.
701 		 */
702 		if (mthca_map_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
703 					dev->fw.arbel.eq_arm_base) + 4, 4,
704 				  &dev->eq_regs.arbel.eq_arm)) {
705 			mthca_err(dev, "Couldn't map EQ arm register, aborting.\n");
706 			mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
707 					dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
708 					dev->clr_base);
709 			return -ENOMEM;
710 		}
711 
712 		if (mthca_map_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
713 				  dev->fw.arbel.eq_set_ci_base,
714 				  MTHCA_EQ_SET_CI_SIZE,
715 				  &dev->eq_regs.arbel.eq_set_ci_base)) {
716 			mthca_err(dev, "Couldn't map EQ CI register, aborting.\n");
717 			mthca_unmap_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
718 					      dev->fw.arbel.eq_arm_base) + 4, 4,
719 					dev->eq_regs.arbel.eq_arm);
720 			mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
721 					dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
722 					dev->clr_base);
723 			return -ENOMEM;
724 		}
725 	} else {
726 		if (mthca_map_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE,
727 				  &dev->clr_base)) {
728 			mthca_err(dev, "Couldn't map interrupt clear register, "
729 				  "aborting.\n");
730 			return -ENOMEM;
731 		}
732 
733 		if (mthca_map_reg(dev, MTHCA_ECR_BASE,
734 				  MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE,
735 				  &dev->eq_regs.tavor.ecr_base)) {
736 			mthca_err(dev, "Couldn't map ecr register, "
737 				  "aborting.\n");
738 			mthca_unmap_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE,
739 					dev->clr_base);
740 			return -ENOMEM;
741 		}
742 	}
743 
744 	return 0;
745 
746 }
747 
748 static void __devexit mthca_unmap_eq_regs(struct mthca_dev *dev)
749 {
750 	if (mthca_is_memfree(dev)) {
751 		mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
752 				dev->fw.arbel.eq_set_ci_base,
753 				MTHCA_EQ_SET_CI_SIZE,
754 				dev->eq_regs.arbel.eq_set_ci_base);
755 		mthca_unmap_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
756 				      dev->fw.arbel.eq_arm_base) + 4, 4,
757 				dev->eq_regs.arbel.eq_arm);
758 		mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
759 				dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
760 				dev->clr_base);
761 	} else {
762 		mthca_unmap_reg(dev, MTHCA_ECR_BASE,
763 				MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE,
764 				dev->eq_regs.tavor.ecr_base);
765 		mthca_unmap_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE,
766 				dev->clr_base);
767 	}
768 }
769 
770 int __devinit mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt)
771 {
772 	int ret;
773 	u8 status;
774 
775 	/*
776 	 * We assume that mapping one page is enough for the whole EQ
777 	 * context table.  This is fine with all current HCAs, because
778 	 * we only use 32 EQs and each EQ uses 32 bytes of context
779 	 * memory, or 1 KB total.
780 	 */
781 	dev->eq_table.icm_virt = icm_virt;
782 	dev->eq_table.icm_page = alloc_page(GFP_HIGHUSER);
783 	if (!dev->eq_table.icm_page)
784 		return -ENOMEM;
785 	dev->eq_table.icm_dma  = pci_map_page(dev->pdev, dev->eq_table.icm_page, 0,
786 					      PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
787 	if (pci_dma_mapping_error(dev->eq_table.icm_dma)) {
788 		__free_page(dev->eq_table.icm_page);
789 		return -ENOMEM;
790 	}
791 
792 	ret = mthca_MAP_ICM_page(dev, dev->eq_table.icm_dma, icm_virt, &status);
793 	if (!ret && status)
794 		ret = -EINVAL;
795 	if (ret) {
796 		pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE,
797 			       PCI_DMA_BIDIRECTIONAL);
798 		__free_page(dev->eq_table.icm_page);
799 	}
800 
801 	return ret;
802 }
803 
804 void __devexit mthca_unmap_eq_icm(struct mthca_dev *dev)
805 {
806 	u8 status;
807 
808 	mthca_UNMAP_ICM(dev, dev->eq_table.icm_virt, PAGE_SIZE / 4096, &status);
809 	pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE,
810 		       PCI_DMA_BIDIRECTIONAL);
811 	__free_page(dev->eq_table.icm_page);
812 }
813 
814 int __devinit mthca_init_eq_table(struct mthca_dev *dev)
815 {
816 	int err;
817 	u8 status;
818 	u8 intr;
819 	int i;
820 
821 	err = mthca_alloc_init(&dev->eq_table.alloc,
822 			       dev->limits.num_eqs,
823 			       dev->limits.num_eqs - 1,
824 			       dev->limits.reserved_eqs);
825 	if (err)
826 		return err;
827 
828 	err = mthca_map_eq_regs(dev);
829 	if (err)
830 		goto err_out_free;
831 
832 	if (dev->mthca_flags & MTHCA_FLAG_MSI ||
833 	    dev->mthca_flags & MTHCA_FLAG_MSI_X) {
834 		dev->eq_table.clr_mask = 0;
835 	} else {
836 		dev->eq_table.clr_mask =
837 			swab32(1 << (dev->eq_table.inta_pin & 31));
838 		dev->eq_table.clr_int  = dev->clr_base +
839 			(dev->eq_table.inta_pin < 32 ? 4 : 0);
840 	}
841 
842 	dev->eq_table.arm_mask = 0;
843 
844 	intr = (dev->mthca_flags & MTHCA_FLAG_MSI) ?
845 		128 : dev->eq_table.inta_pin;
846 
847 	err = mthca_create_eq(dev, dev->limits.num_cqs,
848 			      (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 128 : intr,
849 			      &dev->eq_table.eq[MTHCA_EQ_COMP]);
850 	if (err)
851 		goto err_out_unmap;
852 
853 	err = mthca_create_eq(dev, MTHCA_NUM_ASYNC_EQE,
854 			      (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 129 : intr,
855 			      &dev->eq_table.eq[MTHCA_EQ_ASYNC]);
856 	if (err)
857 		goto err_out_comp;
858 
859 	err = mthca_create_eq(dev, MTHCA_NUM_CMD_EQE,
860 			      (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 130 : intr,
861 			      &dev->eq_table.eq[MTHCA_EQ_CMD]);
862 	if (err)
863 		goto err_out_async;
864 
865 	if (dev->mthca_flags & MTHCA_FLAG_MSI_X) {
866 		static const char *eq_name[] = {
867 			[MTHCA_EQ_COMP]  = DRV_NAME " (comp)",
868 			[MTHCA_EQ_ASYNC] = DRV_NAME " (async)",
869 			[MTHCA_EQ_CMD]   = DRV_NAME " (cmd)"
870 		};
871 
872 		for (i = 0; i < MTHCA_NUM_EQ; ++i) {
873 			err = request_irq(dev->eq_table.eq[i].msi_x_vector,
874 					  mthca_is_memfree(dev) ?
875 					  mthca_arbel_msi_x_interrupt :
876 					  mthca_tavor_msi_x_interrupt,
877 					  0, eq_name[i], dev->eq_table.eq + i);
878 			if (err)
879 				goto err_out_cmd;
880 			dev->eq_table.eq[i].have_irq = 1;
881 		}
882 	} else {
883 		err = request_irq(dev->pdev->irq,
884 				  mthca_is_memfree(dev) ?
885 				  mthca_arbel_interrupt :
886 				  mthca_tavor_interrupt,
887 				  SA_SHIRQ, DRV_NAME, dev);
888 		if (err)
889 			goto err_out_cmd;
890 		dev->eq_table.have_irq = 1;
891 	}
892 
893 	err = mthca_MAP_EQ(dev, async_mask(dev),
894 			   0, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, &status);
895 	if (err)
896 		mthca_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n",
897 			   dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, err);
898 	if (status)
899 		mthca_warn(dev, "MAP_EQ for async EQ %d returned status 0x%02x\n",
900 			   dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, status);
901 
902 	err = mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK,
903 			   0, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status);
904 	if (err)
905 		mthca_warn(dev, "MAP_EQ for cmd EQ %d failed (%d)\n",
906 			   dev->eq_table.eq[MTHCA_EQ_CMD].eqn, err);
907 	if (status)
908 		mthca_warn(dev, "MAP_EQ for cmd EQ %d returned status 0x%02x\n",
909 			   dev->eq_table.eq[MTHCA_EQ_CMD].eqn, status);
910 
911 	for (i = 0; i < MTHCA_EQ_CMD; ++i)
912 		if (mthca_is_memfree(dev))
913 			arbel_eq_req_not(dev, dev->eq_table.eq[i].eqn_mask);
914 		else
915 			tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn);
916 
917 	return 0;
918 
919 err_out_cmd:
920 	mthca_free_irqs(dev);
921 	mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_CMD]);
922 
923 err_out_async:
924 	mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_ASYNC]);
925 
926 err_out_comp:
927 	mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_COMP]);
928 
929 err_out_unmap:
930 	mthca_unmap_eq_regs(dev);
931 
932 err_out_free:
933 	mthca_alloc_cleanup(&dev->eq_table.alloc);
934 	return err;
935 }
936 
937 void __devexit mthca_cleanup_eq_table(struct mthca_dev *dev)
938 {
939 	u8 status;
940 	int i;
941 
942 	mthca_free_irqs(dev);
943 
944 	mthca_MAP_EQ(dev, async_mask(dev),
945 		     1, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, &status);
946 	mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK,
947 		     1, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status);
948 
949 	for (i = 0; i < MTHCA_NUM_EQ; ++i)
950 		mthca_free_eq(dev, &dev->eq_table.eq[i]);
951 
952 	mthca_unmap_eq_regs(dev);
953 
954 	mthca_alloc_cleanup(&dev->eq_table.alloc);
955 }
956