xref: /linux/drivers/infiniband/hw/efa/efa_com.c (revision 002dff36acfba3476b685a09f78ffb7c452f5951)
1 // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
2 /*
3  * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
4  */
5 
6 #include "efa_com.h"
7 #include "efa_regs_defs.h"
8 
9 #define ADMIN_CMD_TIMEOUT_US 30000000 /* usecs */
10 
11 #define EFA_REG_READ_TIMEOUT_US 50000 /* usecs */
12 #define EFA_MMIO_READ_INVALID 0xffffffff
13 
14 #define EFA_POLL_INTERVAL_MS 100 /* msecs */
15 
16 #define EFA_ASYNC_QUEUE_DEPTH 16
17 #define EFA_ADMIN_QUEUE_DEPTH 32
18 
19 #define EFA_CTRL_MAJOR          0
20 #define EFA_CTRL_MINOR          0
21 #define EFA_CTRL_SUB_MINOR      1
22 
23 #define EFA_DMA_ADDR_TO_UINT32_LOW(x)   ((u32)((u64)(x)))
24 #define EFA_DMA_ADDR_TO_UINT32_HIGH(x)  ((u32)(((u64)(x)) >> 32))
25 
26 enum efa_cmd_status {
27 	EFA_CMD_SUBMITTED,
28 	EFA_CMD_COMPLETED,
29 };
30 
31 struct efa_comp_ctx {
32 	struct completion wait_event;
33 	struct efa_admin_acq_entry *user_cqe;
34 	u32 comp_size;
35 	enum efa_cmd_status status;
36 	/* status from the device */
37 	u8 comp_status;
38 	u8 cmd_opcode;
39 	u8 occupied;
40 };
41 
42 static const char *efa_com_cmd_str(u8 cmd)
43 {
44 #define EFA_CMD_STR_CASE(_cmd) case EFA_ADMIN_##_cmd: return #_cmd
45 
46 	switch (cmd) {
47 	EFA_CMD_STR_CASE(CREATE_QP);
48 	EFA_CMD_STR_CASE(MODIFY_QP);
49 	EFA_CMD_STR_CASE(QUERY_QP);
50 	EFA_CMD_STR_CASE(DESTROY_QP);
51 	EFA_CMD_STR_CASE(CREATE_AH);
52 	EFA_CMD_STR_CASE(DESTROY_AH);
53 	EFA_CMD_STR_CASE(REG_MR);
54 	EFA_CMD_STR_CASE(DEREG_MR);
55 	EFA_CMD_STR_CASE(CREATE_CQ);
56 	EFA_CMD_STR_CASE(DESTROY_CQ);
57 	EFA_CMD_STR_CASE(GET_FEATURE);
58 	EFA_CMD_STR_CASE(SET_FEATURE);
59 	EFA_CMD_STR_CASE(GET_STATS);
60 	EFA_CMD_STR_CASE(ALLOC_PD);
61 	EFA_CMD_STR_CASE(DEALLOC_PD);
62 	EFA_CMD_STR_CASE(ALLOC_UAR);
63 	EFA_CMD_STR_CASE(DEALLOC_UAR);
64 	default: return "unknown command opcode";
65 	}
66 #undef EFA_CMD_STR_CASE
67 }
68 
69 static u32 efa_com_reg_read32(struct efa_com_dev *edev, u16 offset)
70 {
71 	struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
72 	struct efa_admin_mmio_req_read_less_resp *read_resp;
73 	unsigned long exp_time;
74 	u32 mmio_read_reg = 0;
75 	u32 err;
76 
77 	read_resp = mmio_read->read_resp;
78 
79 	spin_lock(&mmio_read->lock);
80 	mmio_read->seq_num++;
81 
82 	/* trash DMA req_id to identify when hardware is done */
83 	read_resp->req_id = mmio_read->seq_num + 0x9aL;
84 	EFA_SET(&mmio_read_reg, EFA_REGS_MMIO_REG_READ_REG_OFF, offset);
85 	EFA_SET(&mmio_read_reg, EFA_REGS_MMIO_REG_READ_REQ_ID,
86 		mmio_read->seq_num);
87 
88 	writel(mmio_read_reg, edev->reg_bar + EFA_REGS_MMIO_REG_READ_OFF);
89 
90 	exp_time = jiffies + usecs_to_jiffies(mmio_read->mmio_read_timeout);
91 	do {
92 		if (READ_ONCE(read_resp->req_id) == mmio_read->seq_num)
93 			break;
94 		udelay(1);
95 	} while (time_is_after_jiffies(exp_time));
96 
97 	if (read_resp->req_id != mmio_read->seq_num) {
98 		ibdev_err_ratelimited(
99 			edev->efa_dev,
100 			"Reading register timed out. expected: req id[%u] offset[%#x] actual: req id[%u] offset[%#x]\n",
101 			mmio_read->seq_num, offset, read_resp->req_id,
102 			read_resp->reg_off);
103 		err = EFA_MMIO_READ_INVALID;
104 		goto out;
105 	}
106 
107 	if (read_resp->reg_off != offset) {
108 		ibdev_err_ratelimited(
109 			edev->efa_dev,
110 			"Reading register failed: wrong offset provided\n");
111 		err = EFA_MMIO_READ_INVALID;
112 		goto out;
113 	}
114 
115 	err = read_resp->reg_val;
116 out:
117 	spin_unlock(&mmio_read->lock);
118 	return err;
119 }
120 
121 static int efa_com_admin_init_sq(struct efa_com_dev *edev)
122 {
123 	struct efa_com_admin_queue *aq = &edev->aq;
124 	struct efa_com_admin_sq *sq = &aq->sq;
125 	u16 size = aq->depth * sizeof(*sq->entries);
126 	u32 aq_caps = 0;
127 	u32 addr_high;
128 	u32 addr_low;
129 
130 	sq->entries =
131 		dma_alloc_coherent(aq->dmadev, size, &sq->dma_addr, GFP_KERNEL);
132 	if (!sq->entries)
133 		return -ENOMEM;
134 
135 	spin_lock_init(&sq->lock);
136 
137 	sq->cc = 0;
138 	sq->pc = 0;
139 	sq->phase = 1;
140 
141 	sq->db_addr = (u32 __iomem *)(edev->reg_bar + EFA_REGS_AQ_PROD_DB_OFF);
142 
143 	addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(sq->dma_addr);
144 	addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(sq->dma_addr);
145 
146 	writel(addr_low, edev->reg_bar + EFA_REGS_AQ_BASE_LO_OFF);
147 	writel(addr_high, edev->reg_bar + EFA_REGS_AQ_BASE_HI_OFF);
148 
149 	EFA_SET(&aq_caps, EFA_REGS_AQ_CAPS_AQ_DEPTH, aq->depth);
150 	EFA_SET(&aq_caps, EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE,
151 		sizeof(struct efa_admin_aq_entry));
152 
153 	writel(aq_caps, edev->reg_bar + EFA_REGS_AQ_CAPS_OFF);
154 
155 	return 0;
156 }
157 
158 static int efa_com_admin_init_cq(struct efa_com_dev *edev)
159 {
160 	struct efa_com_admin_queue *aq = &edev->aq;
161 	struct efa_com_admin_cq *cq = &aq->cq;
162 	u16 size = aq->depth * sizeof(*cq->entries);
163 	u32 acq_caps = 0;
164 	u32 addr_high;
165 	u32 addr_low;
166 
167 	cq->entries =
168 		dma_alloc_coherent(aq->dmadev, size, &cq->dma_addr, GFP_KERNEL);
169 	if (!cq->entries)
170 		return -ENOMEM;
171 
172 	spin_lock_init(&cq->lock);
173 
174 	cq->cc = 0;
175 	cq->phase = 1;
176 
177 	addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(cq->dma_addr);
178 	addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(cq->dma_addr);
179 
180 	writel(addr_low, edev->reg_bar + EFA_REGS_ACQ_BASE_LO_OFF);
181 	writel(addr_high, edev->reg_bar + EFA_REGS_ACQ_BASE_HI_OFF);
182 
183 	EFA_SET(&acq_caps, EFA_REGS_ACQ_CAPS_ACQ_DEPTH, aq->depth);
184 	EFA_SET(&acq_caps, EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE,
185 		sizeof(struct efa_admin_acq_entry));
186 	EFA_SET(&acq_caps, EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR,
187 		aq->msix_vector_idx);
188 
189 	writel(acq_caps, edev->reg_bar + EFA_REGS_ACQ_CAPS_OFF);
190 
191 	return 0;
192 }
193 
194 static int efa_com_admin_init_aenq(struct efa_com_dev *edev,
195 				   struct efa_aenq_handlers *aenq_handlers)
196 {
197 	struct efa_com_aenq *aenq = &edev->aenq;
198 	u32 addr_low, addr_high;
199 	u32 aenq_caps = 0;
200 	u16 size;
201 
202 	if (!aenq_handlers) {
203 		ibdev_err(edev->efa_dev, "aenq handlers pointer is NULL\n");
204 		return -EINVAL;
205 	}
206 
207 	size = EFA_ASYNC_QUEUE_DEPTH * sizeof(*aenq->entries);
208 	aenq->entries = dma_alloc_coherent(edev->dmadev, size, &aenq->dma_addr,
209 					   GFP_KERNEL);
210 	if (!aenq->entries)
211 		return -ENOMEM;
212 
213 	aenq->aenq_handlers = aenq_handlers;
214 	aenq->depth = EFA_ASYNC_QUEUE_DEPTH;
215 	aenq->cc = 0;
216 	aenq->phase = 1;
217 
218 	addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(aenq->dma_addr);
219 	addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(aenq->dma_addr);
220 
221 	writel(addr_low, edev->reg_bar + EFA_REGS_AENQ_BASE_LO_OFF);
222 	writel(addr_high, edev->reg_bar + EFA_REGS_AENQ_BASE_HI_OFF);
223 
224 	EFA_SET(&aenq_caps, EFA_REGS_AENQ_CAPS_AENQ_DEPTH, aenq->depth);
225 	EFA_SET(&aenq_caps, EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE,
226 		sizeof(struct efa_admin_aenq_entry));
227 	EFA_SET(&aenq_caps, EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR,
228 		aenq->msix_vector_idx);
229 	writel(aenq_caps, edev->reg_bar + EFA_REGS_AENQ_CAPS_OFF);
230 
231 	/*
232 	 * Init cons_db to mark that all entries in the queue
233 	 * are initially available
234 	 */
235 	writel(edev->aenq.cc, edev->reg_bar + EFA_REGS_AENQ_CONS_DB_OFF);
236 
237 	return 0;
238 }
239 
240 /* ID to be used with efa_com_get_comp_ctx */
241 static u16 efa_com_alloc_ctx_id(struct efa_com_admin_queue *aq)
242 {
243 	u16 ctx_id;
244 
245 	spin_lock(&aq->comp_ctx_lock);
246 	ctx_id = aq->comp_ctx_pool[aq->comp_ctx_pool_next];
247 	aq->comp_ctx_pool_next++;
248 	spin_unlock(&aq->comp_ctx_lock);
249 
250 	return ctx_id;
251 }
252 
253 static void efa_com_dealloc_ctx_id(struct efa_com_admin_queue *aq,
254 				   u16 ctx_id)
255 {
256 	spin_lock(&aq->comp_ctx_lock);
257 	aq->comp_ctx_pool_next--;
258 	aq->comp_ctx_pool[aq->comp_ctx_pool_next] = ctx_id;
259 	spin_unlock(&aq->comp_ctx_lock);
260 }
261 
262 static inline void efa_com_put_comp_ctx(struct efa_com_admin_queue *aq,
263 					struct efa_comp_ctx *comp_ctx)
264 {
265 	u16 cmd_id = EFA_GET(&comp_ctx->user_cqe->acq_common_descriptor.command,
266 			     EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID);
267 	u16 ctx_id = cmd_id & (aq->depth - 1);
268 
269 	ibdev_dbg(aq->efa_dev, "Put completion command_id %#x\n", cmd_id);
270 	comp_ctx->occupied = 0;
271 	efa_com_dealloc_ctx_id(aq, ctx_id);
272 }
273 
274 static struct efa_comp_ctx *efa_com_get_comp_ctx(struct efa_com_admin_queue *aq,
275 						 u16 cmd_id, bool capture)
276 {
277 	u16 ctx_id = cmd_id & (aq->depth - 1);
278 
279 	if (aq->comp_ctx[ctx_id].occupied && capture) {
280 		ibdev_err_ratelimited(
281 			aq->efa_dev,
282 			"Completion context for command_id %#x is occupied\n",
283 			cmd_id);
284 		return NULL;
285 	}
286 
287 	if (capture) {
288 		aq->comp_ctx[ctx_id].occupied = 1;
289 		ibdev_dbg(aq->efa_dev,
290 			  "Take completion ctxt for command_id %#x\n", cmd_id);
291 	}
292 
293 	return &aq->comp_ctx[ctx_id];
294 }
295 
296 static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq,
297 						       struct efa_admin_aq_entry *cmd,
298 						       size_t cmd_size_in_bytes,
299 						       struct efa_admin_acq_entry *comp,
300 						       size_t comp_size_in_bytes)
301 {
302 	struct efa_admin_aq_entry *aqe;
303 	struct efa_comp_ctx *comp_ctx;
304 	u16 queue_size_mask;
305 	u16 cmd_id;
306 	u16 ctx_id;
307 	u16 pi;
308 
309 	queue_size_mask = aq->depth - 1;
310 	pi = aq->sq.pc & queue_size_mask;
311 
312 	ctx_id = efa_com_alloc_ctx_id(aq);
313 
314 	/* cmd_id LSBs are the ctx_id and MSBs are entropy bits from pc */
315 	cmd_id = ctx_id & queue_size_mask;
316 	cmd_id |= aq->sq.pc & ~queue_size_mask;
317 	cmd_id &= EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK;
318 
319 	cmd->aq_common_descriptor.command_id = cmd_id;
320 	EFA_SET(&cmd->aq_common_descriptor.flags,
321 		EFA_ADMIN_AQ_COMMON_DESC_PHASE, aq->sq.phase);
322 
323 	comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, true);
324 	if (!comp_ctx) {
325 		efa_com_dealloc_ctx_id(aq, ctx_id);
326 		return ERR_PTR(-EINVAL);
327 	}
328 
329 	comp_ctx->status = EFA_CMD_SUBMITTED;
330 	comp_ctx->comp_size = comp_size_in_bytes;
331 	comp_ctx->user_cqe = comp;
332 	comp_ctx->cmd_opcode = cmd->aq_common_descriptor.opcode;
333 
334 	reinit_completion(&comp_ctx->wait_event);
335 
336 	aqe = &aq->sq.entries[pi];
337 	memset(aqe, 0, sizeof(*aqe));
338 	memcpy(aqe, cmd, cmd_size_in_bytes);
339 
340 	aq->sq.pc++;
341 	atomic64_inc(&aq->stats.submitted_cmd);
342 
343 	if ((aq->sq.pc & queue_size_mask) == 0)
344 		aq->sq.phase = !aq->sq.phase;
345 
346 	/* barrier not needed in case of writel */
347 	writel(aq->sq.pc, aq->sq.db_addr);
348 
349 	return comp_ctx;
350 }
351 
352 static inline int efa_com_init_comp_ctxt(struct efa_com_admin_queue *aq)
353 {
354 	size_t pool_size = aq->depth * sizeof(*aq->comp_ctx_pool);
355 	size_t size = aq->depth * sizeof(struct efa_comp_ctx);
356 	struct efa_comp_ctx *comp_ctx;
357 	u16 i;
358 
359 	aq->comp_ctx = devm_kzalloc(aq->dmadev, size, GFP_KERNEL);
360 	aq->comp_ctx_pool = devm_kzalloc(aq->dmadev, pool_size, GFP_KERNEL);
361 	if (!aq->comp_ctx || !aq->comp_ctx_pool) {
362 		devm_kfree(aq->dmadev, aq->comp_ctx_pool);
363 		devm_kfree(aq->dmadev, aq->comp_ctx);
364 		return -ENOMEM;
365 	}
366 
367 	for (i = 0; i < aq->depth; i++) {
368 		comp_ctx = efa_com_get_comp_ctx(aq, i, false);
369 		if (comp_ctx)
370 			init_completion(&comp_ctx->wait_event);
371 
372 		aq->comp_ctx_pool[i] = i;
373 	}
374 
375 	spin_lock_init(&aq->comp_ctx_lock);
376 
377 	aq->comp_ctx_pool_next = 0;
378 
379 	return 0;
380 }
381 
382 static struct efa_comp_ctx *efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq,
383 						     struct efa_admin_aq_entry *cmd,
384 						     size_t cmd_size_in_bytes,
385 						     struct efa_admin_acq_entry *comp,
386 						     size_t comp_size_in_bytes)
387 {
388 	struct efa_comp_ctx *comp_ctx;
389 
390 	spin_lock(&aq->sq.lock);
391 	if (!test_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state)) {
392 		ibdev_err_ratelimited(aq->efa_dev, "Admin queue is closed\n");
393 		spin_unlock(&aq->sq.lock);
394 		return ERR_PTR(-ENODEV);
395 	}
396 
397 	comp_ctx = __efa_com_submit_admin_cmd(aq, cmd, cmd_size_in_bytes, comp,
398 					      comp_size_in_bytes);
399 	spin_unlock(&aq->sq.lock);
400 	if (IS_ERR(comp_ctx))
401 		clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
402 
403 	return comp_ctx;
404 }
405 
406 static void efa_com_handle_single_admin_completion(struct efa_com_admin_queue *aq,
407 						   struct efa_admin_acq_entry *cqe)
408 {
409 	struct efa_comp_ctx *comp_ctx;
410 	u16 cmd_id;
411 
412 	cmd_id = EFA_GET(&cqe->acq_common_descriptor.command,
413 			 EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID);
414 
415 	comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, false);
416 	if (!comp_ctx) {
417 		ibdev_err(aq->efa_dev,
418 			  "comp_ctx is NULL. Changing the admin queue running state\n");
419 		clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
420 		return;
421 	}
422 
423 	comp_ctx->status = EFA_CMD_COMPLETED;
424 	comp_ctx->comp_status = cqe->acq_common_descriptor.status;
425 	if (comp_ctx->user_cqe)
426 		memcpy(comp_ctx->user_cqe, cqe, comp_ctx->comp_size);
427 
428 	if (!test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state))
429 		complete(&comp_ctx->wait_event);
430 }
431 
432 static void efa_com_handle_admin_completion(struct efa_com_admin_queue *aq)
433 {
434 	struct efa_admin_acq_entry *cqe;
435 	u16 queue_size_mask;
436 	u16 comp_num = 0;
437 	u8 phase;
438 	u16 ci;
439 
440 	queue_size_mask = aq->depth - 1;
441 
442 	ci = aq->cq.cc & queue_size_mask;
443 	phase = aq->cq.phase;
444 
445 	cqe = &aq->cq.entries[ci];
446 
447 	/* Go over all the completions */
448 	while ((READ_ONCE(cqe->acq_common_descriptor.flags) &
449 		EFA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK) == phase) {
450 		/*
451 		 * Do not read the rest of the completion entry before the
452 		 * phase bit was validated
453 		 */
454 		dma_rmb();
455 		efa_com_handle_single_admin_completion(aq, cqe);
456 
457 		ci++;
458 		comp_num++;
459 		if (ci == aq->depth) {
460 			ci = 0;
461 			phase = !phase;
462 		}
463 
464 		cqe = &aq->cq.entries[ci];
465 	}
466 
467 	aq->cq.cc += comp_num;
468 	aq->cq.phase = phase;
469 	aq->sq.cc += comp_num;
470 	atomic64_add(comp_num, &aq->stats.completed_cmd);
471 }
472 
473 static int efa_com_comp_status_to_errno(u8 comp_status)
474 {
475 	switch (comp_status) {
476 	case EFA_ADMIN_SUCCESS:
477 		return 0;
478 	case EFA_ADMIN_RESOURCE_ALLOCATION_FAILURE:
479 		return -ENOMEM;
480 	case EFA_ADMIN_UNSUPPORTED_OPCODE:
481 		return -EOPNOTSUPP;
482 	case EFA_ADMIN_BAD_OPCODE:
483 	case EFA_ADMIN_MALFORMED_REQUEST:
484 	case EFA_ADMIN_ILLEGAL_PARAMETER:
485 	case EFA_ADMIN_UNKNOWN_ERROR:
486 		return -EINVAL;
487 	default:
488 		return -EINVAL;
489 	}
490 }
491 
492 static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_ctx,
493 						     struct efa_com_admin_queue *aq)
494 {
495 	unsigned long timeout;
496 	unsigned long flags;
497 	int err;
498 
499 	timeout = jiffies + usecs_to_jiffies(aq->completion_timeout);
500 
501 	while (1) {
502 		spin_lock_irqsave(&aq->cq.lock, flags);
503 		efa_com_handle_admin_completion(aq);
504 		spin_unlock_irqrestore(&aq->cq.lock, flags);
505 
506 		if (comp_ctx->status != EFA_CMD_SUBMITTED)
507 			break;
508 
509 		if (time_is_before_jiffies(timeout)) {
510 			ibdev_err_ratelimited(
511 				aq->efa_dev,
512 				"Wait for completion (polling) timeout\n");
513 			/* EFA didn't have any completion */
514 			atomic64_inc(&aq->stats.no_completion);
515 
516 			clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
517 			err = -ETIME;
518 			goto out;
519 		}
520 
521 		msleep(aq->poll_interval);
522 	}
523 
524 	err = efa_com_comp_status_to_errno(comp_ctx->comp_status);
525 out:
526 	efa_com_put_comp_ctx(aq, comp_ctx);
527 	return err;
528 }
529 
530 static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *comp_ctx,
531 							struct efa_com_admin_queue *aq)
532 {
533 	unsigned long flags;
534 	int err;
535 
536 	wait_for_completion_timeout(&comp_ctx->wait_event,
537 				    usecs_to_jiffies(aq->completion_timeout));
538 
539 	/*
540 	 * In case the command wasn't completed find out the root cause.
541 	 * There might be 2 kinds of errors
542 	 * 1) No completion (timeout reached)
543 	 * 2) There is completion but the device didn't get any msi-x interrupt.
544 	 */
545 	if (comp_ctx->status == EFA_CMD_SUBMITTED) {
546 		spin_lock_irqsave(&aq->cq.lock, flags);
547 		efa_com_handle_admin_completion(aq);
548 		spin_unlock_irqrestore(&aq->cq.lock, flags);
549 
550 		atomic64_inc(&aq->stats.no_completion);
551 
552 		if (comp_ctx->status == EFA_CMD_COMPLETED)
553 			ibdev_err_ratelimited(
554 				aq->efa_dev,
555 				"The device sent a completion but the driver didn't receive any MSI-X interrupt for admin cmd %s(%d) status %d (ctx: 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
556 				efa_com_cmd_str(comp_ctx->cmd_opcode),
557 				comp_ctx->cmd_opcode, comp_ctx->status,
558 				comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc);
559 		else
560 			ibdev_err_ratelimited(
561 				aq->efa_dev,
562 				"The device didn't send any completion for admin cmd %s(%d) status %d (ctx 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
563 				efa_com_cmd_str(comp_ctx->cmd_opcode),
564 				comp_ctx->cmd_opcode, comp_ctx->status,
565 				comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc);
566 
567 		clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
568 		err = -ETIME;
569 		goto out;
570 	}
571 
572 	err = efa_com_comp_status_to_errno(comp_ctx->comp_status);
573 out:
574 	efa_com_put_comp_ctx(aq, comp_ctx);
575 	return err;
576 }
577 
578 /*
579  * There are two types to wait for completion.
580  * Polling mode - wait until the completion is available.
581  * Async mode - wait on wait queue until the completion is ready
582  * (or the timeout expired).
583  * It is expected that the IRQ called efa_com_handle_admin_completion
584  * to mark the completions.
585  */
586 static int efa_com_wait_and_process_admin_cq(struct efa_comp_ctx *comp_ctx,
587 					     struct efa_com_admin_queue *aq)
588 {
589 	if (test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state))
590 		return efa_com_wait_and_process_admin_cq_polling(comp_ctx, aq);
591 
592 	return efa_com_wait_and_process_admin_cq_interrupts(comp_ctx, aq);
593 }
594 
595 /**
596  * efa_com_cmd_exec - Execute admin command
597  * @aq: admin queue.
598  * @cmd: the admin command to execute.
599  * @cmd_size: the command size.
600  * @comp: command completion return entry.
601  * @comp_size: command completion size.
602  * Submit an admin command and then wait until the device will return a
603  * completion.
604  * The completion will be copied into comp.
605  *
606  * @return - 0 on success, negative value on failure.
607  */
608 int efa_com_cmd_exec(struct efa_com_admin_queue *aq,
609 		     struct efa_admin_aq_entry *cmd,
610 		     size_t cmd_size,
611 		     struct efa_admin_acq_entry *comp,
612 		     size_t comp_size)
613 {
614 	struct efa_comp_ctx *comp_ctx;
615 	int err;
616 
617 	might_sleep();
618 
619 	/* In case of queue FULL */
620 	down(&aq->avail_cmds);
621 
622 	ibdev_dbg(aq->efa_dev, "%s (opcode %d)\n",
623 		  efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
624 		  cmd->aq_common_descriptor.opcode);
625 	comp_ctx = efa_com_submit_admin_cmd(aq, cmd, cmd_size, comp, comp_size);
626 	if (IS_ERR(comp_ctx)) {
627 		ibdev_err_ratelimited(
628 			aq->efa_dev,
629 			"Failed to submit command %s (opcode %u) err %ld\n",
630 			efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
631 			cmd->aq_common_descriptor.opcode, PTR_ERR(comp_ctx));
632 
633 		up(&aq->avail_cmds);
634 		atomic64_inc(&aq->stats.cmd_err);
635 		return PTR_ERR(comp_ctx);
636 	}
637 
638 	err = efa_com_wait_and_process_admin_cq(comp_ctx, aq);
639 	if (err) {
640 		ibdev_err_ratelimited(
641 			aq->efa_dev,
642 			"Failed to process command %s (opcode %u) comp_status %d err %d\n",
643 			efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
644 			cmd->aq_common_descriptor.opcode, comp_ctx->comp_status,
645 			err);
646 		atomic64_inc(&aq->stats.cmd_err);
647 	}
648 
649 	up(&aq->avail_cmds);
650 
651 	return err;
652 }
653 
654 /**
655  * efa_com_admin_destroy - Destroy the admin and the async events queues.
656  * @edev: EFA communication layer struct
657  */
658 void efa_com_admin_destroy(struct efa_com_dev *edev)
659 {
660 	struct efa_com_admin_queue *aq = &edev->aq;
661 	struct efa_com_aenq *aenq = &edev->aenq;
662 	struct efa_com_admin_cq *cq = &aq->cq;
663 	struct efa_com_admin_sq *sq = &aq->sq;
664 	u16 size;
665 
666 	clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
667 
668 	devm_kfree(edev->dmadev, aq->comp_ctx_pool);
669 	devm_kfree(edev->dmadev, aq->comp_ctx);
670 
671 	size = aq->depth * sizeof(*sq->entries);
672 	dma_free_coherent(edev->dmadev, size, sq->entries, sq->dma_addr);
673 
674 	size = aq->depth * sizeof(*cq->entries);
675 	dma_free_coherent(edev->dmadev, size, cq->entries, cq->dma_addr);
676 
677 	size = aenq->depth * sizeof(*aenq->entries);
678 	dma_free_coherent(edev->dmadev, size, aenq->entries, aenq->dma_addr);
679 }
680 
681 /**
682  * efa_com_set_admin_polling_mode - Set the admin completion queue polling mode
683  * @edev: EFA communication layer struct
684  * @polling: Enable/Disable polling mode
685  *
686  * Set the admin completion mode.
687  */
688 void efa_com_set_admin_polling_mode(struct efa_com_dev *edev, bool polling)
689 {
690 	u32 mask_value = 0;
691 
692 	if (polling)
693 		EFA_SET(&mask_value, EFA_REGS_INTR_MASK_EN, 1);
694 
695 	writel(mask_value, edev->reg_bar + EFA_REGS_INTR_MASK_OFF);
696 	if (polling)
697 		set_bit(EFA_AQ_STATE_POLLING_BIT, &edev->aq.state);
698 	else
699 		clear_bit(EFA_AQ_STATE_POLLING_BIT, &edev->aq.state);
700 }
701 
702 static void efa_com_stats_init(struct efa_com_dev *edev)
703 {
704 	atomic64_t *s = (atomic64_t *)&edev->aq.stats;
705 	int i;
706 
707 	for (i = 0; i < sizeof(edev->aq.stats) / sizeof(*s); i++, s++)
708 		atomic64_set(s, 0);
709 }
710 
711 /**
712  * efa_com_admin_init - Init the admin and the async queues
713  * @edev: EFA communication layer struct
714  * @aenq_handlers: Those handlers to be called upon event.
715  *
716  * Initialize the admin submission and completion queues.
717  * Initialize the asynchronous events notification queues.
718  *
719  * @return - 0 on success, negative value on failure.
720  */
721 int efa_com_admin_init(struct efa_com_dev *edev,
722 		       struct efa_aenq_handlers *aenq_handlers)
723 {
724 	struct efa_com_admin_queue *aq = &edev->aq;
725 	u32 timeout;
726 	u32 dev_sts;
727 	u32 cap;
728 	int err;
729 
730 	dev_sts = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
731 	if (!EFA_GET(&dev_sts, EFA_REGS_DEV_STS_READY)) {
732 		ibdev_err(edev->efa_dev,
733 			  "Device isn't ready, abort com init %#x\n", dev_sts);
734 		return -ENODEV;
735 	}
736 
737 	aq->depth = EFA_ADMIN_QUEUE_DEPTH;
738 
739 	aq->dmadev = edev->dmadev;
740 	aq->efa_dev = edev->efa_dev;
741 	set_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state);
742 
743 	sema_init(&aq->avail_cmds, aq->depth);
744 
745 	efa_com_stats_init(edev);
746 
747 	err = efa_com_init_comp_ctxt(aq);
748 	if (err)
749 		return err;
750 
751 	err = efa_com_admin_init_sq(edev);
752 	if (err)
753 		goto err_destroy_comp_ctxt;
754 
755 	err = efa_com_admin_init_cq(edev);
756 	if (err)
757 		goto err_destroy_sq;
758 
759 	efa_com_set_admin_polling_mode(edev, false);
760 
761 	err = efa_com_admin_init_aenq(edev, aenq_handlers);
762 	if (err)
763 		goto err_destroy_cq;
764 
765 	cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
766 	timeout = EFA_GET(&cap, EFA_REGS_CAPS_ADMIN_CMD_TO);
767 	if (timeout)
768 		/* the resolution of timeout reg is 100ms */
769 		aq->completion_timeout = timeout * 100000;
770 	else
771 		aq->completion_timeout = ADMIN_CMD_TIMEOUT_US;
772 
773 	aq->poll_interval = EFA_POLL_INTERVAL_MS;
774 
775 	set_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
776 
777 	return 0;
778 
779 err_destroy_cq:
780 	dma_free_coherent(edev->dmadev, aq->depth * sizeof(*aq->cq.entries),
781 			  aq->cq.entries, aq->cq.dma_addr);
782 err_destroy_sq:
783 	dma_free_coherent(edev->dmadev, aq->depth * sizeof(*aq->sq.entries),
784 			  aq->sq.entries, aq->sq.dma_addr);
785 err_destroy_comp_ctxt:
786 	devm_kfree(edev->dmadev, aq->comp_ctx);
787 
788 	return err;
789 }
790 
791 /**
792  * efa_com_admin_q_comp_intr_handler - admin queue interrupt handler
793  * @edev: EFA communication layer struct
794  *
795  * This method goes over the admin completion queue and wakes up
796  * all the pending threads that wait on the commands wait event.
797  *
798  * @note: Should be called after MSI-X interrupt.
799  */
800 void efa_com_admin_q_comp_intr_handler(struct efa_com_dev *edev)
801 {
802 	unsigned long flags;
803 
804 	spin_lock_irqsave(&edev->aq.cq.lock, flags);
805 	efa_com_handle_admin_completion(&edev->aq);
806 	spin_unlock_irqrestore(&edev->aq.cq.lock, flags);
807 }
808 
809 /*
810  * efa_handle_specific_aenq_event:
811  * return the handler that is relevant to the specific event group
812  */
813 static efa_aenq_handler efa_com_get_specific_aenq_cb(struct efa_com_dev *edev,
814 						     u16 group)
815 {
816 	struct efa_aenq_handlers *aenq_handlers = edev->aenq.aenq_handlers;
817 
818 	if (group < EFA_MAX_HANDLERS && aenq_handlers->handlers[group])
819 		return aenq_handlers->handlers[group];
820 
821 	return aenq_handlers->unimplemented_handler;
822 }
823 
824 /**
825  * efa_com_aenq_intr_handler - AENQ interrupt handler
826  * @edev: EFA communication layer struct
827  * @data: Data of interrupt handler.
828  *
829  * Go over the async event notification queue and call the proper aenq handler.
830  */
831 void efa_com_aenq_intr_handler(struct efa_com_dev *edev, void *data)
832 {
833 	struct efa_admin_aenq_common_desc *aenq_common;
834 	struct efa_com_aenq *aenq = &edev->aenq;
835 	struct efa_admin_aenq_entry *aenq_e;
836 	efa_aenq_handler handler_cb;
837 	u32 processed = 0;
838 	u8 phase;
839 	u32 ci;
840 
841 	ci = aenq->cc & (aenq->depth - 1);
842 	phase = aenq->phase;
843 	aenq_e = &aenq->entries[ci]; /* Get first entry */
844 	aenq_common = &aenq_e->aenq_common_desc;
845 
846 	/* Go over all the events */
847 	while ((READ_ONCE(aenq_common->flags) &
848 		EFA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == phase) {
849 		/*
850 		 * Do not read the rest of the completion entry before the
851 		 * phase bit was validated
852 		 */
853 		dma_rmb();
854 
855 		/* Handle specific event*/
856 		handler_cb = efa_com_get_specific_aenq_cb(edev,
857 							  aenq_common->group);
858 		handler_cb(data, aenq_e); /* call the actual event handler*/
859 
860 		/* Get next event entry */
861 		ci++;
862 		processed++;
863 
864 		if (ci == aenq->depth) {
865 			ci = 0;
866 			phase = !phase;
867 		}
868 		aenq_e = &aenq->entries[ci];
869 		aenq_common = &aenq_e->aenq_common_desc;
870 	}
871 
872 	aenq->cc += processed;
873 	aenq->phase = phase;
874 
875 	/* Don't update aenq doorbell if there weren't any processed events */
876 	if (!processed)
877 		return;
878 
879 	/* barrier not needed in case of writel */
880 	writel(aenq->cc, edev->reg_bar + EFA_REGS_AENQ_CONS_DB_OFF);
881 }
882 
883 static void efa_com_mmio_reg_read_resp_addr_init(struct efa_com_dev *edev)
884 {
885 	struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
886 	u32 addr_high;
887 	u32 addr_low;
888 
889 	/* dma_addr_bits is unknown at this point */
890 	addr_high = (mmio_read->read_resp_dma_addr >> 32) & GENMASK(31, 0);
891 	addr_low = mmio_read->read_resp_dma_addr & GENMASK(31, 0);
892 
893 	writel(addr_high, edev->reg_bar + EFA_REGS_MMIO_RESP_HI_OFF);
894 	writel(addr_low, edev->reg_bar + EFA_REGS_MMIO_RESP_LO_OFF);
895 }
896 
897 int efa_com_mmio_reg_read_init(struct efa_com_dev *edev)
898 {
899 	struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
900 
901 	spin_lock_init(&mmio_read->lock);
902 	mmio_read->read_resp =
903 		dma_alloc_coherent(edev->dmadev, sizeof(*mmio_read->read_resp),
904 				   &mmio_read->read_resp_dma_addr, GFP_KERNEL);
905 	if (!mmio_read->read_resp)
906 		return -ENOMEM;
907 
908 	efa_com_mmio_reg_read_resp_addr_init(edev);
909 
910 	mmio_read->read_resp->req_id = 0;
911 	mmio_read->seq_num = 0;
912 	mmio_read->mmio_read_timeout = EFA_REG_READ_TIMEOUT_US;
913 
914 	return 0;
915 }
916 
917 void efa_com_mmio_reg_read_destroy(struct efa_com_dev *edev)
918 {
919 	struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
920 
921 	dma_free_coherent(edev->dmadev, sizeof(*mmio_read->read_resp),
922 			  mmio_read->read_resp, mmio_read->read_resp_dma_addr);
923 }
924 
925 int efa_com_validate_version(struct efa_com_dev *edev)
926 {
927 	u32 min_ctrl_ver = 0;
928 	u32 ctrl_ver_masked;
929 	u32 min_ver = 0;
930 	u32 ctrl_ver;
931 	u32 ver;
932 
933 	/*
934 	 * Make sure the EFA version and the controller version are at least
935 	 * as the driver expects
936 	 */
937 	ver = efa_com_reg_read32(edev, EFA_REGS_VERSION_OFF);
938 	ctrl_ver = efa_com_reg_read32(edev,
939 				      EFA_REGS_CONTROLLER_VERSION_OFF);
940 
941 	ibdev_dbg(edev->efa_dev, "efa device version: %d.%d\n",
942 		  EFA_GET(&ver, EFA_REGS_VERSION_MAJOR_VERSION),
943 		  EFA_GET(&ver, EFA_REGS_VERSION_MINOR_VERSION));
944 
945 	EFA_SET(&min_ver, EFA_REGS_VERSION_MAJOR_VERSION,
946 		EFA_ADMIN_API_VERSION_MAJOR);
947 	EFA_SET(&min_ver, EFA_REGS_VERSION_MINOR_VERSION,
948 		EFA_ADMIN_API_VERSION_MINOR);
949 	if (ver < min_ver) {
950 		ibdev_err(edev->efa_dev,
951 			  "EFA version is lower than the minimal version the driver supports\n");
952 		return -EOPNOTSUPP;
953 	}
954 
955 	ibdev_dbg(
956 		edev->efa_dev,
957 		"efa controller version: %d.%d.%d implementation version %d\n",
958 		EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION),
959 		EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION),
960 		EFA_GET(&ctrl_ver,
961 			EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION),
962 		EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_IMPL_ID));
963 
964 	ctrl_ver_masked =
965 		EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION) |
966 		EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION) |
967 		EFA_GET(&ctrl_ver,
968 			EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION);
969 
970 	EFA_SET(&min_ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION,
971 		EFA_CTRL_MAJOR);
972 	EFA_SET(&min_ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION,
973 		EFA_CTRL_MINOR);
974 	EFA_SET(&min_ctrl_ver, EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION,
975 		EFA_CTRL_SUB_MINOR);
976 	/* Validate the ctrl version without the implementation ID */
977 	if (ctrl_ver_masked < min_ctrl_ver) {
978 		ibdev_err(edev->efa_dev,
979 			  "EFA ctrl version is lower than the minimal ctrl version the driver supports\n");
980 		return -EOPNOTSUPP;
981 	}
982 
983 	return 0;
984 }
985 
986 /**
987  * efa_com_get_dma_width - Retrieve physical dma address width the device
988  * supports.
989  * @edev: EFA communication layer struct
990  *
991  * Retrieve the maximum physical address bits the device can handle.
992  *
993  * @return: > 0 on Success and negative value otherwise.
994  */
995 int efa_com_get_dma_width(struct efa_com_dev *edev)
996 {
997 	u32 caps = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
998 	int width;
999 
1000 	width = EFA_GET(&caps, EFA_REGS_CAPS_DMA_ADDR_WIDTH);
1001 
1002 	ibdev_dbg(edev->efa_dev, "DMA width: %d\n", width);
1003 
1004 	if (width < 32 || width > 64) {
1005 		ibdev_err(edev->efa_dev, "DMA width illegal value: %d\n", width);
1006 		return -EINVAL;
1007 	}
1008 
1009 	edev->dma_addr_bits = width;
1010 
1011 	return width;
1012 }
1013 
1014 static int wait_for_reset_state(struct efa_com_dev *edev, u32 timeout, int on)
1015 {
1016 	u32 val, i;
1017 
1018 	for (i = 0; i < timeout; i++) {
1019 		val = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
1020 
1021 		if (EFA_GET(&val, EFA_REGS_DEV_STS_RESET_IN_PROGRESS) == on)
1022 			return 0;
1023 
1024 		ibdev_dbg(edev->efa_dev, "Reset indication val %d\n", val);
1025 		msleep(EFA_POLL_INTERVAL_MS);
1026 	}
1027 
1028 	return -ETIME;
1029 }
1030 
1031 /**
1032  * efa_com_dev_reset - Perform device FLR to the device.
1033  * @edev: EFA communication layer struct
1034  * @reset_reason: Specify what is the trigger for the reset in case of an error.
1035  *
1036  * @return - 0 on success, negative value on failure.
1037  */
1038 int efa_com_dev_reset(struct efa_com_dev *edev,
1039 		      enum efa_regs_reset_reason_types reset_reason)
1040 {
1041 	u32 stat, timeout, cap;
1042 	u32 reset_val = 0;
1043 	int err;
1044 
1045 	stat = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
1046 	cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
1047 
1048 	if (!EFA_GET(&stat, EFA_REGS_DEV_STS_READY)) {
1049 		ibdev_err(edev->efa_dev,
1050 			  "Device isn't ready, can't reset device\n");
1051 		return -EINVAL;
1052 	}
1053 
1054 	timeout = EFA_GET(&cap, EFA_REGS_CAPS_RESET_TIMEOUT);
1055 	if (!timeout) {
1056 		ibdev_err(edev->efa_dev, "Invalid timeout value\n");
1057 		return -EINVAL;
1058 	}
1059 
1060 	/* start reset */
1061 	EFA_SET(&reset_val, EFA_REGS_DEV_CTL_DEV_RESET, 1);
1062 	EFA_SET(&reset_val, EFA_REGS_DEV_CTL_RESET_REASON, reset_reason);
1063 	writel(reset_val, edev->reg_bar + EFA_REGS_DEV_CTL_OFF);
1064 
1065 	/* reset clears the mmio readless address, restore it */
1066 	efa_com_mmio_reg_read_resp_addr_init(edev);
1067 
1068 	err = wait_for_reset_state(edev, timeout, 1);
1069 	if (err) {
1070 		ibdev_err(edev->efa_dev, "Reset indication didn't turn on\n");
1071 		return err;
1072 	}
1073 
1074 	/* reset done */
1075 	writel(0, edev->reg_bar + EFA_REGS_DEV_CTL_OFF);
1076 	err = wait_for_reset_state(edev, timeout, 0);
1077 	if (err) {
1078 		ibdev_err(edev->efa_dev, "Reset indication didn't turn off\n");
1079 		return err;
1080 	}
1081 
1082 	timeout = EFA_GET(&cap, EFA_REGS_CAPS_ADMIN_CMD_TO);
1083 	if (timeout)
1084 		/* the resolution of timeout reg is 100ms */
1085 		edev->aq.completion_timeout = timeout * 100000;
1086 	else
1087 		edev->aq.completion_timeout = ADMIN_CMD_TIMEOUT_US;
1088 
1089 	return 0;
1090 }
1091