xref: /linux/drivers/infiniband/hw/efa/efa_com.c (revision a1c3be890440a1769ed6f822376a3e3ab0d42994)
1 // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
2 /*
3  * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved.
4  */
5 
6 #include "efa_com.h"
7 #include "efa_regs_defs.h"
8 
9 #define ADMIN_CMD_TIMEOUT_US 30000000 /* usecs */
10 
11 #define EFA_REG_READ_TIMEOUT_US 50000 /* usecs */
12 #define EFA_MMIO_READ_INVALID 0xffffffff
13 
14 #define EFA_POLL_INTERVAL_MS 100 /* msecs */
15 
16 #define EFA_ASYNC_QUEUE_DEPTH 16
17 #define EFA_ADMIN_QUEUE_DEPTH 32
18 
19 #define EFA_CTRL_MAJOR          0
20 #define EFA_CTRL_MINOR          0
21 #define EFA_CTRL_SUB_MINOR      1
22 
23 enum efa_cmd_status {
24 	EFA_CMD_SUBMITTED,
25 	EFA_CMD_COMPLETED,
26 };
27 
28 struct efa_comp_ctx {
29 	struct completion wait_event;
30 	struct efa_admin_acq_entry *user_cqe;
31 	u32 comp_size;
32 	enum efa_cmd_status status;
33 	u8 cmd_opcode;
34 	u8 occupied;
35 };
36 
37 static const char *efa_com_cmd_str(u8 cmd)
38 {
39 #define EFA_CMD_STR_CASE(_cmd) case EFA_ADMIN_##_cmd: return #_cmd
40 
41 	switch (cmd) {
42 	EFA_CMD_STR_CASE(CREATE_QP);
43 	EFA_CMD_STR_CASE(MODIFY_QP);
44 	EFA_CMD_STR_CASE(QUERY_QP);
45 	EFA_CMD_STR_CASE(DESTROY_QP);
46 	EFA_CMD_STR_CASE(CREATE_AH);
47 	EFA_CMD_STR_CASE(DESTROY_AH);
48 	EFA_CMD_STR_CASE(REG_MR);
49 	EFA_CMD_STR_CASE(DEREG_MR);
50 	EFA_CMD_STR_CASE(CREATE_CQ);
51 	EFA_CMD_STR_CASE(DESTROY_CQ);
52 	EFA_CMD_STR_CASE(GET_FEATURE);
53 	EFA_CMD_STR_CASE(SET_FEATURE);
54 	EFA_CMD_STR_CASE(GET_STATS);
55 	EFA_CMD_STR_CASE(ALLOC_PD);
56 	EFA_CMD_STR_CASE(DEALLOC_PD);
57 	EFA_CMD_STR_CASE(ALLOC_UAR);
58 	EFA_CMD_STR_CASE(DEALLOC_UAR);
59 	default: return "unknown command opcode";
60 	}
61 #undef EFA_CMD_STR_CASE
62 }
63 
64 static u32 efa_com_reg_read32(struct efa_com_dev *edev, u16 offset)
65 {
66 	struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
67 	struct efa_admin_mmio_req_read_less_resp *read_resp;
68 	unsigned long exp_time;
69 	u32 mmio_read_reg = 0;
70 	u32 err;
71 
72 	read_resp = mmio_read->read_resp;
73 
74 	spin_lock(&mmio_read->lock);
75 	mmio_read->seq_num++;
76 
77 	/* trash DMA req_id to identify when hardware is done */
78 	read_resp->req_id = mmio_read->seq_num + 0x9aL;
79 	EFA_SET(&mmio_read_reg, EFA_REGS_MMIO_REG_READ_REG_OFF, offset);
80 	EFA_SET(&mmio_read_reg, EFA_REGS_MMIO_REG_READ_REQ_ID,
81 		mmio_read->seq_num);
82 
83 	writel(mmio_read_reg, edev->reg_bar + EFA_REGS_MMIO_REG_READ_OFF);
84 
85 	exp_time = jiffies + usecs_to_jiffies(mmio_read->mmio_read_timeout);
86 	do {
87 		if (READ_ONCE(read_resp->req_id) == mmio_read->seq_num)
88 			break;
89 		udelay(1);
90 	} while (time_is_after_jiffies(exp_time));
91 
92 	if (read_resp->req_id != mmio_read->seq_num) {
93 		ibdev_err_ratelimited(
94 			edev->efa_dev,
95 			"Reading register timed out. expected: req id[%u] offset[%#x] actual: req id[%u] offset[%#x]\n",
96 			mmio_read->seq_num, offset, read_resp->req_id,
97 			read_resp->reg_off);
98 		err = EFA_MMIO_READ_INVALID;
99 		goto out;
100 	}
101 
102 	if (read_resp->reg_off != offset) {
103 		ibdev_err_ratelimited(
104 			edev->efa_dev,
105 			"Reading register failed: wrong offset provided\n");
106 		err = EFA_MMIO_READ_INVALID;
107 		goto out;
108 	}
109 
110 	err = read_resp->reg_val;
111 out:
112 	spin_unlock(&mmio_read->lock);
113 	return err;
114 }
115 
116 static int efa_com_admin_init_sq(struct efa_com_dev *edev)
117 {
118 	struct efa_com_admin_queue *aq = &edev->aq;
119 	struct efa_com_admin_sq *sq = &aq->sq;
120 	u16 size = aq->depth * sizeof(*sq->entries);
121 	u32 aq_caps = 0;
122 	u32 addr_high;
123 	u32 addr_low;
124 
125 	sq->entries =
126 		dma_alloc_coherent(aq->dmadev, size, &sq->dma_addr, GFP_KERNEL);
127 	if (!sq->entries)
128 		return -ENOMEM;
129 
130 	spin_lock_init(&sq->lock);
131 
132 	sq->cc = 0;
133 	sq->pc = 0;
134 	sq->phase = 1;
135 
136 	sq->db_addr = (u32 __iomem *)(edev->reg_bar + EFA_REGS_AQ_PROD_DB_OFF);
137 
138 	addr_high = upper_32_bits(sq->dma_addr);
139 	addr_low = lower_32_bits(sq->dma_addr);
140 
141 	writel(addr_low, edev->reg_bar + EFA_REGS_AQ_BASE_LO_OFF);
142 	writel(addr_high, edev->reg_bar + EFA_REGS_AQ_BASE_HI_OFF);
143 
144 	EFA_SET(&aq_caps, EFA_REGS_AQ_CAPS_AQ_DEPTH, aq->depth);
145 	EFA_SET(&aq_caps, EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE,
146 		sizeof(struct efa_admin_aq_entry));
147 
148 	writel(aq_caps, edev->reg_bar + EFA_REGS_AQ_CAPS_OFF);
149 
150 	return 0;
151 }
152 
153 static int efa_com_admin_init_cq(struct efa_com_dev *edev)
154 {
155 	struct efa_com_admin_queue *aq = &edev->aq;
156 	struct efa_com_admin_cq *cq = &aq->cq;
157 	u16 size = aq->depth * sizeof(*cq->entries);
158 	u32 acq_caps = 0;
159 	u32 addr_high;
160 	u32 addr_low;
161 
162 	cq->entries =
163 		dma_alloc_coherent(aq->dmadev, size, &cq->dma_addr, GFP_KERNEL);
164 	if (!cq->entries)
165 		return -ENOMEM;
166 
167 	spin_lock_init(&cq->lock);
168 
169 	cq->cc = 0;
170 	cq->phase = 1;
171 
172 	addr_high = upper_32_bits(cq->dma_addr);
173 	addr_low = lower_32_bits(cq->dma_addr);
174 
175 	writel(addr_low, edev->reg_bar + EFA_REGS_ACQ_BASE_LO_OFF);
176 	writel(addr_high, edev->reg_bar + EFA_REGS_ACQ_BASE_HI_OFF);
177 
178 	EFA_SET(&acq_caps, EFA_REGS_ACQ_CAPS_ACQ_DEPTH, aq->depth);
179 	EFA_SET(&acq_caps, EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE,
180 		sizeof(struct efa_admin_acq_entry));
181 	EFA_SET(&acq_caps, EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR,
182 		aq->msix_vector_idx);
183 
184 	writel(acq_caps, edev->reg_bar + EFA_REGS_ACQ_CAPS_OFF);
185 
186 	return 0;
187 }
188 
189 static int efa_com_admin_init_aenq(struct efa_com_dev *edev,
190 				   struct efa_aenq_handlers *aenq_handlers)
191 {
192 	struct efa_com_aenq *aenq = &edev->aenq;
193 	u32 addr_low, addr_high;
194 	u32 aenq_caps = 0;
195 	u16 size;
196 
197 	if (!aenq_handlers) {
198 		ibdev_err(edev->efa_dev, "aenq handlers pointer is NULL\n");
199 		return -EINVAL;
200 	}
201 
202 	size = EFA_ASYNC_QUEUE_DEPTH * sizeof(*aenq->entries);
203 	aenq->entries = dma_alloc_coherent(edev->dmadev, size, &aenq->dma_addr,
204 					   GFP_KERNEL);
205 	if (!aenq->entries)
206 		return -ENOMEM;
207 
208 	aenq->aenq_handlers = aenq_handlers;
209 	aenq->depth = EFA_ASYNC_QUEUE_DEPTH;
210 	aenq->cc = 0;
211 	aenq->phase = 1;
212 
213 	addr_low = lower_32_bits(aenq->dma_addr);
214 	addr_high = upper_32_bits(aenq->dma_addr);
215 
216 	writel(addr_low, edev->reg_bar + EFA_REGS_AENQ_BASE_LO_OFF);
217 	writel(addr_high, edev->reg_bar + EFA_REGS_AENQ_BASE_HI_OFF);
218 
219 	EFA_SET(&aenq_caps, EFA_REGS_AENQ_CAPS_AENQ_DEPTH, aenq->depth);
220 	EFA_SET(&aenq_caps, EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE,
221 		sizeof(struct efa_admin_aenq_entry));
222 	EFA_SET(&aenq_caps, EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR,
223 		aenq->msix_vector_idx);
224 	writel(aenq_caps, edev->reg_bar + EFA_REGS_AENQ_CAPS_OFF);
225 
226 	/*
227 	 * Init cons_db to mark that all entries in the queue
228 	 * are initially available
229 	 */
230 	writel(edev->aenq.cc, edev->reg_bar + EFA_REGS_AENQ_CONS_DB_OFF);
231 
232 	return 0;
233 }
234 
235 /* ID to be used with efa_com_get_comp_ctx */
236 static u16 efa_com_alloc_ctx_id(struct efa_com_admin_queue *aq)
237 {
238 	u16 ctx_id;
239 
240 	spin_lock(&aq->comp_ctx_lock);
241 	ctx_id = aq->comp_ctx_pool[aq->comp_ctx_pool_next];
242 	aq->comp_ctx_pool_next++;
243 	spin_unlock(&aq->comp_ctx_lock);
244 
245 	return ctx_id;
246 }
247 
248 static void efa_com_dealloc_ctx_id(struct efa_com_admin_queue *aq,
249 				   u16 ctx_id)
250 {
251 	spin_lock(&aq->comp_ctx_lock);
252 	aq->comp_ctx_pool_next--;
253 	aq->comp_ctx_pool[aq->comp_ctx_pool_next] = ctx_id;
254 	spin_unlock(&aq->comp_ctx_lock);
255 }
256 
257 static inline void efa_com_put_comp_ctx(struct efa_com_admin_queue *aq,
258 					struct efa_comp_ctx *comp_ctx)
259 {
260 	u16 cmd_id = EFA_GET(&comp_ctx->user_cqe->acq_common_descriptor.command,
261 			     EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID);
262 	u16 ctx_id = cmd_id & (aq->depth - 1);
263 
264 	ibdev_dbg(aq->efa_dev, "Put completion command_id %#x\n", cmd_id);
265 	comp_ctx->occupied = 0;
266 	efa_com_dealloc_ctx_id(aq, ctx_id);
267 }
268 
269 static struct efa_comp_ctx *efa_com_get_comp_ctx(struct efa_com_admin_queue *aq,
270 						 u16 cmd_id, bool capture)
271 {
272 	u16 ctx_id = cmd_id & (aq->depth - 1);
273 
274 	if (aq->comp_ctx[ctx_id].occupied && capture) {
275 		ibdev_err_ratelimited(
276 			aq->efa_dev,
277 			"Completion context for command_id %#x is occupied\n",
278 			cmd_id);
279 		return NULL;
280 	}
281 
282 	if (capture) {
283 		aq->comp_ctx[ctx_id].occupied = 1;
284 		ibdev_dbg(aq->efa_dev,
285 			  "Take completion ctxt for command_id %#x\n", cmd_id);
286 	}
287 
288 	return &aq->comp_ctx[ctx_id];
289 }
290 
291 static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq,
292 						       struct efa_admin_aq_entry *cmd,
293 						       size_t cmd_size_in_bytes,
294 						       struct efa_admin_acq_entry *comp,
295 						       size_t comp_size_in_bytes)
296 {
297 	struct efa_admin_aq_entry *aqe;
298 	struct efa_comp_ctx *comp_ctx;
299 	u16 queue_size_mask;
300 	u16 cmd_id;
301 	u16 ctx_id;
302 	u16 pi;
303 
304 	queue_size_mask = aq->depth - 1;
305 	pi = aq->sq.pc & queue_size_mask;
306 
307 	ctx_id = efa_com_alloc_ctx_id(aq);
308 
309 	/* cmd_id LSBs are the ctx_id and MSBs are entropy bits from pc */
310 	cmd_id = ctx_id & queue_size_mask;
311 	cmd_id |= aq->sq.pc & ~queue_size_mask;
312 	cmd_id &= EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK;
313 
314 	cmd->aq_common_descriptor.command_id = cmd_id;
315 	EFA_SET(&cmd->aq_common_descriptor.flags,
316 		EFA_ADMIN_AQ_COMMON_DESC_PHASE, aq->sq.phase);
317 
318 	comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, true);
319 	if (!comp_ctx) {
320 		efa_com_dealloc_ctx_id(aq, ctx_id);
321 		return ERR_PTR(-EINVAL);
322 	}
323 
324 	comp_ctx->status = EFA_CMD_SUBMITTED;
325 	comp_ctx->comp_size = comp_size_in_bytes;
326 	comp_ctx->user_cqe = comp;
327 	comp_ctx->cmd_opcode = cmd->aq_common_descriptor.opcode;
328 
329 	reinit_completion(&comp_ctx->wait_event);
330 
331 	aqe = &aq->sq.entries[pi];
332 	memset(aqe, 0, sizeof(*aqe));
333 	memcpy(aqe, cmd, cmd_size_in_bytes);
334 
335 	aq->sq.pc++;
336 	atomic64_inc(&aq->stats.submitted_cmd);
337 
338 	if ((aq->sq.pc & queue_size_mask) == 0)
339 		aq->sq.phase = !aq->sq.phase;
340 
341 	/* barrier not needed in case of writel */
342 	writel(aq->sq.pc, aq->sq.db_addr);
343 
344 	return comp_ctx;
345 }
346 
347 static inline int efa_com_init_comp_ctxt(struct efa_com_admin_queue *aq)
348 {
349 	size_t pool_size = aq->depth * sizeof(*aq->comp_ctx_pool);
350 	size_t size = aq->depth * sizeof(struct efa_comp_ctx);
351 	struct efa_comp_ctx *comp_ctx;
352 	u16 i;
353 
354 	aq->comp_ctx = devm_kzalloc(aq->dmadev, size, GFP_KERNEL);
355 	aq->comp_ctx_pool = devm_kzalloc(aq->dmadev, pool_size, GFP_KERNEL);
356 	if (!aq->comp_ctx || !aq->comp_ctx_pool) {
357 		devm_kfree(aq->dmadev, aq->comp_ctx_pool);
358 		devm_kfree(aq->dmadev, aq->comp_ctx);
359 		return -ENOMEM;
360 	}
361 
362 	for (i = 0; i < aq->depth; i++) {
363 		comp_ctx = efa_com_get_comp_ctx(aq, i, false);
364 		if (comp_ctx)
365 			init_completion(&comp_ctx->wait_event);
366 
367 		aq->comp_ctx_pool[i] = i;
368 	}
369 
370 	spin_lock_init(&aq->comp_ctx_lock);
371 
372 	aq->comp_ctx_pool_next = 0;
373 
374 	return 0;
375 }
376 
377 static struct efa_comp_ctx *efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq,
378 						     struct efa_admin_aq_entry *cmd,
379 						     size_t cmd_size_in_bytes,
380 						     struct efa_admin_acq_entry *comp,
381 						     size_t comp_size_in_bytes)
382 {
383 	struct efa_comp_ctx *comp_ctx;
384 
385 	spin_lock(&aq->sq.lock);
386 	if (!test_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state)) {
387 		ibdev_err_ratelimited(aq->efa_dev, "Admin queue is closed\n");
388 		spin_unlock(&aq->sq.lock);
389 		return ERR_PTR(-ENODEV);
390 	}
391 
392 	comp_ctx = __efa_com_submit_admin_cmd(aq, cmd, cmd_size_in_bytes, comp,
393 					      comp_size_in_bytes);
394 	spin_unlock(&aq->sq.lock);
395 	if (IS_ERR(comp_ctx))
396 		clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
397 
398 	return comp_ctx;
399 }
400 
401 static void efa_com_handle_single_admin_completion(struct efa_com_admin_queue *aq,
402 						   struct efa_admin_acq_entry *cqe)
403 {
404 	struct efa_comp_ctx *comp_ctx;
405 	u16 cmd_id;
406 
407 	cmd_id = EFA_GET(&cqe->acq_common_descriptor.command,
408 			 EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID);
409 
410 	comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, false);
411 	if (!comp_ctx) {
412 		ibdev_err(aq->efa_dev,
413 			  "comp_ctx is NULL. Changing the admin queue running state\n");
414 		clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
415 		return;
416 	}
417 
418 	comp_ctx->status = EFA_CMD_COMPLETED;
419 	memcpy(comp_ctx->user_cqe, cqe, comp_ctx->comp_size);
420 
421 	if (!test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state))
422 		complete(&comp_ctx->wait_event);
423 }
424 
425 static void efa_com_handle_admin_completion(struct efa_com_admin_queue *aq)
426 {
427 	struct efa_admin_acq_entry *cqe;
428 	u16 queue_size_mask;
429 	u16 comp_num = 0;
430 	u8 phase;
431 	u16 ci;
432 
433 	queue_size_mask = aq->depth - 1;
434 
435 	ci = aq->cq.cc & queue_size_mask;
436 	phase = aq->cq.phase;
437 
438 	cqe = &aq->cq.entries[ci];
439 
440 	/* Go over all the completions */
441 	while ((READ_ONCE(cqe->acq_common_descriptor.flags) &
442 		EFA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK) == phase) {
443 		/*
444 		 * Do not read the rest of the completion entry before the
445 		 * phase bit was validated
446 		 */
447 		dma_rmb();
448 		efa_com_handle_single_admin_completion(aq, cqe);
449 
450 		ci++;
451 		comp_num++;
452 		if (ci == aq->depth) {
453 			ci = 0;
454 			phase = !phase;
455 		}
456 
457 		cqe = &aq->cq.entries[ci];
458 	}
459 
460 	aq->cq.cc += comp_num;
461 	aq->cq.phase = phase;
462 	aq->sq.cc += comp_num;
463 	atomic64_add(comp_num, &aq->stats.completed_cmd);
464 }
465 
466 static int efa_com_comp_status_to_errno(u8 comp_status)
467 {
468 	switch (comp_status) {
469 	case EFA_ADMIN_SUCCESS:
470 		return 0;
471 	case EFA_ADMIN_RESOURCE_ALLOCATION_FAILURE:
472 		return -ENOMEM;
473 	case EFA_ADMIN_UNSUPPORTED_OPCODE:
474 		return -EOPNOTSUPP;
475 	case EFA_ADMIN_BAD_OPCODE:
476 	case EFA_ADMIN_MALFORMED_REQUEST:
477 	case EFA_ADMIN_ILLEGAL_PARAMETER:
478 	case EFA_ADMIN_UNKNOWN_ERROR:
479 		return -EINVAL;
480 	default:
481 		return -EINVAL;
482 	}
483 }
484 
485 static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_ctx,
486 						     struct efa_com_admin_queue *aq)
487 {
488 	unsigned long timeout;
489 	unsigned long flags;
490 	int err;
491 
492 	timeout = jiffies + usecs_to_jiffies(aq->completion_timeout);
493 
494 	while (1) {
495 		spin_lock_irqsave(&aq->cq.lock, flags);
496 		efa_com_handle_admin_completion(aq);
497 		spin_unlock_irqrestore(&aq->cq.lock, flags);
498 
499 		if (comp_ctx->status != EFA_CMD_SUBMITTED)
500 			break;
501 
502 		if (time_is_before_jiffies(timeout)) {
503 			ibdev_err_ratelimited(
504 				aq->efa_dev,
505 				"Wait for completion (polling) timeout\n");
506 			/* EFA didn't have any completion */
507 			atomic64_inc(&aq->stats.no_completion);
508 
509 			clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
510 			err = -ETIME;
511 			goto out;
512 		}
513 
514 		msleep(aq->poll_interval);
515 	}
516 
517 	err = efa_com_comp_status_to_errno(comp_ctx->user_cqe->acq_common_descriptor.status);
518 out:
519 	efa_com_put_comp_ctx(aq, comp_ctx);
520 	return err;
521 }
522 
523 static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *comp_ctx,
524 							struct efa_com_admin_queue *aq)
525 {
526 	unsigned long flags;
527 	int err;
528 
529 	wait_for_completion_timeout(&comp_ctx->wait_event,
530 				    usecs_to_jiffies(aq->completion_timeout));
531 
532 	/*
533 	 * In case the command wasn't completed find out the root cause.
534 	 * There might be 2 kinds of errors
535 	 * 1) No completion (timeout reached)
536 	 * 2) There is completion but the device didn't get any msi-x interrupt.
537 	 */
538 	if (comp_ctx->status == EFA_CMD_SUBMITTED) {
539 		spin_lock_irqsave(&aq->cq.lock, flags);
540 		efa_com_handle_admin_completion(aq);
541 		spin_unlock_irqrestore(&aq->cq.lock, flags);
542 
543 		atomic64_inc(&aq->stats.no_completion);
544 
545 		if (comp_ctx->status == EFA_CMD_COMPLETED)
546 			ibdev_err_ratelimited(
547 				aq->efa_dev,
548 				"The device sent a completion but the driver didn't receive any MSI-X interrupt for admin cmd %s(%d) status %d (ctx: 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
549 				efa_com_cmd_str(comp_ctx->cmd_opcode),
550 				comp_ctx->cmd_opcode, comp_ctx->status,
551 				comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc);
552 		else
553 			ibdev_err_ratelimited(
554 				aq->efa_dev,
555 				"The device didn't send any completion for admin cmd %s(%d) status %d (ctx 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
556 				efa_com_cmd_str(comp_ctx->cmd_opcode),
557 				comp_ctx->cmd_opcode, comp_ctx->status,
558 				comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc);
559 
560 		clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
561 		err = -ETIME;
562 		goto out;
563 	}
564 
565 	err = efa_com_comp_status_to_errno(comp_ctx->user_cqe->acq_common_descriptor.status);
566 out:
567 	efa_com_put_comp_ctx(aq, comp_ctx);
568 	return err;
569 }
570 
571 /*
572  * There are two types to wait for completion.
573  * Polling mode - wait until the completion is available.
574  * Async mode - wait on wait queue until the completion is ready
575  * (or the timeout expired).
576  * It is expected that the IRQ called efa_com_handle_admin_completion
577  * to mark the completions.
578  */
579 static int efa_com_wait_and_process_admin_cq(struct efa_comp_ctx *comp_ctx,
580 					     struct efa_com_admin_queue *aq)
581 {
582 	if (test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state))
583 		return efa_com_wait_and_process_admin_cq_polling(comp_ctx, aq);
584 
585 	return efa_com_wait_and_process_admin_cq_interrupts(comp_ctx, aq);
586 }
587 
588 /**
589  * efa_com_cmd_exec - Execute admin command
590  * @aq: admin queue.
591  * @cmd: the admin command to execute.
592  * @cmd_size: the command size.
593  * @comp: command completion return entry.
594  * @comp_size: command completion size.
595  * Submit an admin command and then wait until the device will return a
596  * completion.
597  * The completion will be copied into comp.
598  *
599  * @return - 0 on success, negative value on failure.
600  */
601 int efa_com_cmd_exec(struct efa_com_admin_queue *aq,
602 		     struct efa_admin_aq_entry *cmd,
603 		     size_t cmd_size,
604 		     struct efa_admin_acq_entry *comp,
605 		     size_t comp_size)
606 {
607 	struct efa_comp_ctx *comp_ctx;
608 	int err;
609 
610 	might_sleep();
611 
612 	/* In case of queue FULL */
613 	down(&aq->avail_cmds);
614 
615 	ibdev_dbg(aq->efa_dev, "%s (opcode %d)\n",
616 		  efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
617 		  cmd->aq_common_descriptor.opcode);
618 	comp_ctx = efa_com_submit_admin_cmd(aq, cmd, cmd_size, comp, comp_size);
619 	if (IS_ERR(comp_ctx)) {
620 		ibdev_err_ratelimited(
621 			aq->efa_dev,
622 			"Failed to submit command %s (opcode %u) err %ld\n",
623 			efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
624 			cmd->aq_common_descriptor.opcode, PTR_ERR(comp_ctx));
625 
626 		up(&aq->avail_cmds);
627 		atomic64_inc(&aq->stats.cmd_err);
628 		return PTR_ERR(comp_ctx);
629 	}
630 
631 	err = efa_com_wait_and_process_admin_cq(comp_ctx, aq);
632 	if (err) {
633 		ibdev_err_ratelimited(
634 			aq->efa_dev,
635 			"Failed to process command %s (opcode %u) comp_status %d err %d\n",
636 			efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
637 			cmd->aq_common_descriptor.opcode,
638 			comp_ctx->user_cqe->acq_common_descriptor.status, err);
639 		atomic64_inc(&aq->stats.cmd_err);
640 	}
641 
642 	up(&aq->avail_cmds);
643 
644 	return err;
645 }
646 
647 /**
648  * efa_com_admin_destroy - Destroy the admin and the async events queues.
649  * @edev: EFA communication layer struct
650  */
651 void efa_com_admin_destroy(struct efa_com_dev *edev)
652 {
653 	struct efa_com_admin_queue *aq = &edev->aq;
654 	struct efa_com_aenq *aenq = &edev->aenq;
655 	struct efa_com_admin_cq *cq = &aq->cq;
656 	struct efa_com_admin_sq *sq = &aq->sq;
657 	u16 size;
658 
659 	clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
660 
661 	devm_kfree(edev->dmadev, aq->comp_ctx_pool);
662 	devm_kfree(edev->dmadev, aq->comp_ctx);
663 
664 	size = aq->depth * sizeof(*sq->entries);
665 	dma_free_coherent(edev->dmadev, size, sq->entries, sq->dma_addr);
666 
667 	size = aq->depth * sizeof(*cq->entries);
668 	dma_free_coherent(edev->dmadev, size, cq->entries, cq->dma_addr);
669 
670 	size = aenq->depth * sizeof(*aenq->entries);
671 	dma_free_coherent(edev->dmadev, size, aenq->entries, aenq->dma_addr);
672 }
673 
674 /**
675  * efa_com_set_admin_polling_mode - Set the admin completion queue polling mode
676  * @edev: EFA communication layer struct
677  * @polling: Enable/Disable polling mode
678  *
679  * Set the admin completion mode.
680  */
681 void efa_com_set_admin_polling_mode(struct efa_com_dev *edev, bool polling)
682 {
683 	u32 mask_value = 0;
684 
685 	if (polling)
686 		EFA_SET(&mask_value, EFA_REGS_INTR_MASK_EN, 1);
687 
688 	writel(mask_value, edev->reg_bar + EFA_REGS_INTR_MASK_OFF);
689 	if (polling)
690 		set_bit(EFA_AQ_STATE_POLLING_BIT, &edev->aq.state);
691 	else
692 		clear_bit(EFA_AQ_STATE_POLLING_BIT, &edev->aq.state);
693 }
694 
695 static void efa_com_stats_init(struct efa_com_dev *edev)
696 {
697 	atomic64_t *s = (atomic64_t *)&edev->aq.stats;
698 	int i;
699 
700 	for (i = 0; i < sizeof(edev->aq.stats) / sizeof(*s); i++, s++)
701 		atomic64_set(s, 0);
702 }
703 
704 /**
705  * efa_com_admin_init - Init the admin and the async queues
706  * @edev: EFA communication layer struct
707  * @aenq_handlers: Those handlers to be called upon event.
708  *
709  * Initialize the admin submission and completion queues.
710  * Initialize the asynchronous events notification queues.
711  *
712  * @return - 0 on success, negative value on failure.
713  */
714 int efa_com_admin_init(struct efa_com_dev *edev,
715 		       struct efa_aenq_handlers *aenq_handlers)
716 {
717 	struct efa_com_admin_queue *aq = &edev->aq;
718 	u32 timeout;
719 	u32 dev_sts;
720 	u32 cap;
721 	int err;
722 
723 	dev_sts = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
724 	if (!EFA_GET(&dev_sts, EFA_REGS_DEV_STS_READY)) {
725 		ibdev_err(edev->efa_dev,
726 			  "Device isn't ready, abort com init %#x\n", dev_sts);
727 		return -ENODEV;
728 	}
729 
730 	aq->depth = EFA_ADMIN_QUEUE_DEPTH;
731 
732 	aq->dmadev = edev->dmadev;
733 	aq->efa_dev = edev->efa_dev;
734 	set_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state);
735 
736 	sema_init(&aq->avail_cmds, aq->depth);
737 
738 	efa_com_stats_init(edev);
739 
740 	err = efa_com_init_comp_ctxt(aq);
741 	if (err)
742 		return err;
743 
744 	err = efa_com_admin_init_sq(edev);
745 	if (err)
746 		goto err_destroy_comp_ctxt;
747 
748 	err = efa_com_admin_init_cq(edev);
749 	if (err)
750 		goto err_destroy_sq;
751 
752 	efa_com_set_admin_polling_mode(edev, false);
753 
754 	err = efa_com_admin_init_aenq(edev, aenq_handlers);
755 	if (err)
756 		goto err_destroy_cq;
757 
758 	cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
759 	timeout = EFA_GET(&cap, EFA_REGS_CAPS_ADMIN_CMD_TO);
760 	if (timeout)
761 		/* the resolution of timeout reg is 100ms */
762 		aq->completion_timeout = timeout * 100000;
763 	else
764 		aq->completion_timeout = ADMIN_CMD_TIMEOUT_US;
765 
766 	aq->poll_interval = EFA_POLL_INTERVAL_MS;
767 
768 	set_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
769 
770 	return 0;
771 
772 err_destroy_cq:
773 	dma_free_coherent(edev->dmadev, aq->depth * sizeof(*aq->cq.entries),
774 			  aq->cq.entries, aq->cq.dma_addr);
775 err_destroy_sq:
776 	dma_free_coherent(edev->dmadev, aq->depth * sizeof(*aq->sq.entries),
777 			  aq->sq.entries, aq->sq.dma_addr);
778 err_destroy_comp_ctxt:
779 	devm_kfree(edev->dmadev, aq->comp_ctx);
780 
781 	return err;
782 }
783 
784 /**
785  * efa_com_admin_q_comp_intr_handler - admin queue interrupt handler
786  * @edev: EFA communication layer struct
787  *
788  * This method goes over the admin completion queue and wakes up
789  * all the pending threads that wait on the commands wait event.
790  *
791  * Note: Should be called after MSI-X interrupt.
792  */
793 void efa_com_admin_q_comp_intr_handler(struct efa_com_dev *edev)
794 {
795 	unsigned long flags;
796 
797 	spin_lock_irqsave(&edev->aq.cq.lock, flags);
798 	efa_com_handle_admin_completion(&edev->aq);
799 	spin_unlock_irqrestore(&edev->aq.cq.lock, flags);
800 }
801 
802 /*
803  * efa_handle_specific_aenq_event:
804  * return the handler that is relevant to the specific event group
805  */
806 static efa_aenq_handler efa_com_get_specific_aenq_cb(struct efa_com_dev *edev,
807 						     u16 group)
808 {
809 	struct efa_aenq_handlers *aenq_handlers = edev->aenq.aenq_handlers;
810 
811 	if (group < EFA_MAX_HANDLERS && aenq_handlers->handlers[group])
812 		return aenq_handlers->handlers[group];
813 
814 	return aenq_handlers->unimplemented_handler;
815 }
816 
817 /**
818  * efa_com_aenq_intr_handler - AENQ interrupt handler
819  * @edev: EFA communication layer struct
820  * @data: Data of interrupt handler.
821  *
822  * Go over the async event notification queue and call the proper aenq handler.
823  */
824 void efa_com_aenq_intr_handler(struct efa_com_dev *edev, void *data)
825 {
826 	struct efa_admin_aenq_common_desc *aenq_common;
827 	struct efa_com_aenq *aenq = &edev->aenq;
828 	struct efa_admin_aenq_entry *aenq_e;
829 	efa_aenq_handler handler_cb;
830 	u32 processed = 0;
831 	u8 phase;
832 	u32 ci;
833 
834 	ci = aenq->cc & (aenq->depth - 1);
835 	phase = aenq->phase;
836 	aenq_e = &aenq->entries[ci]; /* Get first entry */
837 	aenq_common = &aenq_e->aenq_common_desc;
838 
839 	/* Go over all the events */
840 	while ((READ_ONCE(aenq_common->flags) &
841 		EFA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == phase) {
842 		/*
843 		 * Do not read the rest of the completion entry before the
844 		 * phase bit was validated
845 		 */
846 		dma_rmb();
847 
848 		/* Handle specific event*/
849 		handler_cb = efa_com_get_specific_aenq_cb(edev,
850 							  aenq_common->group);
851 		handler_cb(data, aenq_e); /* call the actual event handler*/
852 
853 		/* Get next event entry */
854 		ci++;
855 		processed++;
856 
857 		if (ci == aenq->depth) {
858 			ci = 0;
859 			phase = !phase;
860 		}
861 		aenq_e = &aenq->entries[ci];
862 		aenq_common = &aenq_e->aenq_common_desc;
863 	}
864 
865 	aenq->cc += processed;
866 	aenq->phase = phase;
867 
868 	/* Don't update aenq doorbell if there weren't any processed events */
869 	if (!processed)
870 		return;
871 
872 	/* barrier not needed in case of writel */
873 	writel(aenq->cc, edev->reg_bar + EFA_REGS_AENQ_CONS_DB_OFF);
874 }
875 
876 static void efa_com_mmio_reg_read_resp_addr_init(struct efa_com_dev *edev)
877 {
878 	struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
879 	u32 addr_high;
880 	u32 addr_low;
881 
882 	/* dma_addr_bits is unknown at this point */
883 	addr_high = (mmio_read->read_resp_dma_addr >> 32) & GENMASK(31, 0);
884 	addr_low = mmio_read->read_resp_dma_addr & GENMASK(31, 0);
885 
886 	writel(addr_high, edev->reg_bar + EFA_REGS_MMIO_RESP_HI_OFF);
887 	writel(addr_low, edev->reg_bar + EFA_REGS_MMIO_RESP_LO_OFF);
888 }
889 
890 int efa_com_mmio_reg_read_init(struct efa_com_dev *edev)
891 {
892 	struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
893 
894 	spin_lock_init(&mmio_read->lock);
895 	mmio_read->read_resp =
896 		dma_alloc_coherent(edev->dmadev, sizeof(*mmio_read->read_resp),
897 				   &mmio_read->read_resp_dma_addr, GFP_KERNEL);
898 	if (!mmio_read->read_resp)
899 		return -ENOMEM;
900 
901 	efa_com_mmio_reg_read_resp_addr_init(edev);
902 
903 	mmio_read->read_resp->req_id = 0;
904 	mmio_read->seq_num = 0;
905 	mmio_read->mmio_read_timeout = EFA_REG_READ_TIMEOUT_US;
906 
907 	return 0;
908 }
909 
910 void efa_com_mmio_reg_read_destroy(struct efa_com_dev *edev)
911 {
912 	struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
913 
914 	dma_free_coherent(edev->dmadev, sizeof(*mmio_read->read_resp),
915 			  mmio_read->read_resp, mmio_read->read_resp_dma_addr);
916 }
917 
918 int efa_com_validate_version(struct efa_com_dev *edev)
919 {
920 	u32 min_ctrl_ver = 0;
921 	u32 ctrl_ver_masked;
922 	u32 min_ver = 0;
923 	u32 ctrl_ver;
924 	u32 ver;
925 
926 	/*
927 	 * Make sure the EFA version and the controller version are at least
928 	 * as the driver expects
929 	 */
930 	ver = efa_com_reg_read32(edev, EFA_REGS_VERSION_OFF);
931 	ctrl_ver = efa_com_reg_read32(edev,
932 				      EFA_REGS_CONTROLLER_VERSION_OFF);
933 
934 	ibdev_dbg(edev->efa_dev, "efa device version: %d.%d\n",
935 		  EFA_GET(&ver, EFA_REGS_VERSION_MAJOR_VERSION),
936 		  EFA_GET(&ver, EFA_REGS_VERSION_MINOR_VERSION));
937 
938 	EFA_SET(&min_ver, EFA_REGS_VERSION_MAJOR_VERSION,
939 		EFA_ADMIN_API_VERSION_MAJOR);
940 	EFA_SET(&min_ver, EFA_REGS_VERSION_MINOR_VERSION,
941 		EFA_ADMIN_API_VERSION_MINOR);
942 	if (ver < min_ver) {
943 		ibdev_err(edev->efa_dev,
944 			  "EFA version is lower than the minimal version the driver supports\n");
945 		return -EOPNOTSUPP;
946 	}
947 
948 	ibdev_dbg(
949 		edev->efa_dev,
950 		"efa controller version: %d.%d.%d implementation version %d\n",
951 		EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION),
952 		EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION),
953 		EFA_GET(&ctrl_ver,
954 			EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION),
955 		EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_IMPL_ID));
956 
957 	ctrl_ver_masked =
958 		EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION) |
959 		EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION) |
960 		EFA_GET(&ctrl_ver,
961 			EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION);
962 
963 	EFA_SET(&min_ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION,
964 		EFA_CTRL_MAJOR);
965 	EFA_SET(&min_ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION,
966 		EFA_CTRL_MINOR);
967 	EFA_SET(&min_ctrl_ver, EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION,
968 		EFA_CTRL_SUB_MINOR);
969 	/* Validate the ctrl version without the implementation ID */
970 	if (ctrl_ver_masked < min_ctrl_ver) {
971 		ibdev_err(edev->efa_dev,
972 			  "EFA ctrl version is lower than the minimal ctrl version the driver supports\n");
973 		return -EOPNOTSUPP;
974 	}
975 
976 	return 0;
977 }
978 
979 /**
980  * efa_com_get_dma_width - Retrieve physical dma address width the device
981  * supports.
982  * @edev: EFA communication layer struct
983  *
984  * Retrieve the maximum physical address bits the device can handle.
985  *
986  * @return: > 0 on Success and negative value otherwise.
987  */
988 int efa_com_get_dma_width(struct efa_com_dev *edev)
989 {
990 	u32 caps = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
991 	int width;
992 
993 	width = EFA_GET(&caps, EFA_REGS_CAPS_DMA_ADDR_WIDTH);
994 
995 	ibdev_dbg(edev->efa_dev, "DMA width: %d\n", width);
996 
997 	if (width < 32 || width > 64) {
998 		ibdev_err(edev->efa_dev, "DMA width illegal value: %d\n", width);
999 		return -EINVAL;
1000 	}
1001 
1002 	edev->dma_addr_bits = width;
1003 
1004 	return width;
1005 }
1006 
1007 static int wait_for_reset_state(struct efa_com_dev *edev, u32 timeout, int on)
1008 {
1009 	u32 val, i;
1010 
1011 	for (i = 0; i < timeout; i++) {
1012 		val = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
1013 
1014 		if (EFA_GET(&val, EFA_REGS_DEV_STS_RESET_IN_PROGRESS) == on)
1015 			return 0;
1016 
1017 		ibdev_dbg(edev->efa_dev, "Reset indication val %d\n", val);
1018 		msleep(EFA_POLL_INTERVAL_MS);
1019 	}
1020 
1021 	return -ETIME;
1022 }
1023 
1024 /**
1025  * efa_com_dev_reset - Perform device FLR to the device.
1026  * @edev: EFA communication layer struct
1027  * @reset_reason: Specify what is the trigger for the reset in case of an error.
1028  *
1029  * @return - 0 on success, negative value on failure.
1030  */
1031 int efa_com_dev_reset(struct efa_com_dev *edev,
1032 		      enum efa_regs_reset_reason_types reset_reason)
1033 {
1034 	u32 stat, timeout, cap;
1035 	u32 reset_val = 0;
1036 	int err;
1037 
1038 	stat = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
1039 	cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
1040 
1041 	if (!EFA_GET(&stat, EFA_REGS_DEV_STS_READY)) {
1042 		ibdev_err(edev->efa_dev,
1043 			  "Device isn't ready, can't reset device\n");
1044 		return -EINVAL;
1045 	}
1046 
1047 	timeout = EFA_GET(&cap, EFA_REGS_CAPS_RESET_TIMEOUT);
1048 	if (!timeout) {
1049 		ibdev_err(edev->efa_dev, "Invalid timeout value\n");
1050 		return -EINVAL;
1051 	}
1052 
1053 	/* start reset */
1054 	EFA_SET(&reset_val, EFA_REGS_DEV_CTL_DEV_RESET, 1);
1055 	EFA_SET(&reset_val, EFA_REGS_DEV_CTL_RESET_REASON, reset_reason);
1056 	writel(reset_val, edev->reg_bar + EFA_REGS_DEV_CTL_OFF);
1057 
1058 	/* reset clears the mmio readless address, restore it */
1059 	efa_com_mmio_reg_read_resp_addr_init(edev);
1060 
1061 	err = wait_for_reset_state(edev, timeout, 1);
1062 	if (err) {
1063 		ibdev_err(edev->efa_dev, "Reset indication didn't turn on\n");
1064 		return err;
1065 	}
1066 
1067 	/* reset done */
1068 	writel(0, edev->reg_bar + EFA_REGS_DEV_CTL_OFF);
1069 	err = wait_for_reset_state(edev, timeout, 0);
1070 	if (err) {
1071 		ibdev_err(edev->efa_dev, "Reset indication didn't turn off\n");
1072 		return err;
1073 	}
1074 
1075 	timeout = EFA_GET(&cap, EFA_REGS_CAPS_ADMIN_CMD_TO);
1076 	if (timeout)
1077 		/* the resolution of timeout reg is 100ms */
1078 		edev->aq.completion_timeout = timeout * 100000;
1079 	else
1080 		edev->aq.completion_timeout = ADMIN_CMD_TIMEOUT_US;
1081 
1082 	return 0;
1083 }
1084