xref: /linux/drivers/infiniband/hw/efa/efa_com.c (revision 6fdcba32711044c35c0e1b094cbd8f3f0b4472c9)
1 // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
2 /*
3  * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
4  */
5 
6 #include "efa_com.h"
7 #include "efa_regs_defs.h"
8 
9 #define ADMIN_CMD_TIMEOUT_US 30000000 /* usecs */
10 
11 #define EFA_REG_READ_TIMEOUT_US 50000 /* usecs */
12 #define EFA_MMIO_READ_INVALID 0xffffffff
13 
14 #define EFA_POLL_INTERVAL_MS 100 /* msecs */
15 
16 #define EFA_ASYNC_QUEUE_DEPTH 16
17 #define EFA_ADMIN_QUEUE_DEPTH 32
18 
19 #define MIN_EFA_VER\
20 	((EFA_ADMIN_API_VERSION_MAJOR << EFA_REGS_VERSION_MAJOR_VERSION_SHIFT) | \
21 	 (EFA_ADMIN_API_VERSION_MINOR & EFA_REGS_VERSION_MINOR_VERSION_MASK))
22 
23 #define EFA_CTRL_MAJOR          0
24 #define EFA_CTRL_MINOR          0
25 #define EFA_CTRL_SUB_MINOR      1
26 
27 #define MIN_EFA_CTRL_VER \
28 	(((EFA_CTRL_MAJOR) << \
29 	(EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT)) | \
30 	((EFA_CTRL_MINOR) << \
31 	(EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT)) | \
32 	(EFA_CTRL_SUB_MINOR))
33 
34 #define EFA_DMA_ADDR_TO_UINT32_LOW(x)   ((u32)((u64)(x)))
35 #define EFA_DMA_ADDR_TO_UINT32_HIGH(x)  ((u32)(((u64)(x)) >> 32))
36 
37 #define EFA_REGS_ADMIN_INTR_MASK 1
38 
39 enum efa_cmd_status {
40 	EFA_CMD_SUBMITTED,
41 	EFA_CMD_COMPLETED,
42 };
43 
44 struct efa_comp_ctx {
45 	struct completion wait_event;
46 	struct efa_admin_acq_entry *user_cqe;
47 	u32 comp_size;
48 	enum efa_cmd_status status;
49 	/* status from the device */
50 	u8 comp_status;
51 	u8 cmd_opcode;
52 	u8 occupied;
53 };
54 
55 static const char *efa_com_cmd_str(u8 cmd)
56 {
57 #define EFA_CMD_STR_CASE(_cmd) case EFA_ADMIN_##_cmd: return #_cmd
58 
59 	switch (cmd) {
60 	EFA_CMD_STR_CASE(CREATE_QP);
61 	EFA_CMD_STR_CASE(MODIFY_QP);
62 	EFA_CMD_STR_CASE(QUERY_QP);
63 	EFA_CMD_STR_CASE(DESTROY_QP);
64 	EFA_CMD_STR_CASE(CREATE_AH);
65 	EFA_CMD_STR_CASE(DESTROY_AH);
66 	EFA_CMD_STR_CASE(REG_MR);
67 	EFA_CMD_STR_CASE(DEREG_MR);
68 	EFA_CMD_STR_CASE(CREATE_CQ);
69 	EFA_CMD_STR_CASE(DESTROY_CQ);
70 	EFA_CMD_STR_CASE(GET_FEATURE);
71 	EFA_CMD_STR_CASE(SET_FEATURE);
72 	EFA_CMD_STR_CASE(GET_STATS);
73 	EFA_CMD_STR_CASE(ALLOC_PD);
74 	EFA_CMD_STR_CASE(DEALLOC_PD);
75 	EFA_CMD_STR_CASE(ALLOC_UAR);
76 	EFA_CMD_STR_CASE(DEALLOC_UAR);
77 	default: return "unknown command opcode";
78 	}
79 #undef EFA_CMD_STR_CASE
80 }
81 
82 static u32 efa_com_reg_read32(struct efa_com_dev *edev, u16 offset)
83 {
84 	struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
85 	struct efa_admin_mmio_req_read_less_resp *read_resp;
86 	unsigned long exp_time;
87 	u32 mmio_read_reg;
88 	u32 err;
89 
90 	read_resp = mmio_read->read_resp;
91 
92 	spin_lock(&mmio_read->lock);
93 	mmio_read->seq_num++;
94 
95 	/* trash DMA req_id to identify when hardware is done */
96 	read_resp->req_id = mmio_read->seq_num + 0x9aL;
97 	mmio_read_reg = (offset << EFA_REGS_MMIO_REG_READ_REG_OFF_SHIFT) &
98 			EFA_REGS_MMIO_REG_READ_REG_OFF_MASK;
99 	mmio_read_reg |= mmio_read->seq_num &
100 			 EFA_REGS_MMIO_REG_READ_REQ_ID_MASK;
101 
102 	writel(mmio_read_reg, edev->reg_bar + EFA_REGS_MMIO_REG_READ_OFF);
103 
104 	exp_time = jiffies + usecs_to_jiffies(mmio_read->mmio_read_timeout);
105 	do {
106 		if (READ_ONCE(read_resp->req_id) == mmio_read->seq_num)
107 			break;
108 		udelay(1);
109 	} while (time_is_after_jiffies(exp_time));
110 
111 	if (read_resp->req_id != mmio_read->seq_num) {
112 		ibdev_err_ratelimited(
113 			edev->efa_dev,
114 			"Reading register timed out. expected: req id[%u] offset[%#x] actual: req id[%u] offset[%#x]\n",
115 			mmio_read->seq_num, offset, read_resp->req_id,
116 			read_resp->reg_off);
117 		err = EFA_MMIO_READ_INVALID;
118 		goto out;
119 	}
120 
121 	if (read_resp->reg_off != offset) {
122 		ibdev_err_ratelimited(
123 			edev->efa_dev,
124 			"Reading register failed: wrong offset provided\n");
125 		err = EFA_MMIO_READ_INVALID;
126 		goto out;
127 	}
128 
129 	err = read_resp->reg_val;
130 out:
131 	spin_unlock(&mmio_read->lock);
132 	return err;
133 }
134 
135 static int efa_com_admin_init_sq(struct efa_com_dev *edev)
136 {
137 	struct efa_com_admin_queue *aq = &edev->aq;
138 	struct efa_com_admin_sq *sq = &aq->sq;
139 	u16 size = aq->depth * sizeof(*sq->entries);
140 	u32 addr_high;
141 	u32 addr_low;
142 	u32 aq_caps;
143 
144 	sq->entries =
145 		dma_alloc_coherent(aq->dmadev, size, &sq->dma_addr, GFP_KERNEL);
146 	if (!sq->entries)
147 		return -ENOMEM;
148 
149 	spin_lock_init(&sq->lock);
150 
151 	sq->cc = 0;
152 	sq->pc = 0;
153 	sq->phase = 1;
154 
155 	sq->db_addr = (u32 __iomem *)(edev->reg_bar + EFA_REGS_AQ_PROD_DB_OFF);
156 
157 	addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(sq->dma_addr);
158 	addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(sq->dma_addr);
159 
160 	writel(addr_low, edev->reg_bar + EFA_REGS_AQ_BASE_LO_OFF);
161 	writel(addr_high, edev->reg_bar + EFA_REGS_AQ_BASE_HI_OFF);
162 
163 	aq_caps = aq->depth & EFA_REGS_AQ_CAPS_AQ_DEPTH_MASK;
164 	aq_caps |= (sizeof(struct efa_admin_aq_entry) <<
165 			EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT) &
166 			EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK;
167 
168 	writel(aq_caps, edev->reg_bar + EFA_REGS_AQ_CAPS_OFF);
169 
170 	return 0;
171 }
172 
173 static int efa_com_admin_init_cq(struct efa_com_dev *edev)
174 {
175 	struct efa_com_admin_queue *aq = &edev->aq;
176 	struct efa_com_admin_cq *cq = &aq->cq;
177 	u16 size = aq->depth * sizeof(*cq->entries);
178 	u32 addr_high;
179 	u32 addr_low;
180 	u32 acq_caps;
181 
182 	cq->entries =
183 		dma_alloc_coherent(aq->dmadev, size, &cq->dma_addr, GFP_KERNEL);
184 	if (!cq->entries)
185 		return -ENOMEM;
186 
187 	spin_lock_init(&cq->lock);
188 
189 	cq->cc = 0;
190 	cq->phase = 1;
191 
192 	addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(cq->dma_addr);
193 	addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(cq->dma_addr);
194 
195 	writel(addr_low, edev->reg_bar + EFA_REGS_ACQ_BASE_LO_OFF);
196 	writel(addr_high, edev->reg_bar + EFA_REGS_ACQ_BASE_HI_OFF);
197 
198 	acq_caps = aq->depth & EFA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK;
199 	acq_caps |= (sizeof(struct efa_admin_acq_entry) <<
200 			EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT) &
201 			EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK;
202 	acq_caps |= (aq->msix_vector_idx <<
203 			EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_SHIFT) &
204 			EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_MASK;
205 
206 	writel(acq_caps, edev->reg_bar + EFA_REGS_ACQ_CAPS_OFF);
207 
208 	return 0;
209 }
210 
211 static int efa_com_admin_init_aenq(struct efa_com_dev *edev,
212 				   struct efa_aenq_handlers *aenq_handlers)
213 {
214 	struct efa_com_aenq *aenq = &edev->aenq;
215 	u32 addr_low, addr_high, aenq_caps;
216 	u16 size;
217 
218 	if (!aenq_handlers) {
219 		ibdev_err(edev->efa_dev, "aenq handlers pointer is NULL\n");
220 		return -EINVAL;
221 	}
222 
223 	size = EFA_ASYNC_QUEUE_DEPTH * sizeof(*aenq->entries);
224 	aenq->entries = dma_alloc_coherent(edev->dmadev, size, &aenq->dma_addr,
225 					   GFP_KERNEL);
226 	if (!aenq->entries)
227 		return -ENOMEM;
228 
229 	aenq->aenq_handlers = aenq_handlers;
230 	aenq->depth = EFA_ASYNC_QUEUE_DEPTH;
231 	aenq->cc = 0;
232 	aenq->phase = 1;
233 
234 	addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(aenq->dma_addr);
235 	addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(aenq->dma_addr);
236 
237 	writel(addr_low, edev->reg_bar + EFA_REGS_AENQ_BASE_LO_OFF);
238 	writel(addr_high, edev->reg_bar + EFA_REGS_AENQ_BASE_HI_OFF);
239 
240 	aenq_caps = aenq->depth & EFA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK;
241 	aenq_caps |= (sizeof(struct efa_admin_aenq_entry) <<
242 		EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT) &
243 		EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK;
244 	aenq_caps |= (aenq->msix_vector_idx
245 		      << EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_SHIFT) &
246 		     EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_MASK;
247 	writel(aenq_caps, edev->reg_bar + EFA_REGS_AENQ_CAPS_OFF);
248 
249 	/*
250 	 * Init cons_db to mark that all entries in the queue
251 	 * are initially available
252 	 */
253 	writel(edev->aenq.cc, edev->reg_bar + EFA_REGS_AENQ_CONS_DB_OFF);
254 
255 	return 0;
256 }
257 
258 /* ID to be used with efa_com_get_comp_ctx */
259 static u16 efa_com_alloc_ctx_id(struct efa_com_admin_queue *aq)
260 {
261 	u16 ctx_id;
262 
263 	spin_lock(&aq->comp_ctx_lock);
264 	ctx_id = aq->comp_ctx_pool[aq->comp_ctx_pool_next];
265 	aq->comp_ctx_pool_next++;
266 	spin_unlock(&aq->comp_ctx_lock);
267 
268 	return ctx_id;
269 }
270 
271 static void efa_com_dealloc_ctx_id(struct efa_com_admin_queue *aq,
272 				   u16 ctx_id)
273 {
274 	spin_lock(&aq->comp_ctx_lock);
275 	aq->comp_ctx_pool_next--;
276 	aq->comp_ctx_pool[aq->comp_ctx_pool_next] = ctx_id;
277 	spin_unlock(&aq->comp_ctx_lock);
278 }
279 
280 static inline void efa_com_put_comp_ctx(struct efa_com_admin_queue *aq,
281 					struct efa_comp_ctx *comp_ctx)
282 {
283 	u16 cmd_id = comp_ctx->user_cqe->acq_common_descriptor.command &
284 		     EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK;
285 	u16 ctx_id = cmd_id & (aq->depth - 1);
286 
287 	ibdev_dbg(aq->efa_dev, "Put completion command_id %#x\n", cmd_id);
288 	comp_ctx->occupied = 0;
289 	efa_com_dealloc_ctx_id(aq, ctx_id);
290 }
291 
292 static struct efa_comp_ctx *efa_com_get_comp_ctx(struct efa_com_admin_queue *aq,
293 						 u16 cmd_id, bool capture)
294 {
295 	u16 ctx_id = cmd_id & (aq->depth - 1);
296 
297 	if (aq->comp_ctx[ctx_id].occupied && capture) {
298 		ibdev_err_ratelimited(
299 			aq->efa_dev,
300 			"Completion context for command_id %#x is occupied\n",
301 			cmd_id);
302 		return NULL;
303 	}
304 
305 	if (capture) {
306 		aq->comp_ctx[ctx_id].occupied = 1;
307 		ibdev_dbg(aq->efa_dev,
308 			  "Take completion ctxt for command_id %#x\n", cmd_id);
309 	}
310 
311 	return &aq->comp_ctx[ctx_id];
312 }
313 
314 static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq,
315 						       struct efa_admin_aq_entry *cmd,
316 						       size_t cmd_size_in_bytes,
317 						       struct efa_admin_acq_entry *comp,
318 						       size_t comp_size_in_bytes)
319 {
320 	struct efa_admin_aq_entry *aqe;
321 	struct efa_comp_ctx *comp_ctx;
322 	u16 queue_size_mask;
323 	u16 cmd_id;
324 	u16 ctx_id;
325 	u16 pi;
326 
327 	queue_size_mask = aq->depth - 1;
328 	pi = aq->sq.pc & queue_size_mask;
329 
330 	ctx_id = efa_com_alloc_ctx_id(aq);
331 
332 	/* cmd_id LSBs are the ctx_id and MSBs are entropy bits from pc */
333 	cmd_id = ctx_id & queue_size_mask;
334 	cmd_id |= aq->sq.pc & ~queue_size_mask;
335 	cmd_id &= EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK;
336 
337 	cmd->aq_common_descriptor.command_id = cmd_id;
338 	cmd->aq_common_descriptor.flags |= aq->sq.phase &
339 		EFA_ADMIN_AQ_COMMON_DESC_PHASE_MASK;
340 
341 	comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, true);
342 	if (!comp_ctx) {
343 		efa_com_dealloc_ctx_id(aq, ctx_id);
344 		return ERR_PTR(-EINVAL);
345 	}
346 
347 	comp_ctx->status = EFA_CMD_SUBMITTED;
348 	comp_ctx->comp_size = comp_size_in_bytes;
349 	comp_ctx->user_cqe = comp;
350 	comp_ctx->cmd_opcode = cmd->aq_common_descriptor.opcode;
351 
352 	reinit_completion(&comp_ctx->wait_event);
353 
354 	aqe = &aq->sq.entries[pi];
355 	memset(aqe, 0, sizeof(*aqe));
356 	memcpy(aqe, cmd, cmd_size_in_bytes);
357 
358 	aq->sq.pc++;
359 	atomic64_inc(&aq->stats.submitted_cmd);
360 
361 	if ((aq->sq.pc & queue_size_mask) == 0)
362 		aq->sq.phase = !aq->sq.phase;
363 
364 	/* barrier not needed in case of writel */
365 	writel(aq->sq.pc, aq->sq.db_addr);
366 
367 	return comp_ctx;
368 }
369 
370 static inline int efa_com_init_comp_ctxt(struct efa_com_admin_queue *aq)
371 {
372 	size_t pool_size = aq->depth * sizeof(*aq->comp_ctx_pool);
373 	size_t size = aq->depth * sizeof(struct efa_comp_ctx);
374 	struct efa_comp_ctx *comp_ctx;
375 	u16 i;
376 
377 	aq->comp_ctx = devm_kzalloc(aq->dmadev, size, GFP_KERNEL);
378 	aq->comp_ctx_pool = devm_kzalloc(aq->dmadev, pool_size, GFP_KERNEL);
379 	if (!aq->comp_ctx || !aq->comp_ctx_pool) {
380 		devm_kfree(aq->dmadev, aq->comp_ctx_pool);
381 		devm_kfree(aq->dmadev, aq->comp_ctx);
382 		return -ENOMEM;
383 	}
384 
385 	for (i = 0; i < aq->depth; i++) {
386 		comp_ctx = efa_com_get_comp_ctx(aq, i, false);
387 		if (comp_ctx)
388 			init_completion(&comp_ctx->wait_event);
389 
390 		aq->comp_ctx_pool[i] = i;
391 	}
392 
393 	spin_lock_init(&aq->comp_ctx_lock);
394 
395 	aq->comp_ctx_pool_next = 0;
396 
397 	return 0;
398 }
399 
400 static struct efa_comp_ctx *efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq,
401 						     struct efa_admin_aq_entry *cmd,
402 						     size_t cmd_size_in_bytes,
403 						     struct efa_admin_acq_entry *comp,
404 						     size_t comp_size_in_bytes)
405 {
406 	struct efa_comp_ctx *comp_ctx;
407 
408 	spin_lock(&aq->sq.lock);
409 	if (!test_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state)) {
410 		ibdev_err_ratelimited(aq->efa_dev, "Admin queue is closed\n");
411 		spin_unlock(&aq->sq.lock);
412 		return ERR_PTR(-ENODEV);
413 	}
414 
415 	comp_ctx = __efa_com_submit_admin_cmd(aq, cmd, cmd_size_in_bytes, comp,
416 					      comp_size_in_bytes);
417 	spin_unlock(&aq->sq.lock);
418 	if (IS_ERR(comp_ctx))
419 		clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
420 
421 	return comp_ctx;
422 }
423 
424 static void efa_com_handle_single_admin_completion(struct efa_com_admin_queue *aq,
425 						   struct efa_admin_acq_entry *cqe)
426 {
427 	struct efa_comp_ctx *comp_ctx;
428 	u16 cmd_id;
429 
430 	cmd_id = cqe->acq_common_descriptor.command &
431 		 EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK;
432 
433 	comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, false);
434 	if (!comp_ctx) {
435 		ibdev_err(aq->efa_dev,
436 			  "comp_ctx is NULL. Changing the admin queue running state\n");
437 		clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
438 		return;
439 	}
440 
441 	comp_ctx->status = EFA_CMD_COMPLETED;
442 	comp_ctx->comp_status = cqe->acq_common_descriptor.status;
443 	if (comp_ctx->user_cqe)
444 		memcpy(comp_ctx->user_cqe, cqe, comp_ctx->comp_size);
445 
446 	if (!test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state))
447 		complete(&comp_ctx->wait_event);
448 }
449 
450 static void efa_com_handle_admin_completion(struct efa_com_admin_queue *aq)
451 {
452 	struct efa_admin_acq_entry *cqe;
453 	u16 queue_size_mask;
454 	u16 comp_num = 0;
455 	u8 phase;
456 	u16 ci;
457 
458 	queue_size_mask = aq->depth - 1;
459 
460 	ci = aq->cq.cc & queue_size_mask;
461 	phase = aq->cq.phase;
462 
463 	cqe = &aq->cq.entries[ci];
464 
465 	/* Go over all the completions */
466 	while ((READ_ONCE(cqe->acq_common_descriptor.flags) &
467 		EFA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK) == phase) {
468 		/*
469 		 * Do not read the rest of the completion entry before the
470 		 * phase bit was validated
471 		 */
472 		dma_rmb();
473 		efa_com_handle_single_admin_completion(aq, cqe);
474 
475 		ci++;
476 		comp_num++;
477 		if (ci == aq->depth) {
478 			ci = 0;
479 			phase = !phase;
480 		}
481 
482 		cqe = &aq->cq.entries[ci];
483 	}
484 
485 	aq->cq.cc += comp_num;
486 	aq->cq.phase = phase;
487 	aq->sq.cc += comp_num;
488 	atomic64_add(comp_num, &aq->stats.completed_cmd);
489 }
490 
491 static int efa_com_comp_status_to_errno(u8 comp_status)
492 {
493 	switch (comp_status) {
494 	case EFA_ADMIN_SUCCESS:
495 		return 0;
496 	case EFA_ADMIN_RESOURCE_ALLOCATION_FAILURE:
497 		return -ENOMEM;
498 	case EFA_ADMIN_UNSUPPORTED_OPCODE:
499 		return -EOPNOTSUPP;
500 	case EFA_ADMIN_BAD_OPCODE:
501 	case EFA_ADMIN_MALFORMED_REQUEST:
502 	case EFA_ADMIN_ILLEGAL_PARAMETER:
503 	case EFA_ADMIN_UNKNOWN_ERROR:
504 		return -EINVAL;
505 	default:
506 		return -EINVAL;
507 	}
508 }
509 
510 static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_ctx,
511 						     struct efa_com_admin_queue *aq)
512 {
513 	unsigned long timeout;
514 	unsigned long flags;
515 	int err;
516 
517 	timeout = jiffies + usecs_to_jiffies(aq->completion_timeout);
518 
519 	while (1) {
520 		spin_lock_irqsave(&aq->cq.lock, flags);
521 		efa_com_handle_admin_completion(aq);
522 		spin_unlock_irqrestore(&aq->cq.lock, flags);
523 
524 		if (comp_ctx->status != EFA_CMD_SUBMITTED)
525 			break;
526 
527 		if (time_is_before_jiffies(timeout)) {
528 			ibdev_err_ratelimited(
529 				aq->efa_dev,
530 				"Wait for completion (polling) timeout\n");
531 			/* EFA didn't have any completion */
532 			atomic64_inc(&aq->stats.no_completion);
533 
534 			clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
535 			err = -ETIME;
536 			goto out;
537 		}
538 
539 		msleep(aq->poll_interval);
540 	}
541 
542 	err = efa_com_comp_status_to_errno(comp_ctx->comp_status);
543 out:
544 	efa_com_put_comp_ctx(aq, comp_ctx);
545 	return err;
546 }
547 
548 static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *comp_ctx,
549 							struct efa_com_admin_queue *aq)
550 {
551 	unsigned long flags;
552 	int err;
553 
554 	wait_for_completion_timeout(&comp_ctx->wait_event,
555 				    usecs_to_jiffies(aq->completion_timeout));
556 
557 	/*
558 	 * In case the command wasn't completed find out the root cause.
559 	 * There might be 2 kinds of errors
560 	 * 1) No completion (timeout reached)
561 	 * 2) There is completion but the device didn't get any msi-x interrupt.
562 	 */
563 	if (comp_ctx->status == EFA_CMD_SUBMITTED) {
564 		spin_lock_irqsave(&aq->cq.lock, flags);
565 		efa_com_handle_admin_completion(aq);
566 		spin_unlock_irqrestore(&aq->cq.lock, flags);
567 
568 		atomic64_inc(&aq->stats.no_completion);
569 
570 		if (comp_ctx->status == EFA_CMD_COMPLETED)
571 			ibdev_err_ratelimited(
572 				aq->efa_dev,
573 				"The device sent a completion but the driver didn't receive any MSI-X interrupt for admin cmd %s(%d) status %d (ctx: 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
574 				efa_com_cmd_str(comp_ctx->cmd_opcode),
575 				comp_ctx->cmd_opcode, comp_ctx->status,
576 				comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc);
577 		else
578 			ibdev_err_ratelimited(
579 				aq->efa_dev,
580 				"The device didn't send any completion for admin cmd %s(%d) status %d (ctx 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
581 				efa_com_cmd_str(comp_ctx->cmd_opcode),
582 				comp_ctx->cmd_opcode, comp_ctx->status,
583 				comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc);
584 
585 		clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
586 		err = -ETIME;
587 		goto out;
588 	}
589 
590 	err = efa_com_comp_status_to_errno(comp_ctx->comp_status);
591 out:
592 	efa_com_put_comp_ctx(aq, comp_ctx);
593 	return err;
594 }
595 
596 /*
597  * There are two types to wait for completion.
598  * Polling mode - wait until the completion is available.
599  * Async mode - wait on wait queue until the completion is ready
600  * (or the timeout expired).
601  * It is expected that the IRQ called efa_com_handle_admin_completion
602  * to mark the completions.
603  */
604 static int efa_com_wait_and_process_admin_cq(struct efa_comp_ctx *comp_ctx,
605 					     struct efa_com_admin_queue *aq)
606 {
607 	if (test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state))
608 		return efa_com_wait_and_process_admin_cq_polling(comp_ctx, aq);
609 
610 	return efa_com_wait_and_process_admin_cq_interrupts(comp_ctx, aq);
611 }
612 
613 /**
614  * efa_com_cmd_exec - Execute admin command
615  * @aq: admin queue.
616  * @cmd: the admin command to execute.
617  * @cmd_size: the command size.
618  * @comp: command completion return entry.
619  * @comp_size: command completion size.
620  * Submit an admin command and then wait until the device will return a
621  * completion.
622  * The completion will be copied into comp.
623  *
624  * @return - 0 on success, negative value on failure.
625  */
626 int efa_com_cmd_exec(struct efa_com_admin_queue *aq,
627 		     struct efa_admin_aq_entry *cmd,
628 		     size_t cmd_size,
629 		     struct efa_admin_acq_entry *comp,
630 		     size_t comp_size)
631 {
632 	struct efa_comp_ctx *comp_ctx;
633 	int err;
634 
635 	might_sleep();
636 
637 	/* In case of queue FULL */
638 	down(&aq->avail_cmds);
639 
640 	ibdev_dbg(aq->efa_dev, "%s (opcode %d)\n",
641 		  efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
642 		  cmd->aq_common_descriptor.opcode);
643 	comp_ctx = efa_com_submit_admin_cmd(aq, cmd, cmd_size, comp, comp_size);
644 	if (IS_ERR(comp_ctx)) {
645 		ibdev_err_ratelimited(
646 			aq->efa_dev,
647 			"Failed to submit command %s (opcode %u) err %ld\n",
648 			efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
649 			cmd->aq_common_descriptor.opcode, PTR_ERR(comp_ctx));
650 
651 		up(&aq->avail_cmds);
652 		return PTR_ERR(comp_ctx);
653 	}
654 
655 	err = efa_com_wait_and_process_admin_cq(comp_ctx, aq);
656 	if (err)
657 		ibdev_err_ratelimited(
658 			aq->efa_dev,
659 			"Failed to process command %s (opcode %u) comp_status %d err %d\n",
660 			efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
661 			cmd->aq_common_descriptor.opcode, comp_ctx->comp_status,
662 			err);
663 
664 	up(&aq->avail_cmds);
665 
666 	return err;
667 }
668 
669 /**
670  * efa_com_admin_destroy - Destroy the admin and the async events queues.
671  * @edev: EFA communication layer struct
672  */
673 void efa_com_admin_destroy(struct efa_com_dev *edev)
674 {
675 	struct efa_com_admin_queue *aq = &edev->aq;
676 	struct efa_com_aenq *aenq = &edev->aenq;
677 	struct efa_com_admin_cq *cq = &aq->cq;
678 	struct efa_com_admin_sq *sq = &aq->sq;
679 	u16 size;
680 
681 	clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
682 
683 	devm_kfree(edev->dmadev, aq->comp_ctx_pool);
684 	devm_kfree(edev->dmadev, aq->comp_ctx);
685 
686 	size = aq->depth * sizeof(*sq->entries);
687 	dma_free_coherent(edev->dmadev, size, sq->entries, sq->dma_addr);
688 
689 	size = aq->depth * sizeof(*cq->entries);
690 	dma_free_coherent(edev->dmadev, size, cq->entries, cq->dma_addr);
691 
692 	size = aenq->depth * sizeof(*aenq->entries);
693 	dma_free_coherent(edev->dmadev, size, aenq->entries, aenq->dma_addr);
694 }
695 
696 /**
697  * efa_com_set_admin_polling_mode - Set the admin completion queue polling mode
698  * @edev: EFA communication layer struct
699  * @polling: Enable/Disable polling mode
700  *
701  * Set the admin completion mode.
702  */
703 void efa_com_set_admin_polling_mode(struct efa_com_dev *edev, bool polling)
704 {
705 	u32 mask_value = 0;
706 
707 	if (polling)
708 		mask_value = EFA_REGS_ADMIN_INTR_MASK;
709 
710 	writel(mask_value, edev->reg_bar + EFA_REGS_INTR_MASK_OFF);
711 	if (polling)
712 		set_bit(EFA_AQ_STATE_POLLING_BIT, &edev->aq.state);
713 	else
714 		clear_bit(EFA_AQ_STATE_POLLING_BIT, &edev->aq.state);
715 }
716 
717 static void efa_com_stats_init(struct efa_com_dev *edev)
718 {
719 	atomic64_t *s = (atomic64_t *)&edev->aq.stats;
720 	int i;
721 
722 	for (i = 0; i < sizeof(edev->aq.stats) / sizeof(*s); i++, s++)
723 		atomic64_set(s, 0);
724 }
725 
726 /**
727  * efa_com_admin_init - Init the admin and the async queues
728  * @edev: EFA communication layer struct
729  * @aenq_handlers: Those handlers to be called upon event.
730  *
731  * Initialize the admin submission and completion queues.
732  * Initialize the asynchronous events notification queues.
733  *
734  * @return - 0 on success, negative value on failure.
735  */
736 int efa_com_admin_init(struct efa_com_dev *edev,
737 		       struct efa_aenq_handlers *aenq_handlers)
738 {
739 	struct efa_com_admin_queue *aq = &edev->aq;
740 	u32 timeout;
741 	u32 dev_sts;
742 	u32 cap;
743 	int err;
744 
745 	dev_sts = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
746 	if (!(dev_sts & EFA_REGS_DEV_STS_READY_MASK)) {
747 		ibdev_err(edev->efa_dev,
748 			  "Device isn't ready, abort com init %#x\n", dev_sts);
749 		return -ENODEV;
750 	}
751 
752 	aq->depth = EFA_ADMIN_QUEUE_DEPTH;
753 
754 	aq->dmadev = edev->dmadev;
755 	aq->efa_dev = edev->efa_dev;
756 	set_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state);
757 
758 	sema_init(&aq->avail_cmds, aq->depth);
759 
760 	efa_com_stats_init(edev);
761 
762 	err = efa_com_init_comp_ctxt(aq);
763 	if (err)
764 		return err;
765 
766 	err = efa_com_admin_init_sq(edev);
767 	if (err)
768 		goto err_destroy_comp_ctxt;
769 
770 	err = efa_com_admin_init_cq(edev);
771 	if (err)
772 		goto err_destroy_sq;
773 
774 	efa_com_set_admin_polling_mode(edev, false);
775 
776 	err = efa_com_admin_init_aenq(edev, aenq_handlers);
777 	if (err)
778 		goto err_destroy_cq;
779 
780 	cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
781 	timeout = (cap & EFA_REGS_CAPS_ADMIN_CMD_TO_MASK) >>
782 		  EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT;
783 	if (timeout)
784 		/* the resolution of timeout reg is 100ms */
785 		aq->completion_timeout = timeout * 100000;
786 	else
787 		aq->completion_timeout = ADMIN_CMD_TIMEOUT_US;
788 
789 	aq->poll_interval = EFA_POLL_INTERVAL_MS;
790 
791 	set_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
792 
793 	return 0;
794 
795 err_destroy_cq:
796 	dma_free_coherent(edev->dmadev, aq->depth * sizeof(*aq->cq.entries),
797 			  aq->cq.entries, aq->cq.dma_addr);
798 err_destroy_sq:
799 	dma_free_coherent(edev->dmadev, aq->depth * sizeof(*aq->sq.entries),
800 			  aq->sq.entries, aq->sq.dma_addr);
801 err_destroy_comp_ctxt:
802 	devm_kfree(edev->dmadev, aq->comp_ctx);
803 
804 	return err;
805 }
806 
807 /**
808  * efa_com_admin_q_comp_intr_handler - admin queue interrupt handler
809  * @edev: EFA communication layer struct
810  *
811  * This method goes over the admin completion queue and wakes up
812  * all the pending threads that wait on the commands wait event.
813  *
814  * @note: Should be called after MSI-X interrupt.
815  */
816 void efa_com_admin_q_comp_intr_handler(struct efa_com_dev *edev)
817 {
818 	unsigned long flags;
819 
820 	spin_lock_irqsave(&edev->aq.cq.lock, flags);
821 	efa_com_handle_admin_completion(&edev->aq);
822 	spin_unlock_irqrestore(&edev->aq.cq.lock, flags);
823 }
824 
825 /*
826  * efa_handle_specific_aenq_event:
827  * return the handler that is relevant to the specific event group
828  */
829 static efa_aenq_handler efa_com_get_specific_aenq_cb(struct efa_com_dev *edev,
830 						     u16 group)
831 {
832 	struct efa_aenq_handlers *aenq_handlers = edev->aenq.aenq_handlers;
833 
834 	if (group < EFA_MAX_HANDLERS && aenq_handlers->handlers[group])
835 		return aenq_handlers->handlers[group];
836 
837 	return aenq_handlers->unimplemented_handler;
838 }
839 
840 /**
841  * efa_com_aenq_intr_handler - AENQ interrupt handler
842  * @edev: EFA communication layer struct
843  * @data: Data of interrupt handler.
844  *
845  * Go over the async event notification queue and call the proper aenq handler.
846  */
847 void efa_com_aenq_intr_handler(struct efa_com_dev *edev, void *data)
848 {
849 	struct efa_admin_aenq_common_desc *aenq_common;
850 	struct efa_com_aenq *aenq = &edev->aenq;
851 	struct efa_admin_aenq_entry *aenq_e;
852 	efa_aenq_handler handler_cb;
853 	u32 processed = 0;
854 	u8 phase;
855 	u32 ci;
856 
857 	ci = aenq->cc & (aenq->depth - 1);
858 	phase = aenq->phase;
859 	aenq_e = &aenq->entries[ci]; /* Get first entry */
860 	aenq_common = &aenq_e->aenq_common_desc;
861 
862 	/* Go over all the events */
863 	while ((READ_ONCE(aenq_common->flags) &
864 		EFA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == phase) {
865 		/*
866 		 * Do not read the rest of the completion entry before the
867 		 * phase bit was validated
868 		 */
869 		dma_rmb();
870 
871 		/* Handle specific event*/
872 		handler_cb = efa_com_get_specific_aenq_cb(edev,
873 							  aenq_common->group);
874 		handler_cb(data, aenq_e); /* call the actual event handler*/
875 
876 		/* Get next event entry */
877 		ci++;
878 		processed++;
879 
880 		if (ci == aenq->depth) {
881 			ci = 0;
882 			phase = !phase;
883 		}
884 		aenq_e = &aenq->entries[ci];
885 		aenq_common = &aenq_e->aenq_common_desc;
886 	}
887 
888 	aenq->cc += processed;
889 	aenq->phase = phase;
890 
891 	/* Don't update aenq doorbell if there weren't any processed events */
892 	if (!processed)
893 		return;
894 
895 	/* barrier not needed in case of writel */
896 	writel(aenq->cc, edev->reg_bar + EFA_REGS_AENQ_CONS_DB_OFF);
897 }
898 
899 static void efa_com_mmio_reg_read_resp_addr_init(struct efa_com_dev *edev)
900 {
901 	struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
902 	u32 addr_high;
903 	u32 addr_low;
904 
905 	/* dma_addr_bits is unknown at this point */
906 	addr_high = (mmio_read->read_resp_dma_addr >> 32) & GENMASK(31, 0);
907 	addr_low = mmio_read->read_resp_dma_addr & GENMASK(31, 0);
908 
909 	writel(addr_high, edev->reg_bar + EFA_REGS_MMIO_RESP_HI_OFF);
910 	writel(addr_low, edev->reg_bar + EFA_REGS_MMIO_RESP_LO_OFF);
911 }
912 
913 int efa_com_mmio_reg_read_init(struct efa_com_dev *edev)
914 {
915 	struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
916 
917 	spin_lock_init(&mmio_read->lock);
918 	mmio_read->read_resp =
919 		dma_alloc_coherent(edev->dmadev, sizeof(*mmio_read->read_resp),
920 				   &mmio_read->read_resp_dma_addr, GFP_KERNEL);
921 	if (!mmio_read->read_resp)
922 		return -ENOMEM;
923 
924 	efa_com_mmio_reg_read_resp_addr_init(edev);
925 
926 	mmio_read->read_resp->req_id = 0;
927 	mmio_read->seq_num = 0;
928 	mmio_read->mmio_read_timeout = EFA_REG_READ_TIMEOUT_US;
929 
930 	return 0;
931 }
932 
933 void efa_com_mmio_reg_read_destroy(struct efa_com_dev *edev)
934 {
935 	struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
936 
937 	dma_free_coherent(edev->dmadev, sizeof(*mmio_read->read_resp),
938 			  mmio_read->read_resp, mmio_read->read_resp_dma_addr);
939 }
940 
941 int efa_com_validate_version(struct efa_com_dev *edev)
942 {
943 	u32 ctrl_ver_masked;
944 	u32 ctrl_ver;
945 	u32 ver;
946 
947 	/*
948 	 * Make sure the EFA version and the controller version are at least
949 	 * as the driver expects
950 	 */
951 	ver = efa_com_reg_read32(edev, EFA_REGS_VERSION_OFF);
952 	ctrl_ver = efa_com_reg_read32(edev,
953 				      EFA_REGS_CONTROLLER_VERSION_OFF);
954 
955 	ibdev_dbg(edev->efa_dev, "efa device version: %d.%d\n",
956 		  (ver & EFA_REGS_VERSION_MAJOR_VERSION_MASK) >>
957 			  EFA_REGS_VERSION_MAJOR_VERSION_SHIFT,
958 		  ver & EFA_REGS_VERSION_MINOR_VERSION_MASK);
959 
960 	if (ver < MIN_EFA_VER) {
961 		ibdev_err(edev->efa_dev,
962 			  "EFA version is lower than the minimal version the driver supports\n");
963 		return -EOPNOTSUPP;
964 	}
965 
966 	ibdev_dbg(edev->efa_dev,
967 		  "efa controller version: %d.%d.%d implementation version %d\n",
968 		  (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) >>
969 			  EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT,
970 		  (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) >>
971 			  EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT,
972 		  (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK),
973 		  (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK) >>
974 			  EFA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT);
975 
976 	ctrl_ver_masked =
977 		(ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) |
978 		(ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) |
979 		(ctrl_ver & EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK);
980 
981 	/* Validate the ctrl version without the implementation ID */
982 	if (ctrl_ver_masked < MIN_EFA_CTRL_VER) {
983 		ibdev_err(edev->efa_dev,
984 			  "EFA ctrl version is lower than the minimal ctrl version the driver supports\n");
985 		return -EOPNOTSUPP;
986 	}
987 
988 	return 0;
989 }
990 
991 /**
992  * efa_com_get_dma_width - Retrieve physical dma address width the device
993  * supports.
994  * @edev: EFA communication layer struct
995  *
996  * Retrieve the maximum physical address bits the device can handle.
997  *
998  * @return: > 0 on Success and negative value otherwise.
999  */
1000 int efa_com_get_dma_width(struct efa_com_dev *edev)
1001 {
1002 	u32 caps = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
1003 	int width;
1004 
1005 	width = (caps & EFA_REGS_CAPS_DMA_ADDR_WIDTH_MASK) >>
1006 		EFA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT;
1007 
1008 	ibdev_dbg(edev->efa_dev, "DMA width: %d\n", width);
1009 
1010 	if (width < 32 || width > 64) {
1011 		ibdev_err(edev->efa_dev, "DMA width illegal value: %d\n", width);
1012 		return -EINVAL;
1013 	}
1014 
1015 	edev->dma_addr_bits = width;
1016 
1017 	return width;
1018 }
1019 
1020 static int wait_for_reset_state(struct efa_com_dev *edev, u32 timeout,
1021 				u16 exp_state)
1022 {
1023 	u32 val, i;
1024 
1025 	for (i = 0; i < timeout; i++) {
1026 		val = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
1027 
1028 		if ((val & EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK) ==
1029 		    exp_state)
1030 			return 0;
1031 
1032 		ibdev_dbg(edev->efa_dev, "Reset indication val %d\n", val);
1033 		msleep(EFA_POLL_INTERVAL_MS);
1034 	}
1035 
1036 	return -ETIME;
1037 }
1038 
1039 /**
1040  * efa_com_dev_reset - Perform device FLR to the device.
1041  * @edev: EFA communication layer struct
1042  * @reset_reason: Specify what is the trigger for the reset in case of an error.
1043  *
1044  * @return - 0 on success, negative value on failure.
1045  */
1046 int efa_com_dev_reset(struct efa_com_dev *edev,
1047 		      enum efa_regs_reset_reason_types reset_reason)
1048 {
1049 	u32 stat, timeout, cap, reset_val;
1050 	int err;
1051 
1052 	stat = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
1053 	cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
1054 
1055 	if (!(stat & EFA_REGS_DEV_STS_READY_MASK)) {
1056 		ibdev_err(edev->efa_dev,
1057 			  "Device isn't ready, can't reset device\n");
1058 		return -EINVAL;
1059 	}
1060 
1061 	timeout = (cap & EFA_REGS_CAPS_RESET_TIMEOUT_MASK) >>
1062 		  EFA_REGS_CAPS_RESET_TIMEOUT_SHIFT;
1063 	if (!timeout) {
1064 		ibdev_err(edev->efa_dev, "Invalid timeout value\n");
1065 		return -EINVAL;
1066 	}
1067 
1068 	/* start reset */
1069 	reset_val = EFA_REGS_DEV_CTL_DEV_RESET_MASK;
1070 	reset_val |= (reset_reason << EFA_REGS_DEV_CTL_RESET_REASON_SHIFT) &
1071 		     EFA_REGS_DEV_CTL_RESET_REASON_MASK;
1072 	writel(reset_val, edev->reg_bar + EFA_REGS_DEV_CTL_OFF);
1073 
1074 	/* reset clears the mmio readless address, restore it */
1075 	efa_com_mmio_reg_read_resp_addr_init(edev);
1076 
1077 	err = wait_for_reset_state(edev, timeout,
1078 				   EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK);
1079 	if (err) {
1080 		ibdev_err(edev->efa_dev, "Reset indication didn't turn on\n");
1081 		return err;
1082 	}
1083 
1084 	/* reset done */
1085 	writel(0, edev->reg_bar + EFA_REGS_DEV_CTL_OFF);
1086 	err = wait_for_reset_state(edev, timeout, 0);
1087 	if (err) {
1088 		ibdev_err(edev->efa_dev, "Reset indication didn't turn off\n");
1089 		return err;
1090 	}
1091 
1092 	timeout = (cap & EFA_REGS_CAPS_ADMIN_CMD_TO_MASK) >>
1093 		  EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT;
1094 	if (timeout)
1095 		/* the resolution of timeout reg is 100ms */
1096 		edev->aq.completion_timeout = timeout * 100000;
1097 	else
1098 		edev->aq.completion_timeout = ADMIN_CMD_TIMEOUT_US;
1099 
1100 	return 0;
1101 }
1102