xref: /linux/drivers/infiniband/hw/hfi1/mad.c (revision 1b50f42049d8270986a952e621415278e0945ce4)
1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2 /*
3  * Copyright(c) 2015-2018 Intel Corporation.
4  */
5 
6 #include <linux/net.h>
7 #include <rdma/opa_addr.h>
8 #define OPA_NUM_PKEY_BLOCKS_PER_SMP (OPA_SMP_DR_DATA_SIZE \
9 			/ (OPA_PARTITION_TABLE_BLK_SIZE * sizeof(u16)))
10 
11 #include "hfi.h"
12 #include "mad.h"
13 #include "trace.h"
14 #include "qp.h"
15 
16 /* the reset value from the FM is supposed to be 0xffff, handle both */
17 #define OPA_LINK_WIDTH_RESET_OLD 0x0fff
18 #define OPA_LINK_WIDTH_RESET 0xffff
19 
20 struct trap_node {
21 	struct list_head list;
22 	struct opa_mad_notice_attr data;
23 	__be64 tid;
24 	int len;
25 	u32 retry;
26 	u8 in_use;
27 	u8 repress;
28 };
29 
30 static int smp_length_check(u32 data_size, u32 request_len)
31 {
32 	if (unlikely(request_len < data_size))
33 		return -EINVAL;
34 
35 	return 0;
36 }
37 
38 static int reply(struct ib_mad_hdr *smp)
39 {
40 	/*
41 	 * The verbs framework will handle the directed/LID route
42 	 * packet changes.
43 	 */
44 	smp->method = IB_MGMT_METHOD_GET_RESP;
45 	if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
46 		smp->status |= IB_SMP_DIRECTION;
47 	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
48 }
49 
50 static inline void clear_opa_smp_data(struct opa_smp *smp)
51 {
52 	void *data = opa_get_smp_data(smp);
53 	size_t size = opa_get_smp_data_size(smp);
54 
55 	memset(data, 0, size);
56 }
57 
58 static u16 hfi1_lookup_pkey_value(struct hfi1_ibport *ibp, int pkey_idx)
59 {
60 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
61 
62 	if (pkey_idx < ARRAY_SIZE(ppd->pkeys))
63 		return ppd->pkeys[pkey_idx];
64 
65 	return 0;
66 }
67 
68 void hfi1_event_pkey_change(struct hfi1_devdata *dd, u32 port)
69 {
70 	struct ib_event event;
71 
72 	event.event = IB_EVENT_PKEY_CHANGE;
73 	event.device = &dd->verbs_dev.rdi.ibdev;
74 	event.element.port_num = port;
75 	ib_dispatch_event(&event);
76 }
77 
78 /*
79  * If the port is down, clean up all pending traps.  We need to be careful
80  * with the given trap, because it may be queued.
81  */
82 static void cleanup_traps(struct hfi1_ibport *ibp, struct trap_node *trap)
83 {
84 	struct trap_node *node, *q;
85 	unsigned long flags;
86 	struct list_head trap_list;
87 	int i;
88 
89 	for (i = 0; i < RVT_MAX_TRAP_LISTS; i++) {
90 		spin_lock_irqsave(&ibp->rvp.lock, flags);
91 		list_replace_init(&ibp->rvp.trap_lists[i].list, &trap_list);
92 		ibp->rvp.trap_lists[i].list_len = 0;
93 		spin_unlock_irqrestore(&ibp->rvp.lock, flags);
94 
95 		/*
96 		 * Remove all items from the list, freeing all the non-given
97 		 * traps.
98 		 */
99 		list_for_each_entry_safe(node, q, &trap_list, list) {
100 			list_del(&node->list);
101 			if (node != trap)
102 				kfree(node);
103 		}
104 	}
105 
106 	/*
107 	 * If this wasn't on one of the lists it would not be freed.  If it
108 	 * was on the list, it is now safe to free.
109 	 */
110 	kfree(trap);
111 }
112 
113 static struct trap_node *check_and_add_trap(struct hfi1_ibport *ibp,
114 					    struct trap_node *trap)
115 {
116 	struct trap_node *node;
117 	struct trap_list *trap_list;
118 	unsigned long flags;
119 	unsigned long timeout;
120 	int found = 0;
121 	unsigned int queue_id;
122 	static int trap_count;
123 
124 	queue_id = trap->data.generic_type & 0x0F;
125 	if (queue_id >= RVT_MAX_TRAP_LISTS) {
126 		trap_count++;
127 		pr_err_ratelimited("hfi1: Invalid trap 0x%0x dropped. Total dropped: %d\n",
128 				   trap->data.generic_type, trap_count);
129 		kfree(trap);
130 		return NULL;
131 	}
132 
133 	/*
134 	 * Since the retry (handle timeout) does not remove a trap request
135 	 * from the list, all we have to do is compare the node.
136 	 */
137 	spin_lock_irqsave(&ibp->rvp.lock, flags);
138 	trap_list = &ibp->rvp.trap_lists[queue_id];
139 
140 	list_for_each_entry(node, &trap_list->list, list) {
141 		if (node == trap) {
142 			node->retry++;
143 			found = 1;
144 			break;
145 		}
146 	}
147 
148 	/* If it is not on the list, add it, limited to RVT-MAX_TRAP_LEN. */
149 	if (!found) {
150 		if (trap_list->list_len < RVT_MAX_TRAP_LEN) {
151 			trap_list->list_len++;
152 			list_add_tail(&trap->list, &trap_list->list);
153 		} else {
154 			pr_warn_ratelimited("hfi1: Maximum trap limit reached for 0x%0x traps\n",
155 					    trap->data.generic_type);
156 			kfree(trap);
157 		}
158 	}
159 
160 	/*
161 	 * Next check to see if there is a timer pending.  If not, set it up
162 	 * and get the first trap from the list.
163 	 */
164 	node = NULL;
165 	if (!timer_pending(&ibp->rvp.trap_timer)) {
166 		/*
167 		 * o14-2
168 		 * If the time out is set we have to wait until it expires
169 		 * before the trap can be sent.
170 		 * This should be > RVT_TRAP_TIMEOUT
171 		 */
172 		timeout = (RVT_TRAP_TIMEOUT *
173 			   (1UL << ibp->rvp.subnet_timeout)) / 1000;
174 		mod_timer(&ibp->rvp.trap_timer,
175 			  jiffies + usecs_to_jiffies(timeout));
176 		node = list_first_entry(&trap_list->list, struct trap_node,
177 					list);
178 		node->in_use = 1;
179 	}
180 	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
181 
182 	return node;
183 }
184 
185 static void subn_handle_opa_trap_repress(struct hfi1_ibport *ibp,
186 					 struct opa_smp *smp)
187 {
188 	struct trap_list *trap_list;
189 	struct trap_node *trap;
190 	unsigned long flags;
191 	int i;
192 
193 	if (smp->attr_id != IB_SMP_ATTR_NOTICE)
194 		return;
195 
196 	spin_lock_irqsave(&ibp->rvp.lock, flags);
197 	for (i = 0; i < RVT_MAX_TRAP_LISTS; i++) {
198 		trap_list = &ibp->rvp.trap_lists[i];
199 		trap = list_first_entry_or_null(&trap_list->list,
200 						struct trap_node, list);
201 		if (trap && trap->tid == smp->tid) {
202 			if (trap->in_use) {
203 				trap->repress = 1;
204 			} else {
205 				trap_list->list_len--;
206 				list_del(&trap->list);
207 				kfree(trap);
208 			}
209 			break;
210 		}
211 	}
212 	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
213 }
214 
215 static void hfi1_update_sm_ah_attr(struct hfi1_ibport *ibp,
216 				   struct rdma_ah_attr *attr, u32 dlid)
217 {
218 	rdma_ah_set_dlid(attr, dlid);
219 	rdma_ah_set_port_num(attr, ppd_from_ibp(ibp)->port);
220 	if (dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
221 		struct ib_global_route *grh = rdma_ah_retrieve_grh(attr);
222 
223 		rdma_ah_set_ah_flags(attr, IB_AH_GRH);
224 		grh->sgid_index = 0;
225 		grh->hop_limit = 1;
226 		grh->dgid.global.subnet_prefix =
227 			ibp->rvp.gid_prefix;
228 		grh->dgid.global.interface_id = OPA_MAKE_ID(dlid);
229 	}
230 }
231 
232 static int hfi1_modify_qp0_ah(struct hfi1_ibport *ibp,
233 			      struct rvt_ah *ah, u32 dlid)
234 {
235 	struct rdma_ah_attr attr;
236 	struct rvt_qp *qp0;
237 	int ret = -EINVAL;
238 
239 	memset(&attr, 0, sizeof(attr));
240 	attr.type = ah->ibah.type;
241 	hfi1_update_sm_ah_attr(ibp, &attr, dlid);
242 	rcu_read_lock();
243 	qp0 = rcu_dereference(ibp->rvp.qp[0]);
244 	if (qp0)
245 		ret = rdma_modify_ah(&ah->ibah, &attr);
246 	rcu_read_unlock();
247 	return ret;
248 }
249 
250 static struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u32 dlid)
251 {
252 	struct rdma_ah_attr attr;
253 	struct ib_ah *ah = ERR_PTR(-EINVAL);
254 	struct rvt_qp *qp0;
255 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
256 	struct hfi1_devdata *dd = dd_from_ppd(ppd);
257 	u32 port_num = ppd->port;
258 
259 	memset(&attr, 0, sizeof(attr));
260 	attr.type = rdma_ah_find_type(&dd->verbs_dev.rdi.ibdev, port_num);
261 	hfi1_update_sm_ah_attr(ibp, &attr, dlid);
262 	rcu_read_lock();
263 	qp0 = rcu_dereference(ibp->rvp.qp[0]);
264 	if (qp0)
265 		ah = rdma_create_ah(qp0->ibqp.pd, &attr, 0);
266 	rcu_read_unlock();
267 	return ah;
268 }
269 
270 static void send_trap(struct hfi1_ibport *ibp, struct trap_node *trap)
271 {
272 	struct ib_mad_send_buf *send_buf;
273 	struct ib_mad_agent *agent;
274 	struct opa_smp *smp;
275 	unsigned long flags;
276 	int pkey_idx;
277 	u32 qpn = ppd_from_ibp(ibp)->sm_trap_qp;
278 
279 	agent = ibp->rvp.send_agent;
280 	if (!agent) {
281 		cleanup_traps(ibp, trap);
282 		return;
283 	}
284 
285 	/* o14-3.2.1 */
286 	if (driver_lstate(ppd_from_ibp(ibp)) != IB_PORT_ACTIVE) {
287 		cleanup_traps(ibp, trap);
288 		return;
289 	}
290 
291 	/* Add the trap to the list if necessary and see if we can send it */
292 	trap = check_and_add_trap(ibp, trap);
293 	if (!trap)
294 		return;
295 
296 	pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
297 	if (pkey_idx < 0) {
298 		pr_warn("%s: failed to find limited mgmt pkey, defaulting 0x%x\n",
299 			__func__, hfi1_get_pkey(ibp, 1));
300 		pkey_idx = 1;
301 	}
302 
303 	send_buf = ib_create_send_mad(agent, qpn, pkey_idx, 0,
304 				      IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
305 				      GFP_ATOMIC, IB_MGMT_BASE_VERSION);
306 	if (IS_ERR(send_buf))
307 		return;
308 
309 	smp = send_buf->mad;
310 	smp->base_version = OPA_MGMT_BASE_VERSION;
311 	smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
312 	smp->class_version = OPA_SM_CLASS_VERSION;
313 	smp->method = IB_MGMT_METHOD_TRAP;
314 
315 	/* Only update the transaction ID for new traps (o13-5). */
316 	if (trap->tid == 0) {
317 		ibp->rvp.tid++;
318 		/* make sure that tid != 0 */
319 		if (ibp->rvp.tid == 0)
320 			ibp->rvp.tid++;
321 		trap->tid = cpu_to_be64(ibp->rvp.tid);
322 	}
323 	smp->tid = trap->tid;
324 
325 	smp->attr_id = IB_SMP_ATTR_NOTICE;
326 	/* o14-1: smp->mkey = 0; */
327 
328 	memcpy(smp->route.lid.data, &trap->data, trap->len);
329 
330 	spin_lock_irqsave(&ibp->rvp.lock, flags);
331 	if (!ibp->rvp.sm_ah) {
332 		if (ibp->rvp.sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
333 			struct ib_ah *ah;
334 
335 			ah = hfi1_create_qp0_ah(ibp, ibp->rvp.sm_lid);
336 			if (IS_ERR(ah)) {
337 				spin_unlock_irqrestore(&ibp->rvp.lock, flags);
338 				return;
339 			}
340 			send_buf->ah = ah;
341 			ibp->rvp.sm_ah = ibah_to_rvtah(ah);
342 		} else {
343 			spin_unlock_irqrestore(&ibp->rvp.lock, flags);
344 			return;
345 		}
346 	} else {
347 		send_buf->ah = &ibp->rvp.sm_ah->ibah;
348 	}
349 
350 	/*
351 	 * If the trap was repressed while things were getting set up, don't
352 	 * bother sending it. This could happen for a retry.
353 	 */
354 	if (trap->repress) {
355 		list_del(&trap->list);
356 		spin_unlock_irqrestore(&ibp->rvp.lock, flags);
357 		kfree(trap);
358 		ib_free_send_mad(send_buf);
359 		return;
360 	}
361 
362 	trap->in_use = 0;
363 	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
364 
365 	if (ib_post_send_mad(send_buf, NULL))
366 		ib_free_send_mad(send_buf);
367 }
368 
369 void hfi1_handle_trap_timer(struct timer_list *t)
370 {
371 	struct hfi1_ibport *ibp = timer_container_of(ibp, t, rvp.trap_timer);
372 	struct trap_node *trap = NULL;
373 	unsigned long flags;
374 	int i;
375 
376 	/* Find the trap with the highest priority */
377 	spin_lock_irqsave(&ibp->rvp.lock, flags);
378 	for (i = 0; !trap && i < RVT_MAX_TRAP_LISTS; i++) {
379 		trap = list_first_entry_or_null(&ibp->rvp.trap_lists[i].list,
380 						struct trap_node, list);
381 	}
382 	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
383 
384 	if (trap)
385 		send_trap(ibp, trap);
386 }
387 
388 static struct trap_node *create_trap_node(u8 type, __be16 trap_num, u32 lid)
389 {
390 	struct trap_node *trap;
391 
392 	trap = kzalloc_obj(*trap, GFP_ATOMIC);
393 	if (!trap)
394 		return NULL;
395 
396 	INIT_LIST_HEAD(&trap->list);
397 	trap->data.generic_type = type;
398 	trap->data.prod_type_lsb = IB_NOTICE_PROD_CA;
399 	trap->data.trap_num = trap_num;
400 	trap->data.issuer_lid = cpu_to_be32(lid);
401 
402 	return trap;
403 }
404 
405 /*
406  * Send a bad P_Key trap (ch. 14.3.8).
407  */
408 void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl,
409 		   u32 qp1, u32 qp2, u32 lid1, u32 lid2)
410 {
411 	struct trap_node *trap;
412 	u32 lid = ppd_from_ibp(ibp)->lid;
413 
414 	ibp->rvp.n_pkt_drops++;
415 	ibp->rvp.pkey_violations++;
416 
417 	trap = create_trap_node(IB_NOTICE_TYPE_SECURITY, OPA_TRAP_BAD_P_KEY,
418 				lid);
419 	if (!trap)
420 		return;
421 
422 	/* Send violation trap */
423 	trap->data.ntc_257_258.lid1 = cpu_to_be32(lid1);
424 	trap->data.ntc_257_258.lid2 = cpu_to_be32(lid2);
425 	trap->data.ntc_257_258.key = cpu_to_be32(key);
426 	trap->data.ntc_257_258.sl = sl << 3;
427 	trap->data.ntc_257_258.qp1 = cpu_to_be32(qp1);
428 	trap->data.ntc_257_258.qp2 = cpu_to_be32(qp2);
429 
430 	trap->len = sizeof(trap->data);
431 	send_trap(ibp, trap);
432 }
433 
434 /*
435  * Send a bad M_Key trap (ch. 14.3.9).
436  */
437 static void bad_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
438 		     __be64 mkey, __be32 dr_slid, u8 return_path[], u8 hop_cnt)
439 {
440 	struct trap_node *trap;
441 	u32 lid = ppd_from_ibp(ibp)->lid;
442 
443 	trap = create_trap_node(IB_NOTICE_TYPE_SECURITY, OPA_TRAP_BAD_M_KEY,
444 				lid);
445 	if (!trap)
446 		return;
447 
448 	/* Send violation trap */
449 	trap->data.ntc_256.lid = trap->data.issuer_lid;
450 	trap->data.ntc_256.method = mad->method;
451 	trap->data.ntc_256.attr_id = mad->attr_id;
452 	trap->data.ntc_256.attr_mod = mad->attr_mod;
453 	trap->data.ntc_256.mkey = mkey;
454 	if (mad->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
455 		trap->data.ntc_256.dr_slid = dr_slid;
456 		trap->data.ntc_256.dr_trunc_hop = IB_NOTICE_TRAP_DR_NOTICE;
457 		if (hop_cnt > ARRAY_SIZE(trap->data.ntc_256.dr_rtn_path)) {
458 			trap->data.ntc_256.dr_trunc_hop |=
459 				IB_NOTICE_TRAP_DR_TRUNC;
460 			hop_cnt = ARRAY_SIZE(trap->data.ntc_256.dr_rtn_path);
461 		}
462 		trap->data.ntc_256.dr_trunc_hop |= hop_cnt;
463 		memcpy(trap->data.ntc_256.dr_rtn_path, return_path,
464 		       hop_cnt);
465 	}
466 
467 	trap->len = sizeof(trap->data);
468 
469 	send_trap(ibp, trap);
470 }
471 
472 /*
473  * Send a Port Capability Mask Changed trap (ch. 14.3.11).
474  */
475 void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u32 port_num)
476 {
477 	struct trap_node *trap;
478 	struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
479 	struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
480 	struct hfi1_ibport *ibp = &dd->pport[port_num - 1].ibport_data;
481 	u32 lid = ppd_from_ibp(ibp)->lid;
482 
483 	trap = create_trap_node(IB_NOTICE_TYPE_INFO,
484 				OPA_TRAP_CHANGE_CAPABILITY,
485 				lid);
486 	if (!trap)
487 		return;
488 
489 	trap->data.ntc_144.lid = trap->data.issuer_lid;
490 	trap->data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
491 	trap->data.ntc_144.cap_mask3 = cpu_to_be16(ibp->rvp.port_cap3_flags);
492 
493 	trap->len = sizeof(trap->data);
494 	send_trap(ibp, trap);
495 }
496 
497 /*
498  * Send a System Image GUID Changed trap (ch. 14.3.12).
499  */
500 void hfi1_sys_guid_chg(struct hfi1_ibport *ibp)
501 {
502 	struct trap_node *trap;
503 	u32 lid = ppd_from_ibp(ibp)->lid;
504 
505 	trap = create_trap_node(IB_NOTICE_TYPE_INFO, OPA_TRAP_CHANGE_SYSGUID,
506 				lid);
507 	if (!trap)
508 		return;
509 
510 	trap->data.ntc_145.new_sys_guid = ib_hfi1_sys_image_guid;
511 	trap->data.ntc_145.lid = trap->data.issuer_lid;
512 
513 	trap->len = sizeof(trap->data);
514 	send_trap(ibp, trap);
515 }
516 
517 /*
518  * Send a Node Description Changed trap (ch. 14.3.13).
519  */
520 void hfi1_node_desc_chg(struct hfi1_ibport *ibp)
521 {
522 	struct trap_node *trap;
523 	u32 lid = ppd_from_ibp(ibp)->lid;
524 
525 	trap = create_trap_node(IB_NOTICE_TYPE_INFO,
526 				OPA_TRAP_CHANGE_CAPABILITY,
527 				lid);
528 	if (!trap)
529 		return;
530 
531 	trap->data.ntc_144.lid = trap->data.issuer_lid;
532 	trap->data.ntc_144.change_flags =
533 		cpu_to_be16(OPA_NOTICE_TRAP_NODE_DESC_CHG);
534 
535 	trap->len = sizeof(trap->data);
536 	send_trap(ibp, trap);
537 }
538 
539 static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am,
540 				   u8 *data, struct ib_device *ibdev,
541 				   u32 port, u32 *resp_len, u32 max_len)
542 {
543 	struct opa_node_description *nd;
544 
545 	if (am || smp_length_check(sizeof(*nd), max_len)) {
546 		smp->status |= IB_SMP_INVALID_FIELD;
547 		return reply((struct ib_mad_hdr *)smp);
548 	}
549 
550 	nd = (struct opa_node_description *)data;
551 
552 	memcpy(nd->data, ibdev->node_desc, sizeof(nd->data));
553 
554 	if (resp_len)
555 		*resp_len += sizeof(*nd);
556 
557 	return reply((struct ib_mad_hdr *)smp);
558 }
559 
560 static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data,
561 				   struct ib_device *ibdev, u32 port,
562 				   u32 *resp_len, u32 max_len)
563 {
564 	struct opa_node_info *ni;
565 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
566 	u32 pidx = port - 1; /* IB number port from 1, hw from 0 */
567 
568 	ni = (struct opa_node_info *)data;
569 
570 	/* GUID 0 is illegal */
571 	if (am || pidx >= dd->num_pports || ibdev->node_guid == 0 ||
572 	    smp_length_check(sizeof(*ni), max_len) ||
573 	    get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) {
574 		smp->status |= IB_SMP_INVALID_FIELD;
575 		return reply((struct ib_mad_hdr *)smp);
576 	}
577 
578 	ni->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX);
579 	ni->base_version = OPA_MGMT_BASE_VERSION;
580 	ni->class_version = OPA_SM_CLASS_VERSION;
581 	ni->node_type = 1;     /* channel adapter */
582 	ni->num_ports = ibdev->phys_port_cnt;
583 	/* This is already in network order */
584 	ni->system_image_guid = ib_hfi1_sys_image_guid;
585 	ni->node_guid = ibdev->node_guid;
586 	ni->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
587 	ni->device_id = cpu_to_be16(dd->pcidev->device);
588 	ni->revision = cpu_to_be32(dd->minrev);
589 	ni->local_port_num = port;
590 	ni->vendor_id[0] = dd->oui1;
591 	ni->vendor_id[1] = dd->oui2;
592 	ni->vendor_id[2] = dd->oui3;
593 
594 	if (resp_len)
595 		*resp_len += sizeof(*ni);
596 
597 	return reply((struct ib_mad_hdr *)smp);
598 }
599 
600 static int subn_get_nodeinfo(struct ib_smp *smp, struct ib_device *ibdev,
601 			     u32 port)
602 {
603 	struct ib_node_info *nip = (struct ib_node_info *)&smp->data;
604 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
605 	u32 pidx = port - 1; /* IB number port from 1, hw from 0 */
606 
607 	/* GUID 0 is illegal */
608 	if (smp->attr_mod || pidx >= dd->num_pports ||
609 	    ibdev->node_guid == 0 ||
610 	    get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) {
611 		smp->status |= IB_SMP_INVALID_FIELD;
612 		return reply((struct ib_mad_hdr *)smp);
613 	}
614 
615 	nip->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX);
616 	nip->base_version = OPA_MGMT_BASE_VERSION;
617 	nip->class_version = OPA_SM_CLASS_VERSION;
618 	nip->node_type = 1;     /* channel adapter */
619 	nip->num_ports = ibdev->phys_port_cnt;
620 	/* This is already in network order */
621 	nip->sys_guid = ib_hfi1_sys_image_guid;
622 	nip->node_guid = ibdev->node_guid;
623 	nip->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
624 	nip->device_id = cpu_to_be16(dd->pcidev->device);
625 	nip->revision = cpu_to_be32(dd->minrev);
626 	nip->local_port_num = port;
627 	nip->vendor_id[0] = dd->oui1;
628 	nip->vendor_id[1] = dd->oui2;
629 	nip->vendor_id[2] = dd->oui3;
630 
631 	return reply((struct ib_mad_hdr *)smp);
632 }
633 
634 static void set_link_width_enabled(struct hfi1_pportdata *ppd, u32 w)
635 {
636 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LWID_ENB, w);
637 }
638 
639 static void set_link_width_downgrade_enabled(struct hfi1_pportdata *ppd, u32 w)
640 {
641 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LWID_DG_ENB, w);
642 }
643 
644 static void set_link_speed_enabled(struct hfi1_pportdata *ppd, u32 s)
645 {
646 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_SPD_ENB, s);
647 }
648 
649 static int check_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
650 		      int mad_flags, __be64 mkey, __be32 dr_slid,
651 		      u8 return_path[], u8 hop_cnt)
652 {
653 	int valid_mkey = 0;
654 	int ret = 0;
655 
656 	/* Is the mkey in the process of expiring? */
657 	if (ibp->rvp.mkey_lease_timeout &&
658 	    time_after_eq(jiffies, ibp->rvp.mkey_lease_timeout)) {
659 		/* Clear timeout and mkey protection field. */
660 		ibp->rvp.mkey_lease_timeout = 0;
661 		ibp->rvp.mkeyprot = 0;
662 	}
663 
664 	if ((mad_flags & IB_MAD_IGNORE_MKEY) ||  ibp->rvp.mkey == 0 ||
665 	    ibp->rvp.mkey == mkey)
666 		valid_mkey = 1;
667 
668 	/* Unset lease timeout on any valid Get/Set/TrapRepress */
669 	if (valid_mkey && ibp->rvp.mkey_lease_timeout &&
670 	    (mad->method == IB_MGMT_METHOD_GET ||
671 	     mad->method == IB_MGMT_METHOD_SET ||
672 	     mad->method == IB_MGMT_METHOD_TRAP_REPRESS))
673 		ibp->rvp.mkey_lease_timeout = 0;
674 
675 	if (!valid_mkey) {
676 		switch (mad->method) {
677 		case IB_MGMT_METHOD_GET:
678 			/* Bad mkey not a violation below level 2 */
679 			if (ibp->rvp.mkeyprot < 2)
680 				break;
681 			fallthrough;
682 		case IB_MGMT_METHOD_SET:
683 		case IB_MGMT_METHOD_TRAP_REPRESS:
684 			if (ibp->rvp.mkey_violations != 0xFFFF)
685 				++ibp->rvp.mkey_violations;
686 			if (!ibp->rvp.mkey_lease_timeout &&
687 			    ibp->rvp.mkey_lease_period)
688 				ibp->rvp.mkey_lease_timeout = jiffies +
689 					ibp->rvp.mkey_lease_period * HZ;
690 			/* Generate a trap notice. */
691 			bad_mkey(ibp, mad, mkey, dr_slid, return_path,
692 				 hop_cnt);
693 			ret = 1;
694 		}
695 	}
696 
697 	return ret;
698 }
699 
700 /*
701  * The SMA caches reads from LCB registers in case the LCB is unavailable.
702  * (The LCB is unavailable in certain link states, for example.)
703  */
704 struct lcb_datum {
705 	u32 off;
706 	u64 val;
707 };
708 
709 static struct lcb_datum lcb_cache[] = {
710 	{ DC_LCB_STS_ROUND_TRIP_LTP_CNT, 0 },
711 };
712 
713 static int write_lcb_cache(u32 off, u64 val)
714 {
715 	int i;
716 
717 	for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
718 		if (lcb_cache[i].off == off) {
719 			lcb_cache[i].val = val;
720 			return 0;
721 		}
722 	}
723 
724 	pr_warn("%s bad offset 0x%x\n", __func__, off);
725 	return -1;
726 }
727 
728 static int read_lcb_cache(u32 off, u64 *val)
729 {
730 	int i;
731 
732 	for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
733 		if (lcb_cache[i].off == off) {
734 			*val = lcb_cache[i].val;
735 			return 0;
736 		}
737 	}
738 
739 	pr_warn("%s bad offset 0x%x\n", __func__, off);
740 	return -1;
741 }
742 
743 void read_ltp_rtt(struct hfi1_devdata *dd)
744 {
745 	u64 reg;
746 
747 	if (read_lcb_csr(dd, DC_LCB_STS_ROUND_TRIP_LTP_CNT, &reg))
748 		dd_dev_err(dd, "%s: unable to read LTP RTT\n", __func__);
749 	else
750 		write_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, reg);
751 }
752 
753 static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
754 				   struct ib_device *ibdev, u32 port,
755 				   u32 *resp_len, u32 max_len)
756 {
757 	int i;
758 	struct hfi1_devdata *dd;
759 	struct hfi1_pportdata *ppd;
760 	struct hfi1_ibport *ibp;
761 	struct opa_port_info *pi = (struct opa_port_info *)data;
762 	u8 mtu;
763 	u8 credit_rate;
764 	u8 is_beaconing_active;
765 	u32 state;
766 	u32 num_ports = OPA_AM_NPORT(am);
767 	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
768 	u32 buffer_units;
769 	u64 tmp = 0;
770 
771 	if (num_ports != 1 || smp_length_check(sizeof(*pi), max_len)) {
772 		smp->status |= IB_SMP_INVALID_FIELD;
773 		return reply((struct ib_mad_hdr *)smp);
774 	}
775 
776 	dd = dd_from_ibdev(ibdev);
777 	/* IB numbers ports from 1, hw from 0 */
778 	ppd = dd->pport + (port - 1);
779 	ibp = &ppd->ibport_data;
780 
781 	if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
782 	    ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
783 		smp->status |= IB_SMP_INVALID_FIELD;
784 		return reply((struct ib_mad_hdr *)smp);
785 	}
786 
787 	pi->lid = cpu_to_be32(ppd->lid);
788 
789 	/* Only return the mkey if the protection field allows it. */
790 	if (!(smp->method == IB_MGMT_METHOD_GET &&
791 	      ibp->rvp.mkey != smp->mkey &&
792 	      ibp->rvp.mkeyprot == 1))
793 		pi->mkey = ibp->rvp.mkey;
794 
795 	pi->subnet_prefix = ibp->rvp.gid_prefix;
796 	pi->sm_lid = cpu_to_be32(ibp->rvp.sm_lid);
797 	pi->ib_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
798 	pi->mkey_lease_period = cpu_to_be16(ibp->rvp.mkey_lease_period);
799 	pi->sm_trap_qp = cpu_to_be32(ppd->sm_trap_qp);
800 	pi->sa_qp = cpu_to_be32(ppd->sa_qp);
801 
802 	pi->link_width.enabled = cpu_to_be16(ppd->link_width_enabled);
803 	pi->link_width.supported = cpu_to_be16(ppd->link_width_supported);
804 	pi->link_width.active = cpu_to_be16(ppd->link_width_active);
805 
806 	pi->link_width_downgrade.supported =
807 			cpu_to_be16(ppd->link_width_downgrade_supported);
808 	pi->link_width_downgrade.enabled =
809 			cpu_to_be16(ppd->link_width_downgrade_enabled);
810 	pi->link_width_downgrade.tx_active =
811 			cpu_to_be16(ppd->link_width_downgrade_tx_active);
812 	pi->link_width_downgrade.rx_active =
813 			cpu_to_be16(ppd->link_width_downgrade_rx_active);
814 
815 	pi->link_speed.supported = cpu_to_be16(ppd->link_speed_supported);
816 	pi->link_speed.active = cpu_to_be16(ppd->link_speed_active);
817 	pi->link_speed.enabled = cpu_to_be16(ppd->link_speed_enabled);
818 
819 	state = driver_lstate(ppd);
820 
821 	if (start_of_sm_config && (state == IB_PORT_INIT))
822 		ppd->is_sm_config_started = 1;
823 
824 	pi->port_phys_conf = (ppd->port_type & 0xf);
825 
826 	pi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
827 	pi->port_states.ledenable_offlinereason |=
828 		ppd->is_sm_config_started << 5;
829 	/*
830 	 * This pairs with the memory barrier in hfi1_start_led_override to
831 	 * ensure that we read the correct state of LED beaconing represented
832 	 * by led_override_timer_active
833 	 */
834 	smp_rmb();
835 	is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
836 	pi->port_states.ledenable_offlinereason |= is_beaconing_active << 6;
837 	pi->port_states.ledenable_offlinereason |=
838 		ppd->offline_disabled_reason;
839 
840 	pi->port_states.portphysstate_portstate =
841 		(driver_pstate(ppd) << 4) | state;
842 
843 	pi->mkeyprotect_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc;
844 
845 	memset(pi->neigh_mtu.pvlx_to_mtu, 0, sizeof(pi->neigh_mtu.pvlx_to_mtu));
846 	for (i = 0; i < ppd->vls_supported; i++) {
847 		mtu = mtu_to_enum(dd->vld[i].mtu, HFI1_DEFAULT_ACTIVE_MTU);
848 		if ((i % 2) == 0)
849 			pi->neigh_mtu.pvlx_to_mtu[i / 2] |= (mtu << 4);
850 		else
851 			pi->neigh_mtu.pvlx_to_mtu[i / 2] |= mtu;
852 	}
853 	/* don't forget VL 15 */
854 	mtu = mtu_to_enum(dd->vld[15].mtu, 2048);
855 	pi->neigh_mtu.pvlx_to_mtu[15 / 2] |= mtu;
856 	pi->smsl = ibp->rvp.sm_sl & OPA_PI_MASK_SMSL;
857 	pi->operational_vls = hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS);
858 	pi->partenforce_filterraw |=
859 		(ppd->linkinit_reason & OPA_PI_MASK_LINKINIT_REASON);
860 	if (ppd->part_enforce & HFI1_PART_ENFORCE_IN)
861 		pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_IN;
862 	if (ppd->part_enforce & HFI1_PART_ENFORCE_OUT)
863 		pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_OUT;
864 	pi->mkey_violations = cpu_to_be16(ibp->rvp.mkey_violations);
865 	/* P_KeyViolations are counted by hardware. */
866 	pi->pkey_violations = cpu_to_be16(ibp->rvp.pkey_violations);
867 	pi->qkey_violations = cpu_to_be16(ibp->rvp.qkey_violations);
868 
869 	pi->vl.cap = ppd->vls_supported;
870 	pi->vl.high_limit = cpu_to_be16(ibp->rvp.vl_high_limit);
871 	pi->vl.arb_high_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_CAP);
872 	pi->vl.arb_low_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_LOW_CAP);
873 
874 	pi->clientrereg_subnettimeout = ibp->rvp.subnet_timeout;
875 
876 	pi->port_link_mode  = cpu_to_be16(OPA_PORT_LINK_MODE_OPA << 10 |
877 					  OPA_PORT_LINK_MODE_OPA << 5 |
878 					  OPA_PORT_LINK_MODE_OPA);
879 
880 	pi->port_ltp_crc_mode = cpu_to_be16(ppd->port_ltp_crc_mode);
881 
882 	pi->port_mode = cpu_to_be16(
883 				ppd->is_active_optimize_enabled ?
884 					OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE : 0);
885 
886 	pi->port_packet_format.supported =
887 		cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B |
888 			    OPA_PORT_PACKET_FORMAT_16B);
889 	pi->port_packet_format.enabled =
890 		cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B |
891 			    OPA_PORT_PACKET_FORMAT_16B);
892 
893 	/* flit_control.interleave is (OPA V1, version .76):
894 	 * bits		use
895 	 * ----		---
896 	 * 2		res
897 	 * 2		DistanceSupported
898 	 * 2		DistanceEnabled
899 	 * 5		MaxNextLevelTxEnabled
900 	 * 5		MaxNestLevelRxSupported
901 	 *
902 	 * HFI supports only "distance mode 1" (see OPA V1, version .76,
903 	 * section 9.6.2), so set DistanceSupported, DistanceEnabled
904 	 * to 0x1.
905 	 */
906 	pi->flit_control.interleave = cpu_to_be16(0x1400);
907 
908 	pi->link_down_reason = ppd->local_link_down_reason.sma;
909 	pi->neigh_link_down_reason = ppd->neigh_link_down_reason.sma;
910 	pi->port_error_action = cpu_to_be32(ppd->port_error_action);
911 	pi->mtucap = mtu_to_enum(hfi1_max_mtu, IB_MTU_4096);
912 
913 	/* 32.768 usec. response time (guessing) */
914 	pi->resptimevalue = 3;
915 
916 	pi->local_port_num = port;
917 
918 	/* buffer info for FM */
919 	pi->overall_buffer_space = cpu_to_be16(dd->link_credits);
920 
921 	pi->neigh_node_guid = cpu_to_be64(ppd->neighbor_guid);
922 	pi->neigh_port_num = ppd->neighbor_port_number;
923 	pi->port_neigh_mode =
924 		(ppd->neighbor_type & OPA_PI_MASK_NEIGH_NODE_TYPE) |
925 		(ppd->mgmt_allowed ? OPA_PI_MASK_NEIGH_MGMT_ALLOWED : 0) |
926 		(ppd->neighbor_fm_security ?
927 			OPA_PI_MASK_NEIGH_FW_AUTH_BYPASS : 0);
928 
929 	/* HFIs shall always return VL15 credits to their
930 	 * neighbor in a timely manner, without any credit return pacing.
931 	 */
932 	credit_rate = 0;
933 	buffer_units  = (dd->vau) & OPA_PI_MASK_BUF_UNIT_BUF_ALLOC;
934 	buffer_units |= (dd->vcu << 3) & OPA_PI_MASK_BUF_UNIT_CREDIT_ACK;
935 	buffer_units |= (credit_rate << 6) &
936 				OPA_PI_MASK_BUF_UNIT_VL15_CREDIT_RATE;
937 	buffer_units |= (dd->vl15_init << 11) & OPA_PI_MASK_BUF_UNIT_VL15_INIT;
938 	pi->buffer_units = cpu_to_be32(buffer_units);
939 
940 	pi->opa_cap_mask = cpu_to_be16(ibp->rvp.port_cap3_flags);
941 	pi->collectivemask_multicastmask = ((OPA_COLLECTIVE_NR & 0x7)
942 					    << 3 | (OPA_MCAST_NR & 0x7));
943 
944 	/* HFI supports a replay buffer 128 LTPs in size */
945 	pi->replay_depth.buffer = 0x80;
946 	/* read the cached value of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
947 	read_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, &tmp);
948 
949 	/*
950 	 * this counter is 16 bits wide, but the replay_depth.wire
951 	 * variable is only 8 bits
952 	 */
953 	if (tmp > 0xff)
954 		tmp = 0xff;
955 	pi->replay_depth.wire = tmp;
956 
957 	if (resp_len)
958 		*resp_len += sizeof(struct opa_port_info);
959 
960 	return reply((struct ib_mad_hdr *)smp);
961 }
962 
963 /**
964  * get_pkeys - return the PKEY table
965  * @dd: the hfi1_ib device
966  * @port: the IB port number
967  * @pkeys: the pkey table is placed here
968  */
969 static int get_pkeys(struct hfi1_devdata *dd, u32 port, u16 *pkeys)
970 {
971 	struct hfi1_pportdata *ppd = dd->pport + port - 1;
972 
973 	memcpy(pkeys, ppd->pkeys, sizeof(ppd->pkeys));
974 
975 	return 0;
976 }
977 
978 static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
979 				    struct ib_device *ibdev, u32 port,
980 				    u32 *resp_len, u32 max_len)
981 {
982 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
983 	u32 n_blocks_req = OPA_AM_NBLK(am);
984 	u32 start_block = am & 0x7ff;
985 	__be16 *p;
986 	u16 *q;
987 	int i;
988 	u16 n_blocks_avail;
989 	unsigned npkeys = hfi1_get_npkeys(dd);
990 	size_t size;
991 
992 	if (n_blocks_req == 0) {
993 		pr_warn("OPA Get PKey AM Invalid : P = %d; B = 0x%x; N = 0x%x\n",
994 			port, start_block, n_blocks_req);
995 		smp->status |= IB_SMP_INVALID_FIELD;
996 		return reply((struct ib_mad_hdr *)smp);
997 	}
998 
999 	n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
1000 
1001 	size = (n_blocks_req * OPA_PARTITION_TABLE_BLK_SIZE) * sizeof(u16);
1002 
1003 	if (smp_length_check(size, max_len)) {
1004 		smp->status |= IB_SMP_INVALID_FIELD;
1005 		return reply((struct ib_mad_hdr *)smp);
1006 	}
1007 
1008 	if (start_block + n_blocks_req > n_blocks_avail ||
1009 	    n_blocks_req > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
1010 		pr_warn("OPA Get PKey AM Invalid : s 0x%x; req 0x%x; "
1011 			"avail 0x%x; blk/smp 0x%lx\n",
1012 			start_block, n_blocks_req, n_blocks_avail,
1013 			OPA_NUM_PKEY_BLOCKS_PER_SMP);
1014 		smp->status |= IB_SMP_INVALID_FIELD;
1015 		return reply((struct ib_mad_hdr *)smp);
1016 	}
1017 
1018 	p = (__be16 *)data;
1019 	q = (u16 *)data;
1020 	/* get the real pkeys if we are requesting the first block */
1021 	if (start_block == 0) {
1022 		get_pkeys(dd, port, q);
1023 		for (i = 0; i < npkeys; i++)
1024 			p[i] = cpu_to_be16(q[i]);
1025 		if (resp_len)
1026 			*resp_len += size;
1027 	} else {
1028 		smp->status |= IB_SMP_INVALID_FIELD;
1029 	}
1030 	return reply((struct ib_mad_hdr *)smp);
1031 }
1032 
1033 enum {
1034 	HFI_TRANSITION_DISALLOWED,
1035 	HFI_TRANSITION_IGNORED,
1036 	HFI_TRANSITION_ALLOWED,
1037 	HFI_TRANSITION_UNDEFINED,
1038 };
1039 
1040 /*
1041  * Use shortened names to improve readability of
1042  * {logical,physical}_state_transitions
1043  */
1044 enum {
1045 	__D = HFI_TRANSITION_DISALLOWED,
1046 	__I = HFI_TRANSITION_IGNORED,
1047 	__A = HFI_TRANSITION_ALLOWED,
1048 	__U = HFI_TRANSITION_UNDEFINED,
1049 };
1050 
1051 /*
1052  * IB_PORTPHYSSTATE_POLLING (2) through OPA_PORTPHYSSTATE_MAX (11) are
1053  * represented in physical_state_transitions.
1054  */
1055 #define __N_PHYSTATES (OPA_PORTPHYSSTATE_MAX - IB_PORTPHYSSTATE_POLLING + 1)
1056 
1057 /*
1058  * Within physical_state_transitions, rows represent "old" states,
1059  * columns "new" states, and physical_state_transitions.allowed[old][new]
1060  * indicates if the transition from old state to new state is legal (see
1061  * OPAg1v1, Table 6-4).
1062  */
1063 static const struct {
1064 	u8 allowed[__N_PHYSTATES][__N_PHYSTATES];
1065 } physical_state_transitions = {
1066 	{
1067 		/* 2    3    4    5    6    7    8    9   10   11 */
1068 	/* 2 */	{ __A, __A, __D, __D, __D, __D, __D, __D, __D, __D },
1069 	/* 3 */	{ __A, __I, __D, __D, __D, __D, __D, __D, __D, __A },
1070 	/* 4 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
1071 	/* 5 */	{ __A, __A, __D, __I, __D, __D, __D, __D, __D, __D },
1072 	/* 6 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
1073 	/* 7 */	{ __D, __A, __D, __D, __D, __I, __D, __D, __D, __D },
1074 	/* 8 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
1075 	/* 9 */	{ __I, __A, __D, __D, __D, __D, __D, __I, __D, __D },
1076 	/*10 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
1077 	/*11 */	{ __D, __A, __D, __D, __D, __D, __D, __D, __D, __I },
1078 	}
1079 };
1080 
1081 /*
1082  * IB_PORT_DOWN (1) through IB_PORT_ACTIVE_DEFER (5) are represented
1083  * logical_state_transitions
1084  */
1085 
1086 #define __N_LOGICAL_STATES (IB_PORT_ACTIVE_DEFER - IB_PORT_DOWN + 1)
1087 
1088 /*
1089  * Within logical_state_transitions rows represent "old" states,
1090  * columns "new" states, and logical_state_transitions.allowed[old][new]
1091  * indicates if the transition from old state to new state is legal (see
1092  * OPAg1v1, Table 9-12).
1093  */
1094 static const struct {
1095 	u8 allowed[__N_LOGICAL_STATES][__N_LOGICAL_STATES];
1096 } logical_state_transitions = {
1097 	{
1098 		/* 1    2    3    4    5 */
1099 	/* 1 */	{ __I, __D, __D, __D, __U},
1100 	/* 2 */	{ __D, __I, __A, __D, __U},
1101 	/* 3 */	{ __D, __D, __I, __A, __U},
1102 	/* 4 */	{ __D, __D, __I, __I, __U},
1103 	/* 5 */	{ __U, __U, __U, __U, __U},
1104 	}
1105 };
1106 
1107 static int logical_transition_allowed(int old, int new)
1108 {
1109 	if (old < IB_PORT_NOP || old > IB_PORT_ACTIVE_DEFER ||
1110 	    new < IB_PORT_NOP || new > IB_PORT_ACTIVE_DEFER) {
1111 		pr_warn("invalid logical state(s) (old %d new %d)\n",
1112 			old, new);
1113 		return HFI_TRANSITION_UNDEFINED;
1114 	}
1115 
1116 	if (new == IB_PORT_NOP)
1117 		return HFI_TRANSITION_ALLOWED; /* always allowed */
1118 
1119 	/* adjust states for indexing into logical_state_transitions */
1120 	old -= IB_PORT_DOWN;
1121 	new -= IB_PORT_DOWN;
1122 
1123 	if (old < 0 || new < 0)
1124 		return HFI_TRANSITION_UNDEFINED;
1125 	return logical_state_transitions.allowed[old][new];
1126 }
1127 
1128 static int physical_transition_allowed(int old, int new)
1129 {
1130 	if (old < IB_PORTPHYSSTATE_NOP || old > OPA_PORTPHYSSTATE_MAX ||
1131 	    new < IB_PORTPHYSSTATE_NOP || new > OPA_PORTPHYSSTATE_MAX) {
1132 		pr_warn("invalid physical state(s) (old %d new %d)\n",
1133 			old, new);
1134 		return HFI_TRANSITION_UNDEFINED;
1135 	}
1136 
1137 	if (new == IB_PORTPHYSSTATE_NOP)
1138 		return HFI_TRANSITION_ALLOWED; /* always allowed */
1139 
1140 	/* adjust states for indexing into physical_state_transitions */
1141 	old -= IB_PORTPHYSSTATE_POLLING;
1142 	new -= IB_PORTPHYSSTATE_POLLING;
1143 
1144 	if (old < 0 || new < 0)
1145 		return HFI_TRANSITION_UNDEFINED;
1146 	return physical_state_transitions.allowed[old][new];
1147 }
1148 
1149 static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
1150 					  u32 logical_new, u32 physical_new)
1151 {
1152 	u32 physical_old = driver_pstate(ppd);
1153 	u32 logical_old = driver_lstate(ppd);
1154 	int ret, logical_allowed, physical_allowed;
1155 
1156 	ret = logical_transition_allowed(logical_old, logical_new);
1157 	logical_allowed = ret;
1158 
1159 	if (ret == HFI_TRANSITION_DISALLOWED ||
1160 	    ret == HFI_TRANSITION_UNDEFINED) {
1161 		pr_warn("invalid logical state transition %s -> %s\n",
1162 			ib_port_state_to_str(logical_old),
1163 			ib_port_state_to_str(logical_new));
1164 		return ret;
1165 	}
1166 
1167 	ret = physical_transition_allowed(physical_old, physical_new);
1168 	physical_allowed = ret;
1169 
1170 	if (ret == HFI_TRANSITION_DISALLOWED ||
1171 	    ret == HFI_TRANSITION_UNDEFINED) {
1172 		pr_warn("invalid physical state transition %s -> %s\n",
1173 			opa_pstate_name(physical_old),
1174 			opa_pstate_name(physical_new));
1175 		return ret;
1176 	}
1177 
1178 	if (logical_allowed == HFI_TRANSITION_IGNORED &&
1179 	    physical_allowed == HFI_TRANSITION_IGNORED)
1180 		return HFI_TRANSITION_IGNORED;
1181 
1182 	/*
1183 	 * A change request of Physical Port State from
1184 	 * 'Offline' to 'Polling' should be ignored.
1185 	 */
1186 	if ((physical_old == OPA_PORTPHYSSTATE_OFFLINE) &&
1187 	    (physical_new == IB_PORTPHYSSTATE_POLLING))
1188 		return HFI_TRANSITION_IGNORED;
1189 
1190 	/*
1191 	 * Either physical_allowed or logical_allowed is
1192 	 * HFI_TRANSITION_ALLOWED.
1193 	 */
1194 	return HFI_TRANSITION_ALLOWED;
1195 }
1196 
1197 static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
1198 			   u32 logical_state, u32 phys_state, int local_mad)
1199 {
1200 	struct hfi1_devdata *dd = ppd->dd;
1201 	u32 link_state;
1202 	int ret;
1203 
1204 	ret = port_states_transition_allowed(ppd, logical_state, phys_state);
1205 	if (ret == HFI_TRANSITION_DISALLOWED ||
1206 	    ret == HFI_TRANSITION_UNDEFINED) {
1207 		/* error message emitted above */
1208 		smp->status |= IB_SMP_INVALID_FIELD;
1209 		return 0;
1210 	}
1211 
1212 	if (ret == HFI_TRANSITION_IGNORED)
1213 		return 0;
1214 
1215 	if ((phys_state != IB_PORTPHYSSTATE_NOP) &&
1216 	    !(logical_state == IB_PORT_DOWN ||
1217 	      logical_state == IB_PORT_NOP)){
1218 		pr_warn("SubnSet(OPA_PortInfo) port state invalid: logical_state 0x%x physical_state 0x%x\n",
1219 			logical_state, phys_state);
1220 		smp->status |= IB_SMP_INVALID_FIELD;
1221 	}
1222 
1223 	/*
1224 	 * Logical state changes are summarized in OPAv1g1 spec.,
1225 	 * Table 9-12; physical state changes are summarized in
1226 	 * OPAv1g1 spec., Table 6.4.
1227 	 */
1228 	switch (logical_state) {
1229 	case IB_PORT_NOP:
1230 		if (phys_state == IB_PORTPHYSSTATE_NOP)
1231 			break;
1232 		fallthrough;
1233 	case IB_PORT_DOWN:
1234 		if (phys_state == IB_PORTPHYSSTATE_NOP) {
1235 			link_state = HLS_DN_DOWNDEF;
1236 		} else if (phys_state == IB_PORTPHYSSTATE_POLLING) {
1237 			link_state = HLS_DN_POLL;
1238 			set_link_down_reason(ppd, OPA_LINKDOWN_REASON_FM_BOUNCE,
1239 					     0, OPA_LINKDOWN_REASON_FM_BOUNCE);
1240 		} else if (phys_state == IB_PORTPHYSSTATE_DISABLED) {
1241 			link_state = HLS_DN_DISABLE;
1242 		} else {
1243 			pr_warn("SubnSet(OPA_PortInfo) invalid physical state 0x%x\n",
1244 				phys_state);
1245 			smp->status |= IB_SMP_INVALID_FIELD;
1246 			break;
1247 		}
1248 
1249 		if ((link_state == HLS_DN_POLL ||
1250 		     link_state == HLS_DN_DOWNDEF)) {
1251 			/*
1252 			 * Going to poll.  No matter what the current state,
1253 			 * always move offline first, then tune and start the
1254 			 * link.  This correctly handles a FM link bounce and
1255 			 * a link enable.  Going offline is a no-op if already
1256 			 * offline.
1257 			 */
1258 			set_link_state(ppd, HLS_DN_OFFLINE);
1259 			start_link(ppd);
1260 		} else {
1261 			set_link_state(ppd, link_state);
1262 		}
1263 		if (link_state == HLS_DN_DISABLE &&
1264 		    (ppd->offline_disabled_reason >
1265 		     HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED) ||
1266 		     ppd->offline_disabled_reason ==
1267 		     HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)))
1268 			ppd->offline_disabled_reason =
1269 			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
1270 		/*
1271 		 * Don't send a reply if the response would be sent
1272 		 * through the disabled port.
1273 		 */
1274 		if (link_state == HLS_DN_DISABLE && !local_mad)
1275 			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
1276 		break;
1277 	case IB_PORT_ARMED:
1278 		ret = set_link_state(ppd, HLS_UP_ARMED);
1279 		if (!ret)
1280 			send_idle_sma(dd, SMA_IDLE_ARM);
1281 		break;
1282 	case IB_PORT_ACTIVE:
1283 		if (ppd->neighbor_normal) {
1284 			ret = set_link_state(ppd, HLS_UP_ACTIVE);
1285 			if (ret == 0)
1286 				send_idle_sma(dd, SMA_IDLE_ACTIVE);
1287 		} else {
1288 			pr_warn("SubnSet(OPA_PortInfo) Cannot move to Active with NeighborNormal 0\n");
1289 			smp->status |= IB_SMP_INVALID_FIELD;
1290 		}
1291 		break;
1292 	default:
1293 		pr_warn("SubnSet(OPA_PortInfo) invalid logical state 0x%x\n",
1294 			logical_state);
1295 		smp->status |= IB_SMP_INVALID_FIELD;
1296 	}
1297 
1298 	return 0;
1299 }
1300 
1301 /*
1302  * subn_set_opa_portinfo - set port information
1303  * @smp: the incoming SM packet
1304  * @ibdev: the infiniband device
1305  * @port: the port on the device
1306  *
1307  */
1308 static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
1309 				   struct ib_device *ibdev, u32 port,
1310 				   u32 *resp_len, u32 max_len, int local_mad)
1311 {
1312 	struct opa_port_info *pi = (struct opa_port_info *)data;
1313 	struct ib_event event;
1314 	struct hfi1_devdata *dd;
1315 	struct hfi1_pportdata *ppd;
1316 	struct hfi1_ibport *ibp;
1317 	u8 clientrereg;
1318 	unsigned long flags;
1319 	u32 smlid;
1320 	u32 lid;
1321 	u8 ls_old, ls_new, ps_new;
1322 	u8 vls;
1323 	u8 msl;
1324 	u8 crc_enabled;
1325 	u16 lse, lwe, mtu;
1326 	u32 num_ports = OPA_AM_NPORT(am);
1327 	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
1328 	int ret, i, invalid = 0, call_set_mtu = 0;
1329 	int call_link_downgrade_policy = 0;
1330 
1331 	if (num_ports != 1 ||
1332 	    smp_length_check(sizeof(*pi), max_len)) {
1333 		smp->status |= IB_SMP_INVALID_FIELD;
1334 		return reply((struct ib_mad_hdr *)smp);
1335 	}
1336 
1337 	lid = be32_to_cpu(pi->lid);
1338 	if (lid & 0xFF000000) {
1339 		pr_warn("OPA_PortInfo lid out of range: %X\n", lid);
1340 		smp->status |= IB_SMP_INVALID_FIELD;
1341 		goto get_only;
1342 	}
1343 
1344 
1345 	smlid = be32_to_cpu(pi->sm_lid);
1346 	if (smlid & 0xFF000000) {
1347 		pr_warn("OPA_PortInfo SM lid out of range: %X\n", smlid);
1348 		smp->status |= IB_SMP_INVALID_FIELD;
1349 		goto get_only;
1350 	}
1351 
1352 	clientrereg = (pi->clientrereg_subnettimeout &
1353 			OPA_PI_MASK_CLIENT_REREGISTER);
1354 
1355 	dd = dd_from_ibdev(ibdev);
1356 	/* IB numbers ports from 1, hw from 0 */
1357 	ppd = dd->pport + (port - 1);
1358 	ibp = &ppd->ibport_data;
1359 	event.device = ibdev;
1360 	event.element.port_num = port;
1361 
1362 	ls_old = driver_lstate(ppd);
1363 
1364 	ibp->rvp.mkey = pi->mkey;
1365 	if (ibp->rvp.gid_prefix != pi->subnet_prefix) {
1366 		ibp->rvp.gid_prefix = pi->subnet_prefix;
1367 		event.event = IB_EVENT_GID_CHANGE;
1368 		ib_dispatch_event(&event);
1369 	}
1370 	ibp->rvp.mkey_lease_period = be16_to_cpu(pi->mkey_lease_period);
1371 
1372 	/* Must be a valid unicast LID address. */
1373 	if ((lid == 0 && ls_old > IB_PORT_INIT) ||
1374 	     (hfi1_is_16B_mcast(lid))) {
1375 		smp->status |= IB_SMP_INVALID_FIELD;
1376 		pr_warn("SubnSet(OPA_PortInfo) lid invalid 0x%x\n",
1377 			lid);
1378 	} else if (ppd->lid != lid ||
1379 		 ppd->lmc != (pi->mkeyprotect_lmc & OPA_PI_MASK_LMC)) {
1380 		if (ppd->lid != lid)
1381 			hfi1_set_uevent_bits(ppd, _HFI1_EVENT_LID_CHANGE_BIT);
1382 		if (ppd->lmc != (pi->mkeyprotect_lmc & OPA_PI_MASK_LMC))
1383 			hfi1_set_uevent_bits(ppd, _HFI1_EVENT_LMC_CHANGE_BIT);
1384 		hfi1_set_lid(ppd, lid, pi->mkeyprotect_lmc & OPA_PI_MASK_LMC);
1385 		event.event = IB_EVENT_LID_CHANGE;
1386 		ib_dispatch_event(&event);
1387 
1388 		if (HFI1_PORT_GUID_INDEX + 1 < HFI1_GUIDS_PER_PORT) {
1389 			/* Manufacture GID from LID to support extended
1390 			 * addresses
1391 			 */
1392 			ppd->guids[HFI1_PORT_GUID_INDEX + 1] =
1393 				be64_to_cpu(OPA_MAKE_ID(lid));
1394 			event.event = IB_EVENT_GID_CHANGE;
1395 			ib_dispatch_event(&event);
1396 		}
1397 	}
1398 
1399 	msl = pi->smsl & OPA_PI_MASK_SMSL;
1400 	if (pi->partenforce_filterraw & OPA_PI_MASK_LINKINIT_REASON)
1401 		ppd->linkinit_reason =
1402 			(pi->partenforce_filterraw &
1403 			 OPA_PI_MASK_LINKINIT_REASON);
1404 
1405 	/* Must be a valid unicast LID address. */
1406 	if ((smlid == 0 && ls_old > IB_PORT_INIT) ||
1407 	     (hfi1_is_16B_mcast(smlid))) {
1408 		smp->status |= IB_SMP_INVALID_FIELD;
1409 		pr_warn("SubnSet(OPA_PortInfo) smlid invalid 0x%x\n", smlid);
1410 	} else if (smlid != ibp->rvp.sm_lid || msl != ibp->rvp.sm_sl) {
1411 		pr_warn("SubnSet(OPA_PortInfo) smlid 0x%x\n", smlid);
1412 		spin_lock_irqsave(&ibp->rvp.lock, flags);
1413 		if (ibp->rvp.sm_ah) {
1414 			if (smlid != ibp->rvp.sm_lid)
1415 				hfi1_modify_qp0_ah(ibp, ibp->rvp.sm_ah, smlid);
1416 			if (msl != ibp->rvp.sm_sl)
1417 				rdma_ah_set_sl(&ibp->rvp.sm_ah->attr, msl);
1418 		}
1419 		spin_unlock_irqrestore(&ibp->rvp.lock, flags);
1420 		if (smlid != ibp->rvp.sm_lid)
1421 			ibp->rvp.sm_lid = smlid;
1422 		if (msl != ibp->rvp.sm_sl)
1423 			ibp->rvp.sm_sl = msl;
1424 		event.event = IB_EVENT_SM_CHANGE;
1425 		ib_dispatch_event(&event);
1426 	}
1427 
1428 	if (pi->link_down_reason == 0) {
1429 		ppd->local_link_down_reason.sma = 0;
1430 		ppd->local_link_down_reason.latest = 0;
1431 	}
1432 
1433 	if (pi->neigh_link_down_reason == 0) {
1434 		ppd->neigh_link_down_reason.sma = 0;
1435 		ppd->neigh_link_down_reason.latest = 0;
1436 	}
1437 
1438 	ppd->sm_trap_qp = be32_to_cpu(pi->sm_trap_qp);
1439 	ppd->sa_qp = be32_to_cpu(pi->sa_qp);
1440 
1441 	ppd->port_error_action = be32_to_cpu(pi->port_error_action);
1442 	lwe = be16_to_cpu(pi->link_width.enabled);
1443 	if (lwe) {
1444 		if (lwe == OPA_LINK_WIDTH_RESET ||
1445 		    lwe == OPA_LINK_WIDTH_RESET_OLD)
1446 			set_link_width_enabled(ppd, ppd->link_width_supported);
1447 		else if ((lwe & ~ppd->link_width_supported) == 0)
1448 			set_link_width_enabled(ppd, lwe);
1449 		else
1450 			smp->status |= IB_SMP_INVALID_FIELD;
1451 	}
1452 	lwe = be16_to_cpu(pi->link_width_downgrade.enabled);
1453 	/* LWD.E is always applied - 0 means "disabled" */
1454 	if (lwe == OPA_LINK_WIDTH_RESET ||
1455 	    lwe == OPA_LINK_WIDTH_RESET_OLD) {
1456 		set_link_width_downgrade_enabled(ppd,
1457 						 ppd->
1458 						 link_width_downgrade_supported
1459 						 );
1460 	} else if ((lwe & ~ppd->link_width_downgrade_supported) == 0) {
1461 		/* only set and apply if something changed */
1462 		if (lwe != ppd->link_width_downgrade_enabled) {
1463 			set_link_width_downgrade_enabled(ppd, lwe);
1464 			call_link_downgrade_policy = 1;
1465 		}
1466 	} else {
1467 		smp->status |= IB_SMP_INVALID_FIELD;
1468 	}
1469 	lse = be16_to_cpu(pi->link_speed.enabled);
1470 	if (lse) {
1471 		if (lse & be16_to_cpu(pi->link_speed.supported))
1472 			set_link_speed_enabled(ppd, lse);
1473 		else
1474 			smp->status |= IB_SMP_INVALID_FIELD;
1475 	}
1476 
1477 	ibp->rvp.mkeyprot =
1478 		(pi->mkeyprotect_lmc & OPA_PI_MASK_MKEY_PROT_BIT) >> 6;
1479 	ibp->rvp.vl_high_limit = be16_to_cpu(pi->vl.high_limit) & 0xFF;
1480 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_LIMIT,
1481 				    ibp->rvp.vl_high_limit);
1482 
1483 	if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
1484 	    ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
1485 		smp->status |= IB_SMP_INVALID_FIELD;
1486 		return reply((struct ib_mad_hdr *)smp);
1487 	}
1488 	for (i = 0; i < ppd->vls_supported; i++) {
1489 		if ((i % 2) == 0)
1490 			mtu = enum_to_mtu((pi->neigh_mtu.pvlx_to_mtu[i / 2] >>
1491 					   4) & 0xF);
1492 		else
1493 			mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[i / 2] &
1494 					  0xF);
1495 		if (mtu == 0xffff) {
1496 			pr_warn("SubnSet(OPA_PortInfo) mtu invalid %d (0x%x)\n",
1497 				mtu,
1498 				(pi->neigh_mtu.pvlx_to_mtu[0] >> 4) & 0xF);
1499 			smp->status |= IB_SMP_INVALID_FIELD;
1500 			mtu = hfi1_max_mtu; /* use a valid MTU */
1501 		}
1502 		if (dd->vld[i].mtu != mtu) {
1503 			dd_dev_info(dd,
1504 				    "MTU change on vl %d from %d to %d\n",
1505 				    i, dd->vld[i].mtu, mtu);
1506 			dd->vld[i].mtu = mtu;
1507 			call_set_mtu++;
1508 		}
1509 	}
1510 	/* As per OPAV1 spec: VL15 must support and be configured
1511 	 * for operation with a 2048 or larger MTU.
1512 	 */
1513 	mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[15 / 2] & 0xF);
1514 	if (mtu < 2048 || mtu == 0xffff)
1515 		mtu = 2048;
1516 	if (dd->vld[15].mtu != mtu) {
1517 		dd_dev_info(dd,
1518 			    "MTU change on vl 15 from %d to %d\n",
1519 			    dd->vld[15].mtu, mtu);
1520 		dd->vld[15].mtu = mtu;
1521 		call_set_mtu++;
1522 	}
1523 	if (call_set_mtu)
1524 		set_mtu(ppd);
1525 
1526 	/* Set operational VLs */
1527 	vls = pi->operational_vls & OPA_PI_MASK_OPERATIONAL_VL;
1528 	if (vls) {
1529 		if (vls > ppd->vls_supported) {
1530 			pr_warn("SubnSet(OPA_PortInfo) VL's supported invalid %d\n",
1531 				pi->operational_vls);
1532 			smp->status |= IB_SMP_INVALID_FIELD;
1533 		} else {
1534 			if (hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS,
1535 					    vls) == -EINVAL)
1536 				smp->status |= IB_SMP_INVALID_FIELD;
1537 		}
1538 	}
1539 
1540 	if (pi->mkey_violations == 0)
1541 		ibp->rvp.mkey_violations = 0;
1542 
1543 	if (pi->pkey_violations == 0)
1544 		ibp->rvp.pkey_violations = 0;
1545 
1546 	if (pi->qkey_violations == 0)
1547 		ibp->rvp.qkey_violations = 0;
1548 
1549 	ibp->rvp.subnet_timeout =
1550 		pi->clientrereg_subnettimeout & OPA_PI_MASK_SUBNET_TIMEOUT;
1551 
1552 	crc_enabled = be16_to_cpu(pi->port_ltp_crc_mode);
1553 	crc_enabled >>= 4;
1554 	crc_enabled &= 0xf;
1555 
1556 	if (crc_enabled != 0)
1557 		ppd->port_crc_mode_enabled = port_ltp_to_cap(crc_enabled);
1558 
1559 	ppd->is_active_optimize_enabled =
1560 			!!(be16_to_cpu(pi->port_mode)
1561 					& OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE);
1562 
1563 	ls_new = pi->port_states.portphysstate_portstate &
1564 			OPA_PI_MASK_PORT_STATE;
1565 	ps_new = (pi->port_states.portphysstate_portstate &
1566 			OPA_PI_MASK_PORT_PHYSICAL_STATE) >> 4;
1567 
1568 	if (ls_old == IB_PORT_INIT) {
1569 		if (start_of_sm_config) {
1570 			if (ls_new == ls_old || (ls_new == IB_PORT_ARMED))
1571 				ppd->is_sm_config_started = 1;
1572 		} else if (ls_new == IB_PORT_ARMED) {
1573 			if (ppd->is_sm_config_started == 0) {
1574 				invalid = 1;
1575 				smp->status |= IB_SMP_INVALID_FIELD;
1576 			}
1577 		}
1578 	}
1579 
1580 	/* Handle CLIENT_REREGISTER event b/c SM asked us for it */
1581 	if (clientrereg) {
1582 		event.event = IB_EVENT_CLIENT_REREGISTER;
1583 		ib_dispatch_event(&event);
1584 	}
1585 
1586 	/*
1587 	 * Do the port state change now that the other link parameters
1588 	 * have been set.
1589 	 * Changing the port physical state only makes sense if the link
1590 	 * is down or is being set to down.
1591 	 */
1592 
1593 	if (!invalid) {
1594 		ret = set_port_states(ppd, smp, ls_new, ps_new, local_mad);
1595 		if (ret)
1596 			return ret;
1597 	}
1598 
1599 	ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len,
1600 				      max_len);
1601 
1602 	/* restore re-reg bit per o14-12.2.1 */
1603 	pi->clientrereg_subnettimeout |= clientrereg;
1604 
1605 	/*
1606 	 * Apply the new link downgrade policy.  This may result in a link
1607 	 * bounce.  Do this after everything else so things are settled.
1608 	 * Possible problem: if setting the port state above fails, then
1609 	 * the policy change is not applied.
1610 	 */
1611 	if (call_link_downgrade_policy)
1612 		apply_link_downgrade_policy(ppd, 0);
1613 
1614 	return ret;
1615 
1616 get_only:
1617 	return __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len,
1618 				       max_len);
1619 }
1620 
1621 /**
1622  * set_pkeys - set the PKEY table for ctxt 0
1623  * @dd: the hfi1_ib device
1624  * @port: the IB port number
1625  * @pkeys: the PKEY table
1626  */
1627 static int set_pkeys(struct hfi1_devdata *dd, u32 port, u16 *pkeys)
1628 {
1629 	struct hfi1_pportdata *ppd;
1630 	int i;
1631 	int changed = 0;
1632 	int update_includes_mgmt_partition = 0;
1633 
1634 	/*
1635 	 * IB port one/two always maps to context zero/one,
1636 	 * always a kernel context, no locking needed
1637 	 * If we get here with ppd setup, no need to check
1638 	 * that rcd is valid.
1639 	 */
1640 	ppd = dd->pport + (port - 1);
1641 	/*
1642 	 * If the update does not include the management pkey, don't do it.
1643 	 */
1644 	for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
1645 		if (pkeys[i] == LIM_MGMT_P_KEY) {
1646 			update_includes_mgmt_partition = 1;
1647 			break;
1648 		}
1649 	}
1650 
1651 	if (!update_includes_mgmt_partition)
1652 		return 1;
1653 
1654 	for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
1655 		u16 key = pkeys[i];
1656 		u16 okey = ppd->pkeys[i];
1657 
1658 		if (key == okey)
1659 			continue;
1660 		/*
1661 		 * The SM gives us the complete PKey table. We have
1662 		 * to ensure that we put the PKeys in the matching
1663 		 * slots.
1664 		 */
1665 		ppd->pkeys[i] = key;
1666 		changed = 1;
1667 	}
1668 
1669 	if (changed) {
1670 		(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
1671 		hfi1_event_pkey_change(dd, port);
1672 	}
1673 
1674 	return 0;
1675 }
1676 
1677 static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
1678 				    struct ib_device *ibdev, u32 port,
1679 				    u32 *resp_len, u32 max_len)
1680 {
1681 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1682 	u32 n_blocks_sent = OPA_AM_NBLK(am);
1683 	u32 start_block = am & 0x7ff;
1684 	u16 *p = (u16 *)data;
1685 	__be16 *q = (__be16 *)data;
1686 	int i;
1687 	u16 n_blocks_avail;
1688 	unsigned npkeys = hfi1_get_npkeys(dd);
1689 	u32 size = 0;
1690 
1691 	if (n_blocks_sent == 0) {
1692 		pr_warn("OPA Get PKey AM Invalid : P = %u; B = 0x%x; N = 0x%x\n",
1693 			port, start_block, n_blocks_sent);
1694 		smp->status |= IB_SMP_INVALID_FIELD;
1695 		return reply((struct ib_mad_hdr *)smp);
1696 	}
1697 
1698 	n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
1699 
1700 	size = sizeof(u16) * (n_blocks_sent * OPA_PARTITION_TABLE_BLK_SIZE);
1701 
1702 	if (smp_length_check(size, max_len)) {
1703 		smp->status |= IB_SMP_INVALID_FIELD;
1704 		return reply((struct ib_mad_hdr *)smp);
1705 	}
1706 
1707 	if (start_block + n_blocks_sent > n_blocks_avail ||
1708 	    n_blocks_sent > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
1709 		pr_warn("OPA Set PKey AM Invalid : s 0x%x; req 0x%x; avail 0x%x; blk/smp 0x%lx\n",
1710 			start_block, n_blocks_sent, n_blocks_avail,
1711 			OPA_NUM_PKEY_BLOCKS_PER_SMP);
1712 		smp->status |= IB_SMP_INVALID_FIELD;
1713 		return reply((struct ib_mad_hdr *)smp);
1714 	}
1715 
1716 	for (i = 0; i < n_blocks_sent * OPA_PARTITION_TABLE_BLK_SIZE; i++)
1717 		p[i] = be16_to_cpu(q[i]);
1718 
1719 	if (start_block == 0 && set_pkeys(dd, port, p) != 0) {
1720 		smp->status |= IB_SMP_INVALID_FIELD;
1721 		return reply((struct ib_mad_hdr *)smp);
1722 	}
1723 
1724 	return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len,
1725 					max_len);
1726 }
1727 
1728 #define ILLEGAL_VL 12
1729 /*
1730  * filter_sc2vlt changes mappings to VL15 to ILLEGAL_VL (except
1731  * for SC15, which must map to VL15). If we don't remap things this
1732  * way it is possible for VL15 counters to increment when we try to
1733  * send on a SC which is mapped to an invalid VL.
1734  * When getting the table convert ILLEGAL_VL back to VL15.
1735  */
1736 static void filter_sc2vlt(void *data, bool set)
1737 {
1738 	int i;
1739 	u8 *pd = data;
1740 
1741 	for (i = 0; i < OPA_MAX_SCS; i++) {
1742 		if (i == 15)
1743 			continue;
1744 
1745 		if (set) {
1746 			if ((pd[i] & 0x1f) == 0xf)
1747 				pd[i] = ILLEGAL_VL;
1748 		} else {
1749 			if ((pd[i] & 0x1f) == ILLEGAL_VL)
1750 				pd[i] = 0xf;
1751 		}
1752 	}
1753 }
1754 
1755 static int set_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
1756 {
1757 	u64 *val = data;
1758 
1759 	filter_sc2vlt(data, true);
1760 
1761 	write_csr(dd, SEND_SC2VLT0, *val++);
1762 	write_csr(dd, SEND_SC2VLT1, *val++);
1763 	write_csr(dd, SEND_SC2VLT2, *val++);
1764 	write_csr(dd, SEND_SC2VLT3, *val++);
1765 	write_seqlock_irq(&dd->sc2vl_lock);
1766 	memcpy(dd->sc2vl, data, sizeof(dd->sc2vl));
1767 	write_sequnlock_irq(&dd->sc2vl_lock);
1768 	return 0;
1769 }
1770 
1771 static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
1772 {
1773 	u64 *val = (u64 *)data;
1774 
1775 	*val++ = read_csr(dd, SEND_SC2VLT0);
1776 	*val++ = read_csr(dd, SEND_SC2VLT1);
1777 	*val++ = read_csr(dd, SEND_SC2VLT2);
1778 	*val++ = read_csr(dd, SEND_SC2VLT3);
1779 
1780 	filter_sc2vlt((u64 *)data, false);
1781 	return 0;
1782 }
1783 
1784 static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
1785 				   struct ib_device *ibdev, u32 port,
1786 				   u32 *resp_len, u32 max_len)
1787 {
1788 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1789 	u8 *p = data;
1790 	size_t size = ARRAY_SIZE(ibp->sl_to_sc); /* == 32 */
1791 	unsigned i;
1792 
1793 	if (am || smp_length_check(size, max_len)) {
1794 		smp->status |= IB_SMP_INVALID_FIELD;
1795 		return reply((struct ib_mad_hdr *)smp);
1796 	}
1797 
1798 	for (i = 0; i < ARRAY_SIZE(ibp->sl_to_sc); i++)
1799 		*p++ = ibp->sl_to_sc[i];
1800 
1801 	if (resp_len)
1802 		*resp_len += size;
1803 
1804 	return reply((struct ib_mad_hdr *)smp);
1805 }
1806 
1807 static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
1808 				   struct ib_device *ibdev, u32 port,
1809 				   u32 *resp_len, u32 max_len)
1810 {
1811 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1812 	u8 *p = data;
1813 	size_t size = ARRAY_SIZE(ibp->sl_to_sc);
1814 	int i;
1815 	u8 sc;
1816 
1817 	if (am || smp_length_check(size, max_len)) {
1818 		smp->status |= IB_SMP_INVALID_FIELD;
1819 		return reply((struct ib_mad_hdr *)smp);
1820 	}
1821 
1822 	for (i = 0; i <  ARRAY_SIZE(ibp->sl_to_sc); i++) {
1823 		sc = *p++;
1824 		if (ibp->sl_to_sc[i] != sc) {
1825 			ibp->sl_to_sc[i] = sc;
1826 
1827 			/* Put all stale qps into error state */
1828 			hfi1_error_port_qps(ibp, i);
1829 		}
1830 	}
1831 
1832 	return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len,
1833 				       max_len);
1834 }
1835 
1836 static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
1837 				   struct ib_device *ibdev, u32 port,
1838 				   u32 *resp_len, u32 max_len)
1839 {
1840 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1841 	u8 *p = data;
1842 	size_t size = ARRAY_SIZE(ibp->sc_to_sl); /* == 32 */
1843 	unsigned i;
1844 
1845 	if (am || smp_length_check(size, max_len)) {
1846 		smp->status |= IB_SMP_INVALID_FIELD;
1847 		return reply((struct ib_mad_hdr *)smp);
1848 	}
1849 
1850 	for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++)
1851 		*p++ = ibp->sc_to_sl[i];
1852 
1853 	if (resp_len)
1854 		*resp_len += size;
1855 
1856 	return reply((struct ib_mad_hdr *)smp);
1857 }
1858 
1859 static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
1860 				   struct ib_device *ibdev, u32 port,
1861 				   u32 *resp_len, u32 max_len)
1862 {
1863 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1864 	size_t size = ARRAY_SIZE(ibp->sc_to_sl);
1865 	u8 *p = data;
1866 	int i;
1867 
1868 	if (am || smp_length_check(size, max_len)) {
1869 		smp->status |= IB_SMP_INVALID_FIELD;
1870 		return reply((struct ib_mad_hdr *)smp);
1871 	}
1872 
1873 	for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++)
1874 		ibp->sc_to_sl[i] = *p++;
1875 
1876 	return __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, resp_len,
1877 				       max_len);
1878 }
1879 
1880 static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
1881 				    struct ib_device *ibdev, u32 port,
1882 				    u32 *resp_len, u32 max_len)
1883 {
1884 	u32 n_blocks = OPA_AM_NBLK(am);
1885 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1886 	void *vp = (void *)data;
1887 	size_t size = 4 * sizeof(u64);
1888 
1889 	if (n_blocks != 1 || smp_length_check(size, max_len)) {
1890 		smp->status |= IB_SMP_INVALID_FIELD;
1891 		return reply((struct ib_mad_hdr *)smp);
1892 	}
1893 
1894 	get_sc2vlt_tables(dd, vp);
1895 
1896 	if (resp_len)
1897 		*resp_len += size;
1898 
1899 	return reply((struct ib_mad_hdr *)smp);
1900 }
1901 
1902 static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
1903 				    struct ib_device *ibdev, u32 port,
1904 				    u32 *resp_len, u32 max_len)
1905 {
1906 	u32 n_blocks = OPA_AM_NBLK(am);
1907 	int async_update = OPA_AM_ASYNC(am);
1908 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1909 	void *vp = (void *)data;
1910 	struct hfi1_pportdata *ppd;
1911 	int lstate;
1912 	/*
1913 	 * set_sc2vlt_tables writes the information contained in *data
1914 	 * to four 64-bit registers SendSC2VLt[0-3]. We need to make
1915 	 * sure *max_len is not greater than the total size of the four
1916 	 * SendSC2VLt[0-3] registers.
1917 	 */
1918 	size_t size = 4 * sizeof(u64);
1919 
1920 	if (n_blocks != 1 || async_update || smp_length_check(size, max_len)) {
1921 		smp->status |= IB_SMP_INVALID_FIELD;
1922 		return reply((struct ib_mad_hdr *)smp);
1923 	}
1924 
1925 	/* IB numbers ports from 1, hw from 0 */
1926 	ppd = dd->pport + (port - 1);
1927 	lstate = driver_lstate(ppd);
1928 	/*
1929 	 * it's known that async_update is 0 by this point, but include
1930 	 * the explicit check for clarity
1931 	 */
1932 	if (!async_update &&
1933 	    (lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE)) {
1934 		smp->status |= IB_SMP_INVALID_FIELD;
1935 		return reply((struct ib_mad_hdr *)smp);
1936 	}
1937 
1938 	set_sc2vlt_tables(dd, vp);
1939 
1940 	return __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, resp_len,
1941 					max_len);
1942 }
1943 
1944 static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
1945 				     struct ib_device *ibdev, u32 port,
1946 				     u32 *resp_len, u32 max_len)
1947 {
1948 	u32 n_blocks = OPA_AM_NPORT(am);
1949 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1950 	struct hfi1_pportdata *ppd;
1951 	void *vp = (void *)data;
1952 	int size = sizeof(struct sc2vlnt);
1953 
1954 	if (n_blocks != 1 || smp_length_check(size, max_len)) {
1955 		smp->status |= IB_SMP_INVALID_FIELD;
1956 		return reply((struct ib_mad_hdr *)smp);
1957 	}
1958 
1959 	ppd = dd->pport + (port - 1);
1960 
1961 	fm_get_table(ppd, FM_TBL_SC2VLNT, vp);
1962 
1963 	if (resp_len)
1964 		*resp_len += size;
1965 
1966 	return reply((struct ib_mad_hdr *)smp);
1967 }
1968 
1969 static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
1970 				     struct ib_device *ibdev, u32 port,
1971 				     u32 *resp_len, u32 max_len)
1972 {
1973 	u32 n_blocks = OPA_AM_NPORT(am);
1974 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1975 	struct hfi1_pportdata *ppd;
1976 	void *vp = (void *)data;
1977 	int lstate;
1978 	int size = sizeof(struct sc2vlnt);
1979 
1980 	if (n_blocks != 1 || smp_length_check(size, max_len)) {
1981 		smp->status |= IB_SMP_INVALID_FIELD;
1982 		return reply((struct ib_mad_hdr *)smp);
1983 	}
1984 
1985 	/* IB numbers ports from 1, hw from 0 */
1986 	ppd = dd->pport + (port - 1);
1987 	lstate = driver_lstate(ppd);
1988 	if (lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE) {
1989 		smp->status |= IB_SMP_INVALID_FIELD;
1990 		return reply((struct ib_mad_hdr *)smp);
1991 	}
1992 
1993 	ppd = dd->pport + (port - 1);
1994 
1995 	fm_set_table(ppd, FM_TBL_SC2VLNT, vp);
1996 
1997 	return __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
1998 					 resp_len, max_len);
1999 }
2000 
2001 static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
2002 			      struct ib_device *ibdev, u32 port,
2003 			      u32 *resp_len, u32 max_len)
2004 {
2005 	u32 nports = OPA_AM_NPORT(am);
2006 	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
2007 	u32 lstate;
2008 	struct hfi1_ibport *ibp;
2009 	struct hfi1_pportdata *ppd;
2010 	struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
2011 
2012 	if (nports != 1 || smp_length_check(sizeof(*psi), max_len)) {
2013 		smp->status |= IB_SMP_INVALID_FIELD;
2014 		return reply((struct ib_mad_hdr *)smp);
2015 	}
2016 
2017 	ibp = to_iport(ibdev, port);
2018 	ppd = ppd_from_ibp(ibp);
2019 
2020 	lstate = driver_lstate(ppd);
2021 
2022 	if (start_of_sm_config && (lstate == IB_PORT_INIT))
2023 		ppd->is_sm_config_started = 1;
2024 
2025 	psi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
2026 	psi->port_states.ledenable_offlinereason |=
2027 		ppd->is_sm_config_started << 5;
2028 	psi->port_states.ledenable_offlinereason |=
2029 		ppd->offline_disabled_reason;
2030 
2031 	psi->port_states.portphysstate_portstate =
2032 		(driver_pstate(ppd) << 4) | (lstate & 0xf);
2033 	psi->link_width_downgrade_tx_active =
2034 		cpu_to_be16(ppd->link_width_downgrade_tx_active);
2035 	psi->link_width_downgrade_rx_active =
2036 		cpu_to_be16(ppd->link_width_downgrade_rx_active);
2037 	if (resp_len)
2038 		*resp_len += sizeof(struct opa_port_state_info);
2039 
2040 	return reply((struct ib_mad_hdr *)smp);
2041 }
2042 
2043 static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
2044 			      struct ib_device *ibdev, u32 port,
2045 			      u32 *resp_len, u32 max_len, int local_mad)
2046 {
2047 	u32 nports = OPA_AM_NPORT(am);
2048 	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
2049 	u32 ls_old;
2050 	u8 ls_new, ps_new;
2051 	struct hfi1_ibport *ibp;
2052 	struct hfi1_pportdata *ppd;
2053 	struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
2054 	int ret, invalid = 0;
2055 
2056 	if (nports != 1 || smp_length_check(sizeof(*psi), max_len)) {
2057 		smp->status |= IB_SMP_INVALID_FIELD;
2058 		return reply((struct ib_mad_hdr *)smp);
2059 	}
2060 
2061 	ibp = to_iport(ibdev, port);
2062 	ppd = ppd_from_ibp(ibp);
2063 
2064 	ls_old = driver_lstate(ppd);
2065 
2066 	ls_new = port_states_to_logical_state(&psi->port_states);
2067 	ps_new = port_states_to_phys_state(&psi->port_states);
2068 
2069 	if (ls_old == IB_PORT_INIT) {
2070 		if (start_of_sm_config) {
2071 			if (ls_new == ls_old || (ls_new == IB_PORT_ARMED))
2072 				ppd->is_sm_config_started = 1;
2073 		} else if (ls_new == IB_PORT_ARMED) {
2074 			if (ppd->is_sm_config_started == 0) {
2075 				invalid = 1;
2076 				smp->status |= IB_SMP_INVALID_FIELD;
2077 			}
2078 		}
2079 	}
2080 
2081 	if (!invalid) {
2082 		ret = set_port_states(ppd, smp, ls_new, ps_new, local_mad);
2083 		if (ret)
2084 			return ret;
2085 	}
2086 
2087 	return __subn_get_opa_psi(smp, am, data, ibdev, port, resp_len,
2088 				  max_len);
2089 }
2090 
2091 static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data,
2092 				     struct ib_device *ibdev, u32 port,
2093 				     u32 *resp_len, u32 max_len)
2094 {
2095 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2096 	u32 addr = OPA_AM_CI_ADDR(am);
2097 	u32 len = OPA_AM_CI_LEN(am) + 1;
2098 	int ret;
2099 
2100 	if (dd->pport->port_type != PORT_TYPE_QSFP ||
2101 	    smp_length_check(len, max_len)) {
2102 		smp->status |= IB_SMP_INVALID_FIELD;
2103 		return reply((struct ib_mad_hdr *)smp);
2104 	}
2105 
2106 #define __CI_PAGE_SIZE BIT(7) /* 128 bytes */
2107 #define __CI_PAGE_MASK ~(__CI_PAGE_SIZE - 1)
2108 #define __CI_PAGE_NUM(a) ((a) & __CI_PAGE_MASK)
2109 
2110 	/*
2111 	 * check that addr is within spec, and
2112 	 * addr and (addr + len - 1) are on the same "page"
2113 	 */
2114 	if (addr >= 4096 ||
2115 	    (__CI_PAGE_NUM(addr) != __CI_PAGE_NUM(addr + len - 1))) {
2116 		smp->status |= IB_SMP_INVALID_FIELD;
2117 		return reply((struct ib_mad_hdr *)smp);
2118 	}
2119 
2120 	ret = get_cable_info(dd, port, addr, len, data);
2121 
2122 	if (ret == -ENODEV) {
2123 		smp->status |= IB_SMP_UNSUP_METH_ATTR;
2124 		return reply((struct ib_mad_hdr *)smp);
2125 	}
2126 
2127 	/* The address range for the CableInfo SMA query is wider than the
2128 	 * memory available on the QSFP cable. We want to return a valid
2129 	 * response, albeit zeroed out, for address ranges beyond available
2130 	 * memory but that are within the CableInfo query spec
2131 	 */
2132 	if (ret < 0 && ret != -ERANGE) {
2133 		smp->status |= IB_SMP_INVALID_FIELD;
2134 		return reply((struct ib_mad_hdr *)smp);
2135 	}
2136 
2137 	if (resp_len)
2138 		*resp_len += len;
2139 
2140 	return reply((struct ib_mad_hdr *)smp);
2141 }
2142 
2143 static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
2144 			      struct ib_device *ibdev, u32 port, u32 *resp_len,
2145 			      u32 max_len)
2146 {
2147 	u32 num_ports = OPA_AM_NPORT(am);
2148 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2149 	struct hfi1_pportdata *ppd;
2150 	struct buffer_control *p = (struct buffer_control *)data;
2151 	int size = sizeof(struct buffer_control);
2152 
2153 	if (num_ports != 1 || smp_length_check(size, max_len)) {
2154 		smp->status |= IB_SMP_INVALID_FIELD;
2155 		return reply((struct ib_mad_hdr *)smp);
2156 	}
2157 
2158 	ppd = dd->pport + (port - 1);
2159 	fm_get_table(ppd, FM_TBL_BUFFER_CONTROL, p);
2160 	trace_bct_get(dd, p);
2161 	if (resp_len)
2162 		*resp_len += size;
2163 
2164 	return reply((struct ib_mad_hdr *)smp);
2165 }
2166 
2167 static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
2168 			      struct ib_device *ibdev, u32 port, u32 *resp_len,
2169 			      u32 max_len)
2170 {
2171 	u32 num_ports = OPA_AM_NPORT(am);
2172 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2173 	struct hfi1_pportdata *ppd;
2174 	struct buffer_control *p = (struct buffer_control *)data;
2175 
2176 	if (num_ports != 1 || smp_length_check(sizeof(*p), max_len)) {
2177 		smp->status |= IB_SMP_INVALID_FIELD;
2178 		return reply((struct ib_mad_hdr *)smp);
2179 	}
2180 	ppd = dd->pport + (port - 1);
2181 	trace_bct_set(dd, p);
2182 	if (fm_set_table(ppd, FM_TBL_BUFFER_CONTROL, p) < 0) {
2183 		smp->status |= IB_SMP_INVALID_FIELD;
2184 		return reply((struct ib_mad_hdr *)smp);
2185 	}
2186 
2187 	return __subn_get_opa_bct(smp, am, data, ibdev, port, resp_len,
2188 				  max_len);
2189 }
2190 
2191 static int __subn_get_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
2192 				 struct ib_device *ibdev, u32 port,
2193 				 u32 *resp_len, u32 max_len)
2194 {
2195 	struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
2196 	u32 num_ports = OPA_AM_NPORT(am);
2197 	u8 section = (am & 0x00ff0000) >> 16;
2198 	u8 *p = data;
2199 	int size = 256;
2200 
2201 	if (num_ports != 1 || smp_length_check(size, max_len)) {
2202 		smp->status |= IB_SMP_INVALID_FIELD;
2203 		return reply((struct ib_mad_hdr *)smp);
2204 	}
2205 
2206 	switch (section) {
2207 	case OPA_VLARB_LOW_ELEMENTS:
2208 		fm_get_table(ppd, FM_TBL_VL_LOW_ARB, p);
2209 		break;
2210 	case OPA_VLARB_HIGH_ELEMENTS:
2211 		fm_get_table(ppd, FM_TBL_VL_HIGH_ARB, p);
2212 		break;
2213 	case OPA_VLARB_PREEMPT_ELEMENTS:
2214 		fm_get_table(ppd, FM_TBL_VL_PREEMPT_ELEMS, p);
2215 		break;
2216 	case OPA_VLARB_PREEMPT_MATRIX:
2217 		fm_get_table(ppd, FM_TBL_VL_PREEMPT_MATRIX, p);
2218 		break;
2219 	default:
2220 		pr_warn("OPA SubnGet(VL Arb) AM Invalid : 0x%x\n",
2221 			be32_to_cpu(smp->attr_mod));
2222 		smp->status |= IB_SMP_INVALID_FIELD;
2223 		size = 0;
2224 		break;
2225 	}
2226 
2227 	if (size > 0 && resp_len)
2228 		*resp_len += size;
2229 
2230 	return reply((struct ib_mad_hdr *)smp);
2231 }
2232 
2233 static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
2234 				 struct ib_device *ibdev, u32 port,
2235 				 u32 *resp_len, u32 max_len)
2236 {
2237 	struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
2238 	u32 num_ports = OPA_AM_NPORT(am);
2239 	u8 section = (am & 0x00ff0000) >> 16;
2240 	u8 *p = data;
2241 	int size = 256;
2242 
2243 	if (num_ports != 1 || smp_length_check(size, max_len)) {
2244 		smp->status |= IB_SMP_INVALID_FIELD;
2245 		return reply((struct ib_mad_hdr *)smp);
2246 	}
2247 
2248 	switch (section) {
2249 	case OPA_VLARB_LOW_ELEMENTS:
2250 		(void)fm_set_table(ppd, FM_TBL_VL_LOW_ARB, p);
2251 		break;
2252 	case OPA_VLARB_HIGH_ELEMENTS:
2253 		(void)fm_set_table(ppd, FM_TBL_VL_HIGH_ARB, p);
2254 		break;
2255 	/*
2256 	 * neither OPA_VLARB_PREEMPT_ELEMENTS, or OPA_VLARB_PREEMPT_MATRIX
2257 	 * can be changed from the default values
2258 	 */
2259 	case OPA_VLARB_PREEMPT_ELEMENTS:
2260 	case OPA_VLARB_PREEMPT_MATRIX:
2261 		smp->status |= IB_SMP_UNSUP_METH_ATTR;
2262 		break;
2263 	default:
2264 		pr_warn("OPA SubnSet(VL Arb) AM Invalid : 0x%x\n",
2265 			be32_to_cpu(smp->attr_mod));
2266 		smp->status |= IB_SMP_INVALID_FIELD;
2267 		break;
2268 	}
2269 
2270 	return __subn_get_opa_vl_arb(smp, am, data, ibdev, port, resp_len,
2271 				     max_len);
2272 }
2273 
2274 struct opa_pma_mad {
2275 	struct ib_mad_hdr mad_hdr;
2276 	u8 data[2024];
2277 } __packed;
2278 
2279 struct opa_port_status_req {
2280 	__u8 port_num;
2281 	__u8 reserved[3];
2282 	__be32 vl_select_mask;
2283 };
2284 
2285 #define VL_MASK_ALL		0x00000000000080ffUL
2286 
2287 struct opa_port_status_rsp {
2288 	__u8 port_num;
2289 	__u8 reserved[3];
2290 	__be32  vl_select_mask;
2291 
2292 	/* Data counters */
2293 	__be64 port_xmit_data;
2294 	__be64 port_rcv_data;
2295 	__be64 port_xmit_pkts;
2296 	__be64 port_rcv_pkts;
2297 	__be64 port_multicast_xmit_pkts;
2298 	__be64 port_multicast_rcv_pkts;
2299 	__be64 port_xmit_wait;
2300 	__be64 sw_port_congestion;
2301 	__be64 port_rcv_fecn;
2302 	__be64 port_rcv_becn;
2303 	__be64 port_xmit_time_cong;
2304 	__be64 port_xmit_wasted_bw;
2305 	__be64 port_xmit_wait_data;
2306 	__be64 port_rcv_bubble;
2307 	__be64 port_mark_fecn;
2308 	/* Error counters */
2309 	__be64 port_rcv_constraint_errors;
2310 	__be64 port_rcv_switch_relay_errors;
2311 	__be64 port_xmit_discards;
2312 	__be64 port_xmit_constraint_errors;
2313 	__be64 port_rcv_remote_physical_errors;
2314 	__be64 local_link_integrity_errors;
2315 	__be64 port_rcv_errors;
2316 	__be64 excessive_buffer_overruns;
2317 	__be64 fm_config_errors;
2318 	__be32 link_error_recovery;
2319 	__be32 link_downed;
2320 	u8 uncorrectable_errors;
2321 
2322 	u8 link_quality_indicator; /* 5res, 3bit */
2323 	u8 res2[6];
2324 	struct _vls_pctrs {
2325 		/* per-VL Data counters */
2326 		__be64 port_vl_xmit_data;
2327 		__be64 port_vl_rcv_data;
2328 		__be64 port_vl_xmit_pkts;
2329 		__be64 port_vl_rcv_pkts;
2330 		__be64 port_vl_xmit_wait;
2331 		__be64 sw_port_vl_congestion;
2332 		__be64 port_vl_rcv_fecn;
2333 		__be64 port_vl_rcv_becn;
2334 		__be64 port_xmit_time_cong;
2335 		__be64 port_vl_xmit_wasted_bw;
2336 		__be64 port_vl_xmit_wait_data;
2337 		__be64 port_vl_rcv_bubble;
2338 		__be64 port_vl_mark_fecn;
2339 		__be64 port_vl_xmit_discards;
2340 	} vls[]; /* real array size defined by # bits set in vl_select_mask */
2341 };
2342 
2343 enum counter_selects {
2344 	CS_PORT_XMIT_DATA			= (1 << 31),
2345 	CS_PORT_RCV_DATA			= (1 << 30),
2346 	CS_PORT_XMIT_PKTS			= (1 << 29),
2347 	CS_PORT_RCV_PKTS			= (1 << 28),
2348 	CS_PORT_MCAST_XMIT_PKTS			= (1 << 27),
2349 	CS_PORT_MCAST_RCV_PKTS			= (1 << 26),
2350 	CS_PORT_XMIT_WAIT			= (1 << 25),
2351 	CS_SW_PORT_CONGESTION			= (1 << 24),
2352 	CS_PORT_RCV_FECN			= (1 << 23),
2353 	CS_PORT_RCV_BECN			= (1 << 22),
2354 	CS_PORT_XMIT_TIME_CONG			= (1 << 21),
2355 	CS_PORT_XMIT_WASTED_BW			= (1 << 20),
2356 	CS_PORT_XMIT_WAIT_DATA			= (1 << 19),
2357 	CS_PORT_RCV_BUBBLE			= (1 << 18),
2358 	CS_PORT_MARK_FECN			= (1 << 17),
2359 	CS_PORT_RCV_CONSTRAINT_ERRORS		= (1 << 16),
2360 	CS_PORT_RCV_SWITCH_RELAY_ERRORS		= (1 << 15),
2361 	CS_PORT_XMIT_DISCARDS			= (1 << 14),
2362 	CS_PORT_XMIT_CONSTRAINT_ERRORS		= (1 << 13),
2363 	CS_PORT_RCV_REMOTE_PHYSICAL_ERRORS	= (1 << 12),
2364 	CS_LOCAL_LINK_INTEGRITY_ERRORS		= (1 << 11),
2365 	CS_PORT_RCV_ERRORS			= (1 << 10),
2366 	CS_EXCESSIVE_BUFFER_OVERRUNS		= (1 << 9),
2367 	CS_FM_CONFIG_ERRORS			= (1 << 8),
2368 	CS_LINK_ERROR_RECOVERY			= (1 << 7),
2369 	CS_LINK_DOWNED				= (1 << 6),
2370 	CS_UNCORRECTABLE_ERRORS			= (1 << 5),
2371 };
2372 
2373 struct opa_clear_port_status {
2374 	__be64 port_select_mask[4];
2375 	__be32 counter_select_mask;
2376 };
2377 
2378 struct opa_aggregate {
2379 	__be16 attr_id;
2380 	__be16 err_reqlength;	/* 1 bit, 8 res, 7 bit */
2381 	__be32 attr_mod;
2382 	u8 data[];
2383 };
2384 
2385 #define MSK_LLI 0x000000f0
2386 #define MSK_LLI_SFT 4
2387 #define MSK_LER 0x0000000f
2388 #define MSK_LER_SFT 0
2389 #define ADD_LLI 8
2390 #define ADD_LER 2
2391 
2392 /* Request contains first three fields, response contains those plus the rest */
2393 struct opa_port_data_counters_msg {
2394 	__be64 port_select_mask[4];
2395 	__be32 vl_select_mask;
2396 	__be32 resolution;
2397 
2398 	/* Response fields follow */
2399 	struct _port_dctrs {
2400 		u8 port_number;
2401 		u8 reserved2[3];
2402 		__be32 link_quality_indicator; /* 29res, 3bit */
2403 
2404 		/* Data counters */
2405 		__be64 port_xmit_data;
2406 		__be64 port_rcv_data;
2407 		__be64 port_xmit_pkts;
2408 		__be64 port_rcv_pkts;
2409 		__be64 port_multicast_xmit_pkts;
2410 		__be64 port_multicast_rcv_pkts;
2411 		__be64 port_xmit_wait;
2412 		__be64 sw_port_congestion;
2413 		__be64 port_rcv_fecn;
2414 		__be64 port_rcv_becn;
2415 		__be64 port_xmit_time_cong;
2416 		__be64 port_xmit_wasted_bw;
2417 		__be64 port_xmit_wait_data;
2418 		__be64 port_rcv_bubble;
2419 		__be64 port_mark_fecn;
2420 
2421 		__be64 port_error_counter_summary;
2422 		/* Sum of error counts/port */
2423 
2424 		struct _vls_dctrs {
2425 			/* per-VL Data counters */
2426 			__be64 port_vl_xmit_data;
2427 			__be64 port_vl_rcv_data;
2428 			__be64 port_vl_xmit_pkts;
2429 			__be64 port_vl_rcv_pkts;
2430 			__be64 port_vl_xmit_wait;
2431 			__be64 sw_port_vl_congestion;
2432 			__be64 port_vl_rcv_fecn;
2433 			__be64 port_vl_rcv_becn;
2434 			__be64 port_xmit_time_cong;
2435 			__be64 port_vl_xmit_wasted_bw;
2436 			__be64 port_vl_xmit_wait_data;
2437 			__be64 port_vl_rcv_bubble;
2438 			__be64 port_vl_mark_fecn;
2439 		} vls[];
2440 		/* array size defined by #bits set in vl_select_mask*/
2441 	} port;
2442 };
2443 
2444 struct opa_port_error_counters64_msg {
2445 	/*
2446 	 * Request contains first two fields, response contains the
2447 	 * whole magilla
2448 	 */
2449 	__be64 port_select_mask[4];
2450 	__be32 vl_select_mask;
2451 
2452 	/* Response-only fields follow */
2453 	__be32 reserved1;
2454 	struct _port_ectrs {
2455 		u8 port_number;
2456 		u8 reserved2[7];
2457 		__be64 port_rcv_constraint_errors;
2458 		__be64 port_rcv_switch_relay_errors;
2459 		__be64 port_xmit_discards;
2460 		__be64 port_xmit_constraint_errors;
2461 		__be64 port_rcv_remote_physical_errors;
2462 		__be64 local_link_integrity_errors;
2463 		__be64 port_rcv_errors;
2464 		__be64 excessive_buffer_overruns;
2465 		__be64 fm_config_errors;
2466 		__be32 link_error_recovery;
2467 		__be32 link_downed;
2468 		u8 uncorrectable_errors;
2469 		u8 reserved3[7];
2470 		struct _vls_ectrs {
2471 			__be64 port_vl_xmit_discards;
2472 		} vls[];
2473 		/* array size defined by #bits set in vl_select_mask */
2474 	} port;
2475 };
2476 
2477 struct opa_port_error_info_msg {
2478 	__be64 port_select_mask[4];
2479 	__be32 error_info_select_mask;
2480 	__be32 reserved1;
2481 	struct _port_ei {
2482 		u8 port_number;
2483 		u8 reserved2[7];
2484 
2485 		/* PortRcvErrorInfo */
2486 		struct {
2487 			u8 status_and_code;
2488 			union {
2489 				u8 raw[17];
2490 				struct {
2491 					/* EI1to12 format */
2492 					u8 packet_flit1[8];
2493 					u8 packet_flit2[8];
2494 					u8 remaining_flit_bits12;
2495 				} ei1to12;
2496 				struct {
2497 					u8 packet_bytes[8];
2498 					u8 remaining_flit_bits;
2499 				} ei13;
2500 			} ei;
2501 			u8 reserved3[6];
2502 		} __packed port_rcv_ei;
2503 
2504 		/* ExcessiveBufferOverrunInfo */
2505 		struct {
2506 			u8 status_and_sc;
2507 			u8 reserved4[7];
2508 		} __packed excessive_buffer_overrun_ei;
2509 
2510 		/* PortXmitConstraintErrorInfo */
2511 		struct {
2512 			u8 status;
2513 			u8 reserved5;
2514 			__be16 pkey;
2515 			__be32 slid;
2516 		} __packed port_xmit_constraint_ei;
2517 
2518 		/* PortRcvConstraintErrorInfo */
2519 		struct {
2520 			u8 status;
2521 			u8 reserved6;
2522 			__be16 pkey;
2523 			__be32 slid;
2524 		} __packed port_rcv_constraint_ei;
2525 
2526 		/* PortRcvSwitchRelayErrorInfo */
2527 		struct {
2528 			u8 status_and_code;
2529 			u8 reserved7[3];
2530 			__u32 error_info;
2531 		} __packed port_rcv_switch_relay_ei;
2532 
2533 		/* UncorrectableErrorInfo */
2534 		struct {
2535 			u8 status_and_code;
2536 			u8 reserved8;
2537 		} __packed uncorrectable_ei;
2538 
2539 		/* FMConfigErrorInfo */
2540 		struct {
2541 			u8 status_and_code;
2542 			u8 error_info;
2543 		} __packed fm_config_ei;
2544 		__u32 reserved9;
2545 	} port;
2546 };
2547 
2548 /* opa_port_error_info_msg error_info_select_mask bit definitions */
2549 enum error_info_selects {
2550 	ES_PORT_RCV_ERROR_INFO			= (1 << 31),
2551 	ES_EXCESSIVE_BUFFER_OVERRUN_INFO	= (1 << 30),
2552 	ES_PORT_XMIT_CONSTRAINT_ERROR_INFO	= (1 << 29),
2553 	ES_PORT_RCV_CONSTRAINT_ERROR_INFO	= (1 << 28),
2554 	ES_PORT_RCV_SWITCH_RELAY_ERROR_INFO	= (1 << 27),
2555 	ES_UNCORRECTABLE_ERROR_INFO		= (1 << 26),
2556 	ES_FM_CONFIG_ERROR_INFO			= (1 << 25)
2557 };
2558 
2559 static int pma_get_opa_classportinfo(struct opa_pma_mad *pmp,
2560 				     struct ib_device *ibdev, u32 *resp_len)
2561 {
2562 	struct opa_class_port_info *p =
2563 		(struct opa_class_port_info *)pmp->data;
2564 
2565 	memset(pmp->data, 0, sizeof(pmp->data));
2566 
2567 	if (pmp->mad_hdr.attr_mod != 0)
2568 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2569 
2570 	p->base_version = OPA_MGMT_BASE_VERSION;
2571 	p->class_version = OPA_SM_CLASS_VERSION;
2572 	/*
2573 	 * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec.
2574 	 */
2575 	p->cap_mask2_resp_time = cpu_to_be32(18);
2576 
2577 	if (resp_len)
2578 		*resp_len += sizeof(*p);
2579 
2580 	return reply((struct ib_mad_hdr *)pmp);
2581 }
2582 
2583 static void a0_portstatus(struct hfi1_pportdata *ppd,
2584 			  struct opa_port_status_rsp *rsp)
2585 {
2586 	if (!is_bx(ppd->dd)) {
2587 		unsigned long vl;
2588 		u64 sum_vl_xmit_wait = 0;
2589 		unsigned long vl_all_mask = VL_MASK_ALL;
2590 
2591 		for_each_set_bit(vl, &vl_all_mask, BITS_PER_LONG) {
2592 			u64 tmp = sum_vl_xmit_wait +
2593 				  read_port_cntr(ppd, C_TX_WAIT_VL,
2594 						 idx_from_vl(vl));
2595 			if (tmp < sum_vl_xmit_wait) {
2596 				/* we wrapped */
2597 				sum_vl_xmit_wait = (u64)~0;
2598 				break;
2599 			}
2600 			sum_vl_xmit_wait = tmp;
2601 		}
2602 		if (be64_to_cpu(rsp->port_xmit_wait) > sum_vl_xmit_wait)
2603 			rsp->port_xmit_wait = cpu_to_be64(sum_vl_xmit_wait);
2604 	}
2605 }
2606 
2607 /**
2608  * tx_link_width - convert link width bitmask to integer
2609  * value representing actual link width.
2610  * @link_width: width of active link
2611  * @return: return index of the bit set in link_width var
2612  *
2613  * The function convert and return the index of bit set
2614  * that indicate the current link width.
2615  */
2616 u16 tx_link_width(u16 link_width)
2617 {
2618 	int n = LINK_WIDTH_DEFAULT;
2619 	u16 tx_width = n;
2620 
2621 	while (link_width && n) {
2622 		if (link_width & (1 << (n - 1))) {
2623 			tx_width = n;
2624 			break;
2625 		}
2626 		n--;
2627 	}
2628 
2629 	return tx_width;
2630 }
2631 
2632 /**
2633  * get_xmit_wait_counters - Convert HFI 's SendWaitCnt/SendWaitVlCnt
2634  * counter in unit of TXE cycle times to flit times.
2635  * @ppd: info of physical Hfi port
2636  * @link_width: width of active link
2637  * @link_speed: speed of active link
2638  * @vl: represent VL0-VL7, VL15 for PortVLXmitWait counters request
2639  * and if vl value is C_VL_COUNT, it represent SendWaitCnt
2640  * counter request
2641  * @return: return SendWaitCnt/SendWaitVlCnt counter value per vl.
2642  *
2643  * Convert SendWaitCnt/SendWaitVlCnt counter from TXE cycle times to
2644  * flit times. Call this function to samples these counters. This
2645  * function will calculate for previous state transition and update
2646  * current state at end of function using ppd->prev_link_width and
2647  * ppd->port_vl_xmit_wait_last to port_vl_xmit_wait_curr and link_width.
2648  */
2649 u64 get_xmit_wait_counters(struct hfi1_pportdata *ppd,
2650 			   u16 link_width, u16 link_speed, int vl)
2651 {
2652 	u64 port_vl_xmit_wait_curr;
2653 	u64 delta_vl_xmit_wait;
2654 	u64 xmit_wait_val;
2655 
2656 	if (vl > C_VL_COUNT)
2657 		return  0;
2658 	if (vl < C_VL_COUNT)
2659 		port_vl_xmit_wait_curr =
2660 			read_port_cntr(ppd, C_TX_WAIT_VL, vl);
2661 	else
2662 		port_vl_xmit_wait_curr =
2663 			read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL);
2664 
2665 	xmit_wait_val =
2666 		port_vl_xmit_wait_curr -
2667 		ppd->port_vl_xmit_wait_last[vl];
2668 	delta_vl_xmit_wait =
2669 		convert_xmit_counter(xmit_wait_val,
2670 				     ppd->prev_link_width,
2671 				     link_speed);
2672 
2673 	ppd->vl_xmit_flit_cnt[vl] += delta_vl_xmit_wait;
2674 	ppd->port_vl_xmit_wait_last[vl] = port_vl_xmit_wait_curr;
2675 	ppd->prev_link_width = link_width;
2676 
2677 	return ppd->vl_xmit_flit_cnt[vl];
2678 }
2679 
2680 static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
2681 				  struct ib_device *ibdev,
2682 				  u32 port, u32 *resp_len)
2683 {
2684 	struct opa_port_status_req *req =
2685 		(struct opa_port_status_req *)pmp->data;
2686 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2687 	struct opa_port_status_rsp *rsp;
2688 	unsigned long vl_select_mask = be32_to_cpu(req->vl_select_mask);
2689 	unsigned long vl;
2690 	size_t response_data_size;
2691 	u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
2692 	u32 port_num = req->port_num;
2693 	u8 num_vls = hweight64(vl_select_mask);
2694 	struct _vls_pctrs *vlinfo;
2695 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
2696 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2697 	int vfi;
2698 	u64 tmp, tmp2;
2699 	u16 link_width;
2700 	u16 link_speed;
2701 
2702 	response_data_size = struct_size(rsp, vls, num_vls);
2703 	if (response_data_size > sizeof(pmp->data)) {
2704 		pmp->mad_hdr.status |= OPA_PM_STATUS_REQUEST_TOO_LARGE;
2705 		return reply((struct ib_mad_hdr *)pmp);
2706 	}
2707 
2708 	if (nports != 1 || (port_num && port_num != port) ||
2709 	    num_vls > OPA_MAX_VLS || (vl_select_mask & ~VL_MASK_ALL)) {
2710 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2711 		return reply((struct ib_mad_hdr *)pmp);
2712 	}
2713 
2714 	memset(pmp->data, 0, sizeof(pmp->data));
2715 
2716 	rsp = (struct opa_port_status_rsp *)pmp->data;
2717 	if (port_num)
2718 		rsp->port_num = port_num;
2719 	else
2720 		rsp->port_num = port;
2721 
2722 	rsp->port_rcv_constraint_errors =
2723 		cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
2724 					   CNTR_INVALID_VL));
2725 
2726 	hfi1_read_link_quality(dd, &rsp->link_quality_indicator);
2727 
2728 	rsp->vl_select_mask = cpu_to_be32((u32)vl_select_mask);
2729 	rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
2730 					  CNTR_INVALID_VL));
2731 	rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
2732 					 CNTR_INVALID_VL));
2733 	rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
2734 					  CNTR_INVALID_VL));
2735 	rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
2736 					 CNTR_INVALID_VL));
2737 	rsp->port_multicast_xmit_pkts =
2738 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
2739 					  CNTR_INVALID_VL));
2740 	rsp->port_multicast_rcv_pkts =
2741 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
2742 					  CNTR_INVALID_VL));
2743 	/*
2744 	 * Convert PortXmitWait counter from TXE cycle times
2745 	 * to flit times.
2746 	 */
2747 	link_width =
2748 		tx_link_width(ppd->link_width_downgrade_tx_active);
2749 	link_speed = get_link_speed(ppd->link_speed_active);
2750 	rsp->port_xmit_wait =
2751 		cpu_to_be64(get_xmit_wait_counters(ppd, link_width,
2752 						   link_speed, C_VL_COUNT));
2753 	rsp->port_rcv_fecn =
2754 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
2755 	rsp->port_rcv_becn =
2756 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL));
2757 	rsp->port_xmit_discards =
2758 		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
2759 					   CNTR_INVALID_VL));
2760 	rsp->port_xmit_constraint_errors =
2761 		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
2762 					   CNTR_INVALID_VL));
2763 	rsp->port_rcv_remote_physical_errors =
2764 		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2765 					  CNTR_INVALID_VL));
2766 	rsp->local_link_integrity_errors =
2767 		cpu_to_be64(read_dev_cntr(dd, C_DC_RX_REPLAY,
2768 					  CNTR_INVALID_VL));
2769 	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
2770 	tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
2771 				   CNTR_INVALID_VL);
2772 	if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
2773 		/* overflow/wrapped */
2774 		rsp->link_error_recovery = cpu_to_be32(~0);
2775 	} else {
2776 		rsp->link_error_recovery = cpu_to_be32(tmp2);
2777 	}
2778 	rsp->port_rcv_errors =
2779 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
2780 	rsp->excessive_buffer_overruns =
2781 		cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
2782 	rsp->fm_config_errors =
2783 		cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
2784 					  CNTR_INVALID_VL));
2785 	rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
2786 						      CNTR_INVALID_VL));
2787 
2788 	/* rsp->uncorrectable_errors is 8 bits wide, and it pegs at 0xff */
2789 	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
2790 	rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
2791 
2792 	vlinfo = &rsp->vls[0];
2793 	vfi = 0;
2794 	/* The vl_select_mask has been checked above, and we know
2795 	 * that it contains only entries which represent valid VLs.
2796 	 * So in the for_each_set_bit() loop below, we don't need
2797 	 * any additional checks for vl.
2798 	 */
2799 	for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
2800 		memset(vlinfo, 0, sizeof(*vlinfo));
2801 
2802 		tmp = read_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl));
2803 		rsp->vls[vfi].port_vl_rcv_data = cpu_to_be64(tmp);
2804 
2805 		rsp->vls[vfi].port_vl_rcv_pkts =
2806 			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
2807 						  idx_from_vl(vl)));
2808 
2809 		rsp->vls[vfi].port_vl_xmit_data =
2810 			cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
2811 						   idx_from_vl(vl)));
2812 
2813 		rsp->vls[vfi].port_vl_xmit_pkts =
2814 			cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
2815 						   idx_from_vl(vl)));
2816 		/*
2817 		 * Convert PortVlXmitWait counter from TXE cycle
2818 		 * times to flit times.
2819 		 */
2820 		rsp->vls[vfi].port_vl_xmit_wait =
2821 			cpu_to_be64(get_xmit_wait_counters(ppd, link_width,
2822 							   link_speed,
2823 							   idx_from_vl(vl)));
2824 
2825 		rsp->vls[vfi].port_vl_rcv_fecn =
2826 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
2827 						  idx_from_vl(vl)));
2828 
2829 		rsp->vls[vfi].port_vl_rcv_becn =
2830 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
2831 						  idx_from_vl(vl)));
2832 
2833 		rsp->vls[vfi].port_vl_xmit_discards =
2834 			cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
2835 						   idx_from_vl(vl)));
2836 		vlinfo++;
2837 		vfi++;
2838 	}
2839 
2840 	a0_portstatus(ppd, rsp);
2841 
2842 	if (resp_len)
2843 		*resp_len += response_data_size;
2844 
2845 	return reply((struct ib_mad_hdr *)pmp);
2846 }
2847 
2848 static u64 get_error_counter_summary(struct ib_device *ibdev, u32 port,
2849 				     u8 res_lli, u8 res_ler)
2850 {
2851 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2852 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
2853 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2854 	u64 error_counter_summary = 0, tmp;
2855 
2856 	error_counter_summary += read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
2857 						CNTR_INVALID_VL);
2858 	/* port_rcv_switch_relay_errors is 0 for HFIs */
2859 	error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_DSCD,
2860 						CNTR_INVALID_VL);
2861 	error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
2862 						CNTR_INVALID_VL);
2863 	error_counter_summary += read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2864 					       CNTR_INVALID_VL);
2865 	/* local link integrity must be right-shifted by the lli resolution */
2866 	error_counter_summary += (read_dev_cntr(dd, C_DC_RX_REPLAY,
2867 						CNTR_INVALID_VL) >> res_lli);
2868 	/* link error recovery must b right-shifted by the ler resolution */
2869 	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
2870 	tmp += read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL);
2871 	error_counter_summary += (tmp >> res_ler);
2872 	error_counter_summary += read_dev_cntr(dd, C_DC_RCV_ERR,
2873 					       CNTR_INVALID_VL);
2874 	error_counter_summary += read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
2875 	error_counter_summary += read_dev_cntr(dd, C_DC_FM_CFG_ERR,
2876 					       CNTR_INVALID_VL);
2877 	/* ppd->link_downed is a 32-bit value */
2878 	error_counter_summary += read_port_cntr(ppd, C_SW_LINK_DOWN,
2879 						CNTR_INVALID_VL);
2880 	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
2881 	/* this is an 8-bit quantity */
2882 	error_counter_summary += tmp < 0x100 ? (tmp & 0xff) : 0xff;
2883 
2884 	return error_counter_summary;
2885 }
2886 
2887 static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp)
2888 {
2889 	if (!is_bx(ppd->dd)) {
2890 		unsigned long vl;
2891 		u64 sum_vl_xmit_wait = 0;
2892 		unsigned long vl_all_mask = VL_MASK_ALL;
2893 
2894 		for_each_set_bit(vl, &vl_all_mask, BITS_PER_LONG) {
2895 			u64 tmp = sum_vl_xmit_wait +
2896 				  read_port_cntr(ppd, C_TX_WAIT_VL,
2897 						 idx_from_vl(vl));
2898 			if (tmp < sum_vl_xmit_wait) {
2899 				/* we wrapped */
2900 				sum_vl_xmit_wait = (u64)~0;
2901 				break;
2902 			}
2903 			sum_vl_xmit_wait = tmp;
2904 		}
2905 		if (be64_to_cpu(rsp->port_xmit_wait) > sum_vl_xmit_wait)
2906 			rsp->port_xmit_wait = cpu_to_be64(sum_vl_xmit_wait);
2907 	}
2908 }
2909 
2910 static void pma_get_opa_port_dctrs(struct ib_device *ibdev,
2911 				   struct _port_dctrs *rsp)
2912 {
2913 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2914 
2915 	rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
2916 						CNTR_INVALID_VL));
2917 	rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
2918 						CNTR_INVALID_VL));
2919 	rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
2920 						CNTR_INVALID_VL));
2921 	rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
2922 						CNTR_INVALID_VL));
2923 	rsp->port_multicast_xmit_pkts =
2924 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
2925 					  CNTR_INVALID_VL));
2926 	rsp->port_multicast_rcv_pkts =
2927 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
2928 					  CNTR_INVALID_VL));
2929 }
2930 
2931 static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
2932 				    struct ib_device *ibdev,
2933 				    u32 port, u32 *resp_len)
2934 {
2935 	struct opa_port_data_counters_msg *req =
2936 		(struct opa_port_data_counters_msg *)pmp->data;
2937 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2938 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
2939 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2940 	struct _port_dctrs *rsp;
2941 	struct _vls_dctrs *vlinfo;
2942 	size_t response_data_size;
2943 	u32 num_ports;
2944 	u8 lq, num_vls;
2945 	u8 res_lli, res_ler;
2946 	u64 port_mask;
2947 	u32 port_num;
2948 	unsigned long vl;
2949 	unsigned long vl_select_mask;
2950 	int vfi;
2951 	u16 link_width;
2952 	u16 link_speed;
2953 
2954 	num_ports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
2955 	num_vls = hweight32(be32_to_cpu(req->vl_select_mask));
2956 	vl_select_mask = be32_to_cpu(req->vl_select_mask);
2957 	res_lli = (u8)(be32_to_cpu(req->resolution) & MSK_LLI) >> MSK_LLI_SFT;
2958 	res_lli = res_lli ? res_lli + ADD_LLI : 0;
2959 	res_ler = (u8)(be32_to_cpu(req->resolution) & MSK_LER) >> MSK_LER_SFT;
2960 	res_ler = res_ler ? res_ler + ADD_LER : 0;
2961 
2962 	if (num_ports != 1 || (vl_select_mask & ~VL_MASK_ALL)) {
2963 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2964 		return reply((struct ib_mad_hdr *)pmp);
2965 	}
2966 
2967 	/* Sanity check */
2968 	response_data_size = struct_size(req, port.vls, num_vls);
2969 
2970 	if (response_data_size > sizeof(pmp->data)) {
2971 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2972 		return reply((struct ib_mad_hdr *)pmp);
2973 	}
2974 
2975 	/*
2976 	 * The bit set in the mask needs to be consistent with the
2977 	 * port the request came in on.
2978 	 */
2979 	port_mask = be64_to_cpu(req->port_select_mask[3]);
2980 	port_num = find_first_bit((unsigned long *)&port_mask,
2981 				  sizeof(port_mask) * 8);
2982 
2983 	if (port_num != port) {
2984 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2985 		return reply((struct ib_mad_hdr *)pmp);
2986 	}
2987 
2988 	rsp = &req->port;
2989 	memset(rsp, 0, sizeof(*rsp));
2990 
2991 	rsp->port_number = port;
2992 	/*
2993 	 * Note that link_quality_indicator is a 32 bit quantity in
2994 	 * 'datacounters' queries (as opposed to 'portinfo' queries,
2995 	 * where it's a byte).
2996 	 */
2997 	hfi1_read_link_quality(dd, &lq);
2998 	rsp->link_quality_indicator = cpu_to_be32((u32)lq);
2999 	pma_get_opa_port_dctrs(ibdev, rsp);
3000 
3001 	/*
3002 	 * Convert PortXmitWait counter from TXE
3003 	 * cycle times to flit times.
3004 	 */
3005 	link_width =
3006 		tx_link_width(ppd->link_width_downgrade_tx_active);
3007 	link_speed = get_link_speed(ppd->link_speed_active);
3008 	rsp->port_xmit_wait =
3009 		cpu_to_be64(get_xmit_wait_counters(ppd, link_width,
3010 						   link_speed, C_VL_COUNT));
3011 	rsp->port_rcv_fecn =
3012 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
3013 	rsp->port_rcv_becn =
3014 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL));
3015 	rsp->port_error_counter_summary =
3016 		cpu_to_be64(get_error_counter_summary(ibdev, port,
3017 						      res_lli, res_ler));
3018 
3019 	vlinfo = &rsp->vls[0];
3020 	vfi = 0;
3021 	/* The vl_select_mask has been checked above, and we know
3022 	 * that it contains only entries which represent valid VLs.
3023 	 * So in the for_each_set_bit() loop below, we don't need
3024 	 * any additional checks for vl.
3025 	 */
3026 	for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
3027 		memset(vlinfo, 0, sizeof(*vlinfo));
3028 
3029 		rsp->vls[vfi].port_vl_xmit_data =
3030 			cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
3031 						   idx_from_vl(vl)));
3032 
3033 		rsp->vls[vfi].port_vl_rcv_data =
3034 			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_FLIT_VL,
3035 						  idx_from_vl(vl)));
3036 
3037 		rsp->vls[vfi].port_vl_xmit_pkts =
3038 			cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
3039 						   idx_from_vl(vl)));
3040 
3041 		rsp->vls[vfi].port_vl_rcv_pkts =
3042 			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
3043 						  idx_from_vl(vl)));
3044 
3045 		/*
3046 		 * Convert PortVlXmitWait counter from TXE
3047 		 * cycle times to flit times.
3048 		 */
3049 		rsp->vls[vfi].port_vl_xmit_wait =
3050 			cpu_to_be64(get_xmit_wait_counters(ppd, link_width,
3051 							   link_speed,
3052 							   idx_from_vl(vl)));
3053 
3054 		rsp->vls[vfi].port_vl_rcv_fecn =
3055 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
3056 						  idx_from_vl(vl)));
3057 		rsp->vls[vfi].port_vl_rcv_becn =
3058 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
3059 						  idx_from_vl(vl)));
3060 
3061 		/* rsp->port_vl_xmit_time_cong is 0 for HFIs */
3062 		/* rsp->port_vl_xmit_wasted_bw ??? */
3063 		/* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ???
3064 		 * does this differ from rsp->vls[vfi].port_vl_xmit_wait
3065 		 */
3066 		/*rsp->vls[vfi].port_vl_mark_fecn =
3067 		 *	cpu_to_be64(read_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT
3068 		 *		+ offset));
3069 		 */
3070 		vlinfo++;
3071 		vfi++;
3072 	}
3073 
3074 	a0_datacounters(ppd, rsp);
3075 
3076 	if (resp_len)
3077 		*resp_len += response_data_size;
3078 
3079 	return reply((struct ib_mad_hdr *)pmp);
3080 }
3081 
3082 static int pma_get_ib_portcounters_ext(struct ib_pma_mad *pmp,
3083 				       struct ib_device *ibdev, u32 port)
3084 {
3085 	struct ib_pma_portcounters_ext *p = (struct ib_pma_portcounters_ext *)
3086 						pmp->data;
3087 	struct _port_dctrs rsp;
3088 
3089 	if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
3090 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3091 		goto bail;
3092 	}
3093 
3094 	memset(&rsp, 0, sizeof(rsp));
3095 	pma_get_opa_port_dctrs(ibdev, &rsp);
3096 
3097 	p->port_xmit_data = rsp.port_xmit_data;
3098 	p->port_rcv_data = rsp.port_rcv_data;
3099 	p->port_xmit_packets = rsp.port_xmit_pkts;
3100 	p->port_rcv_packets = rsp.port_rcv_pkts;
3101 	p->port_unicast_xmit_packets = 0;
3102 	p->port_unicast_rcv_packets =  0;
3103 	p->port_multicast_xmit_packets = rsp.port_multicast_xmit_pkts;
3104 	p->port_multicast_rcv_packets = rsp.port_multicast_rcv_pkts;
3105 
3106 bail:
3107 	return reply((struct ib_mad_hdr *)pmp);
3108 }
3109 
3110 static void pma_get_opa_port_ectrs(struct ib_device *ibdev,
3111 				   struct _port_ectrs *rsp, u32 port)
3112 {
3113 	u64 tmp, tmp2;
3114 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3115 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3116 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3117 
3118 	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
3119 	tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
3120 					CNTR_INVALID_VL);
3121 	if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
3122 		/* overflow/wrapped */
3123 		rsp->link_error_recovery = cpu_to_be32(~0);
3124 	} else {
3125 		rsp->link_error_recovery = cpu_to_be32(tmp2);
3126 	}
3127 
3128 	rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
3129 						CNTR_INVALID_VL));
3130 	rsp->port_rcv_errors =
3131 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
3132 	rsp->port_rcv_remote_physical_errors =
3133 		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
3134 					  CNTR_INVALID_VL));
3135 	rsp->port_rcv_switch_relay_errors = 0;
3136 	rsp->port_xmit_discards =
3137 		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
3138 					   CNTR_INVALID_VL));
3139 	rsp->port_xmit_constraint_errors =
3140 		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
3141 					   CNTR_INVALID_VL));
3142 	rsp->port_rcv_constraint_errors =
3143 		cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
3144 					   CNTR_INVALID_VL));
3145 	rsp->local_link_integrity_errors =
3146 		cpu_to_be64(read_dev_cntr(dd, C_DC_RX_REPLAY,
3147 					  CNTR_INVALID_VL));
3148 	rsp->excessive_buffer_overruns =
3149 		cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
3150 }
3151 
3152 static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
3153 				  struct ib_device *ibdev,
3154 				  u32 port, u32 *resp_len)
3155 {
3156 	size_t response_data_size;
3157 	struct _port_ectrs *rsp;
3158 	u32 port_num;
3159 	struct opa_port_error_counters64_msg *req;
3160 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3161 	u32 num_ports;
3162 	u8 num_pslm;
3163 	u8 num_vls;
3164 	struct hfi1_ibport *ibp;
3165 	struct hfi1_pportdata *ppd;
3166 	struct _vls_ectrs *vlinfo;
3167 	unsigned long vl;
3168 	u64 port_mask, tmp;
3169 	unsigned long vl_select_mask;
3170 	int vfi;
3171 
3172 	req = (struct opa_port_error_counters64_msg *)pmp->data;
3173 
3174 	num_ports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
3175 
3176 	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
3177 	num_vls = hweight32(be32_to_cpu(req->vl_select_mask));
3178 
3179 	if (num_ports != 1 || num_ports != num_pslm) {
3180 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3181 		return reply((struct ib_mad_hdr *)pmp);
3182 	}
3183 
3184 	response_data_size = struct_size(req, port.vls, num_vls);
3185 
3186 	if (response_data_size > sizeof(pmp->data)) {
3187 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3188 		return reply((struct ib_mad_hdr *)pmp);
3189 	}
3190 	/*
3191 	 * The bit set in the mask needs to be consistent with the
3192 	 * port the request came in on.
3193 	 */
3194 	port_mask = be64_to_cpu(req->port_select_mask[3]);
3195 	port_num = find_first_bit((unsigned long *)&port_mask,
3196 				  sizeof(port_mask) * 8);
3197 
3198 	if (port_num != port) {
3199 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3200 		return reply((struct ib_mad_hdr *)pmp);
3201 	}
3202 
3203 	rsp = &req->port;
3204 
3205 	ibp = to_iport(ibdev, port_num);
3206 	ppd = ppd_from_ibp(ibp);
3207 
3208 	memset(rsp, 0, sizeof(*rsp));
3209 	rsp->port_number = port_num;
3210 
3211 	pma_get_opa_port_ectrs(ibdev, rsp, port_num);
3212 
3213 	rsp->port_rcv_remote_physical_errors =
3214 		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
3215 					  CNTR_INVALID_VL));
3216 	rsp->fm_config_errors =
3217 		cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
3218 					  CNTR_INVALID_VL));
3219 	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
3220 
3221 	rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
3222 	rsp->port_rcv_errors =
3223 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
3224 	vlinfo = &rsp->vls[0];
3225 	vfi = 0;
3226 	vl_select_mask = be32_to_cpu(req->vl_select_mask);
3227 	for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
3228 		memset(vlinfo, 0, sizeof(*vlinfo));
3229 		rsp->vls[vfi].port_vl_xmit_discards =
3230 			cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
3231 						   idx_from_vl(vl)));
3232 		vlinfo += 1;
3233 		vfi++;
3234 	}
3235 
3236 	if (resp_len)
3237 		*resp_len += response_data_size;
3238 
3239 	return reply((struct ib_mad_hdr *)pmp);
3240 }
3241 
3242 static int pma_get_ib_portcounters(struct ib_pma_mad *pmp,
3243 				   struct ib_device *ibdev, u32 port)
3244 {
3245 	struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
3246 		pmp->data;
3247 	struct _port_ectrs rsp;
3248 	u64 temp_link_overrun_errors;
3249 	u64 temp_64;
3250 	u32 temp_32;
3251 
3252 	memset(&rsp, 0, sizeof(rsp));
3253 	pma_get_opa_port_ectrs(ibdev, &rsp, port);
3254 
3255 	if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
3256 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3257 		goto bail;
3258 	}
3259 
3260 	p->symbol_error_counter = 0; /* N/A for OPA */
3261 
3262 	temp_32 = be32_to_cpu(rsp.link_error_recovery);
3263 	if (temp_32 > 0xFFUL)
3264 		p->link_error_recovery_counter = 0xFF;
3265 	else
3266 		p->link_error_recovery_counter = (u8)temp_32;
3267 
3268 	temp_32 = be32_to_cpu(rsp.link_downed);
3269 	if (temp_32 > 0xFFUL)
3270 		p->link_downed_counter = 0xFF;
3271 	else
3272 		p->link_downed_counter = (u8)temp_32;
3273 
3274 	temp_64 = be64_to_cpu(rsp.port_rcv_errors);
3275 	if (temp_64 > 0xFFFFUL)
3276 		p->port_rcv_errors = cpu_to_be16(0xFFFF);
3277 	else
3278 		p->port_rcv_errors = cpu_to_be16((u16)temp_64);
3279 
3280 	temp_64 = be64_to_cpu(rsp.port_rcv_remote_physical_errors);
3281 	if (temp_64 > 0xFFFFUL)
3282 		p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF);
3283 	else
3284 		p->port_rcv_remphys_errors = cpu_to_be16((u16)temp_64);
3285 
3286 	temp_64 = be64_to_cpu(rsp.port_rcv_switch_relay_errors);
3287 	p->port_rcv_switch_relay_errors = cpu_to_be16((u16)temp_64);
3288 
3289 	temp_64 = be64_to_cpu(rsp.port_xmit_discards);
3290 	if (temp_64 > 0xFFFFUL)
3291 		p->port_xmit_discards = cpu_to_be16(0xFFFF);
3292 	else
3293 		p->port_xmit_discards = cpu_to_be16((u16)temp_64);
3294 
3295 	temp_64 = be64_to_cpu(rsp.port_xmit_constraint_errors);
3296 	if (temp_64 > 0xFFUL)
3297 		p->port_xmit_constraint_errors = 0xFF;
3298 	else
3299 		p->port_xmit_constraint_errors = (u8)temp_64;
3300 
3301 	temp_64 = be64_to_cpu(rsp.port_rcv_constraint_errors);
3302 	if (temp_64 > 0xFFUL)
3303 		p->port_rcv_constraint_errors = 0xFFUL;
3304 	else
3305 		p->port_rcv_constraint_errors = (u8)temp_64;
3306 
3307 	/* LocalLink: 7:4, BufferOverrun: 3:0 */
3308 	temp_64 = be64_to_cpu(rsp.local_link_integrity_errors);
3309 	if (temp_64 > 0xFUL)
3310 		temp_64 = 0xFUL;
3311 
3312 	temp_link_overrun_errors = temp_64 << 4;
3313 
3314 	temp_64 = be64_to_cpu(rsp.excessive_buffer_overruns);
3315 	if (temp_64 > 0xFUL)
3316 		temp_64 = 0xFUL;
3317 	temp_link_overrun_errors |= temp_64;
3318 
3319 	p->link_overrun_errors = (u8)temp_link_overrun_errors;
3320 
3321 	p->vl15_dropped = 0; /* N/A for OPA */
3322 
3323 bail:
3324 	return reply((struct ib_mad_hdr *)pmp);
3325 }
3326 
3327 static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
3328 				 struct ib_device *ibdev,
3329 				 u32 port, u32 *resp_len)
3330 {
3331 	size_t response_data_size;
3332 	struct _port_ei *rsp;
3333 	struct opa_port_error_info_msg *req;
3334 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3335 	u64 port_mask;
3336 	u32 num_ports;
3337 	u32 port_num;
3338 	u8 num_pslm;
3339 	u64 reg;
3340 
3341 	req = (struct opa_port_error_info_msg *)pmp->data;
3342 	rsp = &req->port;
3343 
3344 	num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
3345 	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
3346 
3347 	memset(rsp, 0, sizeof(*rsp));
3348 
3349 	if (num_ports != 1 || num_ports != num_pslm) {
3350 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3351 		return reply((struct ib_mad_hdr *)pmp);
3352 	}
3353 
3354 	/* Sanity check */
3355 	response_data_size = sizeof(struct opa_port_error_info_msg);
3356 
3357 	if (response_data_size > sizeof(pmp->data)) {
3358 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3359 		return reply((struct ib_mad_hdr *)pmp);
3360 	}
3361 
3362 	/*
3363 	 * The bit set in the mask needs to be consistent with the port
3364 	 * the request came in on.
3365 	 */
3366 	port_mask = be64_to_cpu(req->port_select_mask[3]);
3367 	port_num = find_first_bit((unsigned long *)&port_mask,
3368 				  sizeof(port_mask) * 8);
3369 
3370 	if (port_num != port) {
3371 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3372 		return reply((struct ib_mad_hdr *)pmp);
3373 	}
3374 	rsp->port_number = port;
3375 
3376 	/* PortRcvErrorInfo */
3377 	rsp->port_rcv_ei.status_and_code =
3378 		dd->err_info_rcvport.status_and_code;
3379 	memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit1,
3380 	       &dd->err_info_rcvport.packet_flit1, sizeof(u64));
3381 	memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit2,
3382 	       &dd->err_info_rcvport.packet_flit2, sizeof(u64));
3383 
3384 	/* ExcessiverBufferOverrunInfo */
3385 	reg = read_csr(dd, RCV_ERR_INFO);
3386 	if (reg & RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK) {
3387 		/*
3388 		 * if the RcvExcessBufferOverrun bit is set, save SC of
3389 		 * first pkt that encountered an excess buffer overrun
3390 		 */
3391 		u8 tmp = (u8)reg;
3392 
3393 		tmp &=  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SC_SMASK;
3394 		tmp <<= 2;
3395 		rsp->excessive_buffer_overrun_ei.status_and_sc = tmp;
3396 		/* set the status bit */
3397 		rsp->excessive_buffer_overrun_ei.status_and_sc |= 0x80;
3398 	}
3399 
3400 	rsp->port_xmit_constraint_ei.status =
3401 		dd->err_info_xmit_constraint.status;
3402 	rsp->port_xmit_constraint_ei.pkey =
3403 		cpu_to_be16(dd->err_info_xmit_constraint.pkey);
3404 	rsp->port_xmit_constraint_ei.slid =
3405 		cpu_to_be32(dd->err_info_xmit_constraint.slid);
3406 
3407 	rsp->port_rcv_constraint_ei.status =
3408 		dd->err_info_rcv_constraint.status;
3409 	rsp->port_rcv_constraint_ei.pkey =
3410 		cpu_to_be16(dd->err_info_rcv_constraint.pkey);
3411 	rsp->port_rcv_constraint_ei.slid =
3412 		cpu_to_be32(dd->err_info_rcv_constraint.slid);
3413 
3414 	/* UncorrectableErrorInfo */
3415 	rsp->uncorrectable_ei.status_and_code = dd->err_info_uncorrectable;
3416 
3417 	/* FMConfigErrorInfo */
3418 	rsp->fm_config_ei.status_and_code = dd->err_info_fmconfig;
3419 
3420 	if (resp_len)
3421 		*resp_len += response_data_size;
3422 
3423 	return reply((struct ib_mad_hdr *)pmp);
3424 }
3425 
3426 static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
3427 				  struct ib_device *ibdev,
3428 				  u32 port, u32 *resp_len)
3429 {
3430 	struct opa_clear_port_status *req =
3431 		(struct opa_clear_port_status *)pmp->data;
3432 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3433 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3434 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3435 	u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
3436 	u64 portn = be64_to_cpu(req->port_select_mask[3]);
3437 	u32 counter_select = be32_to_cpu(req->counter_select_mask);
3438 	unsigned long vl_select_mask = VL_MASK_ALL; /* clear all per-vl cnts */
3439 	unsigned long vl;
3440 
3441 	if ((nports != 1) || (portn != 1 << port)) {
3442 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3443 		return reply((struct ib_mad_hdr *)pmp);
3444 	}
3445 	/*
3446 	 * only counters returned by pma_get_opa_portstatus() are
3447 	 * handled, so when pma_get_opa_portstatus() gets a fix,
3448 	 * the corresponding change should be made here as well.
3449 	 */
3450 
3451 	if (counter_select & CS_PORT_XMIT_DATA)
3452 		write_dev_cntr(dd, C_DC_XMIT_FLITS, CNTR_INVALID_VL, 0);
3453 
3454 	if (counter_select & CS_PORT_RCV_DATA)
3455 		write_dev_cntr(dd, C_DC_RCV_FLITS, CNTR_INVALID_VL, 0);
3456 
3457 	if (counter_select & CS_PORT_XMIT_PKTS)
3458 		write_dev_cntr(dd, C_DC_XMIT_PKTS, CNTR_INVALID_VL, 0);
3459 
3460 	if (counter_select & CS_PORT_RCV_PKTS)
3461 		write_dev_cntr(dd, C_DC_RCV_PKTS, CNTR_INVALID_VL, 0);
3462 
3463 	if (counter_select & CS_PORT_MCAST_XMIT_PKTS)
3464 		write_dev_cntr(dd, C_DC_MC_XMIT_PKTS, CNTR_INVALID_VL, 0);
3465 
3466 	if (counter_select & CS_PORT_MCAST_RCV_PKTS)
3467 		write_dev_cntr(dd, C_DC_MC_RCV_PKTS, CNTR_INVALID_VL, 0);
3468 
3469 	if (counter_select & CS_PORT_XMIT_WAIT) {
3470 		write_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL, 0);
3471 		ppd->port_vl_xmit_wait_last[C_VL_COUNT] = 0;
3472 		ppd->vl_xmit_flit_cnt[C_VL_COUNT] = 0;
3473 	}
3474 	/* ignore cs_sw_portCongestion for HFIs */
3475 
3476 	if (counter_select & CS_PORT_RCV_FECN)
3477 		write_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL, 0);
3478 
3479 	if (counter_select & CS_PORT_RCV_BECN)
3480 		write_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL, 0);
3481 
3482 	/* ignore cs_port_xmit_time_cong for HFIs */
3483 	/* ignore cs_port_xmit_wasted_bw for now */
3484 	/* ignore cs_port_xmit_wait_data for now */
3485 	if (counter_select & CS_PORT_RCV_BUBBLE)
3486 		write_dev_cntr(dd, C_DC_RCV_BBL, CNTR_INVALID_VL, 0);
3487 
3488 	/* Only applicable for switch */
3489 	/* if (counter_select & CS_PORT_MARK_FECN)
3490 	 *	write_csr(dd, DCC_PRF_PORT_MARK_FECN_CNT, 0);
3491 	 */
3492 
3493 	if (counter_select & CS_PORT_RCV_CONSTRAINT_ERRORS)
3494 		write_port_cntr(ppd, C_SW_RCV_CSTR_ERR, CNTR_INVALID_VL, 0);
3495 
3496 	/* ignore cs_port_rcv_switch_relay_errors for HFIs */
3497 	if (counter_select & CS_PORT_XMIT_DISCARDS)
3498 		write_port_cntr(ppd, C_SW_XMIT_DSCD, CNTR_INVALID_VL, 0);
3499 
3500 	if (counter_select & CS_PORT_XMIT_CONSTRAINT_ERRORS)
3501 		write_port_cntr(ppd, C_SW_XMIT_CSTR_ERR, CNTR_INVALID_VL, 0);
3502 
3503 	if (counter_select & CS_PORT_RCV_REMOTE_PHYSICAL_ERRORS)
3504 		write_dev_cntr(dd, C_DC_RMT_PHY_ERR, CNTR_INVALID_VL, 0);
3505 
3506 	if (counter_select & CS_LOCAL_LINK_INTEGRITY_ERRORS)
3507 		write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0);
3508 
3509 	if (counter_select & CS_LINK_ERROR_RECOVERY) {
3510 		write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
3511 		write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
3512 			       CNTR_INVALID_VL, 0);
3513 	}
3514 
3515 	if (counter_select & CS_PORT_RCV_ERRORS)
3516 		write_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL, 0);
3517 
3518 	if (counter_select & CS_EXCESSIVE_BUFFER_OVERRUNS) {
3519 		write_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL, 0);
3520 		dd->rcv_ovfl_cnt = 0;
3521 	}
3522 
3523 	if (counter_select & CS_FM_CONFIG_ERRORS)
3524 		write_dev_cntr(dd, C_DC_FM_CFG_ERR, CNTR_INVALID_VL, 0);
3525 
3526 	if (counter_select & CS_LINK_DOWNED)
3527 		write_port_cntr(ppd, C_SW_LINK_DOWN, CNTR_INVALID_VL, 0);
3528 
3529 	if (counter_select & CS_UNCORRECTABLE_ERRORS)
3530 		write_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL, 0);
3531 
3532 	for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
3533 		if (counter_select & CS_PORT_XMIT_DATA)
3534 			write_port_cntr(ppd, C_TX_FLIT_VL, idx_from_vl(vl), 0);
3535 
3536 		if (counter_select & CS_PORT_RCV_DATA)
3537 			write_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl), 0);
3538 
3539 		if (counter_select & CS_PORT_XMIT_PKTS)
3540 			write_port_cntr(ppd, C_TX_PKT_VL, idx_from_vl(vl), 0);
3541 
3542 		if (counter_select & CS_PORT_RCV_PKTS)
3543 			write_dev_cntr(dd, C_DC_RX_PKT_VL, idx_from_vl(vl), 0);
3544 
3545 		if (counter_select & CS_PORT_XMIT_WAIT) {
3546 			write_port_cntr(ppd, C_TX_WAIT_VL, idx_from_vl(vl), 0);
3547 			ppd->port_vl_xmit_wait_last[idx_from_vl(vl)] = 0;
3548 			ppd->vl_xmit_flit_cnt[idx_from_vl(vl)] = 0;
3549 		}
3550 
3551 		/* sw_port_vl_congestion is 0 for HFIs */
3552 		if (counter_select & CS_PORT_RCV_FECN)
3553 			write_dev_cntr(dd, C_DC_RCV_FCN_VL, idx_from_vl(vl), 0);
3554 
3555 		if (counter_select & CS_PORT_RCV_BECN)
3556 			write_dev_cntr(dd, C_DC_RCV_BCN_VL, idx_from_vl(vl), 0);
3557 
3558 		/* port_vl_xmit_time_cong is 0 for HFIs */
3559 		/* port_vl_xmit_wasted_bw ??? */
3560 		/* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ??? */
3561 		if (counter_select & CS_PORT_RCV_BUBBLE)
3562 			write_dev_cntr(dd, C_DC_RCV_BBL_VL, idx_from_vl(vl), 0);
3563 
3564 		/* if (counter_select & CS_PORT_MARK_FECN)
3565 		 *     write_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT + offset, 0);
3566 		 */
3567 		if (counter_select & C_SW_XMIT_DSCD_VL)
3568 			write_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
3569 					idx_from_vl(vl), 0);
3570 	}
3571 
3572 	if (resp_len)
3573 		*resp_len += sizeof(*req);
3574 
3575 	return reply((struct ib_mad_hdr *)pmp);
3576 }
3577 
3578 static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
3579 				 struct ib_device *ibdev,
3580 				 u32 port, u32 *resp_len)
3581 {
3582 	struct _port_ei *rsp;
3583 	struct opa_port_error_info_msg *req;
3584 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3585 	u64 port_mask;
3586 	u32 num_ports;
3587 	u32 port_num;
3588 	u8 num_pslm;
3589 	u32 error_info_select;
3590 
3591 	req = (struct opa_port_error_info_msg *)pmp->data;
3592 	rsp = &req->port;
3593 
3594 	num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
3595 	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
3596 
3597 	memset(rsp, 0, sizeof(*rsp));
3598 
3599 	if (num_ports != 1 || num_ports != num_pslm) {
3600 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3601 		return reply((struct ib_mad_hdr *)pmp);
3602 	}
3603 
3604 	/*
3605 	 * The bit set in the mask needs to be consistent with the port
3606 	 * the request came in on.
3607 	 */
3608 	port_mask = be64_to_cpu(req->port_select_mask[3]);
3609 	port_num = find_first_bit((unsigned long *)&port_mask,
3610 				  sizeof(port_mask) * 8);
3611 
3612 	if (port_num != port) {
3613 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3614 		return reply((struct ib_mad_hdr *)pmp);
3615 	}
3616 
3617 	error_info_select = be32_to_cpu(req->error_info_select_mask);
3618 
3619 	/* PortRcvErrorInfo */
3620 	if (error_info_select & ES_PORT_RCV_ERROR_INFO)
3621 		/* turn off status bit */
3622 		dd->err_info_rcvport.status_and_code &= ~OPA_EI_STATUS_SMASK;
3623 
3624 	/* ExcessiverBufferOverrunInfo */
3625 	if (error_info_select & ES_EXCESSIVE_BUFFER_OVERRUN_INFO)
3626 		/*
3627 		 * status bit is essentially kept in the h/w - bit 5 of
3628 		 * RCV_ERR_INFO
3629 		 */
3630 		write_csr(dd, RCV_ERR_INFO,
3631 			  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
3632 
3633 	if (error_info_select & ES_PORT_XMIT_CONSTRAINT_ERROR_INFO)
3634 		dd->err_info_xmit_constraint.status &= ~OPA_EI_STATUS_SMASK;
3635 
3636 	if (error_info_select & ES_PORT_RCV_CONSTRAINT_ERROR_INFO)
3637 		dd->err_info_rcv_constraint.status &= ~OPA_EI_STATUS_SMASK;
3638 
3639 	/* UncorrectableErrorInfo */
3640 	if (error_info_select & ES_UNCORRECTABLE_ERROR_INFO)
3641 		/* turn off status bit */
3642 		dd->err_info_uncorrectable &= ~OPA_EI_STATUS_SMASK;
3643 
3644 	/* FMConfigErrorInfo */
3645 	if (error_info_select & ES_FM_CONFIG_ERROR_INFO)
3646 		/* turn off status bit */
3647 		dd->err_info_fmconfig &= ~OPA_EI_STATUS_SMASK;
3648 
3649 	if (resp_len)
3650 		*resp_len += sizeof(*req);
3651 
3652 	return reply((struct ib_mad_hdr *)pmp);
3653 }
3654 
3655 struct opa_congestion_info_attr {
3656 	__be16 congestion_info;
3657 	u8 control_table_cap;	/* Multiple of 64 entry unit CCTs */
3658 	u8 congestion_log_length;
3659 } __packed;
3660 
3661 static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data,
3662 				    struct ib_device *ibdev, u32 port,
3663 				    u32 *resp_len, u32 max_len)
3664 {
3665 	struct opa_congestion_info_attr *p =
3666 		(struct opa_congestion_info_attr *)data;
3667 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3668 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3669 
3670 	if (smp_length_check(sizeof(*p), max_len)) {
3671 		smp->status |= IB_SMP_INVALID_FIELD;
3672 		return reply((struct ib_mad_hdr *)smp);
3673 	}
3674 
3675 	p->congestion_info = 0;
3676 	p->control_table_cap = ppd->cc_max_table_entries;
3677 	p->congestion_log_length = OPA_CONG_LOG_ELEMS;
3678 
3679 	if (resp_len)
3680 		*resp_len += sizeof(*p);
3681 
3682 	return reply((struct ib_mad_hdr *)smp);
3683 }
3684 
3685 static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
3686 				       u8 *data, struct ib_device *ibdev,
3687 				       u32 port, u32 *resp_len, u32 max_len)
3688 {
3689 	int i;
3690 	struct opa_congestion_setting_attr *p =
3691 		(struct opa_congestion_setting_attr *)data;
3692 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3693 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3694 	struct opa_congestion_setting_entry_shadow *entries;
3695 	struct cc_state *cc_state;
3696 
3697 	if (smp_length_check(sizeof(*p), max_len)) {
3698 		smp->status |= IB_SMP_INVALID_FIELD;
3699 		return reply((struct ib_mad_hdr *)smp);
3700 	}
3701 
3702 	rcu_read_lock();
3703 
3704 	cc_state = get_cc_state(ppd);
3705 
3706 	if (!cc_state) {
3707 		rcu_read_unlock();
3708 		return reply((struct ib_mad_hdr *)smp);
3709 	}
3710 
3711 	entries = cc_state->cong_setting.entries;
3712 	p->port_control = cpu_to_be16(cc_state->cong_setting.port_control);
3713 	p->control_map = cpu_to_be32(cc_state->cong_setting.control_map);
3714 	for (i = 0; i < OPA_MAX_SLS; i++) {
3715 		p->entries[i].ccti_increase = entries[i].ccti_increase;
3716 		p->entries[i].ccti_timer = cpu_to_be16(entries[i].ccti_timer);
3717 		p->entries[i].trigger_threshold =
3718 			entries[i].trigger_threshold;
3719 		p->entries[i].ccti_min = entries[i].ccti_min;
3720 	}
3721 
3722 	rcu_read_unlock();
3723 
3724 	if (resp_len)
3725 		*resp_len += sizeof(*p);
3726 
3727 	return reply((struct ib_mad_hdr *)smp);
3728 }
3729 
3730 /*
3731  * Apply congestion control information stored in the ppd to the
3732  * active structure.
3733  */
3734 static void apply_cc_state(struct hfi1_pportdata *ppd)
3735 {
3736 	struct cc_state *old_cc_state, *new_cc_state;
3737 
3738 	new_cc_state = kzalloc_obj(*new_cc_state);
3739 	if (!new_cc_state)
3740 		return;
3741 
3742 	/*
3743 	 * Hold the lock for updating *and* to prevent ppd information
3744 	 * from changing during the update.
3745 	 */
3746 	spin_lock(&ppd->cc_state_lock);
3747 
3748 	old_cc_state = get_cc_state_protected(ppd);
3749 	if (!old_cc_state) {
3750 		/* never active, or shutting down */
3751 		spin_unlock(&ppd->cc_state_lock);
3752 		kfree(new_cc_state);
3753 		return;
3754 	}
3755 
3756 	*new_cc_state = *old_cc_state;
3757 
3758 	if (ppd->total_cct_entry)
3759 		new_cc_state->cct.ccti_limit = ppd->total_cct_entry - 1;
3760 	else
3761 		new_cc_state->cct.ccti_limit = 0;
3762 
3763 	memcpy(new_cc_state->cct.entries, ppd->ccti_entries,
3764 	       ppd->total_cct_entry * sizeof(struct ib_cc_table_entry));
3765 
3766 	new_cc_state->cong_setting.port_control = IB_CC_CCS_PC_SL_BASED;
3767 	new_cc_state->cong_setting.control_map = ppd->cc_sl_control_map;
3768 	memcpy(new_cc_state->cong_setting.entries, ppd->congestion_entries,
3769 	       OPA_MAX_SLS * sizeof(struct opa_congestion_setting_entry));
3770 
3771 	rcu_assign_pointer(ppd->cc_state, new_cc_state);
3772 
3773 	spin_unlock(&ppd->cc_state_lock);
3774 
3775 	kfree_rcu(old_cc_state, rcu);
3776 }
3777 
3778 static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
3779 				       struct ib_device *ibdev, u32 port,
3780 				       u32 *resp_len, u32 max_len)
3781 {
3782 	struct opa_congestion_setting_attr *p =
3783 		(struct opa_congestion_setting_attr *)data;
3784 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3785 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3786 	struct opa_congestion_setting_entry_shadow *entries;
3787 	int i;
3788 
3789 	if (smp_length_check(sizeof(*p), max_len)) {
3790 		smp->status |= IB_SMP_INVALID_FIELD;
3791 		return reply((struct ib_mad_hdr *)smp);
3792 	}
3793 
3794 	/*
3795 	 * Save details from packet into the ppd.  Hold the cc_state_lock so
3796 	 * our information is consistent with anyone trying to apply the state.
3797 	 */
3798 	spin_lock(&ppd->cc_state_lock);
3799 	ppd->cc_sl_control_map = be32_to_cpu(p->control_map);
3800 
3801 	entries = ppd->congestion_entries;
3802 	for (i = 0; i < OPA_MAX_SLS; i++) {
3803 		entries[i].ccti_increase = p->entries[i].ccti_increase;
3804 		entries[i].ccti_timer = be16_to_cpu(p->entries[i].ccti_timer);
3805 		entries[i].trigger_threshold =
3806 			p->entries[i].trigger_threshold;
3807 		entries[i].ccti_min = p->entries[i].ccti_min;
3808 	}
3809 	spin_unlock(&ppd->cc_state_lock);
3810 
3811 	/* now apply the information */
3812 	apply_cc_state(ppd);
3813 
3814 	return __subn_get_opa_cong_setting(smp, am, data, ibdev, port,
3815 					   resp_len, max_len);
3816 }
3817 
3818 static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am,
3819 					u8 *data, struct ib_device *ibdev,
3820 					u32 port, u32 *resp_len, u32 max_len)
3821 {
3822 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3823 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3824 	struct opa_hfi1_cong_log *cong_log = (struct opa_hfi1_cong_log *)data;
3825 	u64 ts;
3826 	int i;
3827 
3828 	if (am || smp_length_check(sizeof(*cong_log), max_len)) {
3829 		smp->status |= IB_SMP_INVALID_FIELD;
3830 		return reply((struct ib_mad_hdr *)smp);
3831 	}
3832 
3833 	spin_lock_irq(&ppd->cc_log_lock);
3834 
3835 	cong_log->log_type = OPA_CC_LOG_TYPE_HFI;
3836 	cong_log->congestion_flags = 0;
3837 	cong_log->threshold_event_counter =
3838 		cpu_to_be16(ppd->threshold_event_counter);
3839 	memcpy(cong_log->threshold_cong_event_map,
3840 	       ppd->threshold_cong_event_map,
3841 	       sizeof(cong_log->threshold_cong_event_map));
3842 	/* keep timestamp in units of 1.024 usec */
3843 	ts = ktime_get_ns() / 1024;
3844 	cong_log->current_time_stamp = cpu_to_be32(ts);
3845 	for (i = 0; i < OPA_CONG_LOG_ELEMS; i++) {
3846 		struct opa_hfi1_cong_log_event_internal *cce =
3847 			&ppd->cc_events[ppd->cc_mad_idx++];
3848 		if (ppd->cc_mad_idx == OPA_CONG_LOG_ELEMS)
3849 			ppd->cc_mad_idx = 0;
3850 		/*
3851 		 * Entries which are older than twice the time
3852 		 * required to wrap the counter are supposed to
3853 		 * be zeroed (CA10-49 IBTA, release 1.2.1, V1).
3854 		 */
3855 		if ((ts - cce->timestamp) / 2 > U32_MAX)
3856 			continue;
3857 		memcpy(cong_log->events[i].local_qp_cn_entry, &cce->lqpn, 3);
3858 		memcpy(cong_log->events[i].remote_qp_number_cn_entry,
3859 		       &cce->rqpn, 3);
3860 		cong_log->events[i].sl_svc_type_cn_entry =
3861 			((cce->sl & 0x1f) << 3) | (cce->svc_type & 0x7);
3862 		cong_log->events[i].remote_lid_cn_entry =
3863 			cpu_to_be32(cce->rlid);
3864 		cong_log->events[i].timestamp_cn_entry =
3865 			cpu_to_be32(cce->timestamp);
3866 	}
3867 
3868 	/*
3869 	 * Reset threshold_cong_event_map, and threshold_event_counter
3870 	 * to 0 when log is read.
3871 	 */
3872 	memset(ppd->threshold_cong_event_map, 0x0,
3873 	       sizeof(ppd->threshold_cong_event_map));
3874 	ppd->threshold_event_counter = 0;
3875 
3876 	spin_unlock_irq(&ppd->cc_log_lock);
3877 
3878 	if (resp_len)
3879 		*resp_len += sizeof(struct opa_hfi1_cong_log);
3880 
3881 	return reply((struct ib_mad_hdr *)smp);
3882 }
3883 
3884 static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
3885 				   struct ib_device *ibdev, u32 port,
3886 				   u32 *resp_len, u32 max_len)
3887 {
3888 	struct ib_cc_table_attr *cc_table_attr =
3889 		(struct ib_cc_table_attr *)data;
3890 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3891 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3892 	u32 start_block = OPA_AM_START_BLK(am);
3893 	u32 n_blocks = OPA_AM_NBLK(am);
3894 	struct ib_cc_table_entry_shadow *entries;
3895 	int i, j;
3896 	u32 sentry, eentry;
3897 	struct cc_state *cc_state;
3898 	u32 size = sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
3899 
3900 	/* sanity check n_blocks, start_block */
3901 	if (n_blocks == 0 || smp_length_check(size, max_len) ||
3902 	    start_block + n_blocks > ppd->cc_max_table_entries) {
3903 		smp->status |= IB_SMP_INVALID_FIELD;
3904 		return reply((struct ib_mad_hdr *)smp);
3905 	}
3906 
3907 	rcu_read_lock();
3908 
3909 	cc_state = get_cc_state(ppd);
3910 
3911 	if (!cc_state) {
3912 		rcu_read_unlock();
3913 		return reply((struct ib_mad_hdr *)smp);
3914 	}
3915 
3916 	sentry = start_block * IB_CCT_ENTRIES;
3917 	eentry = sentry + (IB_CCT_ENTRIES * n_blocks);
3918 
3919 	cc_table_attr->ccti_limit = cpu_to_be16(cc_state->cct.ccti_limit);
3920 
3921 	entries = cc_state->cct.entries;
3922 
3923 	/* return n_blocks, though the last block may not be full */
3924 	for (j = 0, i = sentry; i < eentry; j++, i++)
3925 		cc_table_attr->ccti_entries[j].entry =
3926 			cpu_to_be16(entries[i].entry);
3927 
3928 	rcu_read_unlock();
3929 
3930 	if (resp_len)
3931 		*resp_len += size;
3932 
3933 	return reply((struct ib_mad_hdr *)smp);
3934 }
3935 
3936 static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
3937 				   struct ib_device *ibdev, u32 port,
3938 				   u32 *resp_len, u32 max_len)
3939 {
3940 	struct ib_cc_table_attr *p = (struct ib_cc_table_attr *)data;
3941 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3942 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3943 	u32 start_block = OPA_AM_START_BLK(am);
3944 	u32 n_blocks = OPA_AM_NBLK(am);
3945 	struct ib_cc_table_entry_shadow *entries;
3946 	int i, j;
3947 	u32 sentry, eentry;
3948 	u16 ccti_limit;
3949 	u32 size = sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
3950 
3951 	/* sanity check n_blocks, start_block */
3952 	if (n_blocks == 0 || smp_length_check(size, max_len) ||
3953 	    start_block + n_blocks > ppd->cc_max_table_entries) {
3954 		smp->status |= IB_SMP_INVALID_FIELD;
3955 		return reply((struct ib_mad_hdr *)smp);
3956 	}
3957 
3958 	sentry = start_block * IB_CCT_ENTRIES;
3959 	eentry = sentry + ((n_blocks - 1) * IB_CCT_ENTRIES) +
3960 		 (be16_to_cpu(p->ccti_limit)) % IB_CCT_ENTRIES + 1;
3961 
3962 	/* sanity check ccti_limit */
3963 	ccti_limit = be16_to_cpu(p->ccti_limit);
3964 	if (ccti_limit + 1 > eentry) {
3965 		smp->status |= IB_SMP_INVALID_FIELD;
3966 		return reply((struct ib_mad_hdr *)smp);
3967 	}
3968 
3969 	/*
3970 	 * Save details from packet into the ppd.  Hold the cc_state_lock so
3971 	 * our information is consistent with anyone trying to apply the state.
3972 	 */
3973 	spin_lock(&ppd->cc_state_lock);
3974 	ppd->total_cct_entry = ccti_limit + 1;
3975 	entries = ppd->ccti_entries;
3976 	for (j = 0, i = sentry; i < eentry; j++, i++)
3977 		entries[i].entry = be16_to_cpu(p->ccti_entries[j].entry);
3978 	spin_unlock(&ppd->cc_state_lock);
3979 
3980 	/* now apply the information */
3981 	apply_cc_state(ppd);
3982 
3983 	return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len,
3984 				       max_len);
3985 }
3986 
3987 struct opa_led_info {
3988 	__be32 rsvd_led_mask;
3989 	__be32 rsvd;
3990 };
3991 
3992 #define OPA_LED_SHIFT	31
3993 #define OPA_LED_MASK	BIT(OPA_LED_SHIFT)
3994 
3995 static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
3996 				   struct ib_device *ibdev, u32 port,
3997 				   u32 *resp_len, u32 max_len)
3998 {
3999 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
4000 	struct hfi1_pportdata *ppd = dd->pport;
4001 	struct opa_led_info *p = (struct opa_led_info *)data;
4002 	u32 nport = OPA_AM_NPORT(am);
4003 	u32 is_beaconing_active;
4004 
4005 	if (nport != 1 || smp_length_check(sizeof(*p), max_len)) {
4006 		smp->status |= IB_SMP_INVALID_FIELD;
4007 		return reply((struct ib_mad_hdr *)smp);
4008 	}
4009 
4010 	/*
4011 	 * This pairs with the memory barrier in hfi1_start_led_override to
4012 	 * ensure that we read the correct state of LED beaconing represented
4013 	 * by led_override_timer_active
4014 	 */
4015 	smp_rmb();
4016 	is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
4017 	p->rsvd_led_mask = cpu_to_be32(is_beaconing_active << OPA_LED_SHIFT);
4018 
4019 	if (resp_len)
4020 		*resp_len += sizeof(struct opa_led_info);
4021 
4022 	return reply((struct ib_mad_hdr *)smp);
4023 }
4024 
4025 static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
4026 				   struct ib_device *ibdev, u32 port,
4027 				   u32 *resp_len, u32 max_len)
4028 {
4029 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
4030 	struct opa_led_info *p = (struct opa_led_info *)data;
4031 	u32 nport = OPA_AM_NPORT(am);
4032 	int on = !!(be32_to_cpu(p->rsvd_led_mask) & OPA_LED_MASK);
4033 
4034 	if (nport != 1 || smp_length_check(sizeof(*p), max_len)) {
4035 		smp->status |= IB_SMP_INVALID_FIELD;
4036 		return reply((struct ib_mad_hdr *)smp);
4037 	}
4038 
4039 	if (on)
4040 		hfi1_start_led_override(dd->pport, 2000, 1500);
4041 	else
4042 		shutdown_led_override(dd->pport);
4043 
4044 	return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len,
4045 				       max_len);
4046 }
4047 
4048 static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
4049 			    u8 *data, struct ib_device *ibdev, u32 port,
4050 			    u32 *resp_len, u32 max_len)
4051 {
4052 	int ret;
4053 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
4054 
4055 	switch (attr_id) {
4056 	case IB_SMP_ATTR_NODE_DESC:
4057 		ret = __subn_get_opa_nodedesc(smp, am, data, ibdev, port,
4058 					      resp_len, max_len);
4059 		break;
4060 	case IB_SMP_ATTR_NODE_INFO:
4061 		ret = __subn_get_opa_nodeinfo(smp, am, data, ibdev, port,
4062 					      resp_len, max_len);
4063 		break;
4064 	case IB_SMP_ATTR_PORT_INFO:
4065 		ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port,
4066 					      resp_len, max_len);
4067 		break;
4068 	case IB_SMP_ATTR_PKEY_TABLE:
4069 		ret = __subn_get_opa_pkeytable(smp, am, data, ibdev, port,
4070 					       resp_len, max_len);
4071 		break;
4072 	case OPA_ATTRIB_ID_SL_TO_SC_MAP:
4073 		ret = __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port,
4074 					      resp_len, max_len);
4075 		break;
4076 	case OPA_ATTRIB_ID_SC_TO_SL_MAP:
4077 		ret = __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port,
4078 					      resp_len, max_len);
4079 		break;
4080 	case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
4081 		ret = __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port,
4082 					       resp_len, max_len);
4083 		break;
4084 	case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
4085 		ret = __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
4086 						resp_len, max_len);
4087 		break;
4088 	case OPA_ATTRIB_ID_PORT_STATE_INFO:
4089 		ret = __subn_get_opa_psi(smp, am, data, ibdev, port,
4090 					 resp_len, max_len);
4091 		break;
4092 	case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
4093 		ret = __subn_get_opa_bct(smp, am, data, ibdev, port,
4094 					 resp_len, max_len);
4095 		break;
4096 	case OPA_ATTRIB_ID_CABLE_INFO:
4097 		ret = __subn_get_opa_cable_info(smp, am, data, ibdev, port,
4098 						resp_len, max_len);
4099 		break;
4100 	case IB_SMP_ATTR_VL_ARB_TABLE:
4101 		ret = __subn_get_opa_vl_arb(smp, am, data, ibdev, port,
4102 					    resp_len, max_len);
4103 		break;
4104 	case OPA_ATTRIB_ID_CONGESTION_INFO:
4105 		ret = __subn_get_opa_cong_info(smp, am, data, ibdev, port,
4106 					       resp_len, max_len);
4107 		break;
4108 	case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
4109 		ret = __subn_get_opa_cong_setting(smp, am, data, ibdev,
4110 						  port, resp_len, max_len);
4111 		break;
4112 	case OPA_ATTRIB_ID_HFI_CONGESTION_LOG:
4113 		ret = __subn_get_opa_hfi1_cong_log(smp, am, data, ibdev,
4114 						   port, resp_len, max_len);
4115 		break;
4116 	case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
4117 		ret = __subn_get_opa_cc_table(smp, am, data, ibdev, port,
4118 					      resp_len, max_len);
4119 		break;
4120 	case IB_SMP_ATTR_LED_INFO:
4121 		ret = __subn_get_opa_led_info(smp, am, data, ibdev, port,
4122 					      resp_len, max_len);
4123 		break;
4124 	case IB_SMP_ATTR_SM_INFO:
4125 		if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
4126 			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
4127 		if (ibp->rvp.port_cap_flags & IB_PORT_SM)
4128 			return IB_MAD_RESULT_SUCCESS;
4129 		fallthrough;
4130 	default:
4131 		smp->status |= IB_SMP_UNSUP_METH_ATTR;
4132 		ret = reply((struct ib_mad_hdr *)smp);
4133 		break;
4134 	}
4135 	return ret;
4136 }
4137 
4138 static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
4139 			    u8 *data, struct ib_device *ibdev, u32 port,
4140 			    u32 *resp_len, u32 max_len, int local_mad)
4141 {
4142 	int ret;
4143 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
4144 
4145 	switch (attr_id) {
4146 	case IB_SMP_ATTR_PORT_INFO:
4147 		ret = __subn_set_opa_portinfo(smp, am, data, ibdev, port,
4148 					      resp_len, max_len, local_mad);
4149 		break;
4150 	case IB_SMP_ATTR_PKEY_TABLE:
4151 		ret = __subn_set_opa_pkeytable(smp, am, data, ibdev, port,
4152 					       resp_len, max_len);
4153 		break;
4154 	case OPA_ATTRIB_ID_SL_TO_SC_MAP:
4155 		ret = __subn_set_opa_sl_to_sc(smp, am, data, ibdev, port,
4156 					      resp_len, max_len);
4157 		break;
4158 	case OPA_ATTRIB_ID_SC_TO_SL_MAP:
4159 		ret = __subn_set_opa_sc_to_sl(smp, am, data, ibdev, port,
4160 					      resp_len, max_len);
4161 		break;
4162 	case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
4163 		ret = __subn_set_opa_sc_to_vlt(smp, am, data, ibdev, port,
4164 					       resp_len, max_len);
4165 		break;
4166 	case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
4167 		ret = __subn_set_opa_sc_to_vlnt(smp, am, data, ibdev, port,
4168 						resp_len, max_len);
4169 		break;
4170 	case OPA_ATTRIB_ID_PORT_STATE_INFO:
4171 		ret = __subn_set_opa_psi(smp, am, data, ibdev, port,
4172 					 resp_len, max_len, local_mad);
4173 		break;
4174 	case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
4175 		ret = __subn_set_opa_bct(smp, am, data, ibdev, port,
4176 					 resp_len, max_len);
4177 		break;
4178 	case IB_SMP_ATTR_VL_ARB_TABLE:
4179 		ret = __subn_set_opa_vl_arb(smp, am, data, ibdev, port,
4180 					    resp_len, max_len);
4181 		break;
4182 	case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
4183 		ret = __subn_set_opa_cong_setting(smp, am, data, ibdev,
4184 						  port, resp_len, max_len);
4185 		break;
4186 	case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
4187 		ret = __subn_set_opa_cc_table(smp, am, data, ibdev, port,
4188 					      resp_len, max_len);
4189 		break;
4190 	case IB_SMP_ATTR_LED_INFO:
4191 		ret = __subn_set_opa_led_info(smp, am, data, ibdev, port,
4192 					      resp_len, max_len);
4193 		break;
4194 	case IB_SMP_ATTR_SM_INFO:
4195 		if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
4196 			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
4197 		if (ibp->rvp.port_cap_flags & IB_PORT_SM)
4198 			return IB_MAD_RESULT_SUCCESS;
4199 		fallthrough;
4200 	default:
4201 		smp->status |= IB_SMP_UNSUP_METH_ATTR;
4202 		ret = reply((struct ib_mad_hdr *)smp);
4203 		break;
4204 	}
4205 	return ret;
4206 }
4207 
4208 static inline void set_aggr_error(struct opa_aggregate *ag)
4209 {
4210 	ag->err_reqlength |= cpu_to_be16(0x8000);
4211 }
4212 
4213 static int subn_get_opa_aggregate(struct opa_smp *smp,
4214 				  struct ib_device *ibdev, u32 port,
4215 				  u32 *resp_len)
4216 {
4217 	int i;
4218 	u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff;
4219 	u8 *next_smp = opa_get_smp_data(smp);
4220 
4221 	if (num_attr < 1 || num_attr > 117) {
4222 		smp->status |= IB_SMP_INVALID_FIELD;
4223 		return reply((struct ib_mad_hdr *)smp);
4224 	}
4225 
4226 	for (i = 0; i < num_attr; i++) {
4227 		struct opa_aggregate *agg;
4228 		size_t agg_data_len;
4229 		size_t agg_size;
4230 		u32 am;
4231 
4232 		agg = (struct opa_aggregate *)next_smp;
4233 		agg_data_len = (be16_to_cpu(agg->err_reqlength) & 0x007f) * 8;
4234 		agg_size = sizeof(*agg) + agg_data_len;
4235 		am = be32_to_cpu(agg->attr_mod);
4236 
4237 		*resp_len += agg_size;
4238 
4239 		if (next_smp + agg_size > ((u8 *)smp) + sizeof(*smp)) {
4240 			smp->status |= IB_SMP_INVALID_FIELD;
4241 			return reply((struct ib_mad_hdr *)smp);
4242 		}
4243 
4244 		/* zero the payload for this segment */
4245 		memset(next_smp + sizeof(*agg), 0, agg_data_len);
4246 
4247 		(void)subn_get_opa_sma(agg->attr_id, smp, am, agg->data,
4248 				       ibdev, port, NULL, (u32)agg_data_len);
4249 
4250 		if (smp->status & IB_SMP_INVALID_FIELD)
4251 			break;
4252 		if (smp->status & ~IB_SMP_DIRECTION) {
4253 			set_aggr_error(agg);
4254 			return reply((struct ib_mad_hdr *)smp);
4255 		}
4256 		next_smp += agg_size;
4257 	}
4258 
4259 	return reply((struct ib_mad_hdr *)smp);
4260 }
4261 
4262 static int subn_set_opa_aggregate(struct opa_smp *smp,
4263 				  struct ib_device *ibdev, u32 port,
4264 				  u32 *resp_len, int local_mad)
4265 {
4266 	int i;
4267 	u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff;
4268 	u8 *next_smp = opa_get_smp_data(smp);
4269 
4270 	if (num_attr < 1 || num_attr > 117) {
4271 		smp->status |= IB_SMP_INVALID_FIELD;
4272 		return reply((struct ib_mad_hdr *)smp);
4273 	}
4274 
4275 	for (i = 0; i < num_attr; i++) {
4276 		struct opa_aggregate *agg;
4277 		size_t agg_data_len;
4278 		size_t agg_size;
4279 		u32 am;
4280 
4281 		agg = (struct opa_aggregate *)next_smp;
4282 		agg_data_len = (be16_to_cpu(agg->err_reqlength) & 0x007f) * 8;
4283 		agg_size = sizeof(*agg) + agg_data_len;
4284 		am = be32_to_cpu(agg->attr_mod);
4285 
4286 		*resp_len += agg_size;
4287 
4288 		if (next_smp + agg_size > ((u8 *)smp) + sizeof(*smp)) {
4289 			smp->status |= IB_SMP_INVALID_FIELD;
4290 			return reply((struct ib_mad_hdr *)smp);
4291 		}
4292 
4293 		(void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data,
4294 				       ibdev, port, NULL, (u32)agg_data_len,
4295 				       local_mad);
4296 
4297 		if (smp->status & IB_SMP_INVALID_FIELD)
4298 			break;
4299 		if (smp->status & ~IB_SMP_DIRECTION) {
4300 			set_aggr_error(agg);
4301 			return reply((struct ib_mad_hdr *)smp);
4302 		}
4303 		next_smp += agg_size;
4304 	}
4305 
4306 	return reply((struct ib_mad_hdr *)smp);
4307 }
4308 
4309 /*
4310  * OPAv1 specifies that, on the transition to link up, these counters
4311  * are cleared:
4312  *   PortRcvErrors [*]
4313  *   LinkErrorRecovery
4314  *   LocalLinkIntegrityErrors
4315  *   ExcessiveBufferOverruns [*]
4316  *
4317  * [*] Error info associated with these counters is retained, but the
4318  * error info status is reset to 0.
4319  */
4320 void clear_linkup_counters(struct hfi1_devdata *dd)
4321 {
4322 	/* PortRcvErrors */
4323 	write_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL, 0);
4324 	dd->err_info_rcvport.status_and_code &= ~OPA_EI_STATUS_SMASK;
4325 	/* LinkErrorRecovery */
4326 	write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
4327 	write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL, 0);
4328 	/* LocalLinkIntegrityErrors */
4329 	write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0);
4330 	/* ExcessiveBufferOverruns */
4331 	write_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL, 0);
4332 	dd->rcv_ovfl_cnt = 0;
4333 	dd->err_info_xmit_constraint.status &= ~OPA_EI_STATUS_SMASK;
4334 }
4335 
4336 static int is_full_mgmt_pkey_in_table(struct hfi1_ibport *ibp)
4337 {
4338 	unsigned int i;
4339 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
4340 
4341 	for (i = 0; i < ARRAY_SIZE(ppd->pkeys); ++i)
4342 		if (ppd->pkeys[i] == FULL_MGMT_P_KEY)
4343 			return 1;
4344 
4345 	return 0;
4346 }
4347 
4348 /*
4349  * is_local_mad() returns 1 if 'mad' is sent from, and destined to the
4350  * local node, 0 otherwise.
4351  */
4352 static int is_local_mad(struct hfi1_ibport *ibp, const struct opa_mad *mad,
4353 			const struct ib_wc *in_wc)
4354 {
4355 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
4356 	const struct opa_smp *smp = (const struct opa_smp *)mad;
4357 
4358 	if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
4359 		return (smp->hop_cnt == 0 &&
4360 			smp->route.dr.dr_slid == OPA_LID_PERMISSIVE &&
4361 			smp->route.dr.dr_dlid == OPA_LID_PERMISSIVE);
4362 	}
4363 
4364 	return (in_wc->slid == ppd->lid);
4365 }
4366 
4367 /*
4368  * opa_local_smp_check() should only be called on MADs for which
4369  * is_local_mad() returns true. It applies the SMP checks that are
4370  * specific to SMPs which are sent from, and destined to this node.
4371  * opa_local_smp_check() returns 0 if the SMP passes its checks, 1
4372  * otherwise.
4373  *
4374  * SMPs which arrive from other nodes are instead checked by
4375  * opa_smp_check().
4376  */
4377 static int opa_local_smp_check(struct hfi1_ibport *ibp,
4378 			       const struct ib_wc *in_wc)
4379 {
4380 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
4381 	u16 pkey;
4382 
4383 	if (in_wc->pkey_index >= ARRAY_SIZE(ppd->pkeys))
4384 		return 1;
4385 
4386 	pkey = ppd->pkeys[in_wc->pkey_index];
4387 	/*
4388 	 * We need to do the "node-local" checks specified in OPAv1,
4389 	 * rev 0.90, section 9.10.26, which are:
4390 	 *   - pkey is 0x7fff, or 0xffff
4391 	 *   - Source QPN == 0 || Destination QPN == 0
4392 	 *   - the MAD header's management class is either
4393 	 *     IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE or
4394 	 *     IB_MGMT_CLASS_SUBN_LID_ROUTED
4395 	 *   - SLID != 0
4396 	 *
4397 	 * However, we know (and so don't need to check again) that,
4398 	 * for local SMPs, the MAD stack passes MADs with:
4399 	 *   - Source QPN of 0
4400 	 *   - MAD mgmt_class is IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
4401 	 *   - SLID is either: OPA_LID_PERMISSIVE (0xFFFFFFFF), or
4402 	 *     our own port's lid
4403 	 *
4404 	 */
4405 	if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY)
4406 		return 0;
4407 	ingress_pkey_table_fail(ppd, pkey, in_wc->slid);
4408 	return 1;
4409 }
4410 
4411 /**
4412  * hfi1_pkey_validation_pma - It validates PKEYs for incoming PMA MAD packets.
4413  * @ibp: IB port data
4414  * @in_mad: MAD packet with header and data
4415  * @in_wc: Work completion data such as source LID, port number, etc.
4416  *
4417  * These are all the possible logic rules for validating a pkey:
4418  *
4419  * a) If pkey neither FULL_MGMT_P_KEY nor LIM_MGMT_P_KEY,
4420  *    and NOT self-originated packet:
4421  *     Drop MAD packet as it should always be part of the
4422  *     management partition unless it's a self-originated packet.
4423  *
4424  * b) If pkey_index -> FULL_MGMT_P_KEY, and LIM_MGMT_P_KEY in pkey table:
4425  *     The packet is coming from a management node and the receiving node
4426  *     is also a management node, so it is safe for the packet to go through.
4427  *
4428  * c) If pkey_index -> FULL_MGMT_P_KEY, and LIM_MGMT_P_KEY is NOT in pkey table:
4429  *     Drop the packet as LIM_MGMT_P_KEY should always be in the pkey table.
4430  *     It could be an FM misconfiguration.
4431  *
4432  * d) If pkey_index -> LIM_MGMT_P_KEY and FULL_MGMT_P_KEY is NOT in pkey table:
4433  *     It is safe for the packet to go through since a non-management node is
4434  *     talking to another non-management node.
4435  *
4436  * e) If pkey_index -> LIM_MGMT_P_KEY and FULL_MGMT_P_KEY in pkey table:
4437  *     Drop the packet because a non-management node is talking to a
4438  *     management node, and it could be an attack.
4439  *
4440  * For the implementation, these rules can be simplied to only checking
4441  * for (a) and (e). There's no need to check for rule (b) as
4442  * the packet doesn't need to be dropped. Rule (c) is not possible in
4443  * the driver as LIM_MGMT_P_KEY is always in the pkey table.
4444  *
4445  * Return:
4446  * 0 - pkey is okay, -EINVAL it's a bad pkey
4447  */
4448 static int hfi1_pkey_validation_pma(struct hfi1_ibport *ibp,
4449 				    const struct opa_mad *in_mad,
4450 				    const struct ib_wc *in_wc)
4451 {
4452 	u16 pkey_value = hfi1_lookup_pkey_value(ibp, in_wc->pkey_index);
4453 
4454 	/* Rule (a) from above */
4455 	if (!is_local_mad(ibp, in_mad, in_wc) &&
4456 	    pkey_value != LIM_MGMT_P_KEY &&
4457 	    pkey_value != FULL_MGMT_P_KEY)
4458 		return -EINVAL;
4459 
4460 	/* Rule (e) from above */
4461 	if (pkey_value == LIM_MGMT_P_KEY &&
4462 	    is_full_mgmt_pkey_in_table(ibp))
4463 		return -EINVAL;
4464 
4465 	return 0;
4466 }
4467 
4468 static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
4469 			    u32 port, const struct opa_mad *in_mad,
4470 			    struct opa_mad *out_mad,
4471 			    u32 *resp_len, int local_mad)
4472 {
4473 	struct opa_smp *smp = (struct opa_smp *)out_mad;
4474 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
4475 	u8 *data;
4476 	u32 am, data_size;
4477 	__be16 attr_id;
4478 	int ret;
4479 
4480 	*out_mad = *in_mad;
4481 	data = opa_get_smp_data(smp);
4482 	data_size = (u32)opa_get_smp_data_size(smp);
4483 
4484 	am = be32_to_cpu(smp->attr_mod);
4485 	attr_id = smp->attr_id;
4486 	if (smp->class_version != OPA_SM_CLASS_VERSION) {
4487 		smp->status |= IB_SMP_UNSUP_VERSION;
4488 		ret = reply((struct ib_mad_hdr *)smp);
4489 		return ret;
4490 	}
4491 	ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags, smp->mkey,
4492 			 smp->route.dr.dr_slid, smp->route.dr.return_path,
4493 			 smp->hop_cnt);
4494 	if (ret) {
4495 		u32 port_num = be32_to_cpu(smp->attr_mod);
4496 
4497 		/*
4498 		 * If this is a get/set portinfo, we already check the
4499 		 * M_Key if the MAD is for another port and the M_Key
4500 		 * is OK on the receiving port. This check is needed
4501 		 * to increment the error counters when the M_Key
4502 		 * fails to match on *both* ports.
4503 		 */
4504 		if (attr_id == IB_SMP_ATTR_PORT_INFO &&
4505 		    (smp->method == IB_MGMT_METHOD_GET ||
4506 		     smp->method == IB_MGMT_METHOD_SET) &&
4507 		    port_num && port_num <= ibdev->phys_port_cnt &&
4508 		    port != port_num)
4509 			(void)check_mkey(to_iport(ibdev, port_num),
4510 					  (struct ib_mad_hdr *)smp, 0,
4511 					  smp->mkey, smp->route.dr.dr_slid,
4512 					  smp->route.dr.return_path,
4513 					  smp->hop_cnt);
4514 		ret = IB_MAD_RESULT_FAILURE;
4515 		return ret;
4516 	}
4517 
4518 	*resp_len = opa_get_smp_header_size(smp);
4519 
4520 	switch (smp->method) {
4521 	case IB_MGMT_METHOD_GET:
4522 		switch (attr_id) {
4523 		default:
4524 			clear_opa_smp_data(smp);
4525 			ret = subn_get_opa_sma(attr_id, smp, am, data,
4526 					       ibdev, port, resp_len,
4527 					       data_size);
4528 			break;
4529 		case OPA_ATTRIB_ID_AGGREGATE:
4530 			ret = subn_get_opa_aggregate(smp, ibdev, port,
4531 						     resp_len);
4532 			break;
4533 		}
4534 		break;
4535 	case IB_MGMT_METHOD_SET:
4536 		switch (attr_id) {
4537 		default:
4538 			ret = subn_set_opa_sma(attr_id, smp, am, data,
4539 					       ibdev, port, resp_len,
4540 					       data_size, local_mad);
4541 			break;
4542 		case OPA_ATTRIB_ID_AGGREGATE:
4543 			ret = subn_set_opa_aggregate(smp, ibdev, port,
4544 						     resp_len, local_mad);
4545 			break;
4546 		}
4547 		break;
4548 	case IB_MGMT_METHOD_TRAP:
4549 	case IB_MGMT_METHOD_REPORT:
4550 	case IB_MGMT_METHOD_REPORT_RESP:
4551 	case IB_MGMT_METHOD_GET_RESP:
4552 		/*
4553 		 * The ib_mad module will call us to process responses
4554 		 * before checking for other consumers.
4555 		 * Just tell the caller to process it normally.
4556 		 */
4557 		ret = IB_MAD_RESULT_SUCCESS;
4558 		break;
4559 	case IB_MGMT_METHOD_TRAP_REPRESS:
4560 		subn_handle_opa_trap_repress(ibp, smp);
4561 		/* Always successful */
4562 		ret = IB_MAD_RESULT_SUCCESS;
4563 		break;
4564 	default:
4565 		smp->status |= IB_SMP_UNSUP_METHOD;
4566 		ret = reply((struct ib_mad_hdr *)smp);
4567 		break;
4568 	}
4569 
4570 	return ret;
4571 }
4572 
4573 static int process_subn(struct ib_device *ibdev, int mad_flags,
4574 			u32 port, const struct ib_mad *in_mad,
4575 			struct ib_mad *out_mad)
4576 {
4577 	struct ib_smp *smp = (struct ib_smp *)out_mad;
4578 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
4579 	int ret;
4580 
4581 	*out_mad = *in_mad;
4582 	if (smp->class_version != 1) {
4583 		smp->status |= IB_SMP_UNSUP_VERSION;
4584 		ret = reply((struct ib_mad_hdr *)smp);
4585 		return ret;
4586 	}
4587 
4588 	ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags,
4589 			 smp->mkey, (__force __be32)smp->dr_slid,
4590 			 smp->return_path, smp->hop_cnt);
4591 	if (ret) {
4592 		u32 port_num = be32_to_cpu(smp->attr_mod);
4593 
4594 		/*
4595 		 * If this is a get/set portinfo, we already check the
4596 		 * M_Key if the MAD is for another port and the M_Key
4597 		 * is OK on the receiving port. This check is needed
4598 		 * to increment the error counters when the M_Key
4599 		 * fails to match on *both* ports.
4600 		 */
4601 		if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
4602 		    (smp->method == IB_MGMT_METHOD_GET ||
4603 		     smp->method == IB_MGMT_METHOD_SET) &&
4604 		    port_num && port_num <= ibdev->phys_port_cnt &&
4605 		    port != port_num)
4606 			(void)check_mkey(to_iport(ibdev, port_num),
4607 					 (struct ib_mad_hdr *)smp, 0,
4608 					 smp->mkey,
4609 					 (__force __be32)smp->dr_slid,
4610 					 smp->return_path, smp->hop_cnt);
4611 		ret = IB_MAD_RESULT_FAILURE;
4612 		return ret;
4613 	}
4614 
4615 	switch (smp->method) {
4616 	case IB_MGMT_METHOD_GET:
4617 		switch (smp->attr_id) {
4618 		case IB_SMP_ATTR_NODE_INFO:
4619 			ret = subn_get_nodeinfo(smp, ibdev, port);
4620 			break;
4621 		default:
4622 			smp->status |= IB_SMP_UNSUP_METH_ATTR;
4623 			ret = reply((struct ib_mad_hdr *)smp);
4624 			break;
4625 		}
4626 		break;
4627 	}
4628 
4629 	return ret;
4630 }
4631 
4632 static int process_perf(struct ib_device *ibdev, u32 port,
4633 			const struct ib_mad *in_mad,
4634 			struct ib_mad *out_mad)
4635 {
4636 	struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad;
4637 	struct ib_class_port_info *cpi = (struct ib_class_port_info *)
4638 						&pmp->data;
4639 	int ret = IB_MAD_RESULT_FAILURE;
4640 
4641 	*out_mad = *in_mad;
4642 	if (pmp->mad_hdr.class_version != 1) {
4643 		pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
4644 		ret = reply((struct ib_mad_hdr *)pmp);
4645 		return ret;
4646 	}
4647 
4648 	switch (pmp->mad_hdr.method) {
4649 	case IB_MGMT_METHOD_GET:
4650 		switch (pmp->mad_hdr.attr_id) {
4651 		case IB_PMA_PORT_COUNTERS:
4652 			ret = pma_get_ib_portcounters(pmp, ibdev, port);
4653 			break;
4654 		case IB_PMA_PORT_COUNTERS_EXT:
4655 			ret = pma_get_ib_portcounters_ext(pmp, ibdev, port);
4656 			break;
4657 		case IB_PMA_CLASS_PORT_INFO:
4658 			cpi->capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
4659 			ret = reply((struct ib_mad_hdr *)pmp);
4660 			break;
4661 		default:
4662 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4663 			ret = reply((struct ib_mad_hdr *)pmp);
4664 			break;
4665 		}
4666 		break;
4667 
4668 	case IB_MGMT_METHOD_SET:
4669 		if (pmp->mad_hdr.attr_id) {
4670 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4671 			ret = reply((struct ib_mad_hdr *)pmp);
4672 		}
4673 		break;
4674 
4675 	case IB_MGMT_METHOD_TRAP:
4676 	case IB_MGMT_METHOD_GET_RESP:
4677 		/*
4678 		 * The ib_mad module will call us to process responses
4679 		 * before checking for other consumers.
4680 		 * Just tell the caller to process it normally.
4681 		 */
4682 		ret = IB_MAD_RESULT_SUCCESS;
4683 		break;
4684 
4685 	default:
4686 		pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
4687 		ret = reply((struct ib_mad_hdr *)pmp);
4688 		break;
4689 	}
4690 
4691 	return ret;
4692 }
4693 
4694 static int process_perf_opa(struct ib_device *ibdev, u32 port,
4695 			    const struct opa_mad *in_mad,
4696 			    struct opa_mad *out_mad, u32 *resp_len)
4697 {
4698 	struct opa_pma_mad *pmp = (struct opa_pma_mad *)out_mad;
4699 	int ret;
4700 
4701 	*out_mad = *in_mad;
4702 
4703 	if (pmp->mad_hdr.class_version != OPA_SM_CLASS_VERSION) {
4704 		pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
4705 		return reply((struct ib_mad_hdr *)pmp);
4706 	}
4707 
4708 	*resp_len = sizeof(pmp->mad_hdr);
4709 
4710 	switch (pmp->mad_hdr.method) {
4711 	case IB_MGMT_METHOD_GET:
4712 		switch (pmp->mad_hdr.attr_id) {
4713 		case IB_PMA_CLASS_PORT_INFO:
4714 			ret = pma_get_opa_classportinfo(pmp, ibdev, resp_len);
4715 			break;
4716 		case OPA_PM_ATTRIB_ID_PORT_STATUS:
4717 			ret = pma_get_opa_portstatus(pmp, ibdev, port,
4718 						     resp_len);
4719 			break;
4720 		case OPA_PM_ATTRIB_ID_DATA_PORT_COUNTERS:
4721 			ret = pma_get_opa_datacounters(pmp, ibdev, port,
4722 						       resp_len);
4723 			break;
4724 		case OPA_PM_ATTRIB_ID_ERROR_PORT_COUNTERS:
4725 			ret = pma_get_opa_porterrors(pmp, ibdev, port,
4726 						     resp_len);
4727 			break;
4728 		case OPA_PM_ATTRIB_ID_ERROR_INFO:
4729 			ret = pma_get_opa_errorinfo(pmp, ibdev, port,
4730 						    resp_len);
4731 			break;
4732 		default:
4733 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4734 			ret = reply((struct ib_mad_hdr *)pmp);
4735 			break;
4736 		}
4737 		break;
4738 
4739 	case IB_MGMT_METHOD_SET:
4740 		switch (pmp->mad_hdr.attr_id) {
4741 		case OPA_PM_ATTRIB_ID_CLEAR_PORT_STATUS:
4742 			ret = pma_set_opa_portstatus(pmp, ibdev, port,
4743 						     resp_len);
4744 			break;
4745 		case OPA_PM_ATTRIB_ID_ERROR_INFO:
4746 			ret = pma_set_opa_errorinfo(pmp, ibdev, port,
4747 						    resp_len);
4748 			break;
4749 		default:
4750 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4751 			ret = reply((struct ib_mad_hdr *)pmp);
4752 			break;
4753 		}
4754 		break;
4755 
4756 	case IB_MGMT_METHOD_TRAP:
4757 	case IB_MGMT_METHOD_GET_RESP:
4758 		/*
4759 		 * The ib_mad module will call us to process responses
4760 		 * before checking for other consumers.
4761 		 * Just tell the caller to process it normally.
4762 		 */
4763 		ret = IB_MAD_RESULT_SUCCESS;
4764 		break;
4765 
4766 	default:
4767 		pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
4768 		ret = reply((struct ib_mad_hdr *)pmp);
4769 		break;
4770 	}
4771 
4772 	return ret;
4773 }
4774 
4775 static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags,
4776 				u32 port, const struct ib_wc *in_wc,
4777 				const struct ib_grh *in_grh,
4778 				const struct opa_mad *in_mad,
4779 				struct opa_mad *out_mad, size_t *out_mad_size,
4780 				u16 *out_mad_pkey_index)
4781 {
4782 	int ret;
4783 	int pkey_idx;
4784 	int local_mad = 0;
4785 	u32 resp_len = in_wc->byte_len - sizeof(*in_grh);
4786 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
4787 
4788 	pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
4789 	if (pkey_idx < 0) {
4790 		pr_warn("failed to find limited mgmt pkey, defaulting 0x%x\n",
4791 			hfi1_get_pkey(ibp, 1));
4792 		pkey_idx = 1;
4793 	}
4794 	*out_mad_pkey_index = (u16)pkey_idx;
4795 
4796 	switch (in_mad->mad_hdr.mgmt_class) {
4797 	case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
4798 	case IB_MGMT_CLASS_SUBN_LID_ROUTED:
4799 		local_mad = is_local_mad(ibp, in_mad, in_wc);
4800 		if (local_mad) {
4801 			ret = opa_local_smp_check(ibp, in_wc);
4802 			if (ret)
4803 				return IB_MAD_RESULT_FAILURE;
4804 		}
4805 		ret = process_subn_opa(ibdev, mad_flags, port, in_mad,
4806 				       out_mad, &resp_len, local_mad);
4807 		goto bail;
4808 	case IB_MGMT_CLASS_PERF_MGMT:
4809 		ret = hfi1_pkey_validation_pma(ibp, in_mad, in_wc);
4810 		if (ret)
4811 			return IB_MAD_RESULT_FAILURE;
4812 
4813 		ret = process_perf_opa(ibdev, port, in_mad, out_mad, &resp_len);
4814 		goto bail;
4815 
4816 	default:
4817 		ret = IB_MAD_RESULT_SUCCESS;
4818 	}
4819 
4820 bail:
4821 	if (ret & IB_MAD_RESULT_REPLY)
4822 		*out_mad_size = round_up(resp_len, 8);
4823 	else if (ret & IB_MAD_RESULT_SUCCESS)
4824 		*out_mad_size = in_wc->byte_len - sizeof(struct ib_grh);
4825 
4826 	return ret;
4827 }
4828 
4829 static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u32 port,
4830 			       const struct ib_wc *in_wc,
4831 			       const struct ib_grh *in_grh,
4832 			       const struct ib_mad *in_mad,
4833 			       struct ib_mad *out_mad)
4834 {
4835 	int ret;
4836 
4837 	switch (in_mad->mad_hdr.mgmt_class) {
4838 	case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
4839 	case IB_MGMT_CLASS_SUBN_LID_ROUTED:
4840 		ret = process_subn(ibdev, mad_flags, port, in_mad, out_mad);
4841 		break;
4842 	case IB_MGMT_CLASS_PERF_MGMT:
4843 		ret = process_perf(ibdev, port, in_mad, out_mad);
4844 		break;
4845 	default:
4846 		ret = IB_MAD_RESULT_SUCCESS;
4847 		break;
4848 	}
4849 
4850 	return ret;
4851 }
4852 
4853 /**
4854  * hfi1_process_mad - process an incoming MAD packet
4855  * @ibdev: the infiniband device this packet came in on
4856  * @mad_flags: MAD flags
4857  * @port: the port number this packet came in on
4858  * @in_wc: the work completion entry for this packet
4859  * @in_grh: the global route header for this packet
4860  * @in_mad: the incoming MAD
4861  * @out_mad: any outgoing MAD reply
4862  * @out_mad_size: size of the outgoing MAD reply
4863  * @out_mad_pkey_index: used to apss back the packet key index
4864  *
4865  * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not
4866  * interested in processing.
4867  *
4868  * Note that the verbs framework has already done the MAD sanity checks,
4869  * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
4870  * MADs.
4871  *
4872  * This is called by the ib_mad module.
4873  */
4874 int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u32 port,
4875 		     const struct ib_wc *in_wc, const struct ib_grh *in_grh,
4876 		     const struct ib_mad *in_mad, struct ib_mad *out_mad,
4877 		     size_t *out_mad_size, u16 *out_mad_pkey_index)
4878 {
4879 	switch (in_mad->mad_hdr.base_version) {
4880 	case OPA_MGMT_BASE_VERSION:
4881 		return hfi1_process_opa_mad(ibdev, mad_flags, port,
4882 					    in_wc, in_grh,
4883 					    (struct opa_mad *)in_mad,
4884 					    (struct opa_mad *)out_mad,
4885 					    out_mad_size,
4886 					    out_mad_pkey_index);
4887 	case IB_MGMT_BASE_VERSION:
4888 		return hfi1_process_ib_mad(ibdev, mad_flags, port, in_wc,
4889 					   in_grh, in_mad, out_mad);
4890 	default:
4891 		break;
4892 	}
4893 
4894 	return IB_MAD_RESULT_FAILURE;
4895 }
4896