xref: /linux/drivers/infiniband/hw/hfi1/mad.c (revision 98838d95075a5295f3478ceba18bcccf472e30f4)
1 /*
2  * Copyright(c) 2015, 2016 Intel Corporation.
3  *
4  * This file is provided under a dual BSD/GPLv2 license.  When using or
5  * redistributing this file, you may do so under either license.
6  *
7  * GPL LICENSE SUMMARY
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of version 2 of the GNU General Public License as
11  * published by the Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful, but
14  * WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * BSD LICENSE
19  *
20  * Redistribution and use in source and binary forms, with or without
21  * modification, are permitted provided that the following conditions
22  * are met:
23  *
24  *  - Redistributions of source code must retain the above copyright
25  *    notice, this list of conditions and the following disclaimer.
26  *  - Redistributions in binary form must reproduce the above copyright
27  *    notice, this list of conditions and the following disclaimer in
28  *    the documentation and/or other materials provided with the
29  *    distribution.
30  *  - Neither the name of Intel Corporation nor the names of its
31  *    contributors may be used to endorse or promote products derived
32  *    from this software without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45  *
46  */
47 
48 #include <linux/net.h>
49 #define OPA_NUM_PKEY_BLOCKS_PER_SMP (OPA_SMP_DR_DATA_SIZE \
50 			/ (OPA_PARTITION_TABLE_BLK_SIZE * sizeof(u16)))
51 
52 #include "hfi.h"
53 #include "mad.h"
54 #include "trace.h"
55 #include "qp.h"
56 
57 /* the reset value from the FM is supposed to be 0xffff, handle both */
58 #define OPA_LINK_WIDTH_RESET_OLD 0x0fff
59 #define OPA_LINK_WIDTH_RESET 0xffff
60 
61 static int reply(struct ib_mad_hdr *smp)
62 {
63 	/*
64 	 * The verbs framework will handle the directed/LID route
65 	 * packet changes.
66 	 */
67 	smp->method = IB_MGMT_METHOD_GET_RESP;
68 	if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
69 		smp->status |= IB_SMP_DIRECTION;
70 	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
71 }
72 
73 static inline void clear_opa_smp_data(struct opa_smp *smp)
74 {
75 	void *data = opa_get_smp_data(smp);
76 	size_t size = opa_get_smp_data_size(smp);
77 
78 	memset(data, 0, size);
79 }
80 
81 void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port)
82 {
83 	struct ib_event event;
84 
85 	event.event = IB_EVENT_PKEY_CHANGE;
86 	event.device = &dd->verbs_dev.rdi.ibdev;
87 	event.element.port_num = port;
88 	ib_dispatch_event(&event);
89 }
90 
91 static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
92 {
93 	struct ib_mad_send_buf *send_buf;
94 	struct ib_mad_agent *agent;
95 	struct opa_smp *smp;
96 	int ret;
97 	unsigned long flags;
98 	unsigned long timeout;
99 	int pkey_idx;
100 	u32 qpn = ppd_from_ibp(ibp)->sm_trap_qp;
101 
102 	agent = ibp->rvp.send_agent;
103 	if (!agent)
104 		return;
105 
106 	/* o14-3.2.1 */
107 	if (ppd_from_ibp(ibp)->lstate != IB_PORT_ACTIVE)
108 		return;
109 
110 	/* o14-2 */
111 	if (ibp->rvp.trap_timeout && time_before(jiffies,
112 						 ibp->rvp.trap_timeout))
113 		return;
114 
115 	pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
116 	if (pkey_idx < 0) {
117 		pr_warn("%s: failed to find limited mgmt pkey, defaulting 0x%x\n",
118 			__func__, hfi1_get_pkey(ibp, 1));
119 		pkey_idx = 1;
120 	}
121 
122 	send_buf = ib_create_send_mad(agent, qpn, pkey_idx, 0,
123 				      IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
124 				      GFP_ATOMIC, IB_MGMT_BASE_VERSION);
125 	if (IS_ERR(send_buf))
126 		return;
127 
128 	smp = send_buf->mad;
129 	smp->base_version = OPA_MGMT_BASE_VERSION;
130 	smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
131 	smp->class_version = OPA_SMI_CLASS_VERSION;
132 	smp->method = IB_MGMT_METHOD_TRAP;
133 	ibp->rvp.tid++;
134 	smp->tid = cpu_to_be64(ibp->rvp.tid);
135 	smp->attr_id = IB_SMP_ATTR_NOTICE;
136 	/* o14-1: smp->mkey = 0; */
137 	memcpy(smp->route.lid.data, data, len);
138 
139 	spin_lock_irqsave(&ibp->rvp.lock, flags);
140 	if (!ibp->rvp.sm_ah) {
141 		if (ibp->rvp.sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
142 			struct ib_ah *ah;
143 
144 			ah = hfi1_create_qp0_ah(ibp, ibp->rvp.sm_lid);
145 			if (IS_ERR(ah)) {
146 				ret = PTR_ERR(ah);
147 			} else {
148 				send_buf->ah = ah;
149 				ibp->rvp.sm_ah = ibah_to_rvtah(ah);
150 				ret = 0;
151 			}
152 		} else {
153 			ret = -EINVAL;
154 		}
155 	} else {
156 		send_buf->ah = &ibp->rvp.sm_ah->ibah;
157 		ret = 0;
158 	}
159 	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
160 
161 	if (!ret)
162 		ret = ib_post_send_mad(send_buf, NULL);
163 	if (!ret) {
164 		/* 4.096 usec. */
165 		timeout = (4096 * (1UL << ibp->rvp.subnet_timeout)) / 1000;
166 		ibp->rvp.trap_timeout = jiffies + usecs_to_jiffies(timeout);
167 	} else {
168 		ib_free_send_mad(send_buf);
169 		ibp->rvp.trap_timeout = 0;
170 	}
171 }
172 
173 /*
174  * Send a bad [PQ]_Key trap (ch. 14.3.8).
175  */
176 void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
177 		    u32 qp1, u32 qp2, u16 lid1, u16 lid2)
178 {
179 	struct opa_mad_notice_attr data;
180 	u32 lid = ppd_from_ibp(ibp)->lid;
181 	u32 _lid1 = lid1;
182 	u32 _lid2 = lid2;
183 
184 	memset(&data, 0, sizeof(data));
185 
186 	if (trap_num == OPA_TRAP_BAD_P_KEY)
187 		ibp->rvp.pkey_violations++;
188 	else
189 		ibp->rvp.qkey_violations++;
190 	ibp->rvp.n_pkt_drops++;
191 
192 	/* Send violation trap */
193 	data.generic_type = IB_NOTICE_TYPE_SECURITY;
194 	data.prod_type_lsb = IB_NOTICE_PROD_CA;
195 	data.trap_num = trap_num;
196 	data.issuer_lid = cpu_to_be32(lid);
197 	data.ntc_257_258.lid1 = cpu_to_be32(_lid1);
198 	data.ntc_257_258.lid2 = cpu_to_be32(_lid2);
199 	data.ntc_257_258.key = cpu_to_be32(key);
200 	data.ntc_257_258.sl = sl << 3;
201 	data.ntc_257_258.qp1 = cpu_to_be32(qp1);
202 	data.ntc_257_258.qp2 = cpu_to_be32(qp2);
203 
204 	send_trap(ibp, &data, sizeof(data));
205 }
206 
207 /*
208  * Send a bad M_Key trap (ch. 14.3.9).
209  */
210 static void bad_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
211 		     __be64 mkey, __be32 dr_slid, u8 return_path[], u8 hop_cnt)
212 {
213 	struct opa_mad_notice_attr data;
214 	u32 lid = ppd_from_ibp(ibp)->lid;
215 
216 	memset(&data, 0, sizeof(data));
217 	/* Send violation trap */
218 	data.generic_type = IB_NOTICE_TYPE_SECURITY;
219 	data.prod_type_lsb = IB_NOTICE_PROD_CA;
220 	data.trap_num = OPA_TRAP_BAD_M_KEY;
221 	data.issuer_lid = cpu_to_be32(lid);
222 	data.ntc_256.lid = data.issuer_lid;
223 	data.ntc_256.method = mad->method;
224 	data.ntc_256.attr_id = mad->attr_id;
225 	data.ntc_256.attr_mod = mad->attr_mod;
226 	data.ntc_256.mkey = mkey;
227 	if (mad->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
228 		data.ntc_256.dr_slid = dr_slid;
229 		data.ntc_256.dr_trunc_hop = IB_NOTICE_TRAP_DR_NOTICE;
230 		if (hop_cnt > ARRAY_SIZE(data.ntc_256.dr_rtn_path)) {
231 			data.ntc_256.dr_trunc_hop |=
232 				IB_NOTICE_TRAP_DR_TRUNC;
233 			hop_cnt = ARRAY_SIZE(data.ntc_256.dr_rtn_path);
234 		}
235 		data.ntc_256.dr_trunc_hop |= hop_cnt;
236 		memcpy(data.ntc_256.dr_rtn_path, return_path,
237 		       hop_cnt);
238 	}
239 
240 	send_trap(ibp, &data, sizeof(data));
241 }
242 
243 /*
244  * Send a Port Capability Mask Changed trap (ch. 14.3.11).
245  */
246 void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num)
247 {
248 	struct opa_mad_notice_attr data;
249 	struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
250 	struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
251 	struct hfi1_ibport *ibp = &dd->pport[port_num - 1].ibport_data;
252 	u32 lid = ppd_from_ibp(ibp)->lid;
253 
254 	memset(&data, 0, sizeof(data));
255 
256 	data.generic_type = IB_NOTICE_TYPE_INFO;
257 	data.prod_type_lsb = IB_NOTICE_PROD_CA;
258 	data.trap_num = OPA_TRAP_CHANGE_CAPABILITY;
259 	data.issuer_lid = cpu_to_be32(lid);
260 	data.ntc_144.lid = data.issuer_lid;
261 	data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
262 
263 	send_trap(ibp, &data, sizeof(data));
264 }
265 
266 /*
267  * Send a System Image GUID Changed trap (ch. 14.3.12).
268  */
269 void hfi1_sys_guid_chg(struct hfi1_ibport *ibp)
270 {
271 	struct opa_mad_notice_attr data;
272 	u32 lid = ppd_from_ibp(ibp)->lid;
273 
274 	memset(&data, 0, sizeof(data));
275 
276 	data.generic_type = IB_NOTICE_TYPE_INFO;
277 	data.prod_type_lsb = IB_NOTICE_PROD_CA;
278 	data.trap_num = OPA_TRAP_CHANGE_SYSGUID;
279 	data.issuer_lid = cpu_to_be32(lid);
280 	data.ntc_145.new_sys_guid = ib_hfi1_sys_image_guid;
281 	data.ntc_145.lid = data.issuer_lid;
282 
283 	send_trap(ibp, &data, sizeof(data));
284 }
285 
286 /*
287  * Send a Node Description Changed trap (ch. 14.3.13).
288  */
289 void hfi1_node_desc_chg(struct hfi1_ibport *ibp)
290 {
291 	struct opa_mad_notice_attr data;
292 	u32 lid = ppd_from_ibp(ibp)->lid;
293 
294 	memset(&data, 0, sizeof(data));
295 
296 	data.generic_type = IB_NOTICE_TYPE_INFO;
297 	data.prod_type_lsb = IB_NOTICE_PROD_CA;
298 	data.trap_num = OPA_TRAP_CHANGE_CAPABILITY;
299 	data.issuer_lid = cpu_to_be32(lid);
300 	data.ntc_144.lid = data.issuer_lid;
301 	data.ntc_144.change_flags =
302 		cpu_to_be16(OPA_NOTICE_TRAP_NODE_DESC_CHG);
303 
304 	send_trap(ibp, &data, sizeof(data));
305 }
306 
307 static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am,
308 				   u8 *data, struct ib_device *ibdev,
309 				   u8 port, u32 *resp_len)
310 {
311 	struct opa_node_description *nd;
312 
313 	if (am) {
314 		smp->status |= IB_SMP_INVALID_FIELD;
315 		return reply((struct ib_mad_hdr *)smp);
316 	}
317 
318 	nd = (struct opa_node_description *)data;
319 
320 	memcpy(nd->data, ibdev->node_desc, sizeof(nd->data));
321 
322 	if (resp_len)
323 		*resp_len += sizeof(*nd);
324 
325 	return reply((struct ib_mad_hdr *)smp);
326 }
327 
328 static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data,
329 				   struct ib_device *ibdev, u8 port,
330 				   u32 *resp_len)
331 {
332 	struct opa_node_info *ni;
333 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
334 	unsigned pidx = port - 1; /* IB number port from 1, hw from 0 */
335 
336 	ni = (struct opa_node_info *)data;
337 
338 	/* GUID 0 is illegal */
339 	if (am || pidx >= dd->num_pports || dd->pport[pidx].guid == 0) {
340 		smp->status |= IB_SMP_INVALID_FIELD;
341 		return reply((struct ib_mad_hdr *)smp);
342 	}
343 
344 	ni->port_guid = cpu_to_be64(dd->pport[pidx].guid);
345 	ni->base_version = OPA_MGMT_BASE_VERSION;
346 	ni->class_version = OPA_SMI_CLASS_VERSION;
347 	ni->node_type = 1;     /* channel adapter */
348 	ni->num_ports = ibdev->phys_port_cnt;
349 	/* This is already in network order */
350 	ni->system_image_guid = ib_hfi1_sys_image_guid;
351 	/* Use first-port GUID as node */
352 	ni->node_guid = cpu_to_be64(dd->pport->guid);
353 	ni->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
354 	ni->device_id = cpu_to_be16(dd->pcidev->device);
355 	ni->revision = cpu_to_be32(dd->minrev);
356 	ni->local_port_num = port;
357 	ni->vendor_id[0] = dd->oui1;
358 	ni->vendor_id[1] = dd->oui2;
359 	ni->vendor_id[2] = dd->oui3;
360 
361 	if (resp_len)
362 		*resp_len += sizeof(*ni);
363 
364 	return reply((struct ib_mad_hdr *)smp);
365 }
366 
367 static int subn_get_nodeinfo(struct ib_smp *smp, struct ib_device *ibdev,
368 			     u8 port)
369 {
370 	struct ib_node_info *nip = (struct ib_node_info *)&smp->data;
371 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
372 	unsigned pidx = port - 1; /* IB number port from 1, hw from 0 */
373 
374 	/* GUID 0 is illegal */
375 	if (smp->attr_mod || pidx >= dd->num_pports ||
376 	    dd->pport[pidx].guid == 0)
377 		smp->status |= IB_SMP_INVALID_FIELD;
378 	else
379 		nip->port_guid = cpu_to_be64(dd->pport[pidx].guid);
380 
381 	nip->base_version = OPA_MGMT_BASE_VERSION;
382 	nip->class_version = OPA_SMI_CLASS_VERSION;
383 	nip->node_type = 1;     /* channel adapter */
384 	nip->num_ports = ibdev->phys_port_cnt;
385 	/* This is already in network order */
386 	nip->sys_guid = ib_hfi1_sys_image_guid;
387 	 /* Use first-port GUID as node */
388 	nip->node_guid = cpu_to_be64(dd->pport->guid);
389 	nip->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
390 	nip->device_id = cpu_to_be16(dd->pcidev->device);
391 	nip->revision = cpu_to_be32(dd->minrev);
392 	nip->local_port_num = port;
393 	nip->vendor_id[0] = dd->oui1;
394 	nip->vendor_id[1] = dd->oui2;
395 	nip->vendor_id[2] = dd->oui3;
396 
397 	return reply((struct ib_mad_hdr *)smp);
398 }
399 
400 static void set_link_width_enabled(struct hfi1_pportdata *ppd, u32 w)
401 {
402 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LWID_ENB, w);
403 }
404 
405 static void set_link_width_downgrade_enabled(struct hfi1_pportdata *ppd, u32 w)
406 {
407 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LWID_DG_ENB, w);
408 }
409 
410 static void set_link_speed_enabled(struct hfi1_pportdata *ppd, u32 s)
411 {
412 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_SPD_ENB, s);
413 }
414 
415 static int check_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
416 		      int mad_flags, __be64 mkey, __be32 dr_slid,
417 		      u8 return_path[], u8 hop_cnt)
418 {
419 	int valid_mkey = 0;
420 	int ret = 0;
421 
422 	/* Is the mkey in the process of expiring? */
423 	if (ibp->rvp.mkey_lease_timeout &&
424 	    time_after_eq(jiffies, ibp->rvp.mkey_lease_timeout)) {
425 		/* Clear timeout and mkey protection field. */
426 		ibp->rvp.mkey_lease_timeout = 0;
427 		ibp->rvp.mkeyprot = 0;
428 	}
429 
430 	if ((mad_flags & IB_MAD_IGNORE_MKEY) ||  ibp->rvp.mkey == 0 ||
431 	    ibp->rvp.mkey == mkey)
432 		valid_mkey = 1;
433 
434 	/* Unset lease timeout on any valid Get/Set/TrapRepress */
435 	if (valid_mkey && ibp->rvp.mkey_lease_timeout &&
436 	    (mad->method == IB_MGMT_METHOD_GET ||
437 	     mad->method == IB_MGMT_METHOD_SET ||
438 	     mad->method == IB_MGMT_METHOD_TRAP_REPRESS))
439 		ibp->rvp.mkey_lease_timeout = 0;
440 
441 	if (!valid_mkey) {
442 		switch (mad->method) {
443 		case IB_MGMT_METHOD_GET:
444 			/* Bad mkey not a violation below level 2 */
445 			if (ibp->rvp.mkeyprot < 2)
446 				break;
447 		case IB_MGMT_METHOD_SET:
448 		case IB_MGMT_METHOD_TRAP_REPRESS:
449 			if (ibp->rvp.mkey_violations != 0xFFFF)
450 				++ibp->rvp.mkey_violations;
451 			if (!ibp->rvp.mkey_lease_timeout &&
452 			    ibp->rvp.mkey_lease_period)
453 				ibp->rvp.mkey_lease_timeout = jiffies +
454 					ibp->rvp.mkey_lease_period * HZ;
455 			/* Generate a trap notice. */
456 			bad_mkey(ibp, mad, mkey, dr_slid, return_path,
457 				 hop_cnt);
458 			ret = 1;
459 		}
460 	}
461 
462 	return ret;
463 }
464 
465 /*
466  * The SMA caches reads from LCB registers in case the LCB is unavailable.
467  * (The LCB is unavailable in certain link states, for example.)
468  */
469 struct lcb_datum {
470 	u32 off;
471 	u64 val;
472 };
473 
474 static struct lcb_datum lcb_cache[] = {
475 	{ DC_LCB_STS_ROUND_TRIP_LTP_CNT, 0 },
476 };
477 
478 static int write_lcb_cache(u32 off, u64 val)
479 {
480 	int i;
481 
482 	for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
483 		if (lcb_cache[i].off == off) {
484 			lcb_cache[i].val = val;
485 			return 0;
486 		}
487 	}
488 
489 	pr_warn("%s bad offset 0x%x\n", __func__, off);
490 	return -1;
491 }
492 
493 static int read_lcb_cache(u32 off, u64 *val)
494 {
495 	int i;
496 
497 	for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
498 		if (lcb_cache[i].off == off) {
499 			*val = lcb_cache[i].val;
500 			return 0;
501 		}
502 	}
503 
504 	pr_warn("%s bad offset 0x%x\n", __func__, off);
505 	return -1;
506 }
507 
508 void read_ltp_rtt(struct hfi1_devdata *dd)
509 {
510 	u64 reg;
511 
512 	if (read_lcb_csr(dd, DC_LCB_STS_ROUND_TRIP_LTP_CNT, &reg))
513 		dd_dev_err(dd, "%s: unable to read LTP RTT\n", __func__);
514 	else
515 		write_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, reg);
516 }
517 
518 static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
519 				   struct ib_device *ibdev, u8 port,
520 				   u32 *resp_len)
521 {
522 	int i;
523 	struct hfi1_devdata *dd;
524 	struct hfi1_pportdata *ppd;
525 	struct hfi1_ibport *ibp;
526 	struct opa_port_info *pi = (struct opa_port_info *)data;
527 	u8 mtu;
528 	u8 credit_rate;
529 	u8 is_beaconing_active;
530 	u32 state;
531 	u32 num_ports = OPA_AM_NPORT(am);
532 	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
533 	u32 buffer_units;
534 	u64 tmp = 0;
535 
536 	if (num_ports != 1) {
537 		smp->status |= IB_SMP_INVALID_FIELD;
538 		return reply((struct ib_mad_hdr *)smp);
539 	}
540 
541 	dd = dd_from_ibdev(ibdev);
542 	/* IB numbers ports from 1, hw from 0 */
543 	ppd = dd->pport + (port - 1);
544 	ibp = &ppd->ibport_data;
545 
546 	if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
547 	    ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
548 		smp->status |= IB_SMP_INVALID_FIELD;
549 		return reply((struct ib_mad_hdr *)smp);
550 	}
551 
552 	pi->lid = cpu_to_be32(ppd->lid);
553 
554 	/* Only return the mkey if the protection field allows it. */
555 	if (!(smp->method == IB_MGMT_METHOD_GET &&
556 	      ibp->rvp.mkey != smp->mkey &&
557 	      ibp->rvp.mkeyprot == 1))
558 		pi->mkey = ibp->rvp.mkey;
559 
560 	pi->subnet_prefix = ibp->rvp.gid_prefix;
561 	pi->sm_lid = cpu_to_be32(ibp->rvp.sm_lid);
562 	pi->ib_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
563 	pi->mkey_lease_period = cpu_to_be16(ibp->rvp.mkey_lease_period);
564 	pi->sm_trap_qp = cpu_to_be32(ppd->sm_trap_qp);
565 	pi->sa_qp = cpu_to_be32(ppd->sa_qp);
566 
567 	pi->link_width.enabled = cpu_to_be16(ppd->link_width_enabled);
568 	pi->link_width.supported = cpu_to_be16(ppd->link_width_supported);
569 	pi->link_width.active = cpu_to_be16(ppd->link_width_active);
570 
571 	pi->link_width_downgrade.supported =
572 			cpu_to_be16(ppd->link_width_downgrade_supported);
573 	pi->link_width_downgrade.enabled =
574 			cpu_to_be16(ppd->link_width_downgrade_enabled);
575 	pi->link_width_downgrade.tx_active =
576 			cpu_to_be16(ppd->link_width_downgrade_tx_active);
577 	pi->link_width_downgrade.rx_active =
578 			cpu_to_be16(ppd->link_width_downgrade_rx_active);
579 
580 	pi->link_speed.supported = cpu_to_be16(ppd->link_speed_supported);
581 	pi->link_speed.active = cpu_to_be16(ppd->link_speed_active);
582 	pi->link_speed.enabled = cpu_to_be16(ppd->link_speed_enabled);
583 
584 	state = driver_lstate(ppd);
585 
586 	if (start_of_sm_config && (state == IB_PORT_INIT))
587 		ppd->is_sm_config_started = 1;
588 
589 	pi->port_phys_conf = (ppd->port_type & 0xf);
590 
591 	pi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
592 	pi->port_states.ledenable_offlinereason |=
593 		ppd->is_sm_config_started << 5;
594 	/*
595 	 * This pairs with the memory barrier in hfi1_start_led_override to
596 	 * ensure that we read the correct state of LED beaconing represented
597 	 * by led_override_timer_active
598 	 */
599 	smp_rmb();
600 	is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
601 	pi->port_states.ledenable_offlinereason |= is_beaconing_active << 6;
602 	pi->port_states.ledenable_offlinereason |=
603 		ppd->offline_disabled_reason;
604 
605 	pi->port_states.portphysstate_portstate =
606 		(hfi1_ibphys_portstate(ppd) << 4) | state;
607 
608 	pi->mkeyprotect_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc;
609 
610 	memset(pi->neigh_mtu.pvlx_to_mtu, 0, sizeof(pi->neigh_mtu.pvlx_to_mtu));
611 	for (i = 0; i < ppd->vls_supported; i++) {
612 		mtu = mtu_to_enum(dd->vld[i].mtu, HFI1_DEFAULT_ACTIVE_MTU);
613 		if ((i % 2) == 0)
614 			pi->neigh_mtu.pvlx_to_mtu[i / 2] |= (mtu << 4);
615 		else
616 			pi->neigh_mtu.pvlx_to_mtu[i / 2] |= mtu;
617 	}
618 	/* don't forget VL 15 */
619 	mtu = mtu_to_enum(dd->vld[15].mtu, 2048);
620 	pi->neigh_mtu.pvlx_to_mtu[15 / 2] |= mtu;
621 	pi->smsl = ibp->rvp.sm_sl & OPA_PI_MASK_SMSL;
622 	pi->operational_vls = hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS);
623 	pi->partenforce_filterraw |=
624 		(ppd->linkinit_reason & OPA_PI_MASK_LINKINIT_REASON);
625 	if (ppd->part_enforce & HFI1_PART_ENFORCE_IN)
626 		pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_IN;
627 	if (ppd->part_enforce & HFI1_PART_ENFORCE_OUT)
628 		pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_OUT;
629 	pi->mkey_violations = cpu_to_be16(ibp->rvp.mkey_violations);
630 	/* P_KeyViolations are counted by hardware. */
631 	pi->pkey_violations = cpu_to_be16(ibp->rvp.pkey_violations);
632 	pi->qkey_violations = cpu_to_be16(ibp->rvp.qkey_violations);
633 
634 	pi->vl.cap = ppd->vls_supported;
635 	pi->vl.high_limit = cpu_to_be16(ibp->rvp.vl_high_limit);
636 	pi->vl.arb_high_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_CAP);
637 	pi->vl.arb_low_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_LOW_CAP);
638 
639 	pi->clientrereg_subnettimeout = ibp->rvp.subnet_timeout;
640 
641 	pi->port_link_mode  = cpu_to_be16(OPA_PORT_LINK_MODE_OPA << 10 |
642 					  OPA_PORT_LINK_MODE_OPA << 5 |
643 					  OPA_PORT_LINK_MODE_OPA);
644 
645 	pi->port_ltp_crc_mode = cpu_to_be16(ppd->port_ltp_crc_mode);
646 
647 	pi->port_mode = cpu_to_be16(
648 				ppd->is_active_optimize_enabled ?
649 					OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE : 0);
650 
651 	pi->port_packet_format.supported =
652 		cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B);
653 	pi->port_packet_format.enabled =
654 		cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B);
655 
656 	/* flit_control.interleave is (OPA V1, version .76):
657 	 * bits		use
658 	 * ----		---
659 	 * 2		res
660 	 * 2		DistanceSupported
661 	 * 2		DistanceEnabled
662 	 * 5		MaxNextLevelTxEnabled
663 	 * 5		MaxNestLevelRxSupported
664 	 *
665 	 * HFI supports only "distance mode 1" (see OPA V1, version .76,
666 	 * section 9.6.2), so set DistanceSupported, DistanceEnabled
667 	 * to 0x1.
668 	 */
669 	pi->flit_control.interleave = cpu_to_be16(0x1400);
670 
671 	pi->link_down_reason = ppd->local_link_down_reason.sma;
672 	pi->neigh_link_down_reason = ppd->neigh_link_down_reason.sma;
673 	pi->port_error_action = cpu_to_be32(ppd->port_error_action);
674 	pi->mtucap = mtu_to_enum(hfi1_max_mtu, IB_MTU_4096);
675 
676 	/* 32.768 usec. response time (guessing) */
677 	pi->resptimevalue = 3;
678 
679 	pi->local_port_num = port;
680 
681 	/* buffer info for FM */
682 	pi->overall_buffer_space = cpu_to_be16(dd->link_credits);
683 
684 	pi->neigh_node_guid = cpu_to_be64(ppd->neighbor_guid);
685 	pi->neigh_port_num = ppd->neighbor_port_number;
686 	pi->port_neigh_mode =
687 		(ppd->neighbor_type & OPA_PI_MASK_NEIGH_NODE_TYPE) |
688 		(ppd->mgmt_allowed ? OPA_PI_MASK_NEIGH_MGMT_ALLOWED : 0) |
689 		(ppd->neighbor_fm_security ?
690 			OPA_PI_MASK_NEIGH_FW_AUTH_BYPASS : 0);
691 
692 	/* HFIs shall always return VL15 credits to their
693 	 * neighbor in a timely manner, without any credit return pacing.
694 	 */
695 	credit_rate = 0;
696 	buffer_units  = (dd->vau) & OPA_PI_MASK_BUF_UNIT_BUF_ALLOC;
697 	buffer_units |= (dd->vcu << 3) & OPA_PI_MASK_BUF_UNIT_CREDIT_ACK;
698 	buffer_units |= (credit_rate << 6) &
699 				OPA_PI_MASK_BUF_UNIT_VL15_CREDIT_RATE;
700 	buffer_units |= (dd->vl15_init << 11) & OPA_PI_MASK_BUF_UNIT_VL15_INIT;
701 	pi->buffer_units = cpu_to_be32(buffer_units);
702 
703 	pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported);
704 
705 	/* HFI supports a replay buffer 128 LTPs in size */
706 	pi->replay_depth.buffer = 0x80;
707 	/* read the cached value of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
708 	read_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, &tmp);
709 
710 	/*
711 	 * this counter is 16 bits wide, but the replay_depth.wire
712 	 * variable is only 8 bits
713 	 */
714 	if (tmp > 0xff)
715 		tmp = 0xff;
716 	pi->replay_depth.wire = tmp;
717 
718 	if (resp_len)
719 		*resp_len += sizeof(struct opa_port_info);
720 
721 	return reply((struct ib_mad_hdr *)smp);
722 }
723 
724 /**
725  * get_pkeys - return the PKEY table
726  * @dd: the hfi1_ib device
727  * @port: the IB port number
728  * @pkeys: the pkey table is placed here
729  */
730 static int get_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
731 {
732 	struct hfi1_pportdata *ppd = dd->pport + port - 1;
733 
734 	memcpy(pkeys, ppd->pkeys, sizeof(ppd->pkeys));
735 
736 	return 0;
737 }
738 
739 static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
740 				    struct ib_device *ibdev, u8 port,
741 				    u32 *resp_len)
742 {
743 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
744 	u32 n_blocks_req = OPA_AM_NBLK(am);
745 	u32 start_block = am & 0x7ff;
746 	__be16 *p;
747 	u16 *q;
748 	int i;
749 	u16 n_blocks_avail;
750 	unsigned npkeys = hfi1_get_npkeys(dd);
751 	size_t size;
752 
753 	if (n_blocks_req == 0) {
754 		pr_warn("OPA Get PKey AM Invalid : P = %d; B = 0x%x; N = 0x%x\n",
755 			port, start_block, n_blocks_req);
756 		smp->status |= IB_SMP_INVALID_FIELD;
757 		return reply((struct ib_mad_hdr *)smp);
758 	}
759 
760 	n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
761 
762 	size = (n_blocks_req * OPA_PARTITION_TABLE_BLK_SIZE) * sizeof(u16);
763 
764 	if (start_block + n_blocks_req > n_blocks_avail ||
765 	    n_blocks_req > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
766 		pr_warn("OPA Get PKey AM Invalid : s 0x%x; req 0x%x; "
767 			"avail 0x%x; blk/smp 0x%lx\n",
768 			start_block, n_blocks_req, n_blocks_avail,
769 			OPA_NUM_PKEY_BLOCKS_PER_SMP);
770 		smp->status |= IB_SMP_INVALID_FIELD;
771 		return reply((struct ib_mad_hdr *)smp);
772 	}
773 
774 	p = (__be16 *)data;
775 	q = (u16 *)data;
776 	/* get the real pkeys if we are requesting the first block */
777 	if (start_block == 0) {
778 		get_pkeys(dd, port, q);
779 		for (i = 0; i < npkeys; i++)
780 			p[i] = cpu_to_be16(q[i]);
781 		if (resp_len)
782 			*resp_len += size;
783 	} else {
784 		smp->status |= IB_SMP_INVALID_FIELD;
785 	}
786 	return reply((struct ib_mad_hdr *)smp);
787 }
788 
789 enum {
790 	HFI_TRANSITION_DISALLOWED,
791 	HFI_TRANSITION_IGNORED,
792 	HFI_TRANSITION_ALLOWED,
793 	HFI_TRANSITION_UNDEFINED,
794 };
795 
796 /*
797  * Use shortened names to improve readability of
798  * {logical,physical}_state_transitions
799  */
800 enum {
801 	__D = HFI_TRANSITION_DISALLOWED,
802 	__I = HFI_TRANSITION_IGNORED,
803 	__A = HFI_TRANSITION_ALLOWED,
804 	__U = HFI_TRANSITION_UNDEFINED,
805 };
806 
807 /*
808  * IB_PORTPHYSSTATE_POLLING (2) through OPA_PORTPHYSSTATE_MAX (11) are
809  * represented in physical_state_transitions.
810  */
811 #define __N_PHYSTATES (OPA_PORTPHYSSTATE_MAX - IB_PORTPHYSSTATE_POLLING + 1)
812 
813 /*
814  * Within physical_state_transitions, rows represent "old" states,
815  * columns "new" states, and physical_state_transitions.allowed[old][new]
816  * indicates if the transition from old state to new state is legal (see
817  * OPAg1v1, Table 6-4).
818  */
819 static const struct {
820 	u8 allowed[__N_PHYSTATES][__N_PHYSTATES];
821 } physical_state_transitions = {
822 	{
823 		/* 2    3    4    5    6    7    8    9   10   11 */
824 	/* 2 */	{ __A, __A, __D, __D, __D, __D, __D, __D, __D, __D },
825 	/* 3 */	{ __A, __I, __D, __D, __D, __D, __D, __D, __D, __A },
826 	/* 4 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
827 	/* 5 */	{ __A, __A, __D, __I, __D, __D, __D, __D, __D, __D },
828 	/* 6 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
829 	/* 7 */	{ __D, __A, __D, __D, __D, __I, __D, __D, __D, __D },
830 	/* 8 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
831 	/* 9 */	{ __I, __A, __D, __D, __D, __D, __D, __I, __D, __D },
832 	/*10 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
833 	/*11 */	{ __D, __A, __D, __D, __D, __D, __D, __D, __D, __I },
834 	}
835 };
836 
837 /*
838  * IB_PORT_DOWN (1) through IB_PORT_ACTIVE_DEFER (5) are represented
839  * logical_state_transitions
840  */
841 
842 #define __N_LOGICAL_STATES (IB_PORT_ACTIVE_DEFER - IB_PORT_DOWN + 1)
843 
844 /*
845  * Within logical_state_transitions rows represent "old" states,
846  * columns "new" states, and logical_state_transitions.allowed[old][new]
847  * indicates if the transition from old state to new state is legal (see
848  * OPAg1v1, Table 9-12).
849  */
850 static const struct {
851 	u8 allowed[__N_LOGICAL_STATES][__N_LOGICAL_STATES];
852 } logical_state_transitions = {
853 	{
854 		/* 1    2    3    4    5 */
855 	/* 1 */	{ __I, __D, __D, __D, __U},
856 	/* 2 */	{ __D, __I, __A, __D, __U},
857 	/* 3 */	{ __D, __D, __I, __A, __U},
858 	/* 4 */	{ __D, __D, __I, __I, __U},
859 	/* 5 */	{ __U, __U, __U, __U, __U},
860 	}
861 };
862 
863 static int logical_transition_allowed(int old, int new)
864 {
865 	if (old < IB_PORT_NOP || old > IB_PORT_ACTIVE_DEFER ||
866 	    new < IB_PORT_NOP || new > IB_PORT_ACTIVE_DEFER) {
867 		pr_warn("invalid logical state(s) (old %d new %d)\n",
868 			old, new);
869 		return HFI_TRANSITION_UNDEFINED;
870 	}
871 
872 	if (new == IB_PORT_NOP)
873 		return HFI_TRANSITION_ALLOWED; /* always allowed */
874 
875 	/* adjust states for indexing into logical_state_transitions */
876 	old -= IB_PORT_DOWN;
877 	new -= IB_PORT_DOWN;
878 
879 	if (old < 0 || new < 0)
880 		return HFI_TRANSITION_UNDEFINED;
881 	return logical_state_transitions.allowed[old][new];
882 }
883 
884 static int physical_transition_allowed(int old, int new)
885 {
886 	if (old < IB_PORTPHYSSTATE_NOP || old > OPA_PORTPHYSSTATE_MAX ||
887 	    new < IB_PORTPHYSSTATE_NOP || new > OPA_PORTPHYSSTATE_MAX) {
888 		pr_warn("invalid physical state(s) (old %d new %d)\n",
889 			old, new);
890 		return HFI_TRANSITION_UNDEFINED;
891 	}
892 
893 	if (new == IB_PORTPHYSSTATE_NOP)
894 		return HFI_TRANSITION_ALLOWED; /* always allowed */
895 
896 	/* adjust states for indexing into physical_state_transitions */
897 	old -= IB_PORTPHYSSTATE_POLLING;
898 	new -= IB_PORTPHYSSTATE_POLLING;
899 
900 	if (old < 0 || new < 0)
901 		return HFI_TRANSITION_UNDEFINED;
902 	return physical_state_transitions.allowed[old][new];
903 }
904 
905 static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
906 					  u32 logical_new, u32 physical_new)
907 {
908 	u32 physical_old = driver_physical_state(ppd);
909 	u32 logical_old = driver_logical_state(ppd);
910 	int ret, logical_allowed, physical_allowed;
911 
912 	ret = logical_transition_allowed(logical_old, logical_new);
913 	logical_allowed = ret;
914 
915 	if (ret == HFI_TRANSITION_DISALLOWED ||
916 	    ret == HFI_TRANSITION_UNDEFINED) {
917 		pr_warn("invalid logical state transition %s -> %s\n",
918 			opa_lstate_name(logical_old),
919 			opa_lstate_name(logical_new));
920 		return ret;
921 	}
922 
923 	ret = physical_transition_allowed(physical_old, physical_new);
924 	physical_allowed = ret;
925 
926 	if (ret == HFI_TRANSITION_DISALLOWED ||
927 	    ret == HFI_TRANSITION_UNDEFINED) {
928 		pr_warn("invalid physical state transition %s -> %s\n",
929 			opa_pstate_name(physical_old),
930 			opa_pstate_name(physical_new));
931 		return ret;
932 	}
933 
934 	if (logical_allowed == HFI_TRANSITION_IGNORED &&
935 	    physical_allowed == HFI_TRANSITION_IGNORED)
936 		return HFI_TRANSITION_IGNORED;
937 
938 	/*
939 	 * A change request of Physical Port State from
940 	 * 'Offline' to 'Polling' should be ignored.
941 	 */
942 	if ((physical_old == OPA_PORTPHYSSTATE_OFFLINE) &&
943 	    (physical_new == IB_PORTPHYSSTATE_POLLING))
944 		return HFI_TRANSITION_IGNORED;
945 
946 	/*
947 	 * Either physical_allowed or logical_allowed is
948 	 * HFI_TRANSITION_ALLOWED.
949 	 */
950 	return HFI_TRANSITION_ALLOWED;
951 }
952 
953 static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
954 			   u32 logical_state, u32 phys_state,
955 			   int suppress_idle_sma)
956 {
957 	struct hfi1_devdata *dd = ppd->dd;
958 	u32 link_state;
959 	int ret;
960 
961 	ret = port_states_transition_allowed(ppd, logical_state, phys_state);
962 	if (ret == HFI_TRANSITION_DISALLOWED ||
963 	    ret == HFI_TRANSITION_UNDEFINED) {
964 		/* error message emitted above */
965 		smp->status |= IB_SMP_INVALID_FIELD;
966 		return 0;
967 	}
968 
969 	if (ret == HFI_TRANSITION_IGNORED)
970 		return 0;
971 
972 	if ((phys_state != IB_PORTPHYSSTATE_NOP) &&
973 	    !(logical_state == IB_PORT_DOWN ||
974 	      logical_state == IB_PORT_NOP)){
975 		pr_warn("SubnSet(OPA_PortInfo) port state invalid: logical_state 0x%x physical_state 0x%x\n",
976 			logical_state, phys_state);
977 		smp->status |= IB_SMP_INVALID_FIELD;
978 	}
979 
980 	/*
981 	 * Logical state changes are summarized in OPAv1g1 spec.,
982 	 * Table 9-12; physical state changes are summarized in
983 	 * OPAv1g1 spec., Table 6.4.
984 	 */
985 	switch (logical_state) {
986 	case IB_PORT_NOP:
987 		if (phys_state == IB_PORTPHYSSTATE_NOP)
988 			break;
989 		/* FALLTHROUGH */
990 	case IB_PORT_DOWN:
991 		if (phys_state == IB_PORTPHYSSTATE_NOP) {
992 			link_state = HLS_DN_DOWNDEF;
993 		} else if (phys_state == IB_PORTPHYSSTATE_POLLING) {
994 			link_state = HLS_DN_POLL;
995 			set_link_down_reason(ppd, OPA_LINKDOWN_REASON_FM_BOUNCE,
996 					     0, OPA_LINKDOWN_REASON_FM_BOUNCE);
997 		} else if (phys_state == IB_PORTPHYSSTATE_DISABLED) {
998 			link_state = HLS_DN_DISABLE;
999 		} else {
1000 			pr_warn("SubnSet(OPA_PortInfo) invalid physical state 0x%x\n",
1001 				phys_state);
1002 			smp->status |= IB_SMP_INVALID_FIELD;
1003 			break;
1004 		}
1005 
1006 		if ((link_state == HLS_DN_POLL ||
1007 		     link_state == HLS_DN_DOWNDEF)) {
1008 			/*
1009 			 * Going to poll.  No matter what the current state,
1010 			 * always move offline first, then tune and start the
1011 			 * link.  This correctly handles a FM link bounce and
1012 			 * a link enable.  Going offline is a no-op if already
1013 			 * offline.
1014 			 */
1015 			set_link_state(ppd, HLS_DN_OFFLINE);
1016 			start_link(ppd);
1017 		} else {
1018 			set_link_state(ppd, link_state);
1019 		}
1020 		if (link_state == HLS_DN_DISABLE &&
1021 		    (ppd->offline_disabled_reason >
1022 		     HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED) ||
1023 		     ppd->offline_disabled_reason ==
1024 		     HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)))
1025 			ppd->offline_disabled_reason =
1026 			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
1027 		/*
1028 		 * Don't send a reply if the response would be sent
1029 		 * through the disabled port.
1030 		 */
1031 		if (link_state == HLS_DN_DISABLE && smp->hop_cnt)
1032 			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
1033 		break;
1034 	case IB_PORT_ARMED:
1035 		ret = set_link_state(ppd, HLS_UP_ARMED);
1036 		if ((ret == 0) && (suppress_idle_sma == 0))
1037 			send_idle_sma(dd, SMA_IDLE_ARM);
1038 		break;
1039 	case IB_PORT_ACTIVE:
1040 		if (ppd->neighbor_normal) {
1041 			ret = set_link_state(ppd, HLS_UP_ACTIVE);
1042 			if (ret == 0)
1043 				send_idle_sma(dd, SMA_IDLE_ACTIVE);
1044 		} else {
1045 			pr_warn("SubnSet(OPA_PortInfo) Cannot move to Active with NeighborNormal 0\n");
1046 			smp->status |= IB_SMP_INVALID_FIELD;
1047 		}
1048 		break;
1049 	default:
1050 		pr_warn("SubnSet(OPA_PortInfo) invalid logical state 0x%x\n",
1051 			logical_state);
1052 		smp->status |= IB_SMP_INVALID_FIELD;
1053 	}
1054 
1055 	return 0;
1056 }
1057 
1058 /**
1059  * subn_set_opa_portinfo - set port information
1060  * @smp: the incoming SM packet
1061  * @ibdev: the infiniband device
1062  * @port: the port on the device
1063  *
1064  */
1065 static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
1066 				   struct ib_device *ibdev, u8 port,
1067 				   u32 *resp_len)
1068 {
1069 	struct opa_port_info *pi = (struct opa_port_info *)data;
1070 	struct ib_event event;
1071 	struct hfi1_devdata *dd;
1072 	struct hfi1_pportdata *ppd;
1073 	struct hfi1_ibport *ibp;
1074 	u8 clientrereg;
1075 	unsigned long flags;
1076 	u32 smlid, opa_lid; /* tmp vars to hold LID values */
1077 	u16 lid;
1078 	u8 ls_old, ls_new, ps_new;
1079 	u8 vls;
1080 	u8 msl;
1081 	u8 crc_enabled;
1082 	u16 lse, lwe, mtu;
1083 	u32 num_ports = OPA_AM_NPORT(am);
1084 	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
1085 	int ret, i, invalid = 0, call_set_mtu = 0;
1086 	int call_link_downgrade_policy = 0;
1087 
1088 	if (num_ports != 1) {
1089 		smp->status |= IB_SMP_INVALID_FIELD;
1090 		return reply((struct ib_mad_hdr *)smp);
1091 	}
1092 
1093 	opa_lid = be32_to_cpu(pi->lid);
1094 	if (opa_lid & 0xFFFF0000) {
1095 		pr_warn("OPA_PortInfo lid out of range: %X\n", opa_lid);
1096 		smp->status |= IB_SMP_INVALID_FIELD;
1097 		goto get_only;
1098 	}
1099 
1100 	lid = (u16)(opa_lid & 0x0000FFFF);
1101 
1102 	smlid = be32_to_cpu(pi->sm_lid);
1103 	if (smlid & 0xFFFF0000) {
1104 		pr_warn("OPA_PortInfo SM lid out of range: %X\n", smlid);
1105 		smp->status |= IB_SMP_INVALID_FIELD;
1106 		goto get_only;
1107 	}
1108 	smlid &= 0x0000FFFF;
1109 
1110 	clientrereg = (pi->clientrereg_subnettimeout &
1111 			OPA_PI_MASK_CLIENT_REREGISTER);
1112 
1113 	dd = dd_from_ibdev(ibdev);
1114 	/* IB numbers ports from 1, hw from 0 */
1115 	ppd = dd->pport + (port - 1);
1116 	ibp = &ppd->ibport_data;
1117 	event.device = ibdev;
1118 	event.element.port_num = port;
1119 
1120 	ls_old = driver_lstate(ppd);
1121 
1122 	ibp->rvp.mkey = pi->mkey;
1123 	ibp->rvp.gid_prefix = pi->subnet_prefix;
1124 	ibp->rvp.mkey_lease_period = be16_to_cpu(pi->mkey_lease_period);
1125 
1126 	/* Must be a valid unicast LID address. */
1127 	if ((lid == 0 && ls_old > IB_PORT_INIT) ||
1128 	    lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
1129 		smp->status |= IB_SMP_INVALID_FIELD;
1130 		pr_warn("SubnSet(OPA_PortInfo) lid invalid 0x%x\n",
1131 			lid);
1132 	} else if (ppd->lid != lid ||
1133 		 ppd->lmc != (pi->mkeyprotect_lmc & OPA_PI_MASK_LMC)) {
1134 		if (ppd->lid != lid)
1135 			hfi1_set_uevent_bits(ppd, _HFI1_EVENT_LID_CHANGE_BIT);
1136 		if (ppd->lmc != (pi->mkeyprotect_lmc & OPA_PI_MASK_LMC))
1137 			hfi1_set_uevent_bits(ppd, _HFI1_EVENT_LMC_CHANGE_BIT);
1138 		hfi1_set_lid(ppd, lid, pi->mkeyprotect_lmc & OPA_PI_MASK_LMC);
1139 		event.event = IB_EVENT_LID_CHANGE;
1140 		ib_dispatch_event(&event);
1141 	}
1142 
1143 	msl = pi->smsl & OPA_PI_MASK_SMSL;
1144 	if (pi->partenforce_filterraw & OPA_PI_MASK_LINKINIT_REASON)
1145 		ppd->linkinit_reason =
1146 			(pi->partenforce_filterraw &
1147 			 OPA_PI_MASK_LINKINIT_REASON);
1148 	/* enable/disable SW pkey checking as per FM control */
1149 	if (pi->partenforce_filterraw & OPA_PI_MASK_PARTITION_ENFORCE_IN)
1150 		ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
1151 	else
1152 		ppd->part_enforce &= ~HFI1_PART_ENFORCE_IN;
1153 
1154 	if (pi->partenforce_filterraw & OPA_PI_MASK_PARTITION_ENFORCE_OUT)
1155 		ppd->part_enforce |= HFI1_PART_ENFORCE_OUT;
1156 	else
1157 		ppd->part_enforce &= ~HFI1_PART_ENFORCE_OUT;
1158 
1159 	/* Must be a valid unicast LID address. */
1160 	if ((smlid == 0 && ls_old > IB_PORT_INIT) ||
1161 	    smlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
1162 		smp->status |= IB_SMP_INVALID_FIELD;
1163 		pr_warn("SubnSet(OPA_PortInfo) smlid invalid 0x%x\n", smlid);
1164 	} else if (smlid != ibp->rvp.sm_lid || msl != ibp->rvp.sm_sl) {
1165 		pr_warn("SubnSet(OPA_PortInfo) smlid 0x%x\n", smlid);
1166 		spin_lock_irqsave(&ibp->rvp.lock, flags);
1167 		if (ibp->rvp.sm_ah) {
1168 			if (smlid != ibp->rvp.sm_lid)
1169 				ibp->rvp.sm_ah->attr.dlid = smlid;
1170 			if (msl != ibp->rvp.sm_sl)
1171 				ibp->rvp.sm_ah->attr.sl = msl;
1172 		}
1173 		spin_unlock_irqrestore(&ibp->rvp.lock, flags);
1174 		if (smlid != ibp->rvp.sm_lid)
1175 			ibp->rvp.sm_lid = smlid;
1176 		if (msl != ibp->rvp.sm_sl)
1177 			ibp->rvp.sm_sl = msl;
1178 		event.event = IB_EVENT_SM_CHANGE;
1179 		ib_dispatch_event(&event);
1180 	}
1181 
1182 	if (pi->link_down_reason == 0) {
1183 		ppd->local_link_down_reason.sma = 0;
1184 		ppd->local_link_down_reason.latest = 0;
1185 	}
1186 
1187 	if (pi->neigh_link_down_reason == 0) {
1188 		ppd->neigh_link_down_reason.sma = 0;
1189 		ppd->neigh_link_down_reason.latest = 0;
1190 	}
1191 
1192 	ppd->sm_trap_qp = be32_to_cpu(pi->sm_trap_qp);
1193 	ppd->sa_qp = be32_to_cpu(pi->sa_qp);
1194 
1195 	ppd->port_error_action = be32_to_cpu(pi->port_error_action);
1196 	lwe = be16_to_cpu(pi->link_width.enabled);
1197 	if (lwe) {
1198 		if (lwe == OPA_LINK_WIDTH_RESET ||
1199 		    lwe == OPA_LINK_WIDTH_RESET_OLD)
1200 			set_link_width_enabled(ppd, ppd->link_width_supported);
1201 		else if ((lwe & ~ppd->link_width_supported) == 0)
1202 			set_link_width_enabled(ppd, lwe);
1203 		else
1204 			smp->status |= IB_SMP_INVALID_FIELD;
1205 	}
1206 	lwe = be16_to_cpu(pi->link_width_downgrade.enabled);
1207 	/* LWD.E is always applied - 0 means "disabled" */
1208 	if (lwe == OPA_LINK_WIDTH_RESET ||
1209 	    lwe == OPA_LINK_WIDTH_RESET_OLD) {
1210 		set_link_width_downgrade_enabled(ppd,
1211 						 ppd->
1212 						 link_width_downgrade_supported
1213 						 );
1214 	} else if ((lwe & ~ppd->link_width_downgrade_supported) == 0) {
1215 		/* only set and apply if something changed */
1216 		if (lwe != ppd->link_width_downgrade_enabled) {
1217 			set_link_width_downgrade_enabled(ppd, lwe);
1218 			call_link_downgrade_policy = 1;
1219 		}
1220 	} else {
1221 		smp->status |= IB_SMP_INVALID_FIELD;
1222 	}
1223 	lse = be16_to_cpu(pi->link_speed.enabled);
1224 	if (lse) {
1225 		if (lse & be16_to_cpu(pi->link_speed.supported))
1226 			set_link_speed_enabled(ppd, lse);
1227 		else
1228 			smp->status |= IB_SMP_INVALID_FIELD;
1229 	}
1230 
1231 	ibp->rvp.mkeyprot =
1232 		(pi->mkeyprotect_lmc & OPA_PI_MASK_MKEY_PROT_BIT) >> 6;
1233 	ibp->rvp.vl_high_limit = be16_to_cpu(pi->vl.high_limit) & 0xFF;
1234 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_LIMIT,
1235 				    ibp->rvp.vl_high_limit);
1236 
1237 	if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
1238 	    ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
1239 		smp->status |= IB_SMP_INVALID_FIELD;
1240 		return reply((struct ib_mad_hdr *)smp);
1241 	}
1242 	for (i = 0; i < ppd->vls_supported; i++) {
1243 		if ((i % 2) == 0)
1244 			mtu = enum_to_mtu((pi->neigh_mtu.pvlx_to_mtu[i / 2] >>
1245 					   4) & 0xF);
1246 		else
1247 			mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[i / 2] &
1248 					  0xF);
1249 		if (mtu == 0xffff) {
1250 			pr_warn("SubnSet(OPA_PortInfo) mtu invalid %d (0x%x)\n",
1251 				mtu,
1252 				(pi->neigh_mtu.pvlx_to_mtu[0] >> 4) & 0xF);
1253 			smp->status |= IB_SMP_INVALID_FIELD;
1254 			mtu = hfi1_max_mtu; /* use a valid MTU */
1255 		}
1256 		if (dd->vld[i].mtu != mtu) {
1257 			dd_dev_info(dd,
1258 				    "MTU change on vl %d from %d to %d\n",
1259 				    i, dd->vld[i].mtu, mtu);
1260 			dd->vld[i].mtu = mtu;
1261 			call_set_mtu++;
1262 		}
1263 	}
1264 	/* As per OPAV1 spec: VL15 must support and be configured
1265 	 * for operation with a 2048 or larger MTU.
1266 	 */
1267 	mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[15 / 2] & 0xF);
1268 	if (mtu < 2048 || mtu == 0xffff)
1269 		mtu = 2048;
1270 	if (dd->vld[15].mtu != mtu) {
1271 		dd_dev_info(dd,
1272 			    "MTU change on vl 15 from %d to %d\n",
1273 			    dd->vld[15].mtu, mtu);
1274 		dd->vld[15].mtu = mtu;
1275 		call_set_mtu++;
1276 	}
1277 	if (call_set_mtu)
1278 		set_mtu(ppd);
1279 
1280 	/* Set operational VLs */
1281 	vls = pi->operational_vls & OPA_PI_MASK_OPERATIONAL_VL;
1282 	if (vls) {
1283 		if (vls > ppd->vls_supported) {
1284 			pr_warn("SubnSet(OPA_PortInfo) VL's supported invalid %d\n",
1285 				pi->operational_vls);
1286 			smp->status |= IB_SMP_INVALID_FIELD;
1287 		} else {
1288 			if (hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS,
1289 					    vls) == -EINVAL)
1290 				smp->status |= IB_SMP_INVALID_FIELD;
1291 		}
1292 	}
1293 
1294 	if (pi->mkey_violations == 0)
1295 		ibp->rvp.mkey_violations = 0;
1296 
1297 	if (pi->pkey_violations == 0)
1298 		ibp->rvp.pkey_violations = 0;
1299 
1300 	if (pi->qkey_violations == 0)
1301 		ibp->rvp.qkey_violations = 0;
1302 
1303 	ibp->rvp.subnet_timeout =
1304 		pi->clientrereg_subnettimeout & OPA_PI_MASK_SUBNET_TIMEOUT;
1305 
1306 	crc_enabled = be16_to_cpu(pi->port_ltp_crc_mode);
1307 	crc_enabled >>= 4;
1308 	crc_enabled &= 0xf;
1309 
1310 	if (crc_enabled != 0)
1311 		ppd->port_crc_mode_enabled = port_ltp_to_cap(crc_enabled);
1312 
1313 	ppd->is_active_optimize_enabled =
1314 			!!(be16_to_cpu(pi->port_mode)
1315 					& OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE);
1316 
1317 	ls_new = pi->port_states.portphysstate_portstate &
1318 			OPA_PI_MASK_PORT_STATE;
1319 	ps_new = (pi->port_states.portphysstate_portstate &
1320 			OPA_PI_MASK_PORT_PHYSICAL_STATE) >> 4;
1321 
1322 	if (ls_old == IB_PORT_INIT) {
1323 		if (start_of_sm_config) {
1324 			if (ls_new == ls_old || (ls_new == IB_PORT_ARMED))
1325 				ppd->is_sm_config_started = 1;
1326 		} else if (ls_new == IB_PORT_ARMED) {
1327 			if (ppd->is_sm_config_started == 0)
1328 				invalid = 1;
1329 		}
1330 	}
1331 
1332 	/* Handle CLIENT_REREGISTER event b/c SM asked us for it */
1333 	if (clientrereg) {
1334 		event.event = IB_EVENT_CLIENT_REREGISTER;
1335 		ib_dispatch_event(&event);
1336 	}
1337 
1338 	/*
1339 	 * Do the port state change now that the other link parameters
1340 	 * have been set.
1341 	 * Changing the port physical state only makes sense if the link
1342 	 * is down or is being set to down.
1343 	 */
1344 
1345 	ret = set_port_states(ppd, smp, ls_new, ps_new, invalid);
1346 	if (ret)
1347 		return ret;
1348 
1349 	ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len);
1350 
1351 	/* restore re-reg bit per o14-12.2.1 */
1352 	pi->clientrereg_subnettimeout |= clientrereg;
1353 
1354 	/*
1355 	 * Apply the new link downgrade policy.  This may result in a link
1356 	 * bounce.  Do this after everything else so things are settled.
1357 	 * Possible problem: if setting the port state above fails, then
1358 	 * the policy change is not applied.
1359 	 */
1360 	if (call_link_downgrade_policy)
1361 		apply_link_downgrade_policy(ppd, 0);
1362 
1363 	return ret;
1364 
1365 get_only:
1366 	return __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len);
1367 }
1368 
1369 /**
1370  * set_pkeys - set the PKEY table for ctxt 0
1371  * @dd: the hfi1_ib device
1372  * @port: the IB port number
1373  * @pkeys: the PKEY table
1374  */
1375 static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
1376 {
1377 	struct hfi1_pportdata *ppd;
1378 	int i;
1379 	int changed = 0;
1380 	int update_includes_mgmt_partition = 0;
1381 
1382 	/*
1383 	 * IB port one/two always maps to context zero/one,
1384 	 * always a kernel context, no locking needed
1385 	 * If we get here with ppd setup, no need to check
1386 	 * that rcd is valid.
1387 	 */
1388 	ppd = dd->pport + (port - 1);
1389 	/*
1390 	 * If the update does not include the management pkey, don't do it.
1391 	 */
1392 	for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
1393 		if (pkeys[i] == LIM_MGMT_P_KEY) {
1394 			update_includes_mgmt_partition = 1;
1395 			break;
1396 		}
1397 	}
1398 
1399 	if (!update_includes_mgmt_partition)
1400 		return 1;
1401 
1402 	for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
1403 		u16 key = pkeys[i];
1404 		u16 okey = ppd->pkeys[i];
1405 
1406 		if (key == okey)
1407 			continue;
1408 		/*
1409 		 * The SM gives us the complete PKey table. We have
1410 		 * to ensure that we put the PKeys in the matching
1411 		 * slots.
1412 		 */
1413 		ppd->pkeys[i] = key;
1414 		changed = 1;
1415 	}
1416 
1417 	if (changed) {
1418 		(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
1419 		hfi1_event_pkey_change(dd, port);
1420 	}
1421 
1422 	return 0;
1423 }
1424 
1425 static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
1426 				    struct ib_device *ibdev, u8 port,
1427 				    u32 *resp_len)
1428 {
1429 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1430 	u32 n_blocks_sent = OPA_AM_NBLK(am);
1431 	u32 start_block = am & 0x7ff;
1432 	u16 *p = (u16 *)data;
1433 	__be16 *q = (__be16 *)data;
1434 	int i;
1435 	u16 n_blocks_avail;
1436 	unsigned npkeys = hfi1_get_npkeys(dd);
1437 
1438 	if (n_blocks_sent == 0) {
1439 		pr_warn("OPA Get PKey AM Invalid : P = %d; B = 0x%x; N = 0x%x\n",
1440 			port, start_block, n_blocks_sent);
1441 		smp->status |= IB_SMP_INVALID_FIELD;
1442 		return reply((struct ib_mad_hdr *)smp);
1443 	}
1444 
1445 	n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
1446 
1447 	if (start_block + n_blocks_sent > n_blocks_avail ||
1448 	    n_blocks_sent > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
1449 		pr_warn("OPA Set PKey AM Invalid : s 0x%x; req 0x%x; avail 0x%x; blk/smp 0x%lx\n",
1450 			start_block, n_blocks_sent, n_blocks_avail,
1451 			OPA_NUM_PKEY_BLOCKS_PER_SMP);
1452 		smp->status |= IB_SMP_INVALID_FIELD;
1453 		return reply((struct ib_mad_hdr *)smp);
1454 	}
1455 
1456 	for (i = 0; i < n_blocks_sent * OPA_PARTITION_TABLE_BLK_SIZE; i++)
1457 		p[i] = be16_to_cpu(q[i]);
1458 
1459 	if (start_block == 0 && set_pkeys(dd, port, p) != 0) {
1460 		smp->status |= IB_SMP_INVALID_FIELD;
1461 		return reply((struct ib_mad_hdr *)smp);
1462 	}
1463 
1464 	return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len);
1465 }
1466 
1467 static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
1468 {
1469 	u64 *val = data;
1470 
1471 	*val++ = read_csr(dd, SEND_SC2VLT0);
1472 	*val++ = read_csr(dd, SEND_SC2VLT1);
1473 	*val++ = read_csr(dd, SEND_SC2VLT2);
1474 	*val++ = read_csr(dd, SEND_SC2VLT3);
1475 	return 0;
1476 }
1477 
1478 #define ILLEGAL_VL 12
1479 /*
1480  * filter_sc2vlt changes mappings to VL15 to ILLEGAL_VL (except
1481  * for SC15, which must map to VL15). If we don't remap things this
1482  * way it is possible for VL15 counters to increment when we try to
1483  * send on a SC which is mapped to an invalid VL.
1484  */
1485 static void filter_sc2vlt(void *data)
1486 {
1487 	int i;
1488 	u8 *pd = data;
1489 
1490 	for (i = 0; i < OPA_MAX_SCS; i++) {
1491 		if (i == 15)
1492 			continue;
1493 		if ((pd[i] & 0x1f) == 0xf)
1494 			pd[i] = ILLEGAL_VL;
1495 	}
1496 }
1497 
1498 static int set_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
1499 {
1500 	u64 *val = data;
1501 
1502 	filter_sc2vlt(data);
1503 
1504 	write_csr(dd, SEND_SC2VLT0, *val++);
1505 	write_csr(dd, SEND_SC2VLT1, *val++);
1506 	write_csr(dd, SEND_SC2VLT2, *val++);
1507 	write_csr(dd, SEND_SC2VLT3, *val++);
1508 	write_seqlock_irq(&dd->sc2vl_lock);
1509 	memcpy(dd->sc2vl, data, sizeof(dd->sc2vl));
1510 	write_sequnlock_irq(&dd->sc2vl_lock);
1511 	return 0;
1512 }
1513 
1514 static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
1515 				   struct ib_device *ibdev, u8 port,
1516 				   u32 *resp_len)
1517 {
1518 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1519 	u8 *p = data;
1520 	size_t size = ARRAY_SIZE(ibp->sl_to_sc); /* == 32 */
1521 	unsigned i;
1522 
1523 	if (am) {
1524 		smp->status |= IB_SMP_INVALID_FIELD;
1525 		return reply((struct ib_mad_hdr *)smp);
1526 	}
1527 
1528 	for (i = 0; i < ARRAY_SIZE(ibp->sl_to_sc); i++)
1529 		*p++ = ibp->sl_to_sc[i];
1530 
1531 	if (resp_len)
1532 		*resp_len += size;
1533 
1534 	return reply((struct ib_mad_hdr *)smp);
1535 }
1536 
1537 static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
1538 				   struct ib_device *ibdev, u8 port,
1539 				   u32 *resp_len)
1540 {
1541 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1542 	u8 *p = data;
1543 	int i;
1544 	u8 sc;
1545 
1546 	if (am) {
1547 		smp->status |= IB_SMP_INVALID_FIELD;
1548 		return reply((struct ib_mad_hdr *)smp);
1549 	}
1550 
1551 	for (i = 0; i <  ARRAY_SIZE(ibp->sl_to_sc); i++) {
1552 		sc = *p++;
1553 		if (ibp->sl_to_sc[i] != sc) {
1554 			ibp->sl_to_sc[i] = sc;
1555 
1556 			/* Put all stale qps into error state */
1557 			hfi1_error_port_qps(ibp, i);
1558 		}
1559 	}
1560 
1561 	return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len);
1562 }
1563 
1564 static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
1565 				   struct ib_device *ibdev, u8 port,
1566 				   u32 *resp_len)
1567 {
1568 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1569 	u8 *p = data;
1570 	size_t size = ARRAY_SIZE(ibp->sc_to_sl); /* == 32 */
1571 	unsigned i;
1572 
1573 	if (am) {
1574 		smp->status |= IB_SMP_INVALID_FIELD;
1575 		return reply((struct ib_mad_hdr *)smp);
1576 	}
1577 
1578 	for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++)
1579 		*p++ = ibp->sc_to_sl[i];
1580 
1581 	if (resp_len)
1582 		*resp_len += size;
1583 
1584 	return reply((struct ib_mad_hdr *)smp);
1585 }
1586 
1587 static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
1588 				   struct ib_device *ibdev, u8 port,
1589 				   u32 *resp_len)
1590 {
1591 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1592 	u8 *p = data;
1593 	int i;
1594 
1595 	if (am) {
1596 		smp->status |= IB_SMP_INVALID_FIELD;
1597 		return reply((struct ib_mad_hdr *)smp);
1598 	}
1599 
1600 	for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++)
1601 		ibp->sc_to_sl[i] = *p++;
1602 
1603 	return __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, resp_len);
1604 }
1605 
1606 static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
1607 				    struct ib_device *ibdev, u8 port,
1608 				    u32 *resp_len)
1609 {
1610 	u32 n_blocks = OPA_AM_NBLK(am);
1611 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1612 	void *vp = (void *)data;
1613 	size_t size = 4 * sizeof(u64);
1614 
1615 	if (n_blocks != 1) {
1616 		smp->status |= IB_SMP_INVALID_FIELD;
1617 		return reply((struct ib_mad_hdr *)smp);
1618 	}
1619 
1620 	get_sc2vlt_tables(dd, vp);
1621 
1622 	if (resp_len)
1623 		*resp_len += size;
1624 
1625 	return reply((struct ib_mad_hdr *)smp);
1626 }
1627 
1628 static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
1629 				    struct ib_device *ibdev, u8 port,
1630 				    u32 *resp_len)
1631 {
1632 	u32 n_blocks = OPA_AM_NBLK(am);
1633 	int async_update = OPA_AM_ASYNC(am);
1634 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1635 	void *vp = (void *)data;
1636 	struct hfi1_pportdata *ppd;
1637 	int lstate;
1638 
1639 	if (n_blocks != 1 || async_update) {
1640 		smp->status |= IB_SMP_INVALID_FIELD;
1641 		return reply((struct ib_mad_hdr *)smp);
1642 	}
1643 
1644 	/* IB numbers ports from 1, hw from 0 */
1645 	ppd = dd->pport + (port - 1);
1646 	lstate = driver_lstate(ppd);
1647 	/*
1648 	 * it's known that async_update is 0 by this point, but include
1649 	 * the explicit check for clarity
1650 	 */
1651 	if (!async_update &&
1652 	    (lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE)) {
1653 		smp->status |= IB_SMP_INVALID_FIELD;
1654 		return reply((struct ib_mad_hdr *)smp);
1655 	}
1656 
1657 	set_sc2vlt_tables(dd, vp);
1658 
1659 	return __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, resp_len);
1660 }
1661 
1662 static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
1663 				     struct ib_device *ibdev, u8 port,
1664 				     u32 *resp_len)
1665 {
1666 	u32 n_blocks = OPA_AM_NPORT(am);
1667 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1668 	struct hfi1_pportdata *ppd;
1669 	void *vp = (void *)data;
1670 	int size;
1671 
1672 	if (n_blocks != 1) {
1673 		smp->status |= IB_SMP_INVALID_FIELD;
1674 		return reply((struct ib_mad_hdr *)smp);
1675 	}
1676 
1677 	ppd = dd->pport + (port - 1);
1678 
1679 	size = fm_get_table(ppd, FM_TBL_SC2VLNT, vp);
1680 
1681 	if (resp_len)
1682 		*resp_len += size;
1683 
1684 	return reply((struct ib_mad_hdr *)smp);
1685 }
1686 
1687 static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
1688 				     struct ib_device *ibdev, u8 port,
1689 				     u32 *resp_len)
1690 {
1691 	u32 n_blocks = OPA_AM_NPORT(am);
1692 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1693 	struct hfi1_pportdata *ppd;
1694 	void *vp = (void *)data;
1695 	int lstate;
1696 
1697 	if (n_blocks != 1) {
1698 		smp->status |= IB_SMP_INVALID_FIELD;
1699 		return reply((struct ib_mad_hdr *)smp);
1700 	}
1701 
1702 	/* IB numbers ports from 1, hw from 0 */
1703 	ppd = dd->pport + (port - 1);
1704 	lstate = driver_lstate(ppd);
1705 	if (lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE) {
1706 		smp->status |= IB_SMP_INVALID_FIELD;
1707 		return reply((struct ib_mad_hdr *)smp);
1708 	}
1709 
1710 	ppd = dd->pport + (port - 1);
1711 
1712 	fm_set_table(ppd, FM_TBL_SC2VLNT, vp);
1713 
1714 	return __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
1715 					 resp_len);
1716 }
1717 
1718 static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
1719 			      struct ib_device *ibdev, u8 port,
1720 			      u32 *resp_len)
1721 {
1722 	u32 nports = OPA_AM_NPORT(am);
1723 	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
1724 	u32 lstate;
1725 	struct hfi1_ibport *ibp;
1726 	struct hfi1_pportdata *ppd;
1727 	struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
1728 
1729 	if (nports != 1) {
1730 		smp->status |= IB_SMP_INVALID_FIELD;
1731 		return reply((struct ib_mad_hdr *)smp);
1732 	}
1733 
1734 	ibp = to_iport(ibdev, port);
1735 	ppd = ppd_from_ibp(ibp);
1736 
1737 	lstate = driver_lstate(ppd);
1738 
1739 	if (start_of_sm_config && (lstate == IB_PORT_INIT))
1740 		ppd->is_sm_config_started = 1;
1741 
1742 	psi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
1743 	psi->port_states.ledenable_offlinereason |=
1744 		ppd->is_sm_config_started << 5;
1745 	psi->port_states.ledenable_offlinereason |=
1746 		ppd->offline_disabled_reason;
1747 
1748 	psi->port_states.portphysstate_portstate =
1749 		(hfi1_ibphys_portstate(ppd) << 4) | (lstate & 0xf);
1750 	psi->link_width_downgrade_tx_active =
1751 		cpu_to_be16(ppd->link_width_downgrade_tx_active);
1752 	psi->link_width_downgrade_rx_active =
1753 		cpu_to_be16(ppd->link_width_downgrade_rx_active);
1754 	if (resp_len)
1755 		*resp_len += sizeof(struct opa_port_state_info);
1756 
1757 	return reply((struct ib_mad_hdr *)smp);
1758 }
1759 
1760 static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
1761 			      struct ib_device *ibdev, u8 port,
1762 			      u32 *resp_len)
1763 {
1764 	u32 nports = OPA_AM_NPORT(am);
1765 	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
1766 	u32 ls_old;
1767 	u8 ls_new, ps_new;
1768 	struct hfi1_ibport *ibp;
1769 	struct hfi1_pportdata *ppd;
1770 	struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
1771 	int ret, invalid = 0;
1772 
1773 	if (nports != 1) {
1774 		smp->status |= IB_SMP_INVALID_FIELD;
1775 		return reply((struct ib_mad_hdr *)smp);
1776 	}
1777 
1778 	ibp = to_iport(ibdev, port);
1779 	ppd = ppd_from_ibp(ibp);
1780 
1781 	ls_old = driver_lstate(ppd);
1782 
1783 	ls_new = port_states_to_logical_state(&psi->port_states);
1784 	ps_new = port_states_to_phys_state(&psi->port_states);
1785 
1786 	if (ls_old == IB_PORT_INIT) {
1787 		if (start_of_sm_config) {
1788 			if (ls_new == ls_old || (ls_new == IB_PORT_ARMED))
1789 				ppd->is_sm_config_started = 1;
1790 		} else if (ls_new == IB_PORT_ARMED) {
1791 			if (ppd->is_sm_config_started == 0)
1792 				invalid = 1;
1793 		}
1794 	}
1795 
1796 	ret = set_port_states(ppd, smp, ls_new, ps_new, invalid);
1797 	if (ret)
1798 		return ret;
1799 
1800 	if (invalid)
1801 		smp->status |= IB_SMP_INVALID_FIELD;
1802 
1803 	return __subn_get_opa_psi(smp, am, data, ibdev, port, resp_len);
1804 }
1805 
1806 static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data,
1807 				     struct ib_device *ibdev, u8 port,
1808 				     u32 *resp_len)
1809 {
1810 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1811 	u32 addr = OPA_AM_CI_ADDR(am);
1812 	u32 len = OPA_AM_CI_LEN(am) + 1;
1813 	int ret;
1814 
1815 	if (dd->pport->port_type != PORT_TYPE_QSFP) {
1816 		smp->status |= IB_SMP_INVALID_FIELD;
1817 		return reply((struct ib_mad_hdr *)smp);
1818 	}
1819 
1820 #define __CI_PAGE_SIZE BIT(7) /* 128 bytes */
1821 #define __CI_PAGE_MASK ~(__CI_PAGE_SIZE - 1)
1822 #define __CI_PAGE_NUM(a) ((a) & __CI_PAGE_MASK)
1823 
1824 	/*
1825 	 * check that addr is within spec, and
1826 	 * addr and (addr + len - 1) are on the same "page"
1827 	 */
1828 	if (addr >= 4096 ||
1829 	    (__CI_PAGE_NUM(addr) != __CI_PAGE_NUM(addr + len - 1))) {
1830 		smp->status |= IB_SMP_INVALID_FIELD;
1831 		return reply((struct ib_mad_hdr *)smp);
1832 	}
1833 
1834 	ret = get_cable_info(dd, port, addr, len, data);
1835 
1836 	if (ret == -ENODEV) {
1837 		smp->status |= IB_SMP_UNSUP_METH_ATTR;
1838 		return reply((struct ib_mad_hdr *)smp);
1839 	}
1840 
1841 	/* The address range for the CableInfo SMA query is wider than the
1842 	 * memory available on the QSFP cable. We want to return a valid
1843 	 * response, albeit zeroed out, for address ranges beyond available
1844 	 * memory but that are within the CableInfo query spec
1845 	 */
1846 	if (ret < 0 && ret != -ERANGE) {
1847 		smp->status |= IB_SMP_INVALID_FIELD;
1848 		return reply((struct ib_mad_hdr *)smp);
1849 	}
1850 
1851 	if (resp_len)
1852 		*resp_len += len;
1853 
1854 	return reply((struct ib_mad_hdr *)smp);
1855 }
1856 
1857 static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
1858 			      struct ib_device *ibdev, u8 port, u32 *resp_len)
1859 {
1860 	u32 num_ports = OPA_AM_NPORT(am);
1861 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1862 	struct hfi1_pportdata *ppd;
1863 	struct buffer_control *p = (struct buffer_control *)data;
1864 	int size;
1865 
1866 	if (num_ports != 1) {
1867 		smp->status |= IB_SMP_INVALID_FIELD;
1868 		return reply((struct ib_mad_hdr *)smp);
1869 	}
1870 
1871 	ppd = dd->pport + (port - 1);
1872 	size = fm_get_table(ppd, FM_TBL_BUFFER_CONTROL, p);
1873 	trace_bct_get(dd, p);
1874 	if (resp_len)
1875 		*resp_len += size;
1876 
1877 	return reply((struct ib_mad_hdr *)smp);
1878 }
1879 
1880 static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
1881 			      struct ib_device *ibdev, u8 port, u32 *resp_len)
1882 {
1883 	u32 num_ports = OPA_AM_NPORT(am);
1884 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1885 	struct hfi1_pportdata *ppd;
1886 	struct buffer_control *p = (struct buffer_control *)data;
1887 
1888 	if (num_ports != 1) {
1889 		smp->status |= IB_SMP_INVALID_FIELD;
1890 		return reply((struct ib_mad_hdr *)smp);
1891 	}
1892 	ppd = dd->pport + (port - 1);
1893 	trace_bct_set(dd, p);
1894 	if (fm_set_table(ppd, FM_TBL_BUFFER_CONTROL, p) < 0) {
1895 		smp->status |= IB_SMP_INVALID_FIELD;
1896 		return reply((struct ib_mad_hdr *)smp);
1897 	}
1898 
1899 	return __subn_get_opa_bct(smp, am, data, ibdev, port, resp_len);
1900 }
1901 
1902 static int __subn_get_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
1903 				 struct ib_device *ibdev, u8 port,
1904 				 u32 *resp_len)
1905 {
1906 	struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
1907 	u32 num_ports = OPA_AM_NPORT(am);
1908 	u8 section = (am & 0x00ff0000) >> 16;
1909 	u8 *p = data;
1910 	int size = 0;
1911 
1912 	if (num_ports != 1) {
1913 		smp->status |= IB_SMP_INVALID_FIELD;
1914 		return reply((struct ib_mad_hdr *)smp);
1915 	}
1916 
1917 	switch (section) {
1918 	case OPA_VLARB_LOW_ELEMENTS:
1919 		size = fm_get_table(ppd, FM_TBL_VL_LOW_ARB, p);
1920 		break;
1921 	case OPA_VLARB_HIGH_ELEMENTS:
1922 		size = fm_get_table(ppd, FM_TBL_VL_HIGH_ARB, p);
1923 		break;
1924 	case OPA_VLARB_PREEMPT_ELEMENTS:
1925 		size = fm_get_table(ppd, FM_TBL_VL_PREEMPT_ELEMS, p);
1926 		break;
1927 	case OPA_VLARB_PREEMPT_MATRIX:
1928 		size = fm_get_table(ppd, FM_TBL_VL_PREEMPT_MATRIX, p);
1929 		break;
1930 	default:
1931 		pr_warn("OPA SubnGet(VL Arb) AM Invalid : 0x%x\n",
1932 			be32_to_cpu(smp->attr_mod));
1933 		smp->status |= IB_SMP_INVALID_FIELD;
1934 		break;
1935 	}
1936 
1937 	if (size > 0 && resp_len)
1938 		*resp_len += size;
1939 
1940 	return reply((struct ib_mad_hdr *)smp);
1941 }
1942 
1943 static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
1944 				 struct ib_device *ibdev, u8 port,
1945 				 u32 *resp_len)
1946 {
1947 	struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
1948 	u32 num_ports = OPA_AM_NPORT(am);
1949 	u8 section = (am & 0x00ff0000) >> 16;
1950 	u8 *p = data;
1951 
1952 	if (num_ports != 1) {
1953 		smp->status |= IB_SMP_INVALID_FIELD;
1954 		return reply((struct ib_mad_hdr *)smp);
1955 	}
1956 
1957 	switch (section) {
1958 	case OPA_VLARB_LOW_ELEMENTS:
1959 		(void)fm_set_table(ppd, FM_TBL_VL_LOW_ARB, p);
1960 		break;
1961 	case OPA_VLARB_HIGH_ELEMENTS:
1962 		(void)fm_set_table(ppd, FM_TBL_VL_HIGH_ARB, p);
1963 		break;
1964 	/*
1965 	 * neither OPA_VLARB_PREEMPT_ELEMENTS, or OPA_VLARB_PREEMPT_MATRIX
1966 	 * can be changed from the default values
1967 	 */
1968 	case OPA_VLARB_PREEMPT_ELEMENTS:
1969 		/* FALLTHROUGH */
1970 	case OPA_VLARB_PREEMPT_MATRIX:
1971 		smp->status |= IB_SMP_UNSUP_METH_ATTR;
1972 		break;
1973 	default:
1974 		pr_warn("OPA SubnSet(VL Arb) AM Invalid : 0x%x\n",
1975 			be32_to_cpu(smp->attr_mod));
1976 		smp->status |= IB_SMP_INVALID_FIELD;
1977 		break;
1978 	}
1979 
1980 	return __subn_get_opa_vl_arb(smp, am, data, ibdev, port, resp_len);
1981 }
1982 
1983 struct opa_pma_mad {
1984 	struct ib_mad_hdr mad_hdr;
1985 	u8 data[2024];
1986 } __packed;
1987 
1988 struct opa_class_port_info {
1989 	u8 base_version;
1990 	u8 class_version;
1991 	__be16 cap_mask;
1992 	__be32 cap_mask2_resp_time;
1993 
1994 	u8 redirect_gid[16];
1995 	__be32 redirect_tc_fl;
1996 	__be32 redirect_lid;
1997 	__be32 redirect_sl_qp;
1998 	__be32 redirect_qkey;
1999 
2000 	u8 trap_gid[16];
2001 	__be32 trap_tc_fl;
2002 	__be32 trap_lid;
2003 	__be32 trap_hl_qp;
2004 	__be32 trap_qkey;
2005 
2006 	__be16 trap_pkey;
2007 	__be16 redirect_pkey;
2008 
2009 	u8 trap_sl_rsvd;
2010 	u8 reserved[3];
2011 } __packed;
2012 
2013 struct opa_port_status_req {
2014 	__u8 port_num;
2015 	__u8 reserved[3];
2016 	__be32 vl_select_mask;
2017 };
2018 
2019 #define VL_MASK_ALL		0x000080ff
2020 
2021 struct opa_port_status_rsp {
2022 	__u8 port_num;
2023 	__u8 reserved[3];
2024 	__be32  vl_select_mask;
2025 
2026 	/* Data counters */
2027 	__be64 port_xmit_data;
2028 	__be64 port_rcv_data;
2029 	__be64 port_xmit_pkts;
2030 	__be64 port_rcv_pkts;
2031 	__be64 port_multicast_xmit_pkts;
2032 	__be64 port_multicast_rcv_pkts;
2033 	__be64 port_xmit_wait;
2034 	__be64 sw_port_congestion;
2035 	__be64 port_rcv_fecn;
2036 	__be64 port_rcv_becn;
2037 	__be64 port_xmit_time_cong;
2038 	__be64 port_xmit_wasted_bw;
2039 	__be64 port_xmit_wait_data;
2040 	__be64 port_rcv_bubble;
2041 	__be64 port_mark_fecn;
2042 	/* Error counters */
2043 	__be64 port_rcv_constraint_errors;
2044 	__be64 port_rcv_switch_relay_errors;
2045 	__be64 port_xmit_discards;
2046 	__be64 port_xmit_constraint_errors;
2047 	__be64 port_rcv_remote_physical_errors;
2048 	__be64 local_link_integrity_errors;
2049 	__be64 port_rcv_errors;
2050 	__be64 excessive_buffer_overruns;
2051 	__be64 fm_config_errors;
2052 	__be32 link_error_recovery;
2053 	__be32 link_downed;
2054 	u8 uncorrectable_errors;
2055 
2056 	u8 link_quality_indicator; /* 5res, 3bit */
2057 	u8 res2[6];
2058 	struct _vls_pctrs {
2059 		/* per-VL Data counters */
2060 		__be64 port_vl_xmit_data;
2061 		__be64 port_vl_rcv_data;
2062 		__be64 port_vl_xmit_pkts;
2063 		__be64 port_vl_rcv_pkts;
2064 		__be64 port_vl_xmit_wait;
2065 		__be64 sw_port_vl_congestion;
2066 		__be64 port_vl_rcv_fecn;
2067 		__be64 port_vl_rcv_becn;
2068 		__be64 port_xmit_time_cong;
2069 		__be64 port_vl_xmit_wasted_bw;
2070 		__be64 port_vl_xmit_wait_data;
2071 		__be64 port_vl_rcv_bubble;
2072 		__be64 port_vl_mark_fecn;
2073 		__be64 port_vl_xmit_discards;
2074 	} vls[0]; /* real array size defined by # bits set in vl_select_mask */
2075 };
2076 
2077 enum counter_selects {
2078 	CS_PORT_XMIT_DATA			= (1 << 31),
2079 	CS_PORT_RCV_DATA			= (1 << 30),
2080 	CS_PORT_XMIT_PKTS			= (1 << 29),
2081 	CS_PORT_RCV_PKTS			= (1 << 28),
2082 	CS_PORT_MCAST_XMIT_PKTS			= (1 << 27),
2083 	CS_PORT_MCAST_RCV_PKTS			= (1 << 26),
2084 	CS_PORT_XMIT_WAIT			= (1 << 25),
2085 	CS_SW_PORT_CONGESTION			= (1 << 24),
2086 	CS_PORT_RCV_FECN			= (1 << 23),
2087 	CS_PORT_RCV_BECN			= (1 << 22),
2088 	CS_PORT_XMIT_TIME_CONG			= (1 << 21),
2089 	CS_PORT_XMIT_WASTED_BW			= (1 << 20),
2090 	CS_PORT_XMIT_WAIT_DATA			= (1 << 19),
2091 	CS_PORT_RCV_BUBBLE			= (1 << 18),
2092 	CS_PORT_MARK_FECN			= (1 << 17),
2093 	CS_PORT_RCV_CONSTRAINT_ERRORS		= (1 << 16),
2094 	CS_PORT_RCV_SWITCH_RELAY_ERRORS		= (1 << 15),
2095 	CS_PORT_XMIT_DISCARDS			= (1 << 14),
2096 	CS_PORT_XMIT_CONSTRAINT_ERRORS		= (1 << 13),
2097 	CS_PORT_RCV_REMOTE_PHYSICAL_ERRORS	= (1 << 12),
2098 	CS_LOCAL_LINK_INTEGRITY_ERRORS		= (1 << 11),
2099 	CS_PORT_RCV_ERRORS			= (1 << 10),
2100 	CS_EXCESSIVE_BUFFER_OVERRUNS		= (1 << 9),
2101 	CS_FM_CONFIG_ERRORS			= (1 << 8),
2102 	CS_LINK_ERROR_RECOVERY			= (1 << 7),
2103 	CS_LINK_DOWNED				= (1 << 6),
2104 	CS_UNCORRECTABLE_ERRORS			= (1 << 5),
2105 };
2106 
2107 struct opa_clear_port_status {
2108 	__be64 port_select_mask[4];
2109 	__be32 counter_select_mask;
2110 };
2111 
2112 struct opa_aggregate {
2113 	__be16 attr_id;
2114 	__be16 err_reqlength;	/* 1 bit, 8 res, 7 bit */
2115 	__be32 attr_mod;
2116 	u8 data[0];
2117 };
2118 
2119 #define MSK_LLI 0x000000f0
2120 #define MSK_LLI_SFT 4
2121 #define MSK_LER 0x0000000f
2122 #define MSK_LER_SFT 0
2123 #define ADD_LLI 8
2124 #define ADD_LER 2
2125 
2126 /* Request contains first three fields, response contains those plus the rest */
2127 struct opa_port_data_counters_msg {
2128 	__be64 port_select_mask[4];
2129 	__be32 vl_select_mask;
2130 	__be32 resolution;
2131 
2132 	/* Response fields follow */
2133 	struct _port_dctrs {
2134 		u8 port_number;
2135 		u8 reserved2[3];
2136 		__be32 link_quality_indicator; /* 29res, 3bit */
2137 
2138 		/* Data counters */
2139 		__be64 port_xmit_data;
2140 		__be64 port_rcv_data;
2141 		__be64 port_xmit_pkts;
2142 		__be64 port_rcv_pkts;
2143 		__be64 port_multicast_xmit_pkts;
2144 		__be64 port_multicast_rcv_pkts;
2145 		__be64 port_xmit_wait;
2146 		__be64 sw_port_congestion;
2147 		__be64 port_rcv_fecn;
2148 		__be64 port_rcv_becn;
2149 		__be64 port_xmit_time_cong;
2150 		__be64 port_xmit_wasted_bw;
2151 		__be64 port_xmit_wait_data;
2152 		__be64 port_rcv_bubble;
2153 		__be64 port_mark_fecn;
2154 
2155 		__be64 port_error_counter_summary;
2156 		/* Sum of error counts/port */
2157 
2158 		struct _vls_dctrs {
2159 			/* per-VL Data counters */
2160 			__be64 port_vl_xmit_data;
2161 			__be64 port_vl_rcv_data;
2162 			__be64 port_vl_xmit_pkts;
2163 			__be64 port_vl_rcv_pkts;
2164 			__be64 port_vl_xmit_wait;
2165 			__be64 sw_port_vl_congestion;
2166 			__be64 port_vl_rcv_fecn;
2167 			__be64 port_vl_rcv_becn;
2168 			__be64 port_xmit_time_cong;
2169 			__be64 port_vl_xmit_wasted_bw;
2170 			__be64 port_vl_xmit_wait_data;
2171 			__be64 port_vl_rcv_bubble;
2172 			__be64 port_vl_mark_fecn;
2173 		} vls[0];
2174 		/* array size defined by #bits set in vl_select_mask*/
2175 	} port[1]; /* array size defined by  #ports in attribute modifier */
2176 };
2177 
2178 struct opa_port_error_counters64_msg {
2179 	/*
2180 	 * Request contains first two fields, response contains the
2181 	 * whole magilla
2182 	 */
2183 	__be64 port_select_mask[4];
2184 	__be32 vl_select_mask;
2185 
2186 	/* Response-only fields follow */
2187 	__be32 reserved1;
2188 	struct _port_ectrs {
2189 		u8 port_number;
2190 		u8 reserved2[7];
2191 		__be64 port_rcv_constraint_errors;
2192 		__be64 port_rcv_switch_relay_errors;
2193 		__be64 port_xmit_discards;
2194 		__be64 port_xmit_constraint_errors;
2195 		__be64 port_rcv_remote_physical_errors;
2196 		__be64 local_link_integrity_errors;
2197 		__be64 port_rcv_errors;
2198 		__be64 excessive_buffer_overruns;
2199 		__be64 fm_config_errors;
2200 		__be32 link_error_recovery;
2201 		__be32 link_downed;
2202 		u8 uncorrectable_errors;
2203 		u8 reserved3[7];
2204 		struct _vls_ectrs {
2205 			__be64 port_vl_xmit_discards;
2206 		} vls[0];
2207 		/* array size defined by #bits set in vl_select_mask */
2208 	} port[1]; /* array size defined by #ports in attribute modifier */
2209 };
2210 
2211 struct opa_port_error_info_msg {
2212 	__be64 port_select_mask[4];
2213 	__be32 error_info_select_mask;
2214 	__be32 reserved1;
2215 	struct _port_ei {
2216 		u8 port_number;
2217 		u8 reserved2[7];
2218 
2219 		/* PortRcvErrorInfo */
2220 		struct {
2221 			u8 status_and_code;
2222 			union {
2223 				u8 raw[17];
2224 				struct {
2225 					/* EI1to12 format */
2226 					u8 packet_flit1[8];
2227 					u8 packet_flit2[8];
2228 					u8 remaining_flit_bits12;
2229 				} ei1to12;
2230 				struct {
2231 					u8 packet_bytes[8];
2232 					u8 remaining_flit_bits;
2233 				} ei13;
2234 			} ei;
2235 			u8 reserved3[6];
2236 		} __packed port_rcv_ei;
2237 
2238 		/* ExcessiveBufferOverrunInfo */
2239 		struct {
2240 			u8 status_and_sc;
2241 			u8 reserved4[7];
2242 		} __packed excessive_buffer_overrun_ei;
2243 
2244 		/* PortXmitConstraintErrorInfo */
2245 		struct {
2246 			u8 status;
2247 			u8 reserved5;
2248 			__be16 pkey;
2249 			__be32 slid;
2250 		} __packed port_xmit_constraint_ei;
2251 
2252 		/* PortRcvConstraintErrorInfo */
2253 		struct {
2254 			u8 status;
2255 			u8 reserved6;
2256 			__be16 pkey;
2257 			__be32 slid;
2258 		} __packed port_rcv_constraint_ei;
2259 
2260 		/* PortRcvSwitchRelayErrorInfo */
2261 		struct {
2262 			u8 status_and_code;
2263 			u8 reserved7[3];
2264 			__u32 error_info;
2265 		} __packed port_rcv_switch_relay_ei;
2266 
2267 		/* UncorrectableErrorInfo */
2268 		struct {
2269 			u8 status_and_code;
2270 			u8 reserved8;
2271 		} __packed uncorrectable_ei;
2272 
2273 		/* FMConfigErrorInfo */
2274 		struct {
2275 			u8 status_and_code;
2276 			u8 error_info;
2277 		} __packed fm_config_ei;
2278 		__u32 reserved9;
2279 	} port[1]; /* actual array size defined by #ports in attr modifier */
2280 };
2281 
2282 /* opa_port_error_info_msg error_info_select_mask bit definitions */
2283 enum error_info_selects {
2284 	ES_PORT_RCV_ERROR_INFO			= (1 << 31),
2285 	ES_EXCESSIVE_BUFFER_OVERRUN_INFO	= (1 << 30),
2286 	ES_PORT_XMIT_CONSTRAINT_ERROR_INFO	= (1 << 29),
2287 	ES_PORT_RCV_CONSTRAINT_ERROR_INFO	= (1 << 28),
2288 	ES_PORT_RCV_SWITCH_RELAY_ERROR_INFO	= (1 << 27),
2289 	ES_UNCORRECTABLE_ERROR_INFO		= (1 << 26),
2290 	ES_FM_CONFIG_ERROR_INFO			= (1 << 25)
2291 };
2292 
2293 static int pma_get_opa_classportinfo(struct opa_pma_mad *pmp,
2294 				     struct ib_device *ibdev, u32 *resp_len)
2295 {
2296 	struct opa_class_port_info *p =
2297 		(struct opa_class_port_info *)pmp->data;
2298 
2299 	memset(pmp->data, 0, sizeof(pmp->data));
2300 
2301 	if (pmp->mad_hdr.attr_mod != 0)
2302 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2303 
2304 	p->base_version = OPA_MGMT_BASE_VERSION;
2305 	p->class_version = OPA_SMI_CLASS_VERSION;
2306 	/*
2307 	 * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec.
2308 	 */
2309 	p->cap_mask2_resp_time = cpu_to_be32(18);
2310 
2311 	if (resp_len)
2312 		*resp_len += sizeof(*p);
2313 
2314 	return reply((struct ib_mad_hdr *)pmp);
2315 }
2316 
2317 static void a0_portstatus(struct hfi1_pportdata *ppd,
2318 			  struct opa_port_status_rsp *rsp, u32 vl_select_mask)
2319 {
2320 	if (!is_bx(ppd->dd)) {
2321 		unsigned long vl;
2322 		u64 sum_vl_xmit_wait = 0;
2323 		u32 vl_all_mask = VL_MASK_ALL;
2324 
2325 		for_each_set_bit(vl, (unsigned long *)&(vl_all_mask),
2326 				 8 * sizeof(vl_all_mask)) {
2327 			u64 tmp = sum_vl_xmit_wait +
2328 				  read_port_cntr(ppd, C_TX_WAIT_VL,
2329 						 idx_from_vl(vl));
2330 			if (tmp < sum_vl_xmit_wait) {
2331 				/* we wrapped */
2332 				sum_vl_xmit_wait = (u64)~0;
2333 				break;
2334 			}
2335 			sum_vl_xmit_wait = tmp;
2336 		}
2337 		if (be64_to_cpu(rsp->port_xmit_wait) > sum_vl_xmit_wait)
2338 			rsp->port_xmit_wait = cpu_to_be64(sum_vl_xmit_wait);
2339 	}
2340 }
2341 
2342 static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
2343 				  struct ib_device *ibdev,
2344 				  u8 port, u32 *resp_len)
2345 {
2346 	struct opa_port_status_req *req =
2347 		(struct opa_port_status_req *)pmp->data;
2348 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2349 	struct opa_port_status_rsp *rsp;
2350 	u32 vl_select_mask = be32_to_cpu(req->vl_select_mask);
2351 	unsigned long vl;
2352 	size_t response_data_size;
2353 	u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
2354 	u8 port_num = req->port_num;
2355 	u8 num_vls = hweight32(vl_select_mask);
2356 	struct _vls_pctrs *vlinfo;
2357 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
2358 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2359 	int vfi;
2360 	u64 tmp, tmp2;
2361 
2362 	response_data_size = sizeof(struct opa_port_status_rsp) +
2363 				num_vls * sizeof(struct _vls_pctrs);
2364 	if (response_data_size > sizeof(pmp->data)) {
2365 		pmp->mad_hdr.status |= OPA_PM_STATUS_REQUEST_TOO_LARGE;
2366 		return reply((struct ib_mad_hdr *)pmp);
2367 	}
2368 
2369 	if (nports != 1 || (port_num && port_num != port) ||
2370 	    num_vls > OPA_MAX_VLS || (vl_select_mask & ~VL_MASK_ALL)) {
2371 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2372 		return reply((struct ib_mad_hdr *)pmp);
2373 	}
2374 
2375 	memset(pmp->data, 0, sizeof(pmp->data));
2376 
2377 	rsp = (struct opa_port_status_rsp *)pmp->data;
2378 	if (port_num)
2379 		rsp->port_num = port_num;
2380 	else
2381 		rsp->port_num = port;
2382 
2383 	rsp->port_rcv_constraint_errors =
2384 		cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
2385 					   CNTR_INVALID_VL));
2386 
2387 	hfi1_read_link_quality(dd, &rsp->link_quality_indicator);
2388 
2389 	rsp->vl_select_mask = cpu_to_be32(vl_select_mask);
2390 	rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
2391 					  CNTR_INVALID_VL));
2392 	rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
2393 					 CNTR_INVALID_VL));
2394 	rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
2395 					  CNTR_INVALID_VL));
2396 	rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
2397 					 CNTR_INVALID_VL));
2398 	rsp->port_multicast_xmit_pkts =
2399 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
2400 					  CNTR_INVALID_VL));
2401 	rsp->port_multicast_rcv_pkts =
2402 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
2403 					  CNTR_INVALID_VL));
2404 	rsp->port_xmit_wait =
2405 		cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL));
2406 	rsp->port_rcv_fecn =
2407 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
2408 	rsp->port_rcv_becn =
2409 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL));
2410 	rsp->port_xmit_discards =
2411 		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
2412 					   CNTR_INVALID_VL));
2413 	rsp->port_xmit_constraint_errors =
2414 		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
2415 					   CNTR_INVALID_VL));
2416 	rsp->port_rcv_remote_physical_errors =
2417 		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2418 					  CNTR_INVALID_VL));
2419 	rsp->local_link_integrity_errors =
2420 		cpu_to_be64(read_dev_cntr(dd, C_DC_RX_REPLAY,
2421 					  CNTR_INVALID_VL));
2422 	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
2423 	tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
2424 				   CNTR_INVALID_VL);
2425 	if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
2426 		/* overflow/wrapped */
2427 		rsp->link_error_recovery = cpu_to_be32(~0);
2428 	} else {
2429 		rsp->link_error_recovery = cpu_to_be32(tmp2);
2430 	}
2431 	rsp->port_rcv_errors =
2432 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
2433 	rsp->excessive_buffer_overruns =
2434 		cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
2435 	rsp->fm_config_errors =
2436 		cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
2437 					  CNTR_INVALID_VL));
2438 	rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
2439 						      CNTR_INVALID_VL));
2440 
2441 	/* rsp->uncorrectable_errors is 8 bits wide, and it pegs at 0xff */
2442 	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
2443 	rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
2444 
2445 	vlinfo = &rsp->vls[0];
2446 	vfi = 0;
2447 	/* The vl_select_mask has been checked above, and we know
2448 	 * that it contains only entries which represent valid VLs.
2449 	 * So in the for_each_set_bit() loop below, we don't need
2450 	 * any additional checks for vl.
2451 	 */
2452 	for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
2453 			 8 * sizeof(vl_select_mask)) {
2454 		memset(vlinfo, 0, sizeof(*vlinfo));
2455 
2456 		tmp = read_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl));
2457 		rsp->vls[vfi].port_vl_rcv_data = cpu_to_be64(tmp);
2458 
2459 		rsp->vls[vfi].port_vl_rcv_pkts =
2460 			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
2461 						  idx_from_vl(vl)));
2462 
2463 		rsp->vls[vfi].port_vl_xmit_data =
2464 			cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
2465 						   idx_from_vl(vl)));
2466 
2467 		rsp->vls[vfi].port_vl_xmit_pkts =
2468 			cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
2469 						   idx_from_vl(vl)));
2470 
2471 		rsp->vls[vfi].port_vl_xmit_wait =
2472 			cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL,
2473 						   idx_from_vl(vl)));
2474 
2475 		rsp->vls[vfi].port_vl_rcv_fecn =
2476 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
2477 						  idx_from_vl(vl)));
2478 
2479 		rsp->vls[vfi].port_vl_rcv_becn =
2480 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
2481 						  idx_from_vl(vl)));
2482 
2483 		rsp->vls[vfi].port_vl_xmit_discards =
2484 			cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
2485 						   idx_from_vl(vl)));
2486 		vlinfo++;
2487 		vfi++;
2488 	}
2489 
2490 	a0_portstatus(ppd, rsp, vl_select_mask);
2491 
2492 	if (resp_len)
2493 		*resp_len += response_data_size;
2494 
2495 	return reply((struct ib_mad_hdr *)pmp);
2496 }
2497 
2498 static u64 get_error_counter_summary(struct ib_device *ibdev, u8 port,
2499 				     u8 res_lli, u8 res_ler)
2500 {
2501 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2502 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
2503 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2504 	u64 error_counter_summary = 0, tmp;
2505 
2506 	error_counter_summary += read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
2507 						CNTR_INVALID_VL);
2508 	/* port_rcv_switch_relay_errors is 0 for HFIs */
2509 	error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_DSCD,
2510 						CNTR_INVALID_VL);
2511 	error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
2512 						CNTR_INVALID_VL);
2513 	error_counter_summary += read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2514 					       CNTR_INVALID_VL);
2515 	/* local link integrity must be right-shifted by the lli resolution */
2516 	error_counter_summary += (read_dev_cntr(dd, C_DC_RX_REPLAY,
2517 						CNTR_INVALID_VL) >> res_lli);
2518 	/* link error recovery must b right-shifted by the ler resolution */
2519 	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
2520 	tmp += read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL);
2521 	error_counter_summary += (tmp >> res_ler);
2522 	error_counter_summary += read_dev_cntr(dd, C_DC_RCV_ERR,
2523 					       CNTR_INVALID_VL);
2524 	error_counter_summary += read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
2525 	error_counter_summary += read_dev_cntr(dd, C_DC_FM_CFG_ERR,
2526 					       CNTR_INVALID_VL);
2527 	/* ppd->link_downed is a 32-bit value */
2528 	error_counter_summary += read_port_cntr(ppd, C_SW_LINK_DOWN,
2529 						CNTR_INVALID_VL);
2530 	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
2531 	/* this is an 8-bit quantity */
2532 	error_counter_summary += tmp < 0x100 ? (tmp & 0xff) : 0xff;
2533 
2534 	return error_counter_summary;
2535 }
2536 
2537 static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp,
2538 			    u32 vl_select_mask)
2539 {
2540 	if (!is_bx(ppd->dd)) {
2541 		unsigned long vl;
2542 		u64 sum_vl_xmit_wait = 0;
2543 		u32 vl_all_mask = VL_MASK_ALL;
2544 
2545 		for_each_set_bit(vl, (unsigned long *)&(vl_all_mask),
2546 				 8 * sizeof(vl_all_mask)) {
2547 			u64 tmp = sum_vl_xmit_wait +
2548 				  read_port_cntr(ppd, C_TX_WAIT_VL,
2549 						 idx_from_vl(vl));
2550 			if (tmp < sum_vl_xmit_wait) {
2551 				/* we wrapped */
2552 				sum_vl_xmit_wait = (u64)~0;
2553 				break;
2554 			}
2555 			sum_vl_xmit_wait = tmp;
2556 		}
2557 		if (be64_to_cpu(rsp->port_xmit_wait) > sum_vl_xmit_wait)
2558 			rsp->port_xmit_wait = cpu_to_be64(sum_vl_xmit_wait);
2559 	}
2560 }
2561 
2562 static void pma_get_opa_port_dctrs(struct ib_device *ibdev,
2563 				   struct _port_dctrs *rsp)
2564 {
2565 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2566 
2567 	rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
2568 						CNTR_INVALID_VL));
2569 	rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
2570 						CNTR_INVALID_VL));
2571 	rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
2572 						CNTR_INVALID_VL));
2573 	rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
2574 						CNTR_INVALID_VL));
2575 	rsp->port_multicast_xmit_pkts =
2576 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
2577 					  CNTR_INVALID_VL));
2578 	rsp->port_multicast_rcv_pkts =
2579 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
2580 					  CNTR_INVALID_VL));
2581 }
2582 
2583 static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
2584 				    struct ib_device *ibdev,
2585 				    u8 port, u32 *resp_len)
2586 {
2587 	struct opa_port_data_counters_msg *req =
2588 		(struct opa_port_data_counters_msg *)pmp->data;
2589 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2590 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
2591 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2592 	struct _port_dctrs *rsp;
2593 	struct _vls_dctrs *vlinfo;
2594 	size_t response_data_size;
2595 	u32 num_ports;
2596 	u8 num_pslm;
2597 	u8 lq, num_vls;
2598 	u8 res_lli, res_ler;
2599 	u64 port_mask;
2600 	u8 port_num;
2601 	unsigned long vl;
2602 	u32 vl_select_mask;
2603 	int vfi;
2604 
2605 	num_ports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
2606 	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
2607 	num_vls = hweight32(be32_to_cpu(req->vl_select_mask));
2608 	vl_select_mask = be32_to_cpu(req->vl_select_mask);
2609 	res_lli = (u8)(be32_to_cpu(req->resolution) & MSK_LLI) >> MSK_LLI_SFT;
2610 	res_lli = res_lli ? res_lli + ADD_LLI : 0;
2611 	res_ler = (u8)(be32_to_cpu(req->resolution) & MSK_LER) >> MSK_LER_SFT;
2612 	res_ler = res_ler ? res_ler + ADD_LER : 0;
2613 
2614 	if (num_ports != 1 || (vl_select_mask & ~VL_MASK_ALL)) {
2615 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2616 		return reply((struct ib_mad_hdr *)pmp);
2617 	}
2618 
2619 	/* Sanity check */
2620 	response_data_size = sizeof(struct opa_port_data_counters_msg) +
2621 				num_vls * sizeof(struct _vls_dctrs);
2622 
2623 	if (response_data_size > sizeof(pmp->data)) {
2624 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2625 		return reply((struct ib_mad_hdr *)pmp);
2626 	}
2627 
2628 	/*
2629 	 * The bit set in the mask needs to be consistent with the
2630 	 * port the request came in on.
2631 	 */
2632 	port_mask = be64_to_cpu(req->port_select_mask[3]);
2633 	port_num = find_first_bit((unsigned long *)&port_mask,
2634 				  sizeof(port_mask) * 8);
2635 
2636 	if (port_num != port) {
2637 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2638 		return reply((struct ib_mad_hdr *)pmp);
2639 	}
2640 
2641 	rsp = &req->port[0];
2642 	memset(rsp, 0, sizeof(*rsp));
2643 
2644 	rsp->port_number = port;
2645 	/*
2646 	 * Note that link_quality_indicator is a 32 bit quantity in
2647 	 * 'datacounters' queries (as opposed to 'portinfo' queries,
2648 	 * where it's a byte).
2649 	 */
2650 	hfi1_read_link_quality(dd, &lq);
2651 	rsp->link_quality_indicator = cpu_to_be32((u32)lq);
2652 	pma_get_opa_port_dctrs(ibdev, rsp);
2653 
2654 	rsp->port_xmit_wait =
2655 		cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL));
2656 	rsp->port_rcv_fecn =
2657 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
2658 	rsp->port_rcv_becn =
2659 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL));
2660 	rsp->port_error_counter_summary =
2661 		cpu_to_be64(get_error_counter_summary(ibdev, port,
2662 						      res_lli, res_ler));
2663 
2664 	vlinfo = &rsp->vls[0];
2665 	vfi = 0;
2666 	/* The vl_select_mask has been checked above, and we know
2667 	 * that it contains only entries which represent valid VLs.
2668 	 * So in the for_each_set_bit() loop below, we don't need
2669 	 * any additional checks for vl.
2670 	 */
2671 	for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
2672 			 8 * sizeof(req->vl_select_mask)) {
2673 		memset(vlinfo, 0, sizeof(*vlinfo));
2674 
2675 		rsp->vls[vfi].port_vl_xmit_data =
2676 			cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
2677 						   idx_from_vl(vl)));
2678 
2679 		rsp->vls[vfi].port_vl_rcv_data =
2680 			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_FLIT_VL,
2681 						  idx_from_vl(vl)));
2682 
2683 		rsp->vls[vfi].port_vl_xmit_pkts =
2684 			cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
2685 						   idx_from_vl(vl)));
2686 
2687 		rsp->vls[vfi].port_vl_rcv_pkts =
2688 			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
2689 						  idx_from_vl(vl)));
2690 
2691 		rsp->vls[vfi].port_vl_xmit_wait =
2692 			cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL,
2693 						   idx_from_vl(vl)));
2694 
2695 		rsp->vls[vfi].port_vl_rcv_fecn =
2696 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
2697 						  idx_from_vl(vl)));
2698 		rsp->vls[vfi].port_vl_rcv_becn =
2699 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
2700 						  idx_from_vl(vl)));
2701 
2702 		/* rsp->port_vl_xmit_time_cong is 0 for HFIs */
2703 		/* rsp->port_vl_xmit_wasted_bw ??? */
2704 		/* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ???
2705 		 * does this differ from rsp->vls[vfi].port_vl_xmit_wait
2706 		 */
2707 		/*rsp->vls[vfi].port_vl_mark_fecn =
2708 		 *	cpu_to_be64(read_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT
2709 		 *		+ offset));
2710 		 */
2711 		vlinfo++;
2712 		vfi++;
2713 	}
2714 
2715 	a0_datacounters(ppd, rsp, vl_select_mask);
2716 
2717 	if (resp_len)
2718 		*resp_len += response_data_size;
2719 
2720 	return reply((struct ib_mad_hdr *)pmp);
2721 }
2722 
2723 static int pma_get_ib_portcounters_ext(struct ib_pma_mad *pmp,
2724 				       struct ib_device *ibdev, u8 port)
2725 {
2726 	struct ib_pma_portcounters_ext *p = (struct ib_pma_portcounters_ext *)
2727 						pmp->data;
2728 	struct _port_dctrs rsp;
2729 
2730 	if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
2731 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2732 		goto bail;
2733 	}
2734 
2735 	memset(&rsp, 0, sizeof(rsp));
2736 	pma_get_opa_port_dctrs(ibdev, &rsp);
2737 
2738 	p->port_xmit_data = rsp.port_xmit_data;
2739 	p->port_rcv_data = rsp.port_rcv_data;
2740 	p->port_xmit_packets = rsp.port_xmit_pkts;
2741 	p->port_rcv_packets = rsp.port_rcv_pkts;
2742 	p->port_unicast_xmit_packets = 0;
2743 	p->port_unicast_rcv_packets =  0;
2744 	p->port_multicast_xmit_packets = rsp.port_multicast_xmit_pkts;
2745 	p->port_multicast_rcv_packets = rsp.port_multicast_rcv_pkts;
2746 
2747 bail:
2748 	return reply((struct ib_mad_hdr *)pmp);
2749 }
2750 
2751 static void pma_get_opa_port_ectrs(struct ib_device *ibdev,
2752 				   struct _port_ectrs *rsp, u8 port)
2753 {
2754 	u64 tmp, tmp2;
2755 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2756 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
2757 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2758 
2759 	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
2760 	tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
2761 					CNTR_INVALID_VL);
2762 	if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
2763 		/* overflow/wrapped */
2764 		rsp->link_error_recovery = cpu_to_be32(~0);
2765 	} else {
2766 		rsp->link_error_recovery = cpu_to_be32(tmp2);
2767 	}
2768 
2769 	rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
2770 						CNTR_INVALID_VL));
2771 	rsp->port_rcv_errors =
2772 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
2773 	rsp->port_rcv_remote_physical_errors =
2774 		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2775 					  CNTR_INVALID_VL));
2776 	rsp->port_rcv_switch_relay_errors = 0;
2777 	rsp->port_xmit_discards =
2778 		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
2779 					   CNTR_INVALID_VL));
2780 	rsp->port_xmit_constraint_errors =
2781 		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
2782 					   CNTR_INVALID_VL));
2783 	rsp->port_rcv_constraint_errors =
2784 		cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
2785 					   CNTR_INVALID_VL));
2786 	rsp->local_link_integrity_errors =
2787 		cpu_to_be64(read_dev_cntr(dd, C_DC_RX_REPLAY,
2788 					  CNTR_INVALID_VL));
2789 	rsp->excessive_buffer_overruns =
2790 		cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
2791 }
2792 
2793 static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
2794 				  struct ib_device *ibdev,
2795 				  u8 port, u32 *resp_len)
2796 {
2797 	size_t response_data_size;
2798 	struct _port_ectrs *rsp;
2799 	u8 port_num;
2800 	struct opa_port_error_counters64_msg *req;
2801 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2802 	u32 num_ports;
2803 	u8 num_pslm;
2804 	u8 num_vls;
2805 	struct hfi1_ibport *ibp;
2806 	struct hfi1_pportdata *ppd;
2807 	struct _vls_ectrs *vlinfo;
2808 	unsigned long vl;
2809 	u64 port_mask, tmp;
2810 	u32 vl_select_mask;
2811 	int vfi;
2812 
2813 	req = (struct opa_port_error_counters64_msg *)pmp->data;
2814 
2815 	num_ports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
2816 
2817 	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
2818 	num_vls = hweight32(be32_to_cpu(req->vl_select_mask));
2819 
2820 	if (num_ports != 1 || num_ports != num_pslm) {
2821 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2822 		return reply((struct ib_mad_hdr *)pmp);
2823 	}
2824 
2825 	response_data_size = sizeof(struct opa_port_error_counters64_msg) +
2826 				num_vls * sizeof(struct _vls_ectrs);
2827 
2828 	if (response_data_size > sizeof(pmp->data)) {
2829 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2830 		return reply((struct ib_mad_hdr *)pmp);
2831 	}
2832 	/*
2833 	 * The bit set in the mask needs to be consistent with the
2834 	 * port the request came in on.
2835 	 */
2836 	port_mask = be64_to_cpu(req->port_select_mask[3]);
2837 	port_num = find_first_bit((unsigned long *)&port_mask,
2838 				  sizeof(port_mask) * 8);
2839 
2840 	if (port_num != port) {
2841 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2842 		return reply((struct ib_mad_hdr *)pmp);
2843 	}
2844 
2845 	rsp = &req->port[0];
2846 
2847 	ibp = to_iport(ibdev, port_num);
2848 	ppd = ppd_from_ibp(ibp);
2849 
2850 	memset(rsp, 0, sizeof(*rsp));
2851 	rsp->port_number = port_num;
2852 
2853 	pma_get_opa_port_ectrs(ibdev, rsp, port_num);
2854 
2855 	rsp->port_rcv_remote_physical_errors =
2856 		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2857 					  CNTR_INVALID_VL));
2858 	rsp->fm_config_errors =
2859 		cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
2860 					  CNTR_INVALID_VL));
2861 	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
2862 
2863 	rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
2864 	rsp->port_rcv_errors =
2865 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
2866 	vlinfo = &rsp->vls[0];
2867 	vfi = 0;
2868 	vl_select_mask = be32_to_cpu(req->vl_select_mask);
2869 	for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
2870 			 8 * sizeof(req->vl_select_mask)) {
2871 		memset(vlinfo, 0, sizeof(*vlinfo));
2872 		rsp->vls[vfi].port_vl_xmit_discards =
2873 			cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
2874 						   idx_from_vl(vl)));
2875 		vlinfo += 1;
2876 		vfi++;
2877 	}
2878 
2879 	if (resp_len)
2880 		*resp_len += response_data_size;
2881 
2882 	return reply((struct ib_mad_hdr *)pmp);
2883 }
2884 
2885 static int pma_get_ib_portcounters(struct ib_pma_mad *pmp,
2886 				   struct ib_device *ibdev, u8 port)
2887 {
2888 	struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
2889 		pmp->data;
2890 	struct _port_ectrs rsp;
2891 	u64 temp_link_overrun_errors;
2892 	u64 temp_64;
2893 	u32 temp_32;
2894 
2895 	memset(&rsp, 0, sizeof(rsp));
2896 	pma_get_opa_port_ectrs(ibdev, &rsp, port);
2897 
2898 	if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
2899 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2900 		goto bail;
2901 	}
2902 
2903 	p->symbol_error_counter = 0; /* N/A for OPA */
2904 
2905 	temp_32 = be32_to_cpu(rsp.link_error_recovery);
2906 	if (temp_32 > 0xFFUL)
2907 		p->link_error_recovery_counter = 0xFF;
2908 	else
2909 		p->link_error_recovery_counter = (u8)temp_32;
2910 
2911 	temp_32 = be32_to_cpu(rsp.link_downed);
2912 	if (temp_32 > 0xFFUL)
2913 		p->link_downed_counter = 0xFF;
2914 	else
2915 		p->link_downed_counter = (u8)temp_32;
2916 
2917 	temp_64 = be64_to_cpu(rsp.port_rcv_errors);
2918 	if (temp_64 > 0xFFFFUL)
2919 		p->port_rcv_errors = cpu_to_be16(0xFFFF);
2920 	else
2921 		p->port_rcv_errors = cpu_to_be16((u16)temp_64);
2922 
2923 	temp_64 = be64_to_cpu(rsp.port_rcv_remote_physical_errors);
2924 	if (temp_64 > 0xFFFFUL)
2925 		p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF);
2926 	else
2927 		p->port_rcv_remphys_errors = cpu_to_be16((u16)temp_64);
2928 
2929 	temp_64 = be64_to_cpu(rsp.port_rcv_switch_relay_errors);
2930 	p->port_rcv_switch_relay_errors = cpu_to_be16((u16)temp_64);
2931 
2932 	temp_64 = be64_to_cpu(rsp.port_xmit_discards);
2933 	if (temp_64 > 0xFFFFUL)
2934 		p->port_xmit_discards = cpu_to_be16(0xFFFF);
2935 	else
2936 		p->port_xmit_discards = cpu_to_be16((u16)temp_64);
2937 
2938 	temp_64 = be64_to_cpu(rsp.port_xmit_constraint_errors);
2939 	if (temp_64 > 0xFFUL)
2940 		p->port_xmit_constraint_errors = 0xFF;
2941 	else
2942 		p->port_xmit_constraint_errors = (u8)temp_64;
2943 
2944 	temp_64 = be64_to_cpu(rsp.port_rcv_constraint_errors);
2945 	if (temp_64 > 0xFFUL)
2946 		p->port_rcv_constraint_errors = 0xFFUL;
2947 	else
2948 		p->port_rcv_constraint_errors = (u8)temp_64;
2949 
2950 	/* LocalLink: 7:4, BufferOverrun: 3:0 */
2951 	temp_64 = be64_to_cpu(rsp.local_link_integrity_errors);
2952 	if (temp_64 > 0xFUL)
2953 		temp_64 = 0xFUL;
2954 
2955 	temp_link_overrun_errors = temp_64 << 4;
2956 
2957 	temp_64 = be64_to_cpu(rsp.excessive_buffer_overruns);
2958 	if (temp_64 > 0xFUL)
2959 		temp_64 = 0xFUL;
2960 	temp_link_overrun_errors |= temp_64;
2961 
2962 	p->link_overrun_errors = (u8)temp_link_overrun_errors;
2963 
2964 	p->vl15_dropped = 0; /* N/A for OPA */
2965 
2966 bail:
2967 	return reply((struct ib_mad_hdr *)pmp);
2968 }
2969 
2970 static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
2971 				 struct ib_device *ibdev,
2972 				 u8 port, u32 *resp_len)
2973 {
2974 	size_t response_data_size;
2975 	struct _port_ei *rsp;
2976 	struct opa_port_error_info_msg *req;
2977 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2978 	u64 port_mask;
2979 	u32 num_ports;
2980 	u8 port_num;
2981 	u8 num_pslm;
2982 	u64 reg;
2983 
2984 	req = (struct opa_port_error_info_msg *)pmp->data;
2985 	rsp = &req->port[0];
2986 
2987 	num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
2988 	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
2989 
2990 	memset(rsp, 0, sizeof(*rsp));
2991 
2992 	if (num_ports != 1 || num_ports != num_pslm) {
2993 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2994 		return reply((struct ib_mad_hdr *)pmp);
2995 	}
2996 
2997 	/* Sanity check */
2998 	response_data_size = sizeof(struct opa_port_error_info_msg);
2999 
3000 	if (response_data_size > sizeof(pmp->data)) {
3001 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3002 		return reply((struct ib_mad_hdr *)pmp);
3003 	}
3004 
3005 	/*
3006 	 * The bit set in the mask needs to be consistent with the port
3007 	 * the request came in on.
3008 	 */
3009 	port_mask = be64_to_cpu(req->port_select_mask[3]);
3010 	port_num = find_first_bit((unsigned long *)&port_mask,
3011 				  sizeof(port_mask) * 8);
3012 
3013 	if (port_num != port) {
3014 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3015 		return reply((struct ib_mad_hdr *)pmp);
3016 	}
3017 
3018 	/* PortRcvErrorInfo */
3019 	rsp->port_rcv_ei.status_and_code =
3020 		dd->err_info_rcvport.status_and_code;
3021 	memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit1,
3022 	       &dd->err_info_rcvport.packet_flit1, sizeof(u64));
3023 	memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit2,
3024 	       &dd->err_info_rcvport.packet_flit2, sizeof(u64));
3025 
3026 	/* ExcessiverBufferOverrunInfo */
3027 	reg = read_csr(dd, RCV_ERR_INFO);
3028 	if (reg & RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK) {
3029 		/*
3030 		 * if the RcvExcessBufferOverrun bit is set, save SC of
3031 		 * first pkt that encountered an excess buffer overrun
3032 		 */
3033 		u8 tmp = (u8)reg;
3034 
3035 		tmp &=  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SC_SMASK;
3036 		tmp <<= 2;
3037 		rsp->excessive_buffer_overrun_ei.status_and_sc = tmp;
3038 		/* set the status bit */
3039 		rsp->excessive_buffer_overrun_ei.status_and_sc |= 0x80;
3040 	}
3041 
3042 	rsp->port_xmit_constraint_ei.status =
3043 		dd->err_info_xmit_constraint.status;
3044 	rsp->port_xmit_constraint_ei.pkey =
3045 		cpu_to_be16(dd->err_info_xmit_constraint.pkey);
3046 	rsp->port_xmit_constraint_ei.slid =
3047 		cpu_to_be32(dd->err_info_xmit_constraint.slid);
3048 
3049 	rsp->port_rcv_constraint_ei.status =
3050 		dd->err_info_rcv_constraint.status;
3051 	rsp->port_rcv_constraint_ei.pkey =
3052 		cpu_to_be16(dd->err_info_rcv_constraint.pkey);
3053 	rsp->port_rcv_constraint_ei.slid =
3054 		cpu_to_be32(dd->err_info_rcv_constraint.slid);
3055 
3056 	/* UncorrectableErrorInfo */
3057 	rsp->uncorrectable_ei.status_and_code = dd->err_info_uncorrectable;
3058 
3059 	/* FMConfigErrorInfo */
3060 	rsp->fm_config_ei.status_and_code = dd->err_info_fmconfig;
3061 
3062 	if (resp_len)
3063 		*resp_len += response_data_size;
3064 
3065 	return reply((struct ib_mad_hdr *)pmp);
3066 }
3067 
3068 static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
3069 				  struct ib_device *ibdev,
3070 				  u8 port, u32 *resp_len)
3071 {
3072 	struct opa_clear_port_status *req =
3073 		(struct opa_clear_port_status *)pmp->data;
3074 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3075 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3076 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3077 	u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
3078 	u64 portn = be64_to_cpu(req->port_select_mask[3]);
3079 	u32 counter_select = be32_to_cpu(req->counter_select_mask);
3080 	u32 vl_select_mask = VL_MASK_ALL; /* clear all per-vl cnts */
3081 	unsigned long vl;
3082 
3083 	if ((nports != 1) || (portn != 1 << port)) {
3084 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3085 		return reply((struct ib_mad_hdr *)pmp);
3086 	}
3087 	/*
3088 	 * only counters returned by pma_get_opa_portstatus() are
3089 	 * handled, so when pma_get_opa_portstatus() gets a fix,
3090 	 * the corresponding change should be made here as well.
3091 	 */
3092 
3093 	if (counter_select & CS_PORT_XMIT_DATA)
3094 		write_dev_cntr(dd, C_DC_XMIT_FLITS, CNTR_INVALID_VL, 0);
3095 
3096 	if (counter_select & CS_PORT_RCV_DATA)
3097 		write_dev_cntr(dd, C_DC_RCV_FLITS, CNTR_INVALID_VL, 0);
3098 
3099 	if (counter_select & CS_PORT_XMIT_PKTS)
3100 		write_dev_cntr(dd, C_DC_XMIT_PKTS, CNTR_INVALID_VL, 0);
3101 
3102 	if (counter_select & CS_PORT_RCV_PKTS)
3103 		write_dev_cntr(dd, C_DC_RCV_PKTS, CNTR_INVALID_VL, 0);
3104 
3105 	if (counter_select & CS_PORT_MCAST_XMIT_PKTS)
3106 		write_dev_cntr(dd, C_DC_MC_XMIT_PKTS, CNTR_INVALID_VL, 0);
3107 
3108 	if (counter_select & CS_PORT_MCAST_RCV_PKTS)
3109 		write_dev_cntr(dd, C_DC_MC_RCV_PKTS, CNTR_INVALID_VL, 0);
3110 
3111 	if (counter_select & CS_PORT_XMIT_WAIT)
3112 		write_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL, 0);
3113 
3114 	/* ignore cs_sw_portCongestion for HFIs */
3115 
3116 	if (counter_select & CS_PORT_RCV_FECN)
3117 		write_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL, 0);
3118 
3119 	if (counter_select & CS_PORT_RCV_BECN)
3120 		write_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL, 0);
3121 
3122 	/* ignore cs_port_xmit_time_cong for HFIs */
3123 	/* ignore cs_port_xmit_wasted_bw for now */
3124 	/* ignore cs_port_xmit_wait_data for now */
3125 	if (counter_select & CS_PORT_RCV_BUBBLE)
3126 		write_dev_cntr(dd, C_DC_RCV_BBL, CNTR_INVALID_VL, 0);
3127 
3128 	/* Only applicable for switch */
3129 	/* if (counter_select & CS_PORT_MARK_FECN)
3130 	 *	write_csr(dd, DCC_PRF_PORT_MARK_FECN_CNT, 0);
3131 	 */
3132 
3133 	if (counter_select & CS_PORT_RCV_CONSTRAINT_ERRORS)
3134 		write_port_cntr(ppd, C_SW_RCV_CSTR_ERR, CNTR_INVALID_VL, 0);
3135 
3136 	/* ignore cs_port_rcv_switch_relay_errors for HFIs */
3137 	if (counter_select & CS_PORT_XMIT_DISCARDS)
3138 		write_port_cntr(ppd, C_SW_XMIT_DSCD, CNTR_INVALID_VL, 0);
3139 
3140 	if (counter_select & CS_PORT_XMIT_CONSTRAINT_ERRORS)
3141 		write_port_cntr(ppd, C_SW_XMIT_CSTR_ERR, CNTR_INVALID_VL, 0);
3142 
3143 	if (counter_select & CS_PORT_RCV_REMOTE_PHYSICAL_ERRORS)
3144 		write_dev_cntr(dd, C_DC_RMT_PHY_ERR, CNTR_INVALID_VL, 0);
3145 
3146 	if (counter_select & CS_LOCAL_LINK_INTEGRITY_ERRORS)
3147 		write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0);
3148 
3149 	if (counter_select & CS_LINK_ERROR_RECOVERY) {
3150 		write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
3151 		write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
3152 			       CNTR_INVALID_VL, 0);
3153 	}
3154 
3155 	if (counter_select & CS_PORT_RCV_ERRORS)
3156 		write_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL, 0);
3157 
3158 	if (counter_select & CS_EXCESSIVE_BUFFER_OVERRUNS) {
3159 		write_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL, 0);
3160 		dd->rcv_ovfl_cnt = 0;
3161 	}
3162 
3163 	if (counter_select & CS_FM_CONFIG_ERRORS)
3164 		write_dev_cntr(dd, C_DC_FM_CFG_ERR, CNTR_INVALID_VL, 0);
3165 
3166 	if (counter_select & CS_LINK_DOWNED)
3167 		write_port_cntr(ppd, C_SW_LINK_DOWN, CNTR_INVALID_VL, 0);
3168 
3169 	if (counter_select & CS_UNCORRECTABLE_ERRORS)
3170 		write_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL, 0);
3171 
3172 	for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
3173 			 8 * sizeof(vl_select_mask)) {
3174 		if (counter_select & CS_PORT_XMIT_DATA)
3175 			write_port_cntr(ppd, C_TX_FLIT_VL, idx_from_vl(vl), 0);
3176 
3177 		if (counter_select & CS_PORT_RCV_DATA)
3178 			write_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl), 0);
3179 
3180 		if (counter_select & CS_PORT_XMIT_PKTS)
3181 			write_port_cntr(ppd, C_TX_PKT_VL, idx_from_vl(vl), 0);
3182 
3183 		if (counter_select & CS_PORT_RCV_PKTS)
3184 			write_dev_cntr(dd, C_DC_RX_PKT_VL, idx_from_vl(vl), 0);
3185 
3186 		if (counter_select & CS_PORT_XMIT_WAIT)
3187 			write_port_cntr(ppd, C_TX_WAIT_VL, idx_from_vl(vl), 0);
3188 
3189 		/* sw_port_vl_congestion is 0 for HFIs */
3190 		if (counter_select & CS_PORT_RCV_FECN)
3191 			write_dev_cntr(dd, C_DC_RCV_FCN_VL, idx_from_vl(vl), 0);
3192 
3193 		if (counter_select & CS_PORT_RCV_BECN)
3194 			write_dev_cntr(dd, C_DC_RCV_BCN_VL, idx_from_vl(vl), 0);
3195 
3196 		/* port_vl_xmit_time_cong is 0 for HFIs */
3197 		/* port_vl_xmit_wasted_bw ??? */
3198 		/* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ??? */
3199 		if (counter_select & CS_PORT_RCV_BUBBLE)
3200 			write_dev_cntr(dd, C_DC_RCV_BBL_VL, idx_from_vl(vl), 0);
3201 
3202 		/* if (counter_select & CS_PORT_MARK_FECN)
3203 		 *     write_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT + offset, 0);
3204 		 */
3205 		if (counter_select & C_SW_XMIT_DSCD_VL)
3206 			write_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
3207 					idx_from_vl(vl), 0);
3208 	}
3209 
3210 	if (resp_len)
3211 		*resp_len += sizeof(*req);
3212 
3213 	return reply((struct ib_mad_hdr *)pmp);
3214 }
3215 
3216 static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
3217 				 struct ib_device *ibdev,
3218 				 u8 port, u32 *resp_len)
3219 {
3220 	struct _port_ei *rsp;
3221 	struct opa_port_error_info_msg *req;
3222 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3223 	u64 port_mask;
3224 	u32 num_ports;
3225 	u8 port_num;
3226 	u8 num_pslm;
3227 	u32 error_info_select;
3228 
3229 	req = (struct opa_port_error_info_msg *)pmp->data;
3230 	rsp = &req->port[0];
3231 
3232 	num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
3233 	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
3234 
3235 	memset(rsp, 0, sizeof(*rsp));
3236 
3237 	if (num_ports != 1 || num_ports != num_pslm) {
3238 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3239 		return reply((struct ib_mad_hdr *)pmp);
3240 	}
3241 
3242 	/*
3243 	 * The bit set in the mask needs to be consistent with the port
3244 	 * the request came in on.
3245 	 */
3246 	port_mask = be64_to_cpu(req->port_select_mask[3]);
3247 	port_num = find_first_bit((unsigned long *)&port_mask,
3248 				  sizeof(port_mask) * 8);
3249 
3250 	if (port_num != port) {
3251 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3252 		return reply((struct ib_mad_hdr *)pmp);
3253 	}
3254 
3255 	error_info_select = be32_to_cpu(req->error_info_select_mask);
3256 
3257 	/* PortRcvErrorInfo */
3258 	if (error_info_select & ES_PORT_RCV_ERROR_INFO)
3259 		/* turn off status bit */
3260 		dd->err_info_rcvport.status_and_code &= ~OPA_EI_STATUS_SMASK;
3261 
3262 	/* ExcessiverBufferOverrunInfo */
3263 	if (error_info_select & ES_EXCESSIVE_BUFFER_OVERRUN_INFO)
3264 		/*
3265 		 * status bit is essentially kept in the h/w - bit 5 of
3266 		 * RCV_ERR_INFO
3267 		 */
3268 		write_csr(dd, RCV_ERR_INFO,
3269 			  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
3270 
3271 	if (error_info_select & ES_PORT_XMIT_CONSTRAINT_ERROR_INFO)
3272 		dd->err_info_xmit_constraint.status &= ~OPA_EI_STATUS_SMASK;
3273 
3274 	if (error_info_select & ES_PORT_RCV_CONSTRAINT_ERROR_INFO)
3275 		dd->err_info_rcv_constraint.status &= ~OPA_EI_STATUS_SMASK;
3276 
3277 	/* UncorrectableErrorInfo */
3278 	if (error_info_select & ES_UNCORRECTABLE_ERROR_INFO)
3279 		/* turn off status bit */
3280 		dd->err_info_uncorrectable &= ~OPA_EI_STATUS_SMASK;
3281 
3282 	/* FMConfigErrorInfo */
3283 	if (error_info_select & ES_FM_CONFIG_ERROR_INFO)
3284 		/* turn off status bit */
3285 		dd->err_info_fmconfig &= ~OPA_EI_STATUS_SMASK;
3286 
3287 	if (resp_len)
3288 		*resp_len += sizeof(*req);
3289 
3290 	return reply((struct ib_mad_hdr *)pmp);
3291 }
3292 
3293 struct opa_congestion_info_attr {
3294 	__be16 congestion_info;
3295 	u8 control_table_cap;	/* Multiple of 64 entry unit CCTs */
3296 	u8 congestion_log_length;
3297 } __packed;
3298 
3299 static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data,
3300 				    struct ib_device *ibdev, u8 port,
3301 				    u32 *resp_len)
3302 {
3303 	struct opa_congestion_info_attr *p =
3304 		(struct opa_congestion_info_attr *)data;
3305 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3306 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3307 
3308 	p->congestion_info = 0;
3309 	p->control_table_cap = ppd->cc_max_table_entries;
3310 	p->congestion_log_length = OPA_CONG_LOG_ELEMS;
3311 
3312 	if (resp_len)
3313 		*resp_len += sizeof(*p);
3314 
3315 	return reply((struct ib_mad_hdr *)smp);
3316 }
3317 
3318 static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
3319 				       u8 *data, struct ib_device *ibdev,
3320 				       u8 port, u32 *resp_len)
3321 {
3322 	int i;
3323 	struct opa_congestion_setting_attr *p =
3324 		(struct opa_congestion_setting_attr *)data;
3325 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3326 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3327 	struct opa_congestion_setting_entry_shadow *entries;
3328 	struct cc_state *cc_state;
3329 
3330 	rcu_read_lock();
3331 
3332 	cc_state = get_cc_state(ppd);
3333 
3334 	if (!cc_state) {
3335 		rcu_read_unlock();
3336 		return reply((struct ib_mad_hdr *)smp);
3337 	}
3338 
3339 	entries = cc_state->cong_setting.entries;
3340 	p->port_control = cpu_to_be16(cc_state->cong_setting.port_control);
3341 	p->control_map = cpu_to_be32(cc_state->cong_setting.control_map);
3342 	for (i = 0; i < OPA_MAX_SLS; i++) {
3343 		p->entries[i].ccti_increase = entries[i].ccti_increase;
3344 		p->entries[i].ccti_timer = cpu_to_be16(entries[i].ccti_timer);
3345 		p->entries[i].trigger_threshold =
3346 			entries[i].trigger_threshold;
3347 		p->entries[i].ccti_min = entries[i].ccti_min;
3348 	}
3349 
3350 	rcu_read_unlock();
3351 
3352 	if (resp_len)
3353 		*resp_len += sizeof(*p);
3354 
3355 	return reply((struct ib_mad_hdr *)smp);
3356 }
3357 
3358 /*
3359  * Apply congestion control information stored in the ppd to the
3360  * active structure.
3361  */
3362 static void apply_cc_state(struct hfi1_pportdata *ppd)
3363 {
3364 	struct cc_state *old_cc_state, *new_cc_state;
3365 
3366 	new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL);
3367 	if (!new_cc_state)
3368 		return;
3369 
3370 	/*
3371 	 * Hold the lock for updating *and* to prevent ppd information
3372 	 * from changing during the update.
3373 	 */
3374 	spin_lock(&ppd->cc_state_lock);
3375 
3376 	old_cc_state = get_cc_state_protected(ppd);
3377 	if (!old_cc_state) {
3378 		/* never active, or shutting down */
3379 		spin_unlock(&ppd->cc_state_lock);
3380 		kfree(new_cc_state);
3381 		return;
3382 	}
3383 
3384 	*new_cc_state = *old_cc_state;
3385 
3386 	new_cc_state->cct.ccti_limit = ppd->total_cct_entry - 1;
3387 	memcpy(new_cc_state->cct.entries, ppd->ccti_entries,
3388 	       ppd->total_cct_entry * sizeof(struct ib_cc_table_entry));
3389 
3390 	new_cc_state->cong_setting.port_control = IB_CC_CCS_PC_SL_BASED;
3391 	new_cc_state->cong_setting.control_map = ppd->cc_sl_control_map;
3392 	memcpy(new_cc_state->cong_setting.entries, ppd->congestion_entries,
3393 	       OPA_MAX_SLS * sizeof(struct opa_congestion_setting_entry));
3394 
3395 	rcu_assign_pointer(ppd->cc_state, new_cc_state);
3396 
3397 	spin_unlock(&ppd->cc_state_lock);
3398 
3399 	kfree_rcu(old_cc_state, rcu);
3400 }
3401 
3402 static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
3403 				       struct ib_device *ibdev, u8 port,
3404 				       u32 *resp_len)
3405 {
3406 	struct opa_congestion_setting_attr *p =
3407 		(struct opa_congestion_setting_attr *)data;
3408 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3409 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3410 	struct opa_congestion_setting_entry_shadow *entries;
3411 	int i;
3412 
3413 	/*
3414 	 * Save details from packet into the ppd.  Hold the cc_state_lock so
3415 	 * our information is consistent with anyone trying to apply the state.
3416 	 */
3417 	spin_lock(&ppd->cc_state_lock);
3418 	ppd->cc_sl_control_map = be32_to_cpu(p->control_map);
3419 
3420 	entries = ppd->congestion_entries;
3421 	for (i = 0; i < OPA_MAX_SLS; i++) {
3422 		entries[i].ccti_increase = p->entries[i].ccti_increase;
3423 		entries[i].ccti_timer = be16_to_cpu(p->entries[i].ccti_timer);
3424 		entries[i].trigger_threshold =
3425 			p->entries[i].trigger_threshold;
3426 		entries[i].ccti_min = p->entries[i].ccti_min;
3427 	}
3428 	spin_unlock(&ppd->cc_state_lock);
3429 
3430 	/* now apply the information */
3431 	apply_cc_state(ppd);
3432 
3433 	return __subn_get_opa_cong_setting(smp, am, data, ibdev, port,
3434 					   resp_len);
3435 }
3436 
3437 static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am,
3438 					u8 *data, struct ib_device *ibdev,
3439 					u8 port, u32 *resp_len)
3440 {
3441 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3442 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3443 	struct opa_hfi1_cong_log *cong_log = (struct opa_hfi1_cong_log *)data;
3444 	s64 ts;
3445 	int i;
3446 
3447 	if (am != 0) {
3448 		smp->status |= IB_SMP_INVALID_FIELD;
3449 		return reply((struct ib_mad_hdr *)smp);
3450 	}
3451 
3452 	spin_lock_irq(&ppd->cc_log_lock);
3453 
3454 	cong_log->log_type = OPA_CC_LOG_TYPE_HFI;
3455 	cong_log->congestion_flags = 0;
3456 	cong_log->threshold_event_counter =
3457 		cpu_to_be16(ppd->threshold_event_counter);
3458 	memcpy(cong_log->threshold_cong_event_map,
3459 	       ppd->threshold_cong_event_map,
3460 	       sizeof(cong_log->threshold_cong_event_map));
3461 	/* keep timestamp in units of 1.024 usec */
3462 	ts = ktime_to_ns(ktime_get()) / 1024;
3463 	cong_log->current_time_stamp = cpu_to_be32(ts);
3464 	for (i = 0; i < OPA_CONG_LOG_ELEMS; i++) {
3465 		struct opa_hfi1_cong_log_event_internal *cce =
3466 			&ppd->cc_events[ppd->cc_mad_idx++];
3467 		if (ppd->cc_mad_idx == OPA_CONG_LOG_ELEMS)
3468 			ppd->cc_mad_idx = 0;
3469 		/*
3470 		 * Entries which are older than twice the time
3471 		 * required to wrap the counter are supposed to
3472 		 * be zeroed (CA10-49 IBTA, release 1.2.1, V1).
3473 		 */
3474 		if ((u64)(ts - cce->timestamp) > (2 * UINT_MAX))
3475 			continue;
3476 		memcpy(cong_log->events[i].local_qp_cn_entry, &cce->lqpn, 3);
3477 		memcpy(cong_log->events[i].remote_qp_number_cn_entry,
3478 		       &cce->rqpn, 3);
3479 		cong_log->events[i].sl_svc_type_cn_entry =
3480 			((cce->sl & 0x1f) << 3) | (cce->svc_type & 0x7);
3481 		cong_log->events[i].remote_lid_cn_entry =
3482 			cpu_to_be32(cce->rlid);
3483 		cong_log->events[i].timestamp_cn_entry =
3484 			cpu_to_be32(cce->timestamp);
3485 	}
3486 
3487 	/*
3488 	 * Reset threshold_cong_event_map, and threshold_event_counter
3489 	 * to 0 when log is read.
3490 	 */
3491 	memset(ppd->threshold_cong_event_map, 0x0,
3492 	       sizeof(ppd->threshold_cong_event_map));
3493 	ppd->threshold_event_counter = 0;
3494 
3495 	spin_unlock_irq(&ppd->cc_log_lock);
3496 
3497 	if (resp_len)
3498 		*resp_len += sizeof(struct opa_hfi1_cong_log);
3499 
3500 	return reply((struct ib_mad_hdr *)smp);
3501 }
3502 
3503 static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
3504 				   struct ib_device *ibdev, u8 port,
3505 				   u32 *resp_len)
3506 {
3507 	struct ib_cc_table_attr *cc_table_attr =
3508 		(struct ib_cc_table_attr *)data;
3509 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3510 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3511 	u32 start_block = OPA_AM_START_BLK(am);
3512 	u32 n_blocks = OPA_AM_NBLK(am);
3513 	struct ib_cc_table_entry_shadow *entries;
3514 	int i, j;
3515 	u32 sentry, eentry;
3516 	struct cc_state *cc_state;
3517 
3518 	/* sanity check n_blocks, start_block */
3519 	if (n_blocks == 0 ||
3520 	    start_block + n_blocks > ppd->cc_max_table_entries) {
3521 		smp->status |= IB_SMP_INVALID_FIELD;
3522 		return reply((struct ib_mad_hdr *)smp);
3523 	}
3524 
3525 	rcu_read_lock();
3526 
3527 	cc_state = get_cc_state(ppd);
3528 
3529 	if (!cc_state) {
3530 		rcu_read_unlock();
3531 		return reply((struct ib_mad_hdr *)smp);
3532 	}
3533 
3534 	sentry = start_block * IB_CCT_ENTRIES;
3535 	eentry = sentry + (IB_CCT_ENTRIES * n_blocks);
3536 
3537 	cc_table_attr->ccti_limit = cpu_to_be16(cc_state->cct.ccti_limit);
3538 
3539 	entries = cc_state->cct.entries;
3540 
3541 	/* return n_blocks, though the last block may not be full */
3542 	for (j = 0, i = sentry; i < eentry; j++, i++)
3543 		cc_table_attr->ccti_entries[j].entry =
3544 			cpu_to_be16(entries[i].entry);
3545 
3546 	rcu_read_unlock();
3547 
3548 	if (resp_len)
3549 		*resp_len += sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
3550 
3551 	return reply((struct ib_mad_hdr *)smp);
3552 }
3553 
3554 static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
3555 				   struct ib_device *ibdev, u8 port,
3556 				   u32 *resp_len)
3557 {
3558 	struct ib_cc_table_attr *p = (struct ib_cc_table_attr *)data;
3559 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3560 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3561 	u32 start_block = OPA_AM_START_BLK(am);
3562 	u32 n_blocks = OPA_AM_NBLK(am);
3563 	struct ib_cc_table_entry_shadow *entries;
3564 	int i, j;
3565 	u32 sentry, eentry;
3566 	u16 ccti_limit;
3567 
3568 	/* sanity check n_blocks, start_block */
3569 	if (n_blocks == 0 ||
3570 	    start_block + n_blocks > ppd->cc_max_table_entries) {
3571 		smp->status |= IB_SMP_INVALID_FIELD;
3572 		return reply((struct ib_mad_hdr *)smp);
3573 	}
3574 
3575 	sentry = start_block * IB_CCT_ENTRIES;
3576 	eentry = sentry + ((n_blocks - 1) * IB_CCT_ENTRIES) +
3577 		 (be16_to_cpu(p->ccti_limit)) % IB_CCT_ENTRIES + 1;
3578 
3579 	/* sanity check ccti_limit */
3580 	ccti_limit = be16_to_cpu(p->ccti_limit);
3581 	if (ccti_limit + 1 > eentry) {
3582 		smp->status |= IB_SMP_INVALID_FIELD;
3583 		return reply((struct ib_mad_hdr *)smp);
3584 	}
3585 
3586 	/*
3587 	 * Save details from packet into the ppd.  Hold the cc_state_lock so
3588 	 * our information is consistent with anyone trying to apply the state.
3589 	 */
3590 	spin_lock(&ppd->cc_state_lock);
3591 	ppd->total_cct_entry = ccti_limit + 1;
3592 	entries = ppd->ccti_entries;
3593 	for (j = 0, i = sentry; i < eentry; j++, i++)
3594 		entries[i].entry = be16_to_cpu(p->ccti_entries[j].entry);
3595 	spin_unlock(&ppd->cc_state_lock);
3596 
3597 	/* now apply the information */
3598 	apply_cc_state(ppd);
3599 
3600 	return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len);
3601 }
3602 
3603 struct opa_led_info {
3604 	__be32 rsvd_led_mask;
3605 	__be32 rsvd;
3606 };
3607 
3608 #define OPA_LED_SHIFT	31
3609 #define OPA_LED_MASK	BIT(OPA_LED_SHIFT)
3610 
3611 static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
3612 				   struct ib_device *ibdev, u8 port,
3613 				   u32 *resp_len)
3614 {
3615 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3616 	struct hfi1_pportdata *ppd = dd->pport;
3617 	struct opa_led_info *p = (struct opa_led_info *)data;
3618 	u32 nport = OPA_AM_NPORT(am);
3619 	u32 is_beaconing_active;
3620 
3621 	if (nport != 1) {
3622 		smp->status |= IB_SMP_INVALID_FIELD;
3623 		return reply((struct ib_mad_hdr *)smp);
3624 	}
3625 
3626 	/*
3627 	 * This pairs with the memory barrier in hfi1_start_led_override to
3628 	 * ensure that we read the correct state of LED beaconing represented
3629 	 * by led_override_timer_active
3630 	 */
3631 	smp_rmb();
3632 	is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
3633 	p->rsvd_led_mask = cpu_to_be32(is_beaconing_active << OPA_LED_SHIFT);
3634 
3635 	if (resp_len)
3636 		*resp_len += sizeof(struct opa_led_info);
3637 
3638 	return reply((struct ib_mad_hdr *)smp);
3639 }
3640 
3641 static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
3642 				   struct ib_device *ibdev, u8 port,
3643 				   u32 *resp_len)
3644 {
3645 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3646 	struct opa_led_info *p = (struct opa_led_info *)data;
3647 	u32 nport = OPA_AM_NPORT(am);
3648 	int on = !!(be32_to_cpu(p->rsvd_led_mask) & OPA_LED_MASK);
3649 
3650 	if (nport != 1) {
3651 		smp->status |= IB_SMP_INVALID_FIELD;
3652 		return reply((struct ib_mad_hdr *)smp);
3653 	}
3654 
3655 	if (on)
3656 		hfi1_start_led_override(dd->pport, 2000, 1500);
3657 	else
3658 		shutdown_led_override(dd->pport);
3659 
3660 	return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len);
3661 }
3662 
3663 static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
3664 			    u8 *data, struct ib_device *ibdev, u8 port,
3665 			    u32 *resp_len)
3666 {
3667 	int ret;
3668 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3669 
3670 	switch (attr_id) {
3671 	case IB_SMP_ATTR_NODE_DESC:
3672 		ret = __subn_get_opa_nodedesc(smp, am, data, ibdev, port,
3673 					      resp_len);
3674 		break;
3675 	case IB_SMP_ATTR_NODE_INFO:
3676 		ret = __subn_get_opa_nodeinfo(smp, am, data, ibdev, port,
3677 					      resp_len);
3678 		break;
3679 	case IB_SMP_ATTR_PORT_INFO:
3680 		ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port,
3681 					      resp_len);
3682 		break;
3683 	case IB_SMP_ATTR_PKEY_TABLE:
3684 		ret = __subn_get_opa_pkeytable(smp, am, data, ibdev, port,
3685 					       resp_len);
3686 		break;
3687 	case OPA_ATTRIB_ID_SL_TO_SC_MAP:
3688 		ret = __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port,
3689 					      resp_len);
3690 		break;
3691 	case OPA_ATTRIB_ID_SC_TO_SL_MAP:
3692 		ret = __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port,
3693 					      resp_len);
3694 		break;
3695 	case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
3696 		ret = __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port,
3697 					       resp_len);
3698 		break;
3699 	case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
3700 		ret = __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
3701 						resp_len);
3702 		break;
3703 	case OPA_ATTRIB_ID_PORT_STATE_INFO:
3704 		ret = __subn_get_opa_psi(smp, am, data, ibdev, port,
3705 					 resp_len);
3706 		break;
3707 	case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
3708 		ret = __subn_get_opa_bct(smp, am, data, ibdev, port,
3709 					 resp_len);
3710 		break;
3711 	case OPA_ATTRIB_ID_CABLE_INFO:
3712 		ret = __subn_get_opa_cable_info(smp, am, data, ibdev, port,
3713 						resp_len);
3714 		break;
3715 	case IB_SMP_ATTR_VL_ARB_TABLE:
3716 		ret = __subn_get_opa_vl_arb(smp, am, data, ibdev, port,
3717 					    resp_len);
3718 		break;
3719 	case OPA_ATTRIB_ID_CONGESTION_INFO:
3720 		ret = __subn_get_opa_cong_info(smp, am, data, ibdev, port,
3721 					       resp_len);
3722 		break;
3723 	case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
3724 		ret = __subn_get_opa_cong_setting(smp, am, data, ibdev,
3725 						  port, resp_len);
3726 		break;
3727 	case OPA_ATTRIB_ID_HFI_CONGESTION_LOG:
3728 		ret = __subn_get_opa_hfi1_cong_log(smp, am, data, ibdev,
3729 						   port, resp_len);
3730 		break;
3731 	case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
3732 		ret = __subn_get_opa_cc_table(smp, am, data, ibdev, port,
3733 					      resp_len);
3734 		break;
3735 	case IB_SMP_ATTR_LED_INFO:
3736 		ret = __subn_get_opa_led_info(smp, am, data, ibdev, port,
3737 					      resp_len);
3738 		break;
3739 	case IB_SMP_ATTR_SM_INFO:
3740 		if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
3741 			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
3742 		if (ibp->rvp.port_cap_flags & IB_PORT_SM)
3743 			return IB_MAD_RESULT_SUCCESS;
3744 		/* FALLTHROUGH */
3745 	default:
3746 		smp->status |= IB_SMP_UNSUP_METH_ATTR;
3747 		ret = reply((struct ib_mad_hdr *)smp);
3748 		break;
3749 	}
3750 	return ret;
3751 }
3752 
3753 static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
3754 			    u8 *data, struct ib_device *ibdev, u8 port,
3755 			    u32 *resp_len)
3756 {
3757 	int ret;
3758 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3759 
3760 	switch (attr_id) {
3761 	case IB_SMP_ATTR_PORT_INFO:
3762 		ret = __subn_set_opa_portinfo(smp, am, data, ibdev, port,
3763 					      resp_len);
3764 		break;
3765 	case IB_SMP_ATTR_PKEY_TABLE:
3766 		ret = __subn_set_opa_pkeytable(smp, am, data, ibdev, port,
3767 					       resp_len);
3768 		break;
3769 	case OPA_ATTRIB_ID_SL_TO_SC_MAP:
3770 		ret = __subn_set_opa_sl_to_sc(smp, am, data, ibdev, port,
3771 					      resp_len);
3772 		break;
3773 	case OPA_ATTRIB_ID_SC_TO_SL_MAP:
3774 		ret = __subn_set_opa_sc_to_sl(smp, am, data, ibdev, port,
3775 					      resp_len);
3776 		break;
3777 	case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
3778 		ret = __subn_set_opa_sc_to_vlt(smp, am, data, ibdev, port,
3779 					       resp_len);
3780 		break;
3781 	case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
3782 		ret = __subn_set_opa_sc_to_vlnt(smp, am, data, ibdev, port,
3783 						resp_len);
3784 		break;
3785 	case OPA_ATTRIB_ID_PORT_STATE_INFO:
3786 		ret = __subn_set_opa_psi(smp, am, data, ibdev, port,
3787 					 resp_len);
3788 		break;
3789 	case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
3790 		ret = __subn_set_opa_bct(smp, am, data, ibdev, port,
3791 					 resp_len);
3792 		break;
3793 	case IB_SMP_ATTR_VL_ARB_TABLE:
3794 		ret = __subn_set_opa_vl_arb(smp, am, data, ibdev, port,
3795 					    resp_len);
3796 		break;
3797 	case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
3798 		ret = __subn_set_opa_cong_setting(smp, am, data, ibdev,
3799 						  port, resp_len);
3800 		break;
3801 	case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
3802 		ret = __subn_set_opa_cc_table(smp, am, data, ibdev, port,
3803 					      resp_len);
3804 		break;
3805 	case IB_SMP_ATTR_LED_INFO:
3806 		ret = __subn_set_opa_led_info(smp, am, data, ibdev, port,
3807 					      resp_len);
3808 		break;
3809 	case IB_SMP_ATTR_SM_INFO:
3810 		if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
3811 			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
3812 		if (ibp->rvp.port_cap_flags & IB_PORT_SM)
3813 			return IB_MAD_RESULT_SUCCESS;
3814 		/* FALLTHROUGH */
3815 	default:
3816 		smp->status |= IB_SMP_UNSUP_METH_ATTR;
3817 		ret = reply((struct ib_mad_hdr *)smp);
3818 		break;
3819 	}
3820 	return ret;
3821 }
3822 
3823 static inline void set_aggr_error(struct opa_aggregate *ag)
3824 {
3825 	ag->err_reqlength |= cpu_to_be16(0x8000);
3826 }
3827 
3828 static int subn_get_opa_aggregate(struct opa_smp *smp,
3829 				  struct ib_device *ibdev, u8 port,
3830 				  u32 *resp_len)
3831 {
3832 	int i;
3833 	u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff;
3834 	u8 *next_smp = opa_get_smp_data(smp);
3835 
3836 	if (num_attr < 1 || num_attr > 117) {
3837 		smp->status |= IB_SMP_INVALID_FIELD;
3838 		return reply((struct ib_mad_hdr *)smp);
3839 	}
3840 
3841 	for (i = 0; i < num_attr; i++) {
3842 		struct opa_aggregate *agg;
3843 		size_t agg_data_len;
3844 		size_t agg_size;
3845 		u32 am;
3846 
3847 		agg = (struct opa_aggregate *)next_smp;
3848 		agg_data_len = (be16_to_cpu(agg->err_reqlength) & 0x007f) * 8;
3849 		agg_size = sizeof(*agg) + agg_data_len;
3850 		am = be32_to_cpu(agg->attr_mod);
3851 
3852 		*resp_len += agg_size;
3853 
3854 		if (next_smp + agg_size > ((u8 *)smp) + sizeof(*smp)) {
3855 			smp->status |= IB_SMP_INVALID_FIELD;
3856 			return reply((struct ib_mad_hdr *)smp);
3857 		}
3858 
3859 		/* zero the payload for this segment */
3860 		memset(next_smp + sizeof(*agg), 0, agg_data_len);
3861 
3862 		(void)subn_get_opa_sma(agg->attr_id, smp, am, agg->data,
3863 					ibdev, port, NULL);
3864 		if (smp->status & ~IB_SMP_DIRECTION) {
3865 			set_aggr_error(agg);
3866 			return reply((struct ib_mad_hdr *)smp);
3867 		}
3868 		next_smp += agg_size;
3869 	}
3870 
3871 	return reply((struct ib_mad_hdr *)smp);
3872 }
3873 
3874 static int subn_set_opa_aggregate(struct opa_smp *smp,
3875 				  struct ib_device *ibdev, u8 port,
3876 				  u32 *resp_len)
3877 {
3878 	int i;
3879 	u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff;
3880 	u8 *next_smp = opa_get_smp_data(smp);
3881 
3882 	if (num_attr < 1 || num_attr > 117) {
3883 		smp->status |= IB_SMP_INVALID_FIELD;
3884 		return reply((struct ib_mad_hdr *)smp);
3885 	}
3886 
3887 	for (i = 0; i < num_attr; i++) {
3888 		struct opa_aggregate *agg;
3889 		size_t agg_data_len;
3890 		size_t agg_size;
3891 		u32 am;
3892 
3893 		agg = (struct opa_aggregate *)next_smp;
3894 		agg_data_len = (be16_to_cpu(agg->err_reqlength) & 0x007f) * 8;
3895 		agg_size = sizeof(*agg) + agg_data_len;
3896 		am = be32_to_cpu(agg->attr_mod);
3897 
3898 		*resp_len += agg_size;
3899 
3900 		if (next_smp + agg_size > ((u8 *)smp) + sizeof(*smp)) {
3901 			smp->status |= IB_SMP_INVALID_FIELD;
3902 			return reply((struct ib_mad_hdr *)smp);
3903 		}
3904 
3905 		(void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data,
3906 					ibdev, port, NULL);
3907 		if (smp->status & ~IB_SMP_DIRECTION) {
3908 			set_aggr_error(agg);
3909 			return reply((struct ib_mad_hdr *)smp);
3910 		}
3911 		next_smp += agg_size;
3912 	}
3913 
3914 	return reply((struct ib_mad_hdr *)smp);
3915 }
3916 
3917 /*
3918  * OPAv1 specifies that, on the transition to link up, these counters
3919  * are cleared:
3920  *   PortRcvErrors [*]
3921  *   LinkErrorRecovery
3922  *   LocalLinkIntegrityErrors
3923  *   ExcessiveBufferOverruns [*]
3924  *
3925  * [*] Error info associated with these counters is retained, but the
3926  * error info status is reset to 0.
3927  */
3928 void clear_linkup_counters(struct hfi1_devdata *dd)
3929 {
3930 	/* PortRcvErrors */
3931 	write_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL, 0);
3932 	dd->err_info_rcvport.status_and_code &= ~OPA_EI_STATUS_SMASK;
3933 	/* LinkErrorRecovery */
3934 	write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
3935 	write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL, 0);
3936 	/* LocalLinkIntegrityErrors */
3937 	write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0);
3938 	/* ExcessiveBufferOverruns */
3939 	write_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL, 0);
3940 	dd->rcv_ovfl_cnt = 0;
3941 	dd->err_info_xmit_constraint.status &= ~OPA_EI_STATUS_SMASK;
3942 }
3943 
3944 /*
3945  * is_local_mad() returns 1 if 'mad' is sent from, and destined to the
3946  * local node, 0 otherwise.
3947  */
3948 static int is_local_mad(struct hfi1_ibport *ibp, const struct opa_mad *mad,
3949 			const struct ib_wc *in_wc)
3950 {
3951 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3952 	const struct opa_smp *smp = (const struct opa_smp *)mad;
3953 
3954 	if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
3955 		return (smp->hop_cnt == 0 &&
3956 			smp->route.dr.dr_slid == OPA_LID_PERMISSIVE &&
3957 			smp->route.dr.dr_dlid == OPA_LID_PERMISSIVE);
3958 	}
3959 
3960 	return (in_wc->slid == ppd->lid);
3961 }
3962 
3963 /*
3964  * opa_local_smp_check() should only be called on MADs for which
3965  * is_local_mad() returns true. It applies the SMP checks that are
3966  * specific to SMPs which are sent from, and destined to this node.
3967  * opa_local_smp_check() returns 0 if the SMP passes its checks, 1
3968  * otherwise.
3969  *
3970  * SMPs which arrive from other nodes are instead checked by
3971  * opa_smp_check().
3972  */
3973 static int opa_local_smp_check(struct hfi1_ibport *ibp,
3974 			       const struct ib_wc *in_wc)
3975 {
3976 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3977 	u16 slid = in_wc->slid;
3978 	u16 pkey;
3979 
3980 	if (in_wc->pkey_index >= ARRAY_SIZE(ppd->pkeys))
3981 		return 1;
3982 
3983 	pkey = ppd->pkeys[in_wc->pkey_index];
3984 	/*
3985 	 * We need to do the "node-local" checks specified in OPAv1,
3986 	 * rev 0.90, section 9.10.26, which are:
3987 	 *   - pkey is 0x7fff, or 0xffff
3988 	 *   - Source QPN == 0 || Destination QPN == 0
3989 	 *   - the MAD header's management class is either
3990 	 *     IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE or
3991 	 *     IB_MGMT_CLASS_SUBN_LID_ROUTED
3992 	 *   - SLID != 0
3993 	 *
3994 	 * However, we know (and so don't need to check again) that,
3995 	 * for local SMPs, the MAD stack passes MADs with:
3996 	 *   - Source QPN of 0
3997 	 *   - MAD mgmt_class is IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
3998 	 *   - SLID is either: OPA_LID_PERMISSIVE (0xFFFFFFFF), or
3999 	 *     our own port's lid
4000 	 *
4001 	 */
4002 	if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY)
4003 		return 0;
4004 	ingress_pkey_table_fail(ppd, pkey, slid);
4005 	return 1;
4006 }
4007 
4008 static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
4009 			    u8 port, const struct opa_mad *in_mad,
4010 			    struct opa_mad *out_mad,
4011 			    u32 *resp_len)
4012 {
4013 	struct opa_smp *smp = (struct opa_smp *)out_mad;
4014 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
4015 	u8 *data;
4016 	u32 am;
4017 	__be16 attr_id;
4018 	int ret;
4019 
4020 	*out_mad = *in_mad;
4021 	data = opa_get_smp_data(smp);
4022 
4023 	am = be32_to_cpu(smp->attr_mod);
4024 	attr_id = smp->attr_id;
4025 	if (smp->class_version != OPA_SMI_CLASS_VERSION) {
4026 		smp->status |= IB_SMP_UNSUP_VERSION;
4027 		ret = reply((struct ib_mad_hdr *)smp);
4028 		return ret;
4029 	}
4030 	ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags, smp->mkey,
4031 			 smp->route.dr.dr_slid, smp->route.dr.return_path,
4032 			 smp->hop_cnt);
4033 	if (ret) {
4034 		u32 port_num = be32_to_cpu(smp->attr_mod);
4035 
4036 		/*
4037 		 * If this is a get/set portinfo, we already check the
4038 		 * M_Key if the MAD is for another port and the M_Key
4039 		 * is OK on the receiving port. This check is needed
4040 		 * to increment the error counters when the M_Key
4041 		 * fails to match on *both* ports.
4042 		 */
4043 		if (attr_id == IB_SMP_ATTR_PORT_INFO &&
4044 		    (smp->method == IB_MGMT_METHOD_GET ||
4045 		     smp->method == IB_MGMT_METHOD_SET) &&
4046 		    port_num && port_num <= ibdev->phys_port_cnt &&
4047 		    port != port_num)
4048 			(void)check_mkey(to_iport(ibdev, port_num),
4049 					  (struct ib_mad_hdr *)smp, 0,
4050 					  smp->mkey, smp->route.dr.dr_slid,
4051 					  smp->route.dr.return_path,
4052 					  smp->hop_cnt);
4053 		ret = IB_MAD_RESULT_FAILURE;
4054 		return ret;
4055 	}
4056 
4057 	*resp_len = opa_get_smp_header_size(smp);
4058 
4059 	switch (smp->method) {
4060 	case IB_MGMT_METHOD_GET:
4061 		switch (attr_id) {
4062 		default:
4063 			clear_opa_smp_data(smp);
4064 			ret = subn_get_opa_sma(attr_id, smp, am, data,
4065 					       ibdev, port, resp_len);
4066 			break;
4067 		case OPA_ATTRIB_ID_AGGREGATE:
4068 			ret = subn_get_opa_aggregate(smp, ibdev, port,
4069 						     resp_len);
4070 			break;
4071 		}
4072 		break;
4073 	case IB_MGMT_METHOD_SET:
4074 		switch (attr_id) {
4075 		default:
4076 			ret = subn_set_opa_sma(attr_id, smp, am, data,
4077 					       ibdev, port, resp_len);
4078 			break;
4079 		case OPA_ATTRIB_ID_AGGREGATE:
4080 			ret = subn_set_opa_aggregate(smp, ibdev, port,
4081 						     resp_len);
4082 			break;
4083 		}
4084 		break;
4085 	case IB_MGMT_METHOD_TRAP:
4086 	case IB_MGMT_METHOD_REPORT:
4087 	case IB_MGMT_METHOD_REPORT_RESP:
4088 	case IB_MGMT_METHOD_GET_RESP:
4089 		/*
4090 		 * The ib_mad module will call us to process responses
4091 		 * before checking for other consumers.
4092 		 * Just tell the caller to process it normally.
4093 		 */
4094 		ret = IB_MAD_RESULT_SUCCESS;
4095 		break;
4096 	default:
4097 		smp->status |= IB_SMP_UNSUP_METHOD;
4098 		ret = reply((struct ib_mad_hdr *)smp);
4099 		break;
4100 	}
4101 
4102 	return ret;
4103 }
4104 
4105 static int process_subn(struct ib_device *ibdev, int mad_flags,
4106 			u8 port, const struct ib_mad *in_mad,
4107 			struct ib_mad *out_mad)
4108 {
4109 	struct ib_smp *smp = (struct ib_smp *)out_mad;
4110 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
4111 	int ret;
4112 
4113 	*out_mad = *in_mad;
4114 	if (smp->class_version != 1) {
4115 		smp->status |= IB_SMP_UNSUP_VERSION;
4116 		ret = reply((struct ib_mad_hdr *)smp);
4117 		return ret;
4118 	}
4119 
4120 	ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags,
4121 			 smp->mkey, (__force __be32)smp->dr_slid,
4122 			 smp->return_path, smp->hop_cnt);
4123 	if (ret) {
4124 		u32 port_num = be32_to_cpu(smp->attr_mod);
4125 
4126 		/*
4127 		 * If this is a get/set portinfo, we already check the
4128 		 * M_Key if the MAD is for another port and the M_Key
4129 		 * is OK on the receiving port. This check is needed
4130 		 * to increment the error counters when the M_Key
4131 		 * fails to match on *both* ports.
4132 		 */
4133 		if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
4134 		    (smp->method == IB_MGMT_METHOD_GET ||
4135 		     smp->method == IB_MGMT_METHOD_SET) &&
4136 		    port_num && port_num <= ibdev->phys_port_cnt &&
4137 		    port != port_num)
4138 			(void)check_mkey(to_iport(ibdev, port_num),
4139 					 (struct ib_mad_hdr *)smp, 0,
4140 					 smp->mkey,
4141 					 (__force __be32)smp->dr_slid,
4142 					 smp->return_path, smp->hop_cnt);
4143 		ret = IB_MAD_RESULT_FAILURE;
4144 		return ret;
4145 	}
4146 
4147 	switch (smp->method) {
4148 	case IB_MGMT_METHOD_GET:
4149 		switch (smp->attr_id) {
4150 		case IB_SMP_ATTR_NODE_INFO:
4151 			ret = subn_get_nodeinfo(smp, ibdev, port);
4152 			break;
4153 		default:
4154 			smp->status |= IB_SMP_UNSUP_METH_ATTR;
4155 			ret = reply((struct ib_mad_hdr *)smp);
4156 			break;
4157 		}
4158 		break;
4159 	}
4160 
4161 	return ret;
4162 }
4163 
4164 static int process_perf(struct ib_device *ibdev, u8 port,
4165 			const struct ib_mad *in_mad,
4166 			struct ib_mad *out_mad)
4167 {
4168 	struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad;
4169 	struct ib_class_port_info *cpi = (struct ib_class_port_info *)
4170 						&pmp->data;
4171 	int ret = IB_MAD_RESULT_FAILURE;
4172 
4173 	*out_mad = *in_mad;
4174 	if (pmp->mad_hdr.class_version != 1) {
4175 		pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
4176 		ret = reply((struct ib_mad_hdr *)pmp);
4177 		return ret;
4178 	}
4179 
4180 	switch (pmp->mad_hdr.method) {
4181 	case IB_MGMT_METHOD_GET:
4182 		switch (pmp->mad_hdr.attr_id) {
4183 		case IB_PMA_PORT_COUNTERS:
4184 			ret = pma_get_ib_portcounters(pmp, ibdev, port);
4185 			break;
4186 		case IB_PMA_PORT_COUNTERS_EXT:
4187 			ret = pma_get_ib_portcounters_ext(pmp, ibdev, port);
4188 			break;
4189 		case IB_PMA_CLASS_PORT_INFO:
4190 			cpi->capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
4191 			ret = reply((struct ib_mad_hdr *)pmp);
4192 			break;
4193 		default:
4194 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4195 			ret = reply((struct ib_mad_hdr *)pmp);
4196 			break;
4197 		}
4198 		break;
4199 
4200 	case IB_MGMT_METHOD_SET:
4201 		if (pmp->mad_hdr.attr_id) {
4202 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4203 			ret = reply((struct ib_mad_hdr *)pmp);
4204 		}
4205 		break;
4206 
4207 	case IB_MGMT_METHOD_TRAP:
4208 	case IB_MGMT_METHOD_GET_RESP:
4209 		/*
4210 		 * The ib_mad module will call us to process responses
4211 		 * before checking for other consumers.
4212 		 * Just tell the caller to process it normally.
4213 		 */
4214 		ret = IB_MAD_RESULT_SUCCESS;
4215 		break;
4216 
4217 	default:
4218 		pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
4219 		ret = reply((struct ib_mad_hdr *)pmp);
4220 		break;
4221 	}
4222 
4223 	return ret;
4224 }
4225 
4226 static int process_perf_opa(struct ib_device *ibdev, u8 port,
4227 			    const struct opa_mad *in_mad,
4228 			    struct opa_mad *out_mad, u32 *resp_len)
4229 {
4230 	struct opa_pma_mad *pmp = (struct opa_pma_mad *)out_mad;
4231 	int ret;
4232 
4233 	*out_mad = *in_mad;
4234 
4235 	if (pmp->mad_hdr.class_version != OPA_SMI_CLASS_VERSION) {
4236 		pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
4237 		return reply((struct ib_mad_hdr *)pmp);
4238 	}
4239 
4240 	*resp_len = sizeof(pmp->mad_hdr);
4241 
4242 	switch (pmp->mad_hdr.method) {
4243 	case IB_MGMT_METHOD_GET:
4244 		switch (pmp->mad_hdr.attr_id) {
4245 		case IB_PMA_CLASS_PORT_INFO:
4246 			ret = pma_get_opa_classportinfo(pmp, ibdev, resp_len);
4247 			break;
4248 		case OPA_PM_ATTRIB_ID_PORT_STATUS:
4249 			ret = pma_get_opa_portstatus(pmp, ibdev, port,
4250 						     resp_len);
4251 			break;
4252 		case OPA_PM_ATTRIB_ID_DATA_PORT_COUNTERS:
4253 			ret = pma_get_opa_datacounters(pmp, ibdev, port,
4254 						       resp_len);
4255 			break;
4256 		case OPA_PM_ATTRIB_ID_ERROR_PORT_COUNTERS:
4257 			ret = pma_get_opa_porterrors(pmp, ibdev, port,
4258 						     resp_len);
4259 			break;
4260 		case OPA_PM_ATTRIB_ID_ERROR_INFO:
4261 			ret = pma_get_opa_errorinfo(pmp, ibdev, port,
4262 						    resp_len);
4263 			break;
4264 		default:
4265 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4266 			ret = reply((struct ib_mad_hdr *)pmp);
4267 			break;
4268 		}
4269 		break;
4270 
4271 	case IB_MGMT_METHOD_SET:
4272 		switch (pmp->mad_hdr.attr_id) {
4273 		case OPA_PM_ATTRIB_ID_CLEAR_PORT_STATUS:
4274 			ret = pma_set_opa_portstatus(pmp, ibdev, port,
4275 						     resp_len);
4276 			break;
4277 		case OPA_PM_ATTRIB_ID_ERROR_INFO:
4278 			ret = pma_set_opa_errorinfo(pmp, ibdev, port,
4279 						    resp_len);
4280 			break;
4281 		default:
4282 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4283 			ret = reply((struct ib_mad_hdr *)pmp);
4284 			break;
4285 		}
4286 		break;
4287 
4288 	case IB_MGMT_METHOD_TRAP:
4289 	case IB_MGMT_METHOD_GET_RESP:
4290 		/*
4291 		 * The ib_mad module will call us to process responses
4292 		 * before checking for other consumers.
4293 		 * Just tell the caller to process it normally.
4294 		 */
4295 		ret = IB_MAD_RESULT_SUCCESS;
4296 		break;
4297 
4298 	default:
4299 		pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
4300 		ret = reply((struct ib_mad_hdr *)pmp);
4301 		break;
4302 	}
4303 
4304 	return ret;
4305 }
4306 
4307 static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags,
4308 				u8 port, const struct ib_wc *in_wc,
4309 				const struct ib_grh *in_grh,
4310 				const struct opa_mad *in_mad,
4311 				struct opa_mad *out_mad, size_t *out_mad_size,
4312 				u16 *out_mad_pkey_index)
4313 {
4314 	int ret;
4315 	int pkey_idx;
4316 	u32 resp_len = 0;
4317 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
4318 
4319 	pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
4320 	if (pkey_idx < 0) {
4321 		pr_warn("failed to find limited mgmt pkey, defaulting 0x%x\n",
4322 			hfi1_get_pkey(ibp, 1));
4323 		pkey_idx = 1;
4324 	}
4325 	*out_mad_pkey_index = (u16)pkey_idx;
4326 
4327 	switch (in_mad->mad_hdr.mgmt_class) {
4328 	case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
4329 	case IB_MGMT_CLASS_SUBN_LID_ROUTED:
4330 		if (is_local_mad(ibp, in_mad, in_wc)) {
4331 			ret = opa_local_smp_check(ibp, in_wc);
4332 			if (ret)
4333 				return IB_MAD_RESULT_FAILURE;
4334 		}
4335 		ret = process_subn_opa(ibdev, mad_flags, port, in_mad,
4336 				       out_mad, &resp_len);
4337 		goto bail;
4338 	case IB_MGMT_CLASS_PERF_MGMT:
4339 		ret = process_perf_opa(ibdev, port, in_mad, out_mad,
4340 				       &resp_len);
4341 		goto bail;
4342 
4343 	default:
4344 		ret = IB_MAD_RESULT_SUCCESS;
4345 	}
4346 
4347 bail:
4348 	if (ret & IB_MAD_RESULT_REPLY)
4349 		*out_mad_size = round_up(resp_len, 8);
4350 	else if (ret & IB_MAD_RESULT_SUCCESS)
4351 		*out_mad_size = in_wc->byte_len - sizeof(struct ib_grh);
4352 
4353 	return ret;
4354 }
4355 
4356 static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port,
4357 			       const struct ib_wc *in_wc,
4358 			       const struct ib_grh *in_grh,
4359 			       const struct ib_mad *in_mad,
4360 			       struct ib_mad *out_mad)
4361 {
4362 	int ret;
4363 
4364 	switch (in_mad->mad_hdr.mgmt_class) {
4365 	case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
4366 	case IB_MGMT_CLASS_SUBN_LID_ROUTED:
4367 		ret = process_subn(ibdev, mad_flags, port, in_mad, out_mad);
4368 		break;
4369 	case IB_MGMT_CLASS_PERF_MGMT:
4370 		ret = process_perf(ibdev, port, in_mad, out_mad);
4371 		break;
4372 	default:
4373 		ret = IB_MAD_RESULT_SUCCESS;
4374 		break;
4375 	}
4376 
4377 	return ret;
4378 }
4379 
4380 /**
4381  * hfi1_process_mad - process an incoming MAD packet
4382  * @ibdev: the infiniband device this packet came in on
4383  * @mad_flags: MAD flags
4384  * @port: the port number this packet came in on
4385  * @in_wc: the work completion entry for this packet
4386  * @in_grh: the global route header for this packet
4387  * @in_mad: the incoming MAD
4388  * @out_mad: any outgoing MAD reply
4389  *
4390  * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not
4391  * interested in processing.
4392  *
4393  * Note that the verbs framework has already done the MAD sanity checks,
4394  * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
4395  * MADs.
4396  *
4397  * This is called by the ib_mad module.
4398  */
4399 int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
4400 		     const struct ib_wc *in_wc, const struct ib_grh *in_grh,
4401 		     const struct ib_mad_hdr *in_mad, size_t in_mad_size,
4402 		     struct ib_mad_hdr *out_mad, size_t *out_mad_size,
4403 		     u16 *out_mad_pkey_index)
4404 {
4405 	switch (in_mad->base_version) {
4406 	case OPA_MGMT_BASE_VERSION:
4407 		if (unlikely(in_mad_size != sizeof(struct opa_mad))) {
4408 			dev_err(ibdev->dma_device, "invalid in_mad_size\n");
4409 			return IB_MAD_RESULT_FAILURE;
4410 		}
4411 		return hfi1_process_opa_mad(ibdev, mad_flags, port,
4412 					    in_wc, in_grh,
4413 					    (struct opa_mad *)in_mad,
4414 					    (struct opa_mad *)out_mad,
4415 					    out_mad_size,
4416 					    out_mad_pkey_index);
4417 	case IB_MGMT_BASE_VERSION:
4418 		return hfi1_process_ib_mad(ibdev, mad_flags, port,
4419 					  in_wc, in_grh,
4420 					  (const struct ib_mad *)in_mad,
4421 					  (struct ib_mad *)out_mad);
4422 	default:
4423 		break;
4424 	}
4425 
4426 	return IB_MAD_RESULT_FAILURE;
4427 }
4428