1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright (c) 2021, the University of Queensland
14 * Copyright 2020 RackTop Systems, Inc.
15 * Copyright 2023 MNX Cloud, Inc.
16 */
17
18 /*
19 * Mellanox Connect-X 4/5/6 driver.
20 */
21
22 #include <sys/modctl.h>
23 #include <sys/conf.h>
24 #include <sys/devops.h>
25 #include <sys/sysmacros.h>
26 #include <sys/vlan.h>
27
28 #include <sys/pattr.h>
29 #include <sys/dlpi.h>
30
31 #include <sys/mac_provider.h>
32
33 /* Need these for mac_vlan_header_info() */
34 #include <sys/mac_client.h>
35 #include <sys/mac_client_priv.h>
36
37 #include <mlxcx.h>
38
39 static char *mlxcx_priv_props[] = {
40 NULL
41 };
42
43 #define MBITS 1000000ULL
44 #define GBITS (1000ULL * MBITS)
45
46 static uint64_t
mlxcx_speed_to_bits(mlxcx_eth_proto_t proto,mlxcx_ext_eth_proto_t ext_proto)47 mlxcx_speed_to_bits(mlxcx_eth_proto_t proto, mlxcx_ext_eth_proto_t ext_proto)
48 {
49 /*
50 * Older parts only used "proto", but starting with ConnectX-6, there
51 * might be speeds & link-types in an extended set of proto bits.
52 *
53 * We check the old bits first because the extended bits do not report
54 * media on links (e.g. nothing like MLXCX_EXTPROTO_100GBASE_CR2
55 * for a 50Gbit lane).
56 *
57 * In the case of, e.g., 100GBASE_CR4 both proto and ext_proto have
58 * bits set, but the extended proto bits are a generic CAUI4 indicator
59 * that could be for CR4, KR4, etc. If we get a legitimate single-bit
60 * value, we don't worry about ext_proto. This may change in the face
61 * of other HW or cabling, however.
62 */
63 switch (proto) {
64 case MLXCX_PROTO_NONE: /* Aka "0" */
65 /* Go straight to checking ext_proto. */
66 break;
67 case MLXCX_PROTO_SGMII_100BASE:
68 case MLXCX_PROTO_100BASE_TX:
69 return (100ULL * MBITS);
70 case MLXCX_PROTO_SGMII:
71 case MLXCX_PROTO_1000BASE_KX:
72 case MLXCX_PROTO_1000BASE_T:
73 return (1000ULL * MBITS);
74 case MLXCX_PROTO_10GBASE_CX4:
75 case MLXCX_PROTO_10GBASE_KX4:
76 case MLXCX_PROTO_10GBASE_KR:
77 case MLXCX_PROTO_10GBASE_CR:
78 case MLXCX_PROTO_10GBASE_SR:
79 case MLXCX_PROTO_10GBASE_ER_LR:
80 case MLXCX_PROTO_10GBASE_T:
81 return (10ULL * GBITS);
82 case MLXCX_PROTO_40GBASE_CR4:
83 case MLXCX_PROTO_40GBASE_KR4:
84 case MLXCX_PROTO_40GBASE_SR4:
85 case MLXCX_PROTO_40GBASE_LR4_ER4:
86 return (40ULL * GBITS);
87 case MLXCX_PROTO_25GBASE_CR:
88 case MLXCX_PROTO_25GBASE_KR:
89 case MLXCX_PROTO_25GBASE_SR:
90 return (25ULL * GBITS);
91 case MLXCX_PROTO_50GBASE_SR2:
92 case MLXCX_PROTO_50GBASE_CR2:
93 case MLXCX_PROTO_50GBASE_KR2:
94 return (50ULL * GBITS);
95 case MLXCX_PROTO_100GBASE_CR4:
96 case MLXCX_PROTO_100GBASE_SR4:
97 case MLXCX_PROTO_100GBASE_KR4:
98 case MLXCX_PROTO_100GBASE_LR4_ER4:
99 return (100ULL * GBITS);
100 default:
101 /*
102 * We've checked for 0 explicitly above, so don't worry here.
103 *
104 * There ARE legitimate single-bit values we don't support,
105 * and should just return 0 immediately. We will ASSERT()
106 * that it's a single-bit value, however, since the passed-in
107 * values are from the "operational" register, which is only
108 * supposed to have one bit set. If the assertion fails
109 * there's either a hardware error or a severe
110 * misunderstanding of the register.
111 */
112 ASSERT0((uint32_t)proto & ((uint32_t)proto - 1U));
113 return (0);
114 }
115
116 switch (ext_proto) {
117 case MLXCX_EXTPROTO_SGMII_100BASE:
118 return (100ULL * MBITS);
119 case MLXCX_EXTPROTO_1000BASE_X_SGMII:
120 return (1000ULL * MBITS);
121 case MLXCX_EXTPROTO_5GBASE_R:
122 return (5ULL * GBITS);
123 case MLXCX_EXTPROTO_10GBASE_XFI_XAUI_1:
124 return (10ULL * GBITS);
125 case MLXCX_EXTPROTO_40GBASE_XLAUI_4_XLPPI_4:
126 return (40ULL * GBITS);
127 case MLXCX_EXTPROTO_25GAUI_1_25GBASE_CR_KR:
128 return (25ULL * GBITS);
129 case MLXCX_EXTPROTO_50GAUI_2_LAUI_2_50GBASE_CR2_KR2:
130 case MLXCX_EXTPROTO_50GAUI_1_LAUI_1_50GBASE_CR_KR:
131 return (50ULL * GBITS);
132 case MLXCX_EXTPROTO_CAUI_4_100GBASE_CR4_KR4:
133 case MLXCX_EXTPROTO_100GAUI_2_100GBASE_CR2_KR2:
134 case MLXCX_EXTPROTO_100GAUI_1_100GBASE_CR_KR:
135 return (100ULL * GBITS);
136 case MLXCX_EXTPROTO_200GAUI_4_200GBASE_CR4_KR4:
137 case MLXCX_EXTPROTO_200GAUI_2_200GBASE_CR2_KR2:
138 return (200ULL * GBITS);
139 case MLXCX_EXTPROTO_400GAUI_8_400GBASE_CR8:
140 case MLXCX_EXTPROTO_400GAUI_4_400GBASE_CR4:
141 return (400ULL * GBITS);
142 default:
143 /*
144 * There ARE legitimate single-bit values we don't support,
145 * and should just return 0 immediately. We will ASSERT()
146 * that it's a single-bit value, however, for reasons detailed
147 * in the prior `default` case.
148 */
149 ASSERT0((uint32_t)ext_proto & ((uint32_t)ext_proto - 1U));
150 break;
151 }
152
153 return (0);
154 }
155
156 static link_fec_t
mlxcx_fec_to_link_fec(mlxcx_pplm_fec_active_t mlxcx_fec)157 mlxcx_fec_to_link_fec(mlxcx_pplm_fec_active_t mlxcx_fec)
158 {
159 if ((mlxcx_fec & MLXCX_PPLM_FEC_ACTIVE_NONE) != 0)
160 return (LINK_FEC_NONE);
161
162 if ((mlxcx_fec & MLXCX_PPLM_FEC_ACTIVE_FIRECODE) != 0)
163 return (LINK_FEC_BASE_R);
164
165 if ((mlxcx_fec & (MLXCX_PPLM_FEC_ACTIVE_RS528 |
166 MLXCX_PPLM_FEC_ACTIVE_RS271 | MLXCX_PPLM_FEC_ACTIVE_RS544 |
167 MLXCX_PPLM_FEC_ACTIVE_RS272)) != 0)
168 return (LINK_FEC_RS);
169
170 return (LINK_FEC_NONE);
171 }
172
173 static boolean_t
mlxcx_link_fec_cap(link_fec_t fec,mlxcx_pplm_fec_caps_t * pfecp)174 mlxcx_link_fec_cap(link_fec_t fec, mlxcx_pplm_fec_caps_t *pfecp)
175 {
176 mlxcx_pplm_fec_caps_t pplm_fec = 0;
177
178 if ((fec & LINK_FEC_AUTO) != 0) {
179 pplm_fec = MLXCX_PPLM_FEC_CAP_AUTO;
180 fec &= ~LINK_FEC_AUTO;
181 } else if ((fec & LINK_FEC_NONE) != 0) {
182 pplm_fec = MLXCX_PPLM_FEC_CAP_NONE;
183 fec &= ~LINK_FEC_NONE;
184 } else if ((fec & LINK_FEC_RS) != 0) {
185 pplm_fec |= MLXCX_PPLM_FEC_CAP_RS;
186 fec &= ~LINK_FEC_RS;
187 } else if ((fec & LINK_FEC_BASE_R) != 0) {
188 pplm_fec |= MLXCX_PPLM_FEC_CAP_FIRECODE;
189 fec &= ~LINK_FEC_BASE_R;
190 }
191
192 /*
193 * Only one fec option is allowed.
194 */
195 if (fec != 0)
196 return (B_FALSE);
197
198 *pfecp = pplm_fec;
199
200 return (B_TRUE);
201 }
202
203 static int
mlxcx_mac_stat_rfc_2863(mlxcx_t * mlxp,mlxcx_port_t * port,uint_t stat,uint64_t * val)204 mlxcx_mac_stat_rfc_2863(mlxcx_t *mlxp, mlxcx_port_t *port, uint_t stat,
205 uint64_t *val)
206 {
207 int ret = 0;
208 boolean_t ok;
209 mlxcx_register_data_t data;
210 mlxcx_ppcnt_rfc_2863_t *st;
211
212 ASSERT(mutex_owned(&port->mlp_mtx));
213
214 bzero(&data, sizeof (data));
215 data.mlrd_ppcnt.mlrd_ppcnt_local_port = port->mlp_num + 1;
216 data.mlrd_ppcnt.mlrd_ppcnt_grp = MLXCX_PPCNT_GRP_RFC_2863;
217 data.mlrd_ppcnt.mlrd_ppcnt_clear = MLXCX_PPCNT_NO_CLEAR;
218
219 ok = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ,
220 MLXCX_REG_PPCNT, &data);
221 if (!ok)
222 return (EIO);
223 st = &data.mlrd_ppcnt.mlrd_ppcnt_rfc_2863;
224
225 switch (stat) {
226 case MAC_STAT_RBYTES:
227 *val = from_be64(st->mlppc_rfc_2863_in_octets);
228 break;
229 case MAC_STAT_MULTIRCV:
230 *val = from_be64(st->mlppc_rfc_2863_in_mcast_pkts);
231 break;
232 case MAC_STAT_BRDCSTRCV:
233 *val = from_be64(st->mlppc_rfc_2863_in_bcast_pkts);
234 break;
235 case MAC_STAT_MULTIXMT:
236 *val = from_be64(st->mlppc_rfc_2863_out_mcast_pkts);
237 break;
238 case MAC_STAT_BRDCSTXMT:
239 *val = from_be64(st->mlppc_rfc_2863_out_bcast_pkts);
240 break;
241 case MAC_STAT_IERRORS:
242 *val = from_be64(st->mlppc_rfc_2863_in_errors);
243 break;
244 case MAC_STAT_UNKNOWNS:
245 *val = from_be64(st->mlppc_rfc_2863_in_unknown_protos);
246 break;
247 case MAC_STAT_OERRORS:
248 *val = from_be64(st->mlppc_rfc_2863_out_errors);
249 break;
250 case MAC_STAT_OBYTES:
251 *val = from_be64(st->mlppc_rfc_2863_out_octets);
252 break;
253 default:
254 ret = ENOTSUP;
255 }
256
257 return (ret);
258 }
259
260 static int
mlxcx_mac_stat_ieee_802_3(mlxcx_t * mlxp,mlxcx_port_t * port,uint_t stat,uint64_t * val)261 mlxcx_mac_stat_ieee_802_3(mlxcx_t *mlxp, mlxcx_port_t *port, uint_t stat,
262 uint64_t *val)
263 {
264 int ret = 0;
265 boolean_t ok;
266 mlxcx_register_data_t data;
267 mlxcx_ppcnt_ieee_802_3_t *st;
268
269 ASSERT(mutex_owned(&port->mlp_mtx));
270
271 bzero(&data, sizeof (data));
272 data.mlrd_ppcnt.mlrd_ppcnt_local_port = port->mlp_num + 1;
273 data.mlrd_ppcnt.mlrd_ppcnt_grp = MLXCX_PPCNT_GRP_IEEE_802_3;
274 data.mlrd_ppcnt.mlrd_ppcnt_clear = MLXCX_PPCNT_NO_CLEAR;
275
276 ok = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ,
277 MLXCX_REG_PPCNT, &data);
278 if (!ok)
279 return (EIO);
280 st = &data.mlrd_ppcnt.mlrd_ppcnt_ieee_802_3;
281
282 switch (stat) {
283 case MAC_STAT_IPACKETS:
284 *val = from_be64(st->mlppc_ieee_802_3_frames_rx);
285 break;
286 case MAC_STAT_OPACKETS:
287 *val = from_be64(st->mlppc_ieee_802_3_frames_tx);
288 break;
289 case ETHER_STAT_ALIGN_ERRORS:
290 *val = from_be64(st->mlppc_ieee_802_3_align_err);
291 break;
292 case ETHER_STAT_FCS_ERRORS:
293 *val = from_be64(st->mlppc_ieee_802_3_fcs_err);
294 break;
295 case ETHER_STAT_TOOLONG_ERRORS:
296 *val = from_be64(st->mlppc_ieee_802_3_frame_too_long_err);
297 break;
298 default:
299 ret = ENOTSUP;
300 }
301
302 return (ret);
303 }
304
305 static int
mlxcx_mac_stat(void * arg,uint_t stat,uint64_t * val)306 mlxcx_mac_stat(void *arg, uint_t stat, uint64_t *val)
307 {
308 mlxcx_t *mlxp = (mlxcx_t *)arg;
309 mlxcx_port_t *port = &mlxp->mlx_ports[0];
310 int ret = 0;
311
312 mutex_enter(&port->mlp_mtx);
313
314 switch (stat) {
315 case MAC_STAT_IFSPEED:
316 *val = mlxcx_speed_to_bits(port->mlp_oper_proto,
317 port->mlp_ext_oper_proto);
318 break;
319 case ETHER_STAT_LINK_DUPLEX:
320 *val = LINK_DUPLEX_FULL;
321 break;
322 case MAC_STAT_RBYTES:
323 case MAC_STAT_MULTIRCV:
324 case MAC_STAT_BRDCSTRCV:
325 case MAC_STAT_MULTIXMT:
326 case MAC_STAT_BRDCSTXMT:
327 case MAC_STAT_IERRORS:
328 case MAC_STAT_UNKNOWNS:
329 case MAC_STAT_OERRORS:
330 case MAC_STAT_OBYTES:
331 ret = mlxcx_mac_stat_rfc_2863(mlxp, port, stat, val);
332 break;
333 case MAC_STAT_IPACKETS:
334 case MAC_STAT_OPACKETS:
335 case ETHER_STAT_ALIGN_ERRORS:
336 case ETHER_STAT_FCS_ERRORS:
337 case ETHER_STAT_TOOLONG_ERRORS:
338 ret = mlxcx_mac_stat_ieee_802_3(mlxp, port, stat, val);
339 break;
340 case MAC_STAT_NORCVBUF:
341 *val = port->mlp_stats.mlps_rx_drops;
342 break;
343 default:
344 ret = ENOTSUP;
345 }
346
347 mutex_exit(&port->mlp_mtx);
348
349 return (ret);
350 }
351
352 static int
mlxcx_mac_led_set(void * arg,mac_led_mode_t mode,uint_t flags)353 mlxcx_mac_led_set(void *arg, mac_led_mode_t mode, uint_t flags)
354 {
355 mlxcx_t *mlxp = arg;
356 mlxcx_port_t *port = &mlxp->mlx_ports[0];
357 int ret = 0;
358
359 if (flags != 0) {
360 return (EINVAL);
361 }
362
363 mutex_enter(&port->mlp_mtx);
364
365 switch (mode) {
366 case MAC_LED_DEFAULT:
367 case MAC_LED_OFF:
368 if (!mlxcx_cmd_set_port_led(mlxp, port, 0)) {
369 ret = EIO;
370 break;
371 }
372 break;
373 case MAC_LED_IDENT:
374 if (!mlxcx_cmd_set_port_led(mlxp, port, UINT16_MAX)) {
375 ret = EIO;
376 break;
377 }
378 break;
379 default:
380 ret = ENOTSUP;
381 }
382
383 mutex_exit(&port->mlp_mtx);
384
385 return (ret);
386 }
387
388 static int
mlxcx_mac_txr_info(void * arg,uint_t id,mac_transceiver_info_t * infop)389 mlxcx_mac_txr_info(void *arg, uint_t id, mac_transceiver_info_t *infop)
390 {
391 mlxcx_t *mlxp = arg;
392 mlxcx_module_status_t st;
393
394 if (!mlxcx_cmd_query_module_status(mlxp, id, &st, NULL))
395 return (EIO);
396
397 if (st != MLXCX_MODULE_UNPLUGGED)
398 mac_transceiver_info_set_present(infop, B_TRUE);
399
400 if (st == MLXCX_MODULE_PLUGGED)
401 mac_transceiver_info_set_usable(infop, B_TRUE);
402
403 return (0);
404 }
405
406 static int
mlxcx_mac_txr_read(void * arg,uint_t id,uint_t page,void * vbuf,size_t nbytes,off_t offset,size_t * nread)407 mlxcx_mac_txr_read(void *arg, uint_t id, uint_t page, void *vbuf,
408 size_t nbytes, off_t offset, size_t *nread)
409 {
410 mlxcx_t *mlxp = arg;
411 mlxcx_register_data_t data;
412 uint8_t *buf = vbuf;
413 boolean_t ok;
414 size_t take, done = 0;
415 uint8_t i2c_addr;
416
417 if (id != 0 || vbuf == NULL || nbytes == 0 || nread == NULL)
418 return (EINVAL);
419
420 if (nbytes > 256 || offset >= 256 || (offset + nbytes > 256))
421 return (EINVAL);
422
423 /*
424 * The PRM is really not very clear about any of this, but it seems
425 * that the i2c_device_addr field in MCIA is the SFP+ spec "page"
426 * number shifted right by 1 bit. They're written in the SFF spec
427 * like "1010000X" so Mellanox just dropped the X.
428 *
429 * This means that if we want page 0xA0, we put 0x50 in the
430 * i2c_device_addr field.
431 *
432 * The "page_number" field in MCIA means something else. Don't ask me
433 * what. FreeBSD leaves it as zero, so we will too!
434 */
435 i2c_addr = page >> 1;
436
437 while (done < nbytes) {
438 take = nbytes - done;
439 if (take > sizeof (data.mlrd_mcia.mlrd_mcia_data))
440 take = sizeof (data.mlrd_mcia.mlrd_mcia_data);
441
442 bzero(&data, sizeof (data));
443 ASSERT3U(id, <=, 0xff);
444 data.mlrd_mcia.mlrd_mcia_module = (uint8_t)id;
445 data.mlrd_mcia.mlrd_mcia_i2c_device_addr = i2c_addr;
446 data.mlrd_mcia.mlrd_mcia_device_addr = to_be16(offset);
447 data.mlrd_mcia.mlrd_mcia_size = to_be16(take);
448
449 ok = mlxcx_cmd_access_register(mlxp,
450 MLXCX_CMD_ACCESS_REGISTER_READ, MLXCX_REG_MCIA, &data);
451 if (!ok) {
452 *nread = 0;
453 return (EIO);
454 }
455
456 if (data.mlrd_mcia.mlrd_mcia_status != MLXCX_MCIA_STATUS_OK) {
457 *nread = 0;
458 return (EIO);
459 }
460
461 bcopy(data.mlrd_mcia.mlrd_mcia_data, &buf[done], take);
462
463 done += take;
464 offset += take;
465 }
466 *nread = done;
467 return (0);
468 }
469
470 static int
mlxcx_mac_ring_stat(mac_ring_driver_t rh,uint_t stat,uint64_t * val)471 mlxcx_mac_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val)
472 {
473 mlxcx_work_queue_t *wq = (mlxcx_work_queue_t *)rh;
474 (void) wq;
475
476 /*
477 * We should add support for using hw flow counters and such to
478 * get per-ring statistics. Not done yet though!
479 */
480
481 switch (stat) {
482 default:
483 *val = 0;
484 return (ENOTSUP);
485 }
486
487 return (0);
488 }
489
490 static int
mlxcx_mac_start(void * arg)491 mlxcx_mac_start(void *arg)
492 {
493 mlxcx_t *mlxp = (mlxcx_t *)arg;
494 (void) mlxp;
495 return (0);
496 }
497
498 static void
mlxcx_mac_stop(void * arg)499 mlxcx_mac_stop(void *arg)
500 {
501 mlxcx_t *mlxp = (mlxcx_t *)arg;
502 (void) mlxp;
503 }
504
505 static mblk_t *
mlxcx_mac_ring_tx(void * arg,mblk_t * mp)506 mlxcx_mac_ring_tx(void *arg, mblk_t *mp)
507 {
508 mlxcx_work_queue_t *sq = (mlxcx_work_queue_t *)arg;
509 mlxcx_t *mlxp = sq->mlwq_mlx;
510 mlxcx_completion_queue_t *cq;
511 mlxcx_buffer_t *b;
512 mac_header_info_t mhi;
513 mblk_t *kmp, *nmp;
514 uint8_t inline_hdrs[MLXCX_MAX_INLINE_HEADERLEN];
515 size_t inline_hdrlen, rem, off;
516 uint32_t chkflags = 0;
517 boolean_t ok;
518 size_t take = 0;
519 uint_t bcount;
520
521 VERIFY(mp->b_next == NULL);
522
523 mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &chkflags);
524
525 if (mac_vlan_header_info(mlxp->mlx_mac_hdl, mp, &mhi) != 0) {
526 /*
527 * We got given a frame without a valid L2 header on it. We
528 * can't really transmit that (mlx parts don't like it), so
529 * we will just drop it on the floor.
530 */
531 freemsg(mp);
532 return (NULL);
533 }
534
535 inline_hdrlen = rem = mhi.mhi_hdrsize;
536
537 kmp = mp;
538 off = 0;
539 while (rem > 0) {
540 const ptrdiff_t sz = MBLKL(kmp);
541 ASSERT3S(sz, >=, 0);
542 ASSERT3U(sz, <=, SIZE_MAX);
543 take = sz;
544 if (take > rem)
545 take = rem;
546 bcopy(kmp->b_rptr, inline_hdrs + off, take);
547 rem -= take;
548 off += take;
549 if (take == sz) {
550 take = 0;
551 kmp = kmp->b_cont;
552 }
553 }
554
555 bcount = mlxcx_buf_bind_or_copy(mlxp, sq, kmp, take, &b);
556 if (bcount == 0) {
557 atomic_or_uint(&sq->mlwq_state, MLXCX_WQ_BLOCKED_MAC);
558 return (mp);
559 }
560
561 mutex_enter(&sq->mlwq_mtx);
562 VERIFY3U(sq->mlwq_inline_mode, <=, MLXCX_ETH_INLINE_L2);
563 cq = sq->mlwq_cq;
564
565 /*
566 * state is a single int, so read-only access without the CQ lock
567 * should be fine.
568 */
569 if (cq->mlcq_state & MLXCX_CQ_TEARDOWN) {
570 mutex_exit(&sq->mlwq_mtx);
571 mlxcx_buf_return_chain(mlxp, b, B_FALSE);
572 return (NULL);
573 }
574
575 if ((sq->mlwq_state & (MLXCX_WQ_TEARDOWN | MLXCX_WQ_STARTED)) !=
576 MLXCX_WQ_STARTED) {
577 mutex_exit(&sq->mlwq_mtx);
578 mlxcx_buf_return_chain(mlxp, b, B_FALSE);
579 return (NULL);
580 }
581
582 /*
583 * If the completion queue buffer count is already at or above
584 * the high water mark, or the addition of this new chain will
585 * exceed the CQ ring size, then indicate we are blocked.
586 */
587 if (cq->mlcq_bufcnt >= cq->mlcq_bufhwm ||
588 (cq->mlcq_bufcnt + bcount) > cq->mlcq_nents) {
589 atomic_or_uint(&cq->mlcq_state, MLXCX_CQ_BLOCKED_MAC);
590 goto blocked;
591 }
592
593 if (sq->mlwq_wqebb_used >= sq->mlwq_bufhwm) {
594 atomic_or_uint(&sq->mlwq_state, MLXCX_WQ_BLOCKED_MAC);
595 goto blocked;
596 }
597
598 ok = mlxcx_sq_add_buffer(mlxp, sq, inline_hdrs, inline_hdrlen,
599 chkflags, b);
600 if (!ok) {
601 atomic_or_uint(&cq->mlcq_state, MLXCX_CQ_BLOCKED_MAC);
602 atomic_or_uint(&sq->mlwq_state, MLXCX_WQ_BLOCKED_MAC);
603 goto blocked;
604 }
605
606 /*
607 * Now that we've successfully enqueued the rest of the packet,
608 * free any mblks that we cut off while inlining headers.
609 */
610 for (; mp != kmp; mp = nmp) {
611 nmp = mp->b_cont;
612 freeb(mp);
613 }
614
615 mutex_exit(&sq->mlwq_mtx);
616
617 return (NULL);
618
619 blocked:
620 mutex_exit(&sq->mlwq_mtx);
621 mlxcx_buf_return_chain(mlxp, b, B_TRUE);
622 return (mp);
623 }
624
625 static int
mlxcx_mac_setpromisc(void * arg,boolean_t on)626 mlxcx_mac_setpromisc(void *arg, boolean_t on)
627 {
628 mlxcx_t *mlxp = (mlxcx_t *)arg;
629 mlxcx_port_t *port = &mlxp->mlx_ports[0];
630 mlxcx_flow_group_t *fg;
631 mlxcx_flow_entry_t *fe;
632 mlxcx_flow_table_t *ft;
633 mlxcx_ring_group_t *g;
634 int ret = 0;
635 uint_t idx;
636
637 mutex_enter(&port->mlp_mtx);
638
639 /*
640 * First, do the top-level flow entry on the root flow table for
641 * the port. This catches all traffic that doesn't match any MAC
642 * MAC filters.
643 */
644 ft = port->mlp_rx_flow;
645 mutex_enter(&ft->mlft_mtx);
646 fg = port->mlp_promisc;
647 fe = list_head(&fg->mlfg_entries);
648 if (on && !(fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED)) {
649 if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) {
650 ret = EIO;
651 }
652 } else if (!on && (fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED)) {
653 if (!mlxcx_cmd_delete_flow_table_entry(mlxp, fe)) {
654 ret = EIO;
655 }
656 }
657 mutex_exit(&ft->mlft_mtx);
658
659 /*
660 * If we failed to change the top-level entry, don't bother with
661 * trying the per-group ones.
662 */
663 if (ret != 0) {
664 mutex_exit(&port->mlp_mtx);
665 return (ret);
666 }
667
668 /*
669 * Then, do the per-rx-group flow entries which catch traffic that
670 * matched a MAC filter but failed to match a VLAN filter.
671 */
672 for (idx = 0; idx < mlxp->mlx_rx_ngroups; ++idx) {
673 g = &mlxp->mlx_rx_groups[idx];
674
675 mutex_enter(&g->mlg_mtx);
676
677 ft = g->mlg_rx_vlan_ft;
678 mutex_enter(&ft->mlft_mtx);
679
680 fg = g->mlg_rx_vlan_promisc_fg;
681 fe = list_head(&fg->mlfg_entries);
682 if (on && !(fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED)) {
683 if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) {
684 ret = EIO;
685 }
686 } else if (!on && (fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED)) {
687 if (!mlxcx_cmd_delete_flow_table_entry(mlxp, fe)) {
688 ret = EIO;
689 }
690 }
691
692 mutex_exit(&ft->mlft_mtx);
693 mutex_exit(&g->mlg_mtx);
694 }
695
696 mutex_exit(&port->mlp_mtx);
697 return (ret);
698 }
699
700 static int
mlxcx_mac_multicast(void * arg,boolean_t add,const uint8_t * addr)701 mlxcx_mac_multicast(void *arg, boolean_t add, const uint8_t *addr)
702 {
703 mlxcx_t *mlxp = (mlxcx_t *)arg;
704 mlxcx_port_t *port = &mlxp->mlx_ports[0];
705 mlxcx_ring_group_t *g = &mlxp->mlx_rx_groups[0];
706 int ret = 0;
707
708 mutex_enter(&port->mlp_mtx);
709 mutex_enter(&g->mlg_mtx);
710 if (add) {
711 if (!mlxcx_add_umcast_entry(mlxp, port, g, addr)) {
712 ret = EIO;
713 }
714 } else {
715 if (!mlxcx_remove_umcast_entry(mlxp, port, g, addr)) {
716 ret = EIO;
717 }
718 }
719 mutex_exit(&g->mlg_mtx);
720 mutex_exit(&port->mlp_mtx);
721 return (ret);
722 }
723
724 static int
mlxcx_group_add_mac(void * arg,const uint8_t * mac_addr)725 mlxcx_group_add_mac(void *arg, const uint8_t *mac_addr)
726 {
727 mlxcx_ring_group_t *g = arg;
728 mlxcx_t *mlxp = g->mlg_mlx;
729 mlxcx_port_t *port = g->mlg_port;
730 int ret = 0;
731
732 mutex_enter(&port->mlp_mtx);
733 mutex_enter(&g->mlg_mtx);
734 if (!mlxcx_add_umcast_entry(mlxp, port, g, mac_addr)) {
735 ret = EIO;
736 }
737 mutex_exit(&g->mlg_mtx);
738 mutex_exit(&port->mlp_mtx);
739
740 return (ret);
741 }
742
743 static int
mlxcx_group_add_vlan(mac_group_driver_t gh,uint16_t vid)744 mlxcx_group_add_vlan(mac_group_driver_t gh, uint16_t vid)
745 {
746 mlxcx_ring_group_t *g = (mlxcx_ring_group_t *)gh;
747 mlxcx_t *mlxp = g->mlg_mlx;
748 int ret = 0;
749 boolean_t tagged = B_TRUE;
750
751 if (vid == MAC_VLAN_UNTAGGED) {
752 vid = 0;
753 tagged = B_FALSE;
754 }
755
756 mutex_enter(&g->mlg_mtx);
757 if (!mlxcx_add_vlan_entry(mlxp, g, tagged, vid)) {
758 ret = EIO;
759 }
760 mutex_exit(&g->mlg_mtx);
761
762 return (ret);
763 }
764
765 static int
mlxcx_group_remove_vlan(mac_group_driver_t gh,uint16_t vid)766 mlxcx_group_remove_vlan(mac_group_driver_t gh, uint16_t vid)
767 {
768 mlxcx_ring_group_t *g = (mlxcx_ring_group_t *)gh;
769 mlxcx_t *mlxp = g->mlg_mlx;
770 int ret = 0;
771 boolean_t tagged = B_TRUE;
772
773 if (vid == MAC_VLAN_UNTAGGED) {
774 vid = 0;
775 tagged = B_FALSE;
776 }
777
778 mutex_enter(&g->mlg_mtx);
779 if (!mlxcx_remove_vlan_entry(mlxp, g, tagged, vid)) {
780 ret = EIO;
781 }
782 mutex_exit(&g->mlg_mtx);
783
784 return (ret);
785 }
786
787 static int
mlxcx_group_remove_mac(void * arg,const uint8_t * mac_addr)788 mlxcx_group_remove_mac(void *arg, const uint8_t *mac_addr)
789 {
790 mlxcx_ring_group_t *g = arg;
791 mlxcx_t *mlxp = g->mlg_mlx;
792 mlxcx_port_t *port = g->mlg_port;
793 int ret = 0;
794
795 mutex_enter(&port->mlp_mtx);
796 mutex_enter(&g->mlg_mtx);
797 if (!mlxcx_remove_umcast_entry(mlxp, port, g, mac_addr)) {
798 ret = EIO;
799 }
800 mutex_exit(&g->mlg_mtx);
801 mutex_exit(&port->mlp_mtx);
802
803 return (ret);
804 }
805
806 static int
mlxcx_mac_ring_start(mac_ring_driver_t rh,uint64_t gen_num)807 mlxcx_mac_ring_start(mac_ring_driver_t rh, uint64_t gen_num)
808 {
809 mlxcx_work_queue_t *wq = (mlxcx_work_queue_t *)rh;
810 mlxcx_completion_queue_t *cq = wq->mlwq_cq;
811 mlxcx_ring_group_t *g = wq->mlwq_group;
812 mlxcx_t *mlxp = wq->mlwq_mlx;
813
814 ASSERT(cq != NULL);
815 ASSERT(g != NULL);
816
817 ASSERT(wq->mlwq_type == MLXCX_WQ_TYPE_SENDQ ||
818 wq->mlwq_type == MLXCX_WQ_TYPE_RECVQ);
819 if (wq->mlwq_type == MLXCX_WQ_TYPE_SENDQ &&
820 !mlxcx_tx_ring_start(mlxp, g, wq))
821 return (EIO);
822 if (wq->mlwq_type == MLXCX_WQ_TYPE_RECVQ &&
823 !mlxcx_rx_ring_start(mlxp, g, wq))
824 return (EIO);
825
826 mutex_enter(&cq->mlcq_mtx);
827 cq->mlcq_mac_gen = gen_num;
828 mutex_exit(&cq->mlcq_mtx);
829
830 return (0);
831 }
832
833 static void
mlxcx_mac_ring_stop(mac_ring_driver_t rh)834 mlxcx_mac_ring_stop(mac_ring_driver_t rh)
835 {
836 mlxcx_work_queue_t *wq = (mlxcx_work_queue_t *)rh;
837 mlxcx_completion_queue_t *cq = wq->mlwq_cq;
838 mlxcx_t *mlxp = wq->mlwq_mlx;
839 mlxcx_buf_shard_t *s;
840 mlxcx_buffer_t *buf;
841
842 /*
843 * To prevent deadlocks and sleeping whilst holding either the
844 * CQ mutex or WQ mutex, we split the stop processing into two
845 * parts.
846 *
847 * With the CQ amd WQ mutexes held the appropriate WQ is stopped.
848 * The Q in the HCA is set to Reset state and flagged as no
849 * longer started. Atomic with changing this WQ state, the buffer
850 * shards are flagged as draining.
851 *
852 * Now, any requests for buffers and attempts to submit messages
853 * will fail and once we're in this state it is safe to relinquish
854 * the CQ and WQ mutexes. Allowing us to complete the ring stop
855 * by waiting for the buffer lists, with the exception of
856 * the loaned list, to drain. Buffers on the loaned list are
857 * not under our control, we will get them back when the mblk tied
858 * to the buffer is freed.
859 */
860
861 mutex_enter(&cq->mlcq_mtx);
862 mutex_enter(&wq->mlwq_mtx);
863
864 if (wq->mlwq_state & MLXCX_WQ_STARTED) {
865 if (wq->mlwq_type == MLXCX_WQ_TYPE_RECVQ &&
866 !mlxcx_cmd_stop_rq(mlxp, wq)) {
867 mutex_exit(&wq->mlwq_mtx);
868 mutex_exit(&cq->mlcq_mtx);
869 return;
870 }
871 if (wq->mlwq_type == MLXCX_WQ_TYPE_SENDQ &&
872 !mlxcx_cmd_stop_sq(mlxp, wq)) {
873 mutex_exit(&wq->mlwq_mtx);
874 mutex_exit(&cq->mlcq_mtx);
875 return;
876 }
877 }
878 ASSERT0(wq->mlwq_state & MLXCX_WQ_STARTED);
879
880 mlxcx_shard_draining(wq->mlwq_bufs);
881 if (wq->mlwq_foreign_bufs != NULL)
882 mlxcx_shard_draining(wq->mlwq_foreign_bufs);
883
884
885 if (wq->mlwq_state & MLXCX_WQ_BUFFERS) {
886 list_t cq_buffers;
887
888 /*
889 * Take the buffers away from the CQ. If the CQ is being
890 * processed and the WQ has been stopped, a completion
891 * which does not match to a buffer will be ignored.
892 */
893 list_create(&cq_buffers, sizeof (mlxcx_buffer_t),
894 offsetof(mlxcx_buffer_t, mlb_cq_entry));
895
896 list_move_tail(&cq_buffers, &cq->mlcq_buffers);
897
898 mutex_enter(&cq->mlcq_bufbmtx);
899 list_move_tail(&cq_buffers, &cq->mlcq_buffers_b);
900 mutex_exit(&cq->mlcq_bufbmtx);
901
902 cq->mlcq_bufcnt = 0;
903
904 mutex_exit(&wq->mlwq_mtx);
905 mutex_exit(&cq->mlcq_mtx);
906
907 /* Return any outstanding buffers to the free pool. */
908 while ((buf = list_remove_head(&cq_buffers)) != NULL) {
909 mlxcx_buf_return_chain(mlxp, buf, B_FALSE);
910 }
911 list_destroy(&cq_buffers);
912
913 s = wq->mlwq_bufs;
914 mutex_enter(&s->mlbs_mtx);
915 while (!list_is_empty(&s->mlbs_busy))
916 cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx);
917 while ((buf = list_head(&s->mlbs_free)) != NULL) {
918 mlxcx_buf_destroy(mlxp, buf);
919 }
920 mutex_exit(&s->mlbs_mtx);
921
922 s = wq->mlwq_foreign_bufs;
923 if (s != NULL) {
924 mutex_enter(&s->mlbs_mtx);
925 while (!list_is_empty(&s->mlbs_busy))
926 cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx);
927 while ((buf = list_head(&s->mlbs_free)) != NULL) {
928 mlxcx_buf_destroy(mlxp, buf);
929 }
930 mutex_exit(&s->mlbs_mtx);
931 }
932
933 mutex_enter(&wq->mlwq_mtx);
934 wq->mlwq_state &= ~MLXCX_WQ_BUFFERS;
935 mutex_exit(&wq->mlwq_mtx);
936 } else {
937 mutex_exit(&wq->mlwq_mtx);
938 mutex_exit(&cq->mlcq_mtx);
939 }
940 }
941
942 static int
mlxcx_mac_group_start(mac_group_driver_t gh)943 mlxcx_mac_group_start(mac_group_driver_t gh)
944 {
945 mlxcx_ring_group_t *g = (mlxcx_ring_group_t *)gh;
946 mlxcx_t *mlxp = g->mlg_mlx;
947
948 VERIFY3S(g->mlg_type, ==, MLXCX_GROUP_RX);
949 ASSERT(mlxp != NULL);
950
951 if (g->mlg_state & MLXCX_GROUP_RUNNING)
952 return (0);
953
954 if (!mlxcx_rx_group_start(mlxp, g))
955 return (EIO);
956
957 return (0);
958 }
959
960 static void
mlxcx_mac_fill_tx_ring(void * arg,mac_ring_type_t rtype,const int group_index,const int ring_index,mac_ring_info_t * infop,mac_ring_handle_t rh)961 mlxcx_mac_fill_tx_ring(void *arg, mac_ring_type_t rtype, const int group_index,
962 const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh)
963 {
964 mlxcx_t *mlxp = (mlxcx_t *)arg;
965 mlxcx_ring_group_t *g;
966 mlxcx_work_queue_t *wq;
967 mac_intr_t *mintr = &infop->mri_intr;
968
969 if (rtype != MAC_RING_TYPE_TX)
970 return;
971 ASSERT3S(group_index, ==, -1);
972
973 g = &mlxp->mlx_tx_groups[0];
974 ASSERT(g->mlg_state & MLXCX_GROUP_INIT);
975 mutex_enter(&g->mlg_mtx);
976
977 ASSERT3S(ring_index, >=, 0);
978 ASSERT3S(ring_index, <, g->mlg_nwqs);
979
980 wq = &g->mlg_wqs[ring_index];
981
982 wq->mlwq_cq->mlcq_mac_hdl = rh;
983
984 infop->mri_driver = (mac_ring_driver_t)wq;
985 infop->mri_start = mlxcx_mac_ring_start;
986 infop->mri_stop = mlxcx_mac_ring_stop;
987 infop->mri_tx = mlxcx_mac_ring_tx;
988 infop->mri_stat = mlxcx_mac_ring_stat;
989
990 mintr->mi_ddi_handle = mlxp->mlx_intr_handles[
991 wq->mlwq_cq->mlcq_eq->mleq_intr_index];
992
993 mutex_exit(&g->mlg_mtx);
994 }
995
996 static int
mlxcx_mac_ring_intr_enable(mac_intr_handle_t intrh)997 mlxcx_mac_ring_intr_enable(mac_intr_handle_t intrh)
998 {
999 mlxcx_completion_queue_t *cq = (mlxcx_completion_queue_t *)intrh;
1000 mlxcx_t *mlxp = cq->mlcq_mlx;
1001
1002 /*
1003 * We are going to call mlxcx_arm_cq() here, so we take the arm lock
1004 * as well as the CQ one to make sure we don't race against
1005 * mlxcx_intr_n().
1006 */
1007 mutex_enter(&cq->mlcq_arm_mtx);
1008 mutex_enter(&cq->mlcq_mtx);
1009 if (cq->mlcq_state & MLXCX_CQ_POLLING) {
1010 atomic_and_uint(&cq->mlcq_state, ~MLXCX_CQ_POLLING);
1011 if (!(cq->mlcq_state & MLXCX_CQ_ARMED))
1012 mlxcx_arm_cq(mlxp, cq);
1013 }
1014 mutex_exit(&cq->mlcq_mtx);
1015 mutex_exit(&cq->mlcq_arm_mtx);
1016
1017 return (0);
1018 }
1019
1020 static int
mlxcx_mac_ring_intr_disable(mac_intr_handle_t intrh)1021 mlxcx_mac_ring_intr_disable(mac_intr_handle_t intrh)
1022 {
1023 mlxcx_completion_queue_t *cq = (mlxcx_completion_queue_t *)intrh;
1024
1025 mutex_enter(&cq->mlcq_mtx);
1026 atomic_or_uint(&cq->mlcq_state, MLXCX_CQ_POLLING);
1027 mutex_exit(&cq->mlcq_mtx);
1028
1029 return (0);
1030 }
1031
1032 static mblk_t *
mlxcx_mac_ring_rx_poll(void * arg,int poll_bytes)1033 mlxcx_mac_ring_rx_poll(void *arg, int poll_bytes)
1034 {
1035 mlxcx_work_queue_t *wq = (mlxcx_work_queue_t *)arg;
1036 mlxcx_completion_queue_t *cq = wq->mlwq_cq;
1037 mlxcx_t *mlxp = wq->mlwq_mlx;
1038 mblk_t *mp;
1039
1040 ASSERT(cq != NULL);
1041 ASSERT3S(poll_bytes, >, 0);
1042 if (poll_bytes == 0)
1043 return (NULL);
1044
1045 mutex_enter(&cq->mlcq_mtx);
1046 mp = mlxcx_rx_poll(mlxp, cq, poll_bytes);
1047 mutex_exit(&cq->mlcq_mtx);
1048
1049 return (mp);
1050 }
1051
1052 static void
mlxcx_mac_fill_rx_ring(void * arg,mac_ring_type_t rtype,const int group_index,const int ring_index,mac_ring_info_t * infop,mac_ring_handle_t rh)1053 mlxcx_mac_fill_rx_ring(void *arg, mac_ring_type_t rtype, const int group_index,
1054 const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh)
1055 {
1056 mlxcx_t *mlxp = (mlxcx_t *)arg;
1057 mlxcx_ring_group_t *g;
1058 mlxcx_work_queue_t *wq;
1059 mac_intr_t *mintr = &infop->mri_intr;
1060
1061 if (rtype != MAC_RING_TYPE_RX)
1062 return;
1063 ASSERT3S(group_index, >=, 0);
1064 ASSERT3S(group_index, <, mlxp->mlx_rx_ngroups);
1065
1066 g = &mlxp->mlx_rx_groups[group_index];
1067 ASSERT(g->mlg_state & MLXCX_GROUP_INIT);
1068 mutex_enter(&g->mlg_mtx);
1069
1070 ASSERT3S(ring_index, >=, 0);
1071 ASSERT3S(ring_index, <, g->mlg_nwqs);
1072
1073 ASSERT(g->mlg_state & MLXCX_GROUP_WQS);
1074 wq = &g->mlg_wqs[ring_index];
1075
1076 wq->mlwq_cq->mlcq_mac_hdl = rh;
1077
1078 infop->mri_driver = (mac_ring_driver_t)wq;
1079 infop->mri_start = mlxcx_mac_ring_start;
1080 infop->mri_stop = mlxcx_mac_ring_stop;
1081 infop->mri_poll = mlxcx_mac_ring_rx_poll;
1082 infop->mri_stat = mlxcx_mac_ring_stat;
1083
1084 mintr->mi_handle = (mac_intr_handle_t)wq->mlwq_cq;
1085 mintr->mi_enable = mlxcx_mac_ring_intr_enable;
1086 mintr->mi_disable = mlxcx_mac_ring_intr_disable;
1087
1088 mintr->mi_ddi_handle = mlxp->mlx_intr_handles[
1089 wq->mlwq_cq->mlcq_eq->mleq_intr_index];
1090
1091 mutex_exit(&g->mlg_mtx);
1092 }
1093
1094 static void
mlxcx_mac_fill_rx_group(void * arg,mac_ring_type_t rtype,const int index,mac_group_info_t * infop,mac_group_handle_t gh)1095 mlxcx_mac_fill_rx_group(void *arg, mac_ring_type_t rtype, const int index,
1096 mac_group_info_t *infop, mac_group_handle_t gh)
1097 {
1098 mlxcx_t *mlxp = (mlxcx_t *)arg;
1099 mlxcx_ring_group_t *g;
1100
1101 if (rtype != MAC_RING_TYPE_RX)
1102 return;
1103
1104 ASSERT3S(index, >=, 0);
1105 ASSERT3S(index, <, mlxp->mlx_rx_ngroups);
1106 g = &mlxp->mlx_rx_groups[index];
1107 ASSERT(g->mlg_state & MLXCX_GROUP_INIT);
1108
1109 g->mlg_mac_hdl = gh;
1110
1111 infop->mgi_driver = (mac_group_driver_t)g;
1112 infop->mgi_start = mlxcx_mac_group_start;
1113 infop->mgi_stop = NULL;
1114 infop->mgi_addmac = mlxcx_group_add_mac;
1115 infop->mgi_remmac = mlxcx_group_remove_mac;
1116 infop->mgi_addvlan = mlxcx_group_add_vlan;
1117 infop->mgi_remvlan = mlxcx_group_remove_vlan;
1118
1119 infop->mgi_count = g->mlg_nwqs;
1120 }
1121
1122 static boolean_t
mlxcx_mac_getcapab(void * arg,mac_capab_t cap,void * cap_data)1123 mlxcx_mac_getcapab(void *arg, mac_capab_t cap, void *cap_data)
1124 {
1125 mlxcx_t *mlxp = (mlxcx_t *)arg;
1126 mac_capab_rings_t *cap_rings;
1127 mac_capab_led_t *cap_leds;
1128 mac_capab_transceiver_t *cap_txr;
1129 uint_t i, n = 0;
1130
1131 switch (cap) {
1132
1133 case MAC_CAPAB_RINGS:
1134 cap_rings = cap_data;
1135 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
1136 switch (cap_rings->mr_type) {
1137 case MAC_RING_TYPE_TX:
1138 cap_rings->mr_gnum = 0;
1139 cap_rings->mr_rnum = mlxp->mlx_tx_groups[0].mlg_nwqs;
1140 cap_rings->mr_rget = mlxcx_mac_fill_tx_ring;
1141 cap_rings->mr_gget = NULL;
1142 cap_rings->mr_gaddring = NULL;
1143 cap_rings->mr_gremring = NULL;
1144 break;
1145 case MAC_RING_TYPE_RX:
1146 cap_rings->mr_gnum = mlxp->mlx_rx_ngroups;
1147 for (i = 0; i < mlxp->mlx_rx_ngroups; ++i)
1148 n += mlxp->mlx_rx_groups[i].mlg_nwqs;
1149 cap_rings->mr_rnum = n;
1150 cap_rings->mr_rget = mlxcx_mac_fill_rx_ring;
1151 cap_rings->mr_gget = mlxcx_mac_fill_rx_group;
1152 cap_rings->mr_gaddring = NULL;
1153 cap_rings->mr_gremring = NULL;
1154 break;
1155 default:
1156 return (B_FALSE);
1157 }
1158 break;
1159
1160 case MAC_CAPAB_HCKSUM:
1161 if (mlxp->mlx_caps->mlc_checksum) {
1162 *(uint32_t *)cap_data = HCKSUM_INET_FULL_V4 |
1163 HCKSUM_INET_FULL_V6 | HCKSUM_IPHDRCKSUM;
1164 }
1165 break;
1166
1167 case MAC_CAPAB_LED:
1168 cap_leds = cap_data;
1169
1170 cap_leds->mcl_flags = 0;
1171 cap_leds->mcl_modes = MAC_LED_DEFAULT | MAC_LED_OFF |
1172 MAC_LED_IDENT;
1173 cap_leds->mcl_set = mlxcx_mac_led_set;
1174 break;
1175
1176 case MAC_CAPAB_TRANSCEIVER:
1177 cap_txr = cap_data;
1178
1179 cap_txr->mct_flags = 0;
1180 cap_txr->mct_ntransceivers = 1;
1181 cap_txr->mct_info = mlxcx_mac_txr_info;
1182 cap_txr->mct_read = mlxcx_mac_txr_read;
1183 break;
1184
1185 default:
1186 return (B_FALSE);
1187 }
1188
1189 return (B_TRUE);
1190 }
1191
1192 static void
mlxcx_mac_propinfo(void * arg,const char * pr_name,mac_prop_id_t pr_num,mac_prop_info_handle_t prh)1193 mlxcx_mac_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1194 mac_prop_info_handle_t prh)
1195 {
1196 mlxcx_t *mlxp = (mlxcx_t *)arg;
1197 mlxcx_port_t *port = &mlxp->mlx_ports[0];
1198
1199 mutex_enter(&port->mlp_mtx);
1200
1201 switch (pr_num) {
1202 case MAC_PROP_DUPLEX:
1203 case MAC_PROP_SPEED:
1204 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
1205 break;
1206 case MAC_PROP_MTU:
1207 mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW);
1208 mac_prop_info_set_range_uint32(prh, MLXCX_MTU_OFFSET,
1209 port->mlp_max_mtu);
1210 mac_prop_info_set_default_uint32(prh,
1211 port->mlp_mtu - MLXCX_MTU_OFFSET);
1212 break;
1213 case MAC_PROP_AUTONEG:
1214 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
1215 mac_prop_info_set_default_uint8(prh, 1);
1216 break;
1217 case MAC_PROP_ADV_FEC_CAP:
1218 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
1219 mac_prop_info_set_default_fec(prh, LINK_FEC_AUTO);
1220 break;
1221 case MAC_PROP_EN_FEC_CAP:
1222 mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW);
1223 mac_prop_info_set_default_fec(prh, LINK_FEC_AUTO);
1224 break;
1225 case MAC_PROP_ADV_400GFDX_CAP:
1226 case MAC_PROP_EN_400GFDX_CAP:
1227 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
1228 mac_prop_info_set_default_uint8(prh,
1229 (port->mlp_ext_oper_proto & MLXCX_EXTPROTO_400G) != 0);
1230 break;
1231 case MAC_PROP_ADV_200GFDX_CAP:
1232 case MAC_PROP_EN_200GFDX_CAP:
1233 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
1234 mac_prop_info_set_default_uint8(prh,
1235 (port->mlp_ext_oper_proto & MLXCX_EXTPROTO_200G) != 0);
1236 break;
1237 case MAC_PROP_ADV_100GFDX_CAP:
1238 case MAC_PROP_EN_100GFDX_CAP:
1239 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
1240 mac_prop_info_set_default_uint8(prh,
1241 ((port->mlp_oper_proto & MLXCX_PROTO_100G) != 0 ||
1242 (port->mlp_ext_oper_proto & MLXCX_EXTPROTO_100G)) != 0);
1243 break;
1244 case MAC_PROP_ADV_50GFDX_CAP:
1245 case MAC_PROP_EN_50GFDX_CAP:
1246 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
1247 mac_prop_info_set_default_uint8(prh,
1248 ((port->mlp_oper_proto & MLXCX_PROTO_50G) != 0 ||
1249 (port->mlp_ext_oper_proto & MLXCX_EXTPROTO_50G)) != 0);
1250 break;
1251 case MAC_PROP_ADV_40GFDX_CAP:
1252 case MAC_PROP_EN_40GFDX_CAP:
1253 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
1254 mac_prop_info_set_default_uint8(prh,
1255 ((port->mlp_oper_proto & MLXCX_PROTO_40G) != 0 ||
1256 (port->mlp_ext_oper_proto & MLXCX_EXTPROTO_40G)) != 0);
1257 break;
1258 case MAC_PROP_ADV_25GFDX_CAP:
1259 case MAC_PROP_EN_25GFDX_CAP:
1260 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
1261 mac_prop_info_set_default_uint8(prh,
1262 ((port->mlp_oper_proto & MLXCX_PROTO_25G) != 0 ||
1263 (port->mlp_ext_oper_proto & MLXCX_EXTPROTO_25G)) != 0);
1264 break;
1265 case MAC_PROP_ADV_10GFDX_CAP:
1266 case MAC_PROP_EN_10GFDX_CAP:
1267 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
1268 mac_prop_info_set_default_uint8(prh,
1269 ((port->mlp_oper_proto & MLXCX_PROTO_10G) != 0 ||
1270 (port->mlp_ext_oper_proto & MLXCX_EXTPROTO_10G)) != 0);
1271 break;
1272 case MAC_PROP_ADV_1000FDX_CAP:
1273 case MAC_PROP_EN_1000FDX_CAP:
1274 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
1275 mac_prop_info_set_default_uint8(prh,
1276 ((port->mlp_oper_proto & MLXCX_PROTO_1G) != 0 ||
1277 (port->mlp_ext_oper_proto & MLXCX_EXTPROTO_1G)) != 0);
1278 break;
1279 case MAC_PROP_ADV_100FDX_CAP:
1280 case MAC_PROP_EN_100FDX_CAP:
1281 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
1282 mac_prop_info_set_default_uint8(prh,
1283 ((port->mlp_oper_proto & MLXCX_PROTO_100M) != 0 ||
1284 (port->mlp_ext_oper_proto & MLXCX_EXTPROTO_100M)) != 0);
1285 break;
1286 default:
1287 break;
1288 }
1289
1290 mutex_exit(&port->mlp_mtx);
1291 }
1292
1293 static int
mlxcx_mac_setprop(void * arg,const char * pr_name,mac_prop_id_t pr_num,uint_t pr_valsize,const void * pr_val)1294 mlxcx_mac_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1295 uint_t pr_valsize, const void *pr_val)
1296 {
1297 mlxcx_t *mlxp = (mlxcx_t *)arg;
1298 mlxcx_port_t *port = &mlxp->mlx_ports[0];
1299 int ret = 0;
1300 uint32_t new_mtu, new_hw_mtu, old_mtu;
1301 mlxcx_buf_shard_t *sh;
1302 boolean_t allocd = B_FALSE;
1303 boolean_t relink = B_FALSE;
1304 link_fec_t fec;
1305 mlxcx_pplm_fec_caps_t cap_fec;
1306
1307 mutex_enter(&port->mlp_mtx);
1308
1309 switch (pr_num) {
1310 case MAC_PROP_MTU:
1311 bcopy(pr_val, &new_mtu, sizeof (new_mtu));
1312 new_hw_mtu = new_mtu + MLXCX_MTU_OFFSET;
1313 if (new_hw_mtu == port->mlp_mtu)
1314 break;
1315 if (new_hw_mtu > port->mlp_max_mtu) {
1316 ret = EINVAL;
1317 break;
1318 }
1319 sh = list_head(&mlxp->mlx_buf_shards);
1320 for (; sh != NULL; sh = list_next(&mlxp->mlx_buf_shards, sh)) {
1321 mutex_enter(&sh->mlbs_mtx);
1322 if (!list_is_empty(&sh->mlbs_free) ||
1323 !list_is_empty(&sh->mlbs_busy) ||
1324 !list_is_empty(&sh->mlbs_loaned)) {
1325 allocd = B_TRUE;
1326 mutex_exit(&sh->mlbs_mtx);
1327 break;
1328 }
1329 mutex_exit(&sh->mlbs_mtx);
1330 }
1331 if (allocd) {
1332 ret = EBUSY;
1333 break;
1334 }
1335 old_mtu = port->mlp_mtu;
1336 ret = mac_maxsdu_update(mlxp->mlx_mac_hdl, new_mtu);
1337 if (ret != 0)
1338 break;
1339 port->mlp_mtu = new_hw_mtu;
1340 if (!mlxcx_cmd_modify_nic_vport_ctx(mlxp, port,
1341 MLXCX_MODIFY_NIC_VPORT_CTX_MTU)) {
1342 port->mlp_mtu = old_mtu;
1343 (void) mac_maxsdu_update(mlxp->mlx_mac_hdl, old_mtu);
1344 ret = EIO;
1345 break;
1346 }
1347 if (!mlxcx_cmd_set_port_mtu(mlxp, port)) {
1348 port->mlp_mtu = old_mtu;
1349 (void) mac_maxsdu_update(mlxp->mlx_mac_hdl, old_mtu);
1350 ret = EIO;
1351 break;
1352 }
1353 break;
1354
1355 case MAC_PROP_EN_FEC_CAP:
1356 bcopy(pr_val, &fec, sizeof (fec));
1357 if (!mlxcx_link_fec_cap(fec, &cap_fec)) {
1358 ret = EINVAL;
1359 break;
1360 }
1361
1362 /*
1363 * Don't change the FEC if it is already at the requested
1364 * setting AND the port is up.
1365 * When the port is down, always set the FEC and attempt
1366 * to retrain the link.
1367 */
1368 if (fec == port->mlp_fec_requested &&
1369 fec == mlxcx_fec_to_link_fec(port->mlp_fec_active) &&
1370 port->mlp_oper_status != MLXCX_PORT_STATUS_DOWN)
1371 break;
1372
1373 /*
1374 * The most like cause of this failing is an invalid
1375 * or unsupported fec option.
1376 */
1377 if (!mlxcx_cmd_modify_port_fec(mlxp, port, cap_fec)) {
1378 ret = EINVAL;
1379 break;
1380 }
1381
1382 port->mlp_fec_requested = fec;
1383
1384 /*
1385 * For FEC to become effective, the link needs to go back
1386 * to training and negotiation state. This happens when
1387 * the link transitions from down to up, force a relink.
1388 */
1389 relink = B_TRUE;
1390 break;
1391
1392 default:
1393 ret = ENOTSUP;
1394 break;
1395 }
1396
1397 if (relink) {
1398 if (!mlxcx_cmd_modify_port_status(mlxp, port,
1399 MLXCX_PORT_STATUS_DOWN) ||
1400 !mlxcx_cmd_modify_port_status(mlxp, port,
1401 MLXCX_PORT_STATUS_UP)) {
1402 ret = EIO;
1403 }
1404 }
1405 mutex_exit(&port->mlp_mtx);
1406
1407 return (ret);
1408 }
1409
1410 static int
mlxcx_mac_getprop(void * arg,const char * pr_name,mac_prop_id_t pr_num,uint_t pr_valsize,void * pr_val)1411 mlxcx_mac_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1412 uint_t pr_valsize, void *pr_val)
1413 {
1414 mlxcx_t *mlxp = (mlxcx_t *)arg;
1415 mlxcx_port_t *port = &mlxp->mlx_ports[0];
1416 uint64_t speed;
1417 int ret = 0;
1418
1419 mutex_enter(&port->mlp_mtx);
1420
1421 switch (pr_num) {
1422 case MAC_PROP_DUPLEX:
1423 if (pr_valsize < sizeof (link_duplex_t)) {
1424 ret = EOVERFLOW;
1425 break;
1426 }
1427 /* connectx parts only support full duplex */
1428 *(link_duplex_t *)pr_val = LINK_DUPLEX_FULL;
1429 break;
1430 case MAC_PROP_SPEED:
1431 if (pr_valsize < sizeof (uint64_t)) {
1432 ret = EOVERFLOW;
1433 break;
1434 }
1435 speed = mlxcx_speed_to_bits(port->mlp_oper_proto,
1436 port->mlp_ext_oper_proto);
1437 bcopy(&speed, pr_val, sizeof (speed));
1438 break;
1439 case MAC_PROP_STATUS:
1440 if (pr_valsize < sizeof (link_state_t)) {
1441 ret = EOVERFLOW;
1442 break;
1443 }
1444 switch (port->mlp_oper_status) {
1445 case MLXCX_PORT_STATUS_UP:
1446 case MLXCX_PORT_STATUS_UP_ONCE:
1447 *(link_state_t *)pr_val = LINK_STATE_UP;
1448 break;
1449 case MLXCX_PORT_STATUS_DOWN:
1450 *(link_state_t *)pr_val = LINK_STATE_DOWN;
1451 break;
1452 default:
1453 *(link_state_t *)pr_val = LINK_STATE_UNKNOWN;
1454 }
1455 break;
1456 case MAC_PROP_AUTONEG:
1457 if (pr_valsize < sizeof (uint8_t)) {
1458 ret = EOVERFLOW;
1459 break;
1460 }
1461 *(uint8_t *)pr_val = port->mlp_autoneg;
1462 break;
1463 case MAC_PROP_ADV_FEC_CAP:
1464 if (pr_valsize < sizeof (link_fec_t)) {
1465 ret = EOVERFLOW;
1466 break;
1467 }
1468 *(link_fec_t *)pr_val =
1469 mlxcx_fec_to_link_fec(port->mlp_fec_active);
1470 break;
1471 case MAC_PROP_EN_FEC_CAP:
1472 if (pr_valsize < sizeof (link_fec_t)) {
1473 ret = EOVERFLOW;
1474 break;
1475 }
1476 *(link_fec_t *)pr_val = port->mlp_fec_requested;
1477 break;
1478 case MAC_PROP_MTU:
1479 if (pr_valsize < sizeof (uint32_t)) {
1480 ret = EOVERFLOW;
1481 break;
1482 }
1483 *(uint32_t *)pr_val = port->mlp_mtu - MLXCX_MTU_OFFSET;
1484 break;
1485 case MAC_PROP_ADV_400GFDX_CAP:
1486 case MAC_PROP_EN_400GFDX_CAP:
1487 if (pr_valsize < sizeof (uint8_t)) {
1488 ret = EOVERFLOW;
1489 break;
1490 }
1491 *(uint8_t *)pr_val =
1492 (port->mlp_ext_max_proto & MLXCX_EXTPROTO_400G) != 0;
1493 break;
1494 case MAC_PROP_ADV_200GFDX_CAP:
1495 case MAC_PROP_EN_200GFDX_CAP:
1496 if (pr_valsize < sizeof (uint8_t)) {
1497 ret = EOVERFLOW;
1498 break;
1499 }
1500 *(uint8_t *)pr_val =
1501 (port->mlp_ext_max_proto & MLXCX_EXTPROTO_200G) != 0;
1502 break;
1503 case MAC_PROP_ADV_100GFDX_CAP:
1504 case MAC_PROP_EN_100GFDX_CAP:
1505 if (pr_valsize < sizeof (uint8_t)) {
1506 ret = EOVERFLOW;
1507 break;
1508 }
1509 *(uint8_t *)pr_val = (port->mlp_max_proto &
1510 MLXCX_PROTO_100G) != 0 ||
1511 (port->mlp_ext_max_proto & MLXCX_EXTPROTO_100G) != 0;
1512 break;
1513 case MAC_PROP_ADV_50GFDX_CAP:
1514 case MAC_PROP_EN_50GFDX_CAP:
1515 if (pr_valsize < sizeof (uint8_t)) {
1516 ret = EOVERFLOW;
1517 break;
1518 }
1519 *(uint8_t *)pr_val = (port->mlp_max_proto &
1520 MLXCX_PROTO_50G) != 0 ||
1521 (port->mlp_ext_max_proto & MLXCX_EXTPROTO_50G) != 0;
1522 break;
1523 case MAC_PROP_ADV_40GFDX_CAP:
1524 case MAC_PROP_EN_40GFDX_CAP:
1525 if (pr_valsize < sizeof (uint8_t)) {
1526 ret = EOVERFLOW;
1527 break;
1528 }
1529 *(uint8_t *)pr_val = (port->mlp_max_proto &
1530 MLXCX_PROTO_40G) != 0 ||
1531 (port->mlp_ext_max_proto & MLXCX_EXTPROTO_40G) != 0;
1532 break;
1533 case MAC_PROP_ADV_25GFDX_CAP:
1534 case MAC_PROP_EN_25GFDX_CAP:
1535 if (pr_valsize < sizeof (uint8_t)) {
1536 ret = EOVERFLOW;
1537 break;
1538 }
1539 *(uint8_t *)pr_val = (port->mlp_max_proto &
1540 MLXCX_PROTO_25G) != 0 ||
1541 (port->mlp_ext_max_proto & MLXCX_EXTPROTO_25G) != 0;
1542 break;
1543 case MAC_PROP_ADV_10GFDX_CAP:
1544 case MAC_PROP_EN_10GFDX_CAP:
1545 if (pr_valsize < sizeof (uint8_t)) {
1546 ret = EOVERFLOW;
1547 break;
1548 }
1549 *(uint8_t *)pr_val = (port->mlp_max_proto &
1550 MLXCX_PROTO_10G) != 0 ||
1551 (port->mlp_ext_max_proto & MLXCX_EXTPROTO_10G) != 0;
1552 break;
1553 case MAC_PROP_ADV_1000FDX_CAP:
1554 case MAC_PROP_EN_1000FDX_CAP:
1555 if (pr_valsize < sizeof (uint8_t)) {
1556 ret = EOVERFLOW;
1557 break;
1558 }
1559 *(uint8_t *)pr_val = (port->mlp_max_proto &
1560 MLXCX_PROTO_1G) != 0 ||
1561 (port->mlp_ext_max_proto & MLXCX_EXTPROTO_1G) != 0;
1562 break;
1563 case MAC_PROP_ADV_100FDX_CAP:
1564 case MAC_PROP_EN_100FDX_CAP:
1565 if (pr_valsize < sizeof (uint8_t)) {
1566 ret = EOVERFLOW;
1567 break;
1568 }
1569 *(uint8_t *)pr_val = (port->mlp_max_proto &
1570 MLXCX_PROTO_100M) != 0 ||
1571 (port->mlp_ext_max_proto & MLXCX_EXTPROTO_100M) != 0;
1572 break;
1573 default:
1574 ret = ENOTSUP;
1575 break;
1576 }
1577
1578 mutex_exit(&port->mlp_mtx);
1579
1580 return (ret);
1581 }
1582
1583 #define MLXCX_MAC_CALLBACK_FLAGS \
1584 (MC_GETCAPAB | MC_GETPROP | MC_PROPINFO | MC_SETPROP)
1585
1586 static mac_callbacks_t mlxcx_mac_callbacks = {
1587 .mc_callbacks = MLXCX_MAC_CALLBACK_FLAGS,
1588 .mc_getstat = mlxcx_mac_stat,
1589 .mc_start = mlxcx_mac_start,
1590 .mc_stop = mlxcx_mac_stop,
1591 .mc_setpromisc = mlxcx_mac_setpromisc,
1592 .mc_multicst = mlxcx_mac_multicast,
1593 .mc_ioctl = NULL,
1594 .mc_getcapab = mlxcx_mac_getcapab,
1595 .mc_setprop = mlxcx_mac_setprop,
1596 .mc_getprop = mlxcx_mac_getprop,
1597 .mc_propinfo = mlxcx_mac_propinfo,
1598 .mc_tx = NULL,
1599 .mc_unicst = NULL,
1600 };
1601
1602 boolean_t
mlxcx_register_mac(mlxcx_t * mlxp)1603 mlxcx_register_mac(mlxcx_t *mlxp)
1604 {
1605 mac_register_t *mac = mac_alloc(MAC_VERSION);
1606 mlxcx_port_t *port;
1607 int ret;
1608
1609 if (mac == NULL)
1610 return (B_FALSE);
1611
1612 VERIFY3U(mlxp->mlx_nports, ==, 1);
1613 port = &mlxp->mlx_ports[0];
1614
1615 mutex_enter(&port->mlp_mtx);
1616
1617 mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1618 mac->m_driver = mlxp;
1619 mac->m_dip = mlxp->mlx_dip;
1620 mac->m_src_addr = port->mlp_mac_address;
1621 mac->m_callbacks = &mlxcx_mac_callbacks;
1622 mac->m_min_sdu = MLXCX_MTU_OFFSET;
1623 mac->m_max_sdu = port->mlp_mtu - MLXCX_MTU_OFFSET;
1624 mac->m_margin = VLAN_TAGSZ;
1625 mac->m_priv_props = mlxcx_priv_props;
1626 mac->m_v12n = MAC_VIRT_LEVEL1;
1627
1628 ret = mac_register(mac, &mlxp->mlx_mac_hdl);
1629 if (ret != 0) {
1630 mlxcx_warn(mlxp, "mac_register() returned %d", ret);
1631 }
1632 mac_free(mac);
1633
1634 mutex_exit(&port->mlp_mtx);
1635
1636 mlxcx_update_link_state(mlxp, port);
1637
1638 return (ret == 0);
1639 }
1640