1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 #include <sys/types.h>
27 #include <sys/errno.h>
28 #include <sys/sysmacros.h>
29 #include <sys/param.h>
30 #include <sys/machsystm.h>
31 #include <sys/stream.h>
32 #include <sys/strsubr.h>
33 #include <sys/kmem.h>
34 #include <sys/strsun.h>
35 #include <sys/callb.h>
36 #include <sys/sdt.h>
37 #include <sys/ethernet.h>
38 #include <sys/mach_descrip.h>
39 #include <sys/mdeg.h>
40 #include <sys/vnet.h>
41 #include <sys/vio_mailbox.h>
42 #include <sys/vio_common.h>
43 #include <sys/vnet_common.h>
44 #include <sys/vnet_mailbox.h>
45 #include <sys/vio_util.h>
46 #include <sys/vnet_gen.h>
47
48 /*
49 * This file contains the implementation of RxDringData transfer mode of VIO
50 * Protocol in vnet. The functions in this file are invoked from vnet_gen.c
51 * after RxDringData mode is negotiated with the peer during attribute phase of
52 * handshake. This file contains functions that setup the transmit and receive
53 * descriptor rings, and associated resources in RxDringData mode. It also
54 * contains the transmit and receive data processing functions that are invoked
55 * in RxDringData mode. The data processing routines in this file have the
56 * suffix '_shm' to indicate the shared memory mechanism used in RxDringData
57 * mode.
58 */
59
60 /* Functions exported to vnet_gen.c */
61 int vgen_create_rx_dring(vgen_ldc_t *ldcp);
62 void vgen_destroy_rx_dring(vgen_ldc_t *ldcp);
63 int vgen_map_tx_dring(vgen_ldc_t *ldcp, void *pkt);
64 void vgen_unmap_tx_dring(vgen_ldc_t *ldcp);
65 int vgen_map_data(vgen_ldc_t *ldcp, void *pkt);
66 int vgen_dringsend_shm(void *arg, mblk_t *mp);
67 int vgen_handle_dringdata_shm(void *arg1, void *arg2);
68 mblk_t *vgen_poll_rcv_shm(vgen_ldc_t *ldcp, int bytes_to_pickup);
69 int vgen_send_dringack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
70 uint32_t start, int32_t end, uint8_t pstate);
71
72 /* Internal functions */
73 static int vgen_handle_dringdata_info_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tp);
74 static int vgen_handle_dringdata_ack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
75 static int vgen_handle_dringdata_nack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tp);
76 static int vgen_intr_rcv_shm(vgen_ldc_t *ldcp);
77 static int vgen_receive_packet(vgen_ldc_t *ldcp, mblk_t **bp, uint_t *size);
78 static int vgen_send_dringdata_shm(vgen_ldc_t *ldcp, uint32_t start,
79 int32_t end);
80 static int vgen_sendmsg_shm(vgen_ldc_t *ldcp, caddr_t msg, size_t msglen);
81
82 /* Functions imported from vnet_gen.c */
83 extern int vgen_handle_evt_read(vgen_ldc_t *ldcp, vgen_caller_t caller);
84 extern int vgen_handle_evt_reset(vgen_ldc_t *ldcp, vgen_caller_t caller);
85 extern void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen);
86 extern void vgen_destroy_rxpools(void *arg);
87
88 /* Tunables */
89 extern uint32_t vnet_num_descriptors;
90 extern uint32_t vgen_chain_len;
91 extern uint32_t vgen_ldcwr_retries;
92 extern uint32_t vgen_recv_delay;
93 extern uint32_t vgen_recv_retries;
94 extern uint32_t vgen_nrbufs_factor;
95
96 #ifdef DEBUG
97
98 #define DEBUG_PRINTF vgen_debug_printf
99
100 extern int vnet_dbglevel;
101 extern int vgen_inject_err_flag;
102
103 extern void vgen_debug_printf(const char *fname, vgen_t *vgenp,
104 vgen_ldc_t *ldcp, const char *fmt, ...);
105 extern boolean_t vgen_inject_error(vgen_ldc_t *ldcp, int error);
106
107 #endif
108
109 /*
110 * Allocate receive resources for the channel. The resources consist of a
111 * receive descriptor ring and an associated receive buffer area.
112 */
113 int
vgen_create_rx_dring(vgen_ldc_t * ldcp)114 vgen_create_rx_dring(vgen_ldc_t *ldcp)
115 {
116 int i, j;
117 int rv;
118 uint32_t ncookies;
119 ldc_mem_info_t minfo;
120 vnet_rx_dringdata_desc_t *rxdp;
121 size_t data_sz;
122 vio_mblk_t *vmp;
123 vio_mblk_t **rxdp_to_vmp;
124 uint32_t rxdsize;
125 caddr_t datap = NULL;
126 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
127
128 rxdsize = sizeof (vnet_rx_dringdata_desc_t);
129 ldcp->num_rxds = vnet_num_descriptors;
130 ldcp->num_rbufs = VGEN_RXDRING_NRBUFS;
131
132 /* Create the receive descriptor ring */
133 rv = ldc_mem_dring_create(ldcp->num_rxds, rxdsize,
134 &ldcp->rx_dring_handle);
135 if (rv != 0) {
136 DWARN(vgenp, ldcp, "ldc_mem_dring_create() failed\n");
137 goto fail;
138 }
139
140 /* Get the addr of descriptor ring */
141 rv = ldc_mem_dring_info(ldcp->rx_dring_handle, &minfo);
142 if (rv != 0) {
143 DWARN(vgenp, ldcp, "ldc_mem_dring_info() failed\n");
144 goto fail;
145 }
146 ldcp->rxdp = (vnet_rx_dringdata_desc_t *)(minfo.vaddr);
147 bzero(ldcp->rxdp, sizeof (*rxdp) * (ldcp->num_rxds));
148
149 /*
150 * Allocate a table that maps descriptor to its associated buffer;
151 * used while receiving to validate that the peer has not changed the
152 * buffer offset provided in the descriptor.
153 */
154 rxdp_to_vmp = kmem_zalloc(ldcp->num_rxds * sizeof (uintptr_t),
155 KM_SLEEP);
156 ldcp->rxdp_to_vmp = rxdp_to_vmp;
157
158 /*
159 * Allocate a single large buffer that serves as the rx buffer area.
160 * We allocate a ldc memory handle and export the buffer area as shared
161 * memory. We send the ldc memcookie for this buffer space to the peer,
162 * as part of dring registration phase during handshake. We manage this
163 * buffer area as individual buffers of max_frame_size and provide
164 * specific buffer offsets in each descriptor to the peer. Note that
165 * the factor used to compute the # of buffers (above) must be > 1 to
166 * ensure that there are more buffers than the # of descriptors. This
167 * is needed because, while the shared memory buffers are sent up our
168 * stack during receive, the sender needs additional buffers that can
169 * be used for further transmits. This also means there is no one to
170 * one correspondence between the descriptor index and buffer offset.
171 * The sender has to read the buffer offset in the descriptor and use
172 * the specified offset to copy the tx data into the shared buffer. We
173 * (receiver) manage the individual buffers and their state (see
174 * VIO_MBLK_STATEs in vio_util.h).
175 */
176 data_sz = RXDRING_DBLK_SZ(vgenp->max_frame_size);
177
178 ldcp->rx_data_sz = data_sz * ldcp->num_rbufs;
179 ldcp->rx_dblk_sz = data_sz;
180 datap = kmem_zalloc(ldcp->rx_data_sz, KM_SLEEP);
181 ldcp->rx_datap = datap;
182
183 /* Allocate a ldc memhandle for the entire rx data area */
184 rv = ldc_mem_alloc_handle(ldcp->ldc_handle, &ldcp->rx_data_handle);
185 if (rv) {
186 ldcp->rx_data_handle = 0;
187 goto fail;
188 }
189
190 /* Allocate memory for the data cookies */
191 ldcp->rx_data_cookie = kmem_zalloc(VNET_DATA_AREA_COOKIES *
192 sizeof (ldc_mem_cookie_t), KM_SLEEP);
193
194 /*
195 * Bind ldc memhandle to the corresponding rx data area.
196 */
197 ncookies = 0;
198 rv = ldc_mem_bind_handle(ldcp->rx_data_handle, (caddr_t)datap,
199 ldcp->rx_data_sz, LDC_DIRECT_MAP, LDC_MEM_W,
200 ldcp->rx_data_cookie, &ncookies);
201 if (rv != 0) {
202 goto fail;
203 }
204 if ((ncookies == 0) || (ncookies > VNET_DATA_AREA_COOKIES)) {
205 goto fail;
206 }
207 ldcp->rx_data_ncookies = ncookies;
208
209 for (j = 1; j < ncookies; j++) {
210 rv = ldc_mem_nextcookie(ldcp->rx_data_handle,
211 &(ldcp->rx_data_cookie[j]));
212 if (rv != 0) {
213 DERR(vgenp, ldcp, "ldc_mem_nextcookie "
214 "failed rv (%d)", rv);
215 goto fail;
216 }
217 }
218
219 /*
220 * Successful in binding the handle to rx data area. Now setup mblks
221 * around each data buffer and setup the descriptors to point to these
222 * rx data buffers. We associate each descriptor with a buffer
223 * by specifying the buffer offset in the descriptor. When the peer
224 * needs to transmit data, this offset is read by the peer to determine
225 * the buffer in the mapped buffer area where the data to be
226 * transmitted should be copied, for a specific descriptor.
227 */
228 rv = vio_create_mblks(ldcp->num_rbufs, data_sz, (uint8_t *)datap,
229 &ldcp->rx_vmp);
230 if (rv != 0) {
231 goto fail;
232 }
233
234 for (i = 0; i < ldcp->num_rxds; i++) {
235 rxdp = &(ldcp->rxdp[i]);
236 /* allocate an mblk around this data buffer */
237 vmp = vio_allocb(ldcp->rx_vmp);
238 ASSERT(vmp != NULL);
239 rxdp->data_buf_offset = VIO_MBLK_DATA_OFF(vmp) + VNET_IPALIGN;
240 rxdp->dstate = VIO_DESC_FREE;
241 rxdp_to_vmp[i] = vmp;
242 }
243
244 /*
245 * The descriptors and the associated buffers are all ready;
246 * now bind descriptor ring to the channel.
247 */
248 rv = ldc_mem_dring_bind(ldcp->ldc_handle, ldcp->rx_dring_handle,
249 LDC_DIRECT_MAP | LDC_SHADOW_MAP, LDC_MEM_RW,
250 &ldcp->rx_dring_cookie, &ncookies);
251 if (rv != 0) {
252 DWARN(vgenp, ldcp, "ldc_mem_dring_bind failed "
253 "rv(%x)\n", rv);
254 goto fail;
255 }
256 ASSERT(ncookies == 1);
257 ldcp->rx_dring_ncookies = ncookies;
258
259 /* initialize rx seqnum and index */
260 ldcp->next_rxseq = VNET_ISS;
261 ldcp->next_rxi = 0;
262
263 return (VGEN_SUCCESS);
264
265 fail:
266 vgen_destroy_rx_dring(ldcp);
267 return (VGEN_FAILURE);
268 }
269
270 /*
271 * Free receive resources for the channel.
272 */
273 void
vgen_destroy_rx_dring(vgen_ldc_t * ldcp)274 vgen_destroy_rx_dring(vgen_ldc_t *ldcp)
275 {
276 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
277
278 /* We first unbind the descriptor ring */
279 if (ldcp->rx_dring_ncookies != 0) {
280 (void) ldc_mem_dring_unbind(ldcp->rx_dring_handle);
281 ldcp->rx_dring_ncookies = 0;
282 }
283
284 /* Destroy the mblks that are wrapped around the rx data buffers */
285 if (ldcp->rx_vmp != NULL) {
286 vio_clobber_pool(ldcp->rx_vmp);
287 if (vio_destroy_mblks(ldcp->rx_vmp) != 0) {
288 /*
289 * If we can't destroy the rx pool for this channel,
290 * dispatch a task to retry and clean up. Note that we
291 * don't need to wait for the task to complete. If the
292 * vnet device itself gets detached, it will wait for
293 * the task to complete implicitly in
294 * ddi_taskq_destroy().
295 */
296 (void) ddi_taskq_dispatch(vgenp->rxp_taskq,
297 vgen_destroy_rxpools, ldcp->rx_vmp, DDI_SLEEP);
298 }
299 ldcp->rx_vmp = NULL;
300 }
301
302 /* Free rx data area cookies */
303 if (ldcp->rx_data_cookie != NULL) {
304 kmem_free(ldcp->rx_data_cookie, VNET_DATA_AREA_COOKIES *
305 sizeof (ldc_mem_cookie_t));
306 ldcp->rx_data_cookie = NULL;
307 }
308
309 /* Unbind rx data area memhandle */
310 if (ldcp->rx_data_ncookies != 0) {
311 (void) ldc_mem_unbind_handle(ldcp->rx_data_handle);
312 ldcp->rx_data_ncookies = 0;
313 }
314
315 /* Free rx data area memhandle */
316 if (ldcp->rx_data_handle != 0) {
317 (void) ldc_mem_free_handle(ldcp->rx_data_handle);
318 ldcp->rx_data_handle = 0;
319 }
320
321 /* Now free the rx data area itself */
322 if (ldcp->rx_datap != NULL) {
323 /* prealloc'd rx data buffer */
324 kmem_free(ldcp->rx_datap, ldcp->rx_data_sz);
325 ldcp->rx_datap = NULL;
326 ldcp->rx_data_sz = 0;
327 }
328
329 /* Finally, free the receive descriptor ring */
330 if (ldcp->rx_dring_handle != 0) {
331 (void) ldc_mem_dring_destroy(ldcp->rx_dring_handle);
332 ldcp->rx_dring_handle = 0;
333 ldcp->rxdp = NULL;
334 }
335
336 if (ldcp->rxdp_to_vmp != NULL) {
337 kmem_free(ldcp->rxdp_to_vmp,
338 ldcp->num_rxds * sizeof (uintptr_t));
339 ldcp->rxdp_to_vmp = NULL;
340 }
341
342 /* Reset rx index and seqnum */
343 ldcp->next_rxi = 0;
344 ldcp->next_rxseq = VNET_ISS;
345 }
346
347 /*
348 * Map the receive descriptor ring exported
349 * by the peer, as our transmit descriptor ring.
350 */
351 int
vgen_map_tx_dring(vgen_ldc_t * ldcp,void * pkt)352 vgen_map_tx_dring(vgen_ldc_t *ldcp, void *pkt)
353 {
354 int i;
355 int rv;
356 ldc_mem_info_t minfo;
357 ldc_mem_cookie_t dcookie;
358 uint32_t ncookies;
359 uint32_t num_desc;
360 uint32_t desc_size;
361 vnet_rx_dringdata_desc_t *txdp;
362 on_trap_data_t otd;
363 vio_dring_reg_msg_t *msg = pkt;
364
365 ncookies = msg->ncookies;
366 num_desc = msg->num_descriptors;
367 desc_size = msg->descriptor_size;
368
369 /*
370 * Sanity check.
371 */
372 if (num_desc < VGEN_NUM_DESCRIPTORS_MIN ||
373 desc_size < sizeof (vnet_rx_dringdata_desc_t) ||
374 ncookies > 1) {
375 goto fail;
376 }
377
378 bcopy(&msg->cookie[0], &dcookie, sizeof (ldc_mem_cookie_t));
379
380 /* Map the remote dring */
381 rv = ldc_mem_dring_map(ldcp->ldc_handle, &dcookie, ncookies, num_desc,
382 desc_size, LDC_DIRECT_MAP, &(ldcp->tx_dring_handle));
383 if (rv != 0) {
384 goto fail;
385 }
386
387 /*
388 * Sucessfully mapped; now try to get info about the mapped dring
389 */
390 rv = ldc_mem_dring_info(ldcp->tx_dring_handle, &minfo);
391 if (rv != 0) {
392 goto fail;
393 }
394
395 /*
396 * Save ring address, number of descriptors.
397 */
398 ldcp->mtxdp = (vnet_rx_dringdata_desc_t *)(minfo.vaddr);
399 bcopy(&dcookie, &(ldcp->tx_dring_cookie), sizeof (dcookie));
400 ldcp->tx_dring_ncookies = ncookies;
401 ldcp->num_txds = num_desc;
402
403 /* Initialize tx dring indexes and seqnum */
404 ldcp->next_txi = ldcp->cur_txi = ldcp->resched_peer_txi = 0;
405 ldcp->next_txseq = VNET_ISS - 1;
406 ldcp->resched_peer = B_TRUE;
407 ldcp->dring_mtype = minfo.mtype;
408 ldcp->dringdata_msgid = 0;
409
410 /* Save peer's dring_info values */
411 bcopy(&dcookie, &(ldcp->peer_hparams.dring_cookie),
412 sizeof (ldc_mem_cookie_t));
413 ldcp->peer_hparams.num_desc = num_desc;
414 ldcp->peer_hparams.desc_size = desc_size;
415 ldcp->peer_hparams.dring_ncookies = ncookies;
416
417 /* Set dring_ident for the peer */
418 ldcp->peer_hparams.dring_ident = (uint64_t)ldcp->mtxdp;
419
420 /* Return the dring_ident in ack msg */
421 msg->dring_ident = (uint64_t)ldcp->mtxdp;
422
423 /*
424 * Mark the descriptor state as 'done'. This is implementation specific
425 * and not required by the protocol. In our implementation, we only
426 * need the descripor to be in 'done' state to be used by the transmit
427 * function and the peer is not aware of it. As the protocol requires
428 * that during initial registration the exporting end point mark the
429 * dstate as 'free', we change it 'done' here. After this, the dstate
430 * in our implementation will keep moving between 'ready', set by our
431 * transmit function; and and 'done', set by the peer (per protocol)
432 * after receiving data.
433 * Setup on_trap() protection before accessing dring shared memory area.
434 */
435 rv = LDC_ON_TRAP(&otd);
436 if (rv != 0) {
437 /*
438 * Data access fault occured down the code path below while
439 * accessing the descriptors. Return failure.
440 */
441 goto fail;
442 }
443
444 for (i = 0; i < num_desc; i++) {
445 txdp = &ldcp->mtxdp[i];
446 txdp->dstate = VIO_DESC_DONE;
447 }
448
449 (void) LDC_NO_TRAP();
450 return (VGEN_SUCCESS);
451
452 fail:
453 if (ldcp->tx_dring_handle != 0) {
454 (void) ldc_mem_dring_unmap(ldcp->tx_dring_handle);
455 ldcp->tx_dring_handle = 0;
456 }
457 return (VGEN_FAILURE);
458 }
459
460 /*
461 * Unmap the transmit descriptor ring.
462 */
463 void
vgen_unmap_tx_dring(vgen_ldc_t * ldcp)464 vgen_unmap_tx_dring(vgen_ldc_t *ldcp)
465 {
466 /* Unmap mapped tx data area */
467 if (ldcp->tx_datap != NULL) {
468 (void) ldc_mem_unmap(ldcp->tx_data_handle);
469 ldcp->tx_datap = NULL;
470 }
471
472 /* Free tx data area handle */
473 if (ldcp->tx_data_handle != 0) {
474 (void) ldc_mem_free_handle(ldcp->tx_data_handle);
475 ldcp->tx_data_handle = 0;
476 }
477
478 /* Free tx data area cookies */
479 if (ldcp->tx_data_cookie != NULL) {
480 kmem_free(ldcp->tx_data_cookie, ldcp->tx_data_ncookies *
481 sizeof (ldc_mem_cookie_t));
482 ldcp->tx_data_cookie = NULL;
483 ldcp->tx_data_ncookies = 0;
484 }
485
486 /* Unmap peer's dring */
487 if (ldcp->tx_dring_handle != 0) {
488 (void) ldc_mem_dring_unmap(ldcp->tx_dring_handle);
489 ldcp->tx_dring_handle = 0;
490 }
491
492 /* clobber tx ring members */
493 bzero(&ldcp->tx_dring_cookie, sizeof (ldcp->tx_dring_cookie));
494 ldcp->mtxdp = NULL;
495 ldcp->next_txi = ldcp->cur_txi = ldcp->resched_peer_txi = 0;
496 ldcp->num_txds = 0;
497 ldcp->next_txseq = VNET_ISS - 1;
498 ldcp->resched_peer = B_TRUE;
499 }
500
501 /*
502 * Map the shared memory data buffer area exported by the peer.
503 */
504 int
vgen_map_data(vgen_ldc_t * ldcp,void * pkt)505 vgen_map_data(vgen_ldc_t *ldcp, void *pkt)
506 {
507 int rv;
508 vio_dring_reg_ext_msg_t *emsg;
509 vio_dring_reg_msg_t *msg = (vio_dring_reg_msg_t *)pkt;
510 uint8_t *buf = (uint8_t *)msg->cookie;
511 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
512 ldc_mem_info_t minfo;
513
514 /* skip over dring cookies */
515 ASSERT(msg->ncookies == 1);
516 buf += (msg->ncookies * sizeof (ldc_mem_cookie_t));
517
518 emsg = (vio_dring_reg_ext_msg_t *)buf;
519 if (emsg->data_ncookies > VNET_DATA_AREA_COOKIES) {
520 return (VGEN_FAILURE);
521 }
522
523 /* save # of data area cookies */
524 ldcp->tx_data_ncookies = emsg->data_ncookies;
525
526 /* save data area size */
527 ldcp->tx_data_sz = emsg->data_area_size;
528
529 /* allocate ldc mem handle for data area */
530 rv = ldc_mem_alloc_handle(ldcp->ldc_handle, &ldcp->tx_data_handle);
531 if (rv != 0) {
532 DWARN(vgenp, ldcp, "ldc_mem_alloc_handle() failed: %d\n", rv);
533 return (VGEN_FAILURE);
534 }
535
536 /* map the data area */
537 rv = ldc_mem_map(ldcp->tx_data_handle, emsg->data_cookie,
538 emsg->data_ncookies, LDC_DIRECT_MAP, LDC_MEM_W,
539 (caddr_t *)&ldcp->tx_datap, NULL);
540 if (rv != 0) {
541 DWARN(vgenp, ldcp, "ldc_mem_map() failed: %d\n", rv);
542 return (VGEN_FAILURE);
543 }
544
545 /* get the map info */
546 rv = ldc_mem_info(ldcp->tx_data_handle, &minfo);
547 if (rv != 0) {
548 DWARN(vgenp, ldcp, "ldc_mem_info() failed: %d\n", rv);
549 return (VGEN_FAILURE);
550 }
551
552 if (minfo.mtype != LDC_DIRECT_MAP) {
553 DWARN(vgenp, ldcp, "mtype(%d) is not direct map\n",
554 minfo.mtype);
555 return (VGEN_FAILURE);
556 }
557
558 /* allocate memory for data area cookies */
559 ldcp->tx_data_cookie = kmem_zalloc(emsg->data_ncookies *
560 sizeof (ldc_mem_cookie_t), KM_SLEEP);
561
562 /* save data area cookies */
563 bcopy(emsg->data_cookie, ldcp->tx_data_cookie,
564 emsg->data_ncookies * sizeof (ldc_mem_cookie_t));
565
566 return (VGEN_SUCCESS);
567 }
568
569 /*
570 * This function transmits normal data frames (non-priority) over the channel.
571 * It queues the frame into the transmit descriptor ring and sends a
572 * VIO_DRING_DATA message if needed, to wake up the peer to (re)start
573 * processing.
574 */
575 int
vgen_dringsend_shm(void * arg,mblk_t * mp)576 vgen_dringsend_shm(void *arg, mblk_t *mp)
577 {
578 uint32_t next_txi;
579 uint32_t txi;
580 vnet_rx_dringdata_desc_t *txdp;
581 struct ether_header *ehp;
582 size_t mblksz;
583 caddr_t dst;
584 mblk_t *bp;
585 size_t size;
586 uint32_t buf_offset;
587 on_trap_data_t otd;
588 int rv = 0;
589 boolean_t is_bcast = B_FALSE;
590 boolean_t is_mcast = B_FALSE;
591 vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
592 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
593 vgen_stats_t *statsp = &ldcp->stats;
594 vgen_hparams_t *lp = &ldcp->local_hparams;
595 boolean_t resched_peer = B_FALSE;
596 boolean_t tx_update = B_FALSE;
597
598 /* Drop the packet if ldc is not up or handshake is not done */
599 if (ldcp->ldc_status != LDC_UP) {
600 DBG2(vgenp, ldcp, "status(%d), dropping packet\n",
601 ldcp->ldc_status);
602 goto dringsend_shm_exit;
603 }
604
605 if (ldcp->hphase != VH_DONE) {
606 DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
607 ldcp->hphase);
608 goto dringsend_shm_exit;
609 }
610
611 size = msgsize(mp);
612 if (size > (size_t)lp->mtu) {
613 DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
614 goto dringsend_shm_exit;
615 }
616 if (size < ETHERMIN)
617 size = ETHERMIN;
618
619 ehp = (struct ether_header *)mp->b_rptr;
620 is_bcast = IS_BROADCAST(ehp);
621 is_mcast = IS_MULTICAST(ehp);
622
623 /*
624 * Setup on_trap() protection before accessing shared memory areas
625 * (descriptor and data buffer). Note that we enable this protection a
626 * little early and turn it off slightly later, than keeping it enabled
627 * strictly at the points in code below where the descriptor and data
628 * buffer are accessed. This is done for performance reasons:
629 * (a) to avoid calling the trap protection code while holding mutex.
630 * (b) to avoid multiple on/off steps for descriptor and data accesses.
631 */
632 rv = LDC_ON_TRAP(&otd);
633 if (rv != 0) {
634 /*
635 * Data access fault occured down the code path below while
636 * accessing either the descriptor or the data buffer. Release
637 * any locks that we might have acquired in the code below and
638 * return failure.
639 */
640 DERR(vgenp, ldcp, "data access fault occured\n");
641 statsp->oerrors++;
642 if (mutex_owned(&ldcp->txlock)) {
643 mutex_exit(&ldcp->txlock);
644 }
645 if (mutex_owned(&ldcp->wrlock)) {
646 mutex_exit(&ldcp->wrlock);
647 }
648 goto dringsend_shm_exit;
649 }
650
651 /*
652 * Allocate a descriptor
653 */
654 mutex_enter(&ldcp->txlock);
655 txi = next_txi = ldcp->next_txi;
656 INCR_TXI(next_txi, ldcp);
657 txdp = &(ldcp->mtxdp[txi]);
658 if (txdp->dstate != VIO_DESC_DONE) { /* out of descriptors */
659 if (ldcp->tx_blocked == B_FALSE) {
660 ldcp->tx_blocked_lbolt = ddi_get_lbolt();
661 ldcp->tx_blocked = B_TRUE;
662 }
663 statsp->tx_no_desc++;
664 mutex_exit(&ldcp->txlock);
665 (void) LDC_NO_TRAP();
666 return (VGEN_TX_NORESOURCES);
667 } else {
668 txdp->dstate = VIO_DESC_INITIALIZING;
669 }
670
671 if (ldcp->tx_blocked == B_TRUE) {
672 ldcp->tx_blocked = B_FALSE;
673 tx_update = B_TRUE;
674 }
675
676 /* Update descriptor ring index */
677 ldcp->next_txi = next_txi;
678 mutex_exit(&ldcp->txlock);
679
680 if (tx_update == B_TRUE) {
681 vio_net_tx_update_t vtx_update =
682 ldcp->portp->vcb.vio_net_tx_update;
683
684 vtx_update(ldcp->portp->vhp);
685 }
686
687 /* Ensure load ordering of dstate (above) and data_buf_offset. */
688 MEMBAR_CONSUMER();
689
690 /* Get the offset of the buffer to be used */
691 buf_offset = txdp->data_buf_offset;
692
693 /* Access the buffer using the offset */
694 dst = (caddr_t)ldcp->tx_datap + buf_offset;
695
696 /* Copy data into mapped transmit buffer */
697 for (bp = mp; bp != NULL; bp = bp->b_cont) {
698 mblksz = MBLKL(bp);
699 bcopy(bp->b_rptr, dst, mblksz);
700 dst += mblksz;
701 }
702
703 /* Set the size of data in the descriptor */
704 txdp->nbytes = size;
705
706 /*
707 * Ensure store ordering of nbytes and dstate (below); so that the peer
708 * sees the right nbytes value after it checks that the dstate is READY.
709 */
710 MEMBAR_PRODUCER();
711
712 mutex_enter(&ldcp->wrlock);
713
714 ASSERT(txdp->dstate == VIO_DESC_INITIALIZING);
715
716 /* Mark the descriptor ready */
717 txdp->dstate = VIO_DESC_READY;
718
719 /* Check if peer needs wake up (handled below) */
720 if (ldcp->resched_peer == B_TRUE && ldcp->resched_peer_txi == txi) {
721 resched_peer = B_TRUE;
722 ldcp->resched_peer = B_FALSE;
723 }
724
725 /* Update tx stats */
726 statsp->opackets++;
727 statsp->obytes += size;
728 if (is_bcast)
729 statsp->brdcstxmt++;
730 else if (is_mcast)
731 statsp->multixmt++;
732
733 mutex_exit(&ldcp->wrlock);
734
735 /*
736 * We are done accessing shared memory; clear trap protection.
737 */
738 (void) LDC_NO_TRAP();
739
740 /*
741 * Need to wake up the peer ?
742 */
743 if (resched_peer == B_TRUE) {
744 rv = vgen_send_dringdata_shm(ldcp, (uint32_t)txi, -1);
745 if (rv != 0) {
746 /* error: drop the packet */
747 DWARN(vgenp, ldcp, "failed sending dringdata msg "
748 "rv(%d) len(%d)\n", rv, size);
749 mutex_enter(&ldcp->wrlock);
750 statsp->oerrors++;
751 ldcp->resched_peer = B_TRUE;
752 mutex_exit(&ldcp->wrlock);
753 }
754 }
755
756 dringsend_shm_exit:
757 if (rv == ECONNRESET || rv == EACCES) {
758 (void) vgen_handle_evt_reset(ldcp, VGEN_OTHER);
759 }
760 freemsg(mp);
761 return (VGEN_TX_SUCCESS);
762 }
763
764 /*
765 * Process dring data messages (info/ack/nack)
766 */
767 int
vgen_handle_dringdata_shm(void * arg1,void * arg2)768 vgen_handle_dringdata_shm(void *arg1, void *arg2)
769 {
770 vgen_ldc_t *ldcp = (vgen_ldc_t *)arg1;
771 vio_msg_tag_t *tagp = (vio_msg_tag_t *)arg2;
772 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
773 int rv = 0;
774
775 switch (tagp->vio_subtype) {
776
777 case VIO_SUBTYPE_INFO:
778 /*
779 * To reduce the locking contention, release the
780 * cblock here and re-acquire it once we are done
781 * receiving packets.
782 */
783 mutex_exit(&ldcp->cblock);
784 mutex_enter(&ldcp->rxlock);
785 rv = vgen_handle_dringdata_info_shm(ldcp, tagp);
786 mutex_exit(&ldcp->rxlock);
787 mutex_enter(&ldcp->cblock);
788 if (rv != 0) {
789 DWARN(vgenp, ldcp, "handle_data_info failed(%d)\n", rv);
790 }
791 break;
792
793 case VIO_SUBTYPE_ACK:
794 rv = vgen_handle_dringdata_ack_shm(ldcp, tagp);
795 if (rv != 0) {
796 DWARN(vgenp, ldcp, "handle_data_ack failed(%d)\n", rv);
797 }
798 break;
799
800 case VIO_SUBTYPE_NACK:
801 rv = vgen_handle_dringdata_nack_shm(ldcp, tagp);
802 if (rv != 0) {
803 DWARN(vgenp, ldcp, "handle_data_nack failed(%d)\n", rv);
804 }
805 break;
806 }
807
808 return (rv);
809 }
810
811 static int
vgen_handle_dringdata_info_shm(vgen_ldc_t * ldcp,vio_msg_tag_t * tagp)812 vgen_handle_dringdata_info_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
813 {
814 uint32_t start;
815 int32_t end;
816 int rv = 0;
817 vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
818 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
819 vgen_stats_t *statsp = &ldcp->stats;
820
821 start = dringmsg->start_idx;
822 end = dringmsg->end_idx;
823
824 DBG1(vgenp, ldcp, "INFO: start(%d), end(%d)\n",
825 start, end);
826
827 if (!(CHECK_RXI(start, ldcp)) ||
828 ((end != -1) && !(CHECK_RXI(end, ldcp)))) {
829 DWARN(vgenp, ldcp, "Invalid Rx start(%d) or end(%d)\n",
830 start, end);
831 /* drop the message if invalid index */
832 return (0);
833 }
834
835 /* validate dring_ident */
836 if (dringmsg->dring_ident != ldcp->peer_hparams.dring_ident) {
837 DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
838 dringmsg->dring_ident);
839 /* invalid dring_ident, drop the msg */
840 return (0);
841 }
842
843 statsp->dring_data_msgs_rcvd++;
844
845 /*
846 * If we are in polling mode, return from here without processing the
847 * dring. We will process the dring in the context of polling thread.
848 */
849 if (ldcp->polling_on == B_TRUE) {
850 return (0);
851 }
852
853 /*
854 * Process the dring and receive packets in intr context.
855 */
856 rv = vgen_intr_rcv_shm(ldcp);
857 if (rv != 0) {
858 DWARN(vgenp, ldcp, "vgen_intr_rcv_shm() failed\n");
859 }
860 return (rv);
861 }
862
863 /*
864 * Process the rx descriptor ring in the context of interrupt thread
865 * (vgen_ldc_cb() callback) and send the received packets up the stack.
866 */
867 static int
vgen_intr_rcv_shm(vgen_ldc_t * ldcp)868 vgen_intr_rcv_shm(vgen_ldc_t *ldcp)
869 {
870 int rv;
871 uint32_t end_ix;
872 vio_dring_msg_t msg;
873 uint_t mblk_sz;
874 int count = 0;
875 int total_count = 0;
876 mblk_t *bp = NULL;
877 mblk_t *bpt = NULL;
878 mblk_t *mp = NULL;
879 vio_net_rx_cb_t vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
880
881 ASSERT(MUTEX_HELD(&ldcp->rxlock));
882
883 do {
884 rv = vgen_receive_packet(ldcp, &mp, &mblk_sz);
885 if (rv != 0) {
886 if (rv == EINVAL) {
887 /* Invalid descriptor error; get next */
888 continue;
889 }
890 DTRACE_PROBE1(vgen_intr_nopkts, vgen_ldc_t *, ldcp);
891 break;
892 }
893
894 /* Build a chain of received packets */
895 if (bp == NULL) {
896 /* first pkt */
897 bp = mp;
898 bpt = bp;
899 bpt->b_next = NULL;
900 } else {
901 mp->b_next = NULL;
902 bpt->b_next = mp;
903 bpt = mp;
904 }
905
906 total_count++;
907 count++;
908
909 /*
910 * We are receiving the packets in interrupt context. If we
911 * have gathered vgen_chain_len (tunable) # of packets in the
912 * chain, send them up. (See vgen_poll_rcv_shm() for receiving
913 * in polling thread context).
914 */
915 if (count == vgen_chain_len) {
916 DTRACE_PROBE2(vgen_intr_pkts, vgen_ldc_t *, ldcp,
917 int, count);
918 mutex_exit(&ldcp->rxlock);
919 vrx_cb(ldcp->portp->vhp, bp);
920 mutex_enter(&ldcp->rxlock);
921 bp = bpt = NULL;
922 count = 0;
923 }
924
925 /*
926 * Stop further processing if we processed the entire dring
927 * once; otherwise continue.
928 */
929 } while (total_count < ldcp->num_rxds);
930
931 if (bp != NULL) {
932 DTRACE_PROBE2(vgen_intr_pkts, vgen_ldc_t *, ldcp, int, count);
933 mutex_exit(&ldcp->rxlock);
934 vrx_cb(ldcp->portp->vhp, bp);
935 mutex_enter(&ldcp->rxlock);
936 }
937
938 if (ldcp->polling_on == B_FALSE) {
939 /*
940 * We send a stopped message to peer (sender) while we are in
941 * intr mode only; allowing the peer to send further data intrs
942 * (dring data msgs) to us.
943 */
944 end_ix = ldcp->next_rxi;
945 DECR_RXI(end_ix, ldcp);
946 msg.dring_ident = ldcp->peer_hparams.dring_ident;
947 rv = vgen_send_dringack_shm(ldcp, (vio_msg_tag_t *)&msg,
948 VNET_START_IDX_UNSPEC, end_ix, VIO_DP_STOPPED);
949 return (rv);
950 }
951
952 return (0);
953 }
954
955 /*
956 * Process the rx descriptor ring in the context of mac polling thread. Receive
957 * packets upto the limit specified by bytes_to_pickup or until there are no
958 * more packets, whichever occurs first. Return the chain of received packets.
959 */
960 mblk_t *
vgen_poll_rcv_shm(vgen_ldc_t * ldcp,int bytes_to_pickup)961 vgen_poll_rcv_shm(vgen_ldc_t *ldcp, int bytes_to_pickup)
962 {
963 uint_t mblk_sz = 0;
964 uint_t sz = 0;
965 mblk_t *bp = NULL;
966 mblk_t *bpt = NULL;
967 mblk_t *mp = NULL;
968 int count = 0;
969 int rv;
970
971 mutex_enter(&ldcp->rxlock);
972
973 if (ldcp->hphase != VH_DONE) {
974 /* Channel is being reset and handshake not complete */
975 mutex_exit(&ldcp->rxlock);
976 return (NULL);
977 }
978
979 do {
980 rv = vgen_receive_packet(ldcp, &mp, &mblk_sz);
981 if (rv != 0) {
982 if (rv == EINVAL) {
983 /* Invalid descriptor error; get next */
984 continue;
985 }
986 DTRACE_PROBE1(vgen_poll_nopkts, vgen_ldc_t *, ldcp);
987 break;
988 }
989
990 /* Build a chain of received packets */
991 if (bp == NULL) {
992 /* first pkt */
993 bp = mp;
994 bpt = bp;
995 bpt->b_next = NULL;
996 } else {
997 mp->b_next = NULL;
998 bpt->b_next = mp;
999 bpt = mp;
1000 }
1001
1002 /* Compute total size accumulated */
1003 sz += mblk_sz;
1004 count++;
1005
1006 /* Reached the bytes limit; we are done. */
1007 if (sz >= bytes_to_pickup) {
1008 break;
1009 }
1010
1011 _NOTE(CONSTCOND)
1012 } while (1);
1013
1014 /*
1015 * We prepend any high priority packets to the chain of packets; note
1016 * that if we are already at the bytes_to_pickup limit, we might
1017 * slightly exceed that in such cases. That should be ok, as these pkts
1018 * are expected to be small in size and arrive at an interval in the
1019 * the order of a few seconds.
1020 */
1021 if (ldcp->rx_pktdata == vgen_handle_pkt_data &&
1022 ldcp->rx_pri_head != NULL) {
1023 ldcp->rx_pri_tail->b_next = bp;
1024 bp = ldcp->rx_pri_head;
1025 ldcp->rx_pri_head = ldcp->rx_pri_tail = NULL;
1026 }
1027
1028 mutex_exit(&ldcp->rxlock);
1029
1030 DTRACE_PROBE2(vgen_poll_pkts, vgen_ldc_t *, ldcp, int, count);
1031 DTRACE_PROBE2(vgen_poll_bytes, vgen_ldc_t *, ldcp, uint_t, sz);
1032 return (bp);
1033 }
1034
1035 /*
1036 * Process the next index in the rx dring and receive the associated packet.
1037 *
1038 * Returns:
1039 * bp: Success: The received packet.
1040 * Failure: NULL
1041 * size: Success: Size of received packet.
1042 * Failure: 0
1043 * retval:
1044 * Success: 0
1045 * Failure: EAGAIN: Descriptor not ready
1046 * EIO: Descriptor contents invalid.
1047 */
1048 static int
vgen_receive_packet(vgen_ldc_t * ldcp,mblk_t ** bp,uint_t * size)1049 vgen_receive_packet(vgen_ldc_t *ldcp, mblk_t **bp, uint_t *size)
1050 {
1051 uint32_t rxi;
1052 vio_mblk_t *vmp;
1053 vio_mblk_t *new_vmp;
1054 struct ether_header *ehp;
1055 vnet_rx_dringdata_desc_t *rxdp;
1056 int err = 0;
1057 uint32_t nbytes = 0;
1058 mblk_t *mp = NULL;
1059 mblk_t *dmp = NULL;
1060 vgen_stats_t *statsp = &ldcp->stats;
1061 vgen_hparams_t *lp = &ldcp->local_hparams;
1062
1063 rxi = ldcp->next_rxi;
1064 rxdp = &(ldcp->rxdp[rxi]);
1065 vmp = ldcp->rxdp_to_vmp[rxi];
1066
1067 if (rxdp->dstate != VIO_DESC_READY) {
1068 /*
1069 * Descriptor is not ready.
1070 */
1071 DTRACE_PROBE1(vgen_noready_rxds, vgen_ldc_t *, ldcp);
1072 return (EAGAIN);
1073 }
1074
1075 /*
1076 * Ensure load ordering of dstate and nbytes.
1077 */
1078 MEMBAR_CONSUMER();
1079
1080 nbytes = rxdp->nbytes;
1081
1082 if ((nbytes < ETHERMIN) ||
1083 (nbytes > lp->mtu) ||
1084 (rxdp->data_buf_offset !=
1085 (VIO_MBLK_DATA_OFF(vmp) + VNET_IPALIGN))) {
1086 /*
1087 * Descriptor contents invalid.
1088 */
1089 statsp->ierrors++;
1090 rxdp->dstate = VIO_DESC_DONE;
1091 err = EIO;
1092 goto done;
1093 }
1094
1095 /*
1096 * Now allocate a new buffer for this descriptor before sending up the
1097 * buffer being processed. If that fails, stop processing; as we are
1098 * out of receive buffers.
1099 */
1100 new_vmp = vio_allocb(ldcp->rx_vmp);
1101
1102 /*
1103 * Process the current buffer being received.
1104 */
1105 mp = vmp->mp;
1106
1107 if (new_vmp == NULL) {
1108 /*
1109 * We failed to get a new mapped buffer that is needed to
1110 * refill the descriptor. In that case, leave the current
1111 * buffer bound to the descriptor; allocate an mblk dynamically
1112 * and copy the contents of the buffer to the mblk. Then send
1113 * up this mblk. This way the sender has the same buffer as
1114 * before that can be used to send new data.
1115 */
1116 statsp->norcvbuf++;
1117 dmp = allocb(nbytes + VNET_IPALIGN, BPRI_MED);
1118 if (dmp == NULL) {
1119 statsp->ierrors++;
1120 return (ENOMEM);
1121 }
1122 bcopy(mp->b_rptr + VNET_IPALIGN,
1123 dmp->b_rptr + VNET_IPALIGN, nbytes);
1124 mp = dmp;
1125 } else {
1126 /* Mark the status of the current rbuf */
1127 vmp->state = VIO_MBLK_HAS_DATA;
1128
1129 /* Set the offset of the new buffer in the descriptor */
1130 rxdp->data_buf_offset =
1131 VIO_MBLK_DATA_OFF(new_vmp) + VNET_IPALIGN;
1132 ldcp->rxdp_to_vmp[rxi] = new_vmp;
1133 }
1134 mp->b_rptr += VNET_IPALIGN;
1135 mp->b_wptr = mp->b_rptr + nbytes;
1136
1137 /*
1138 * Ensure store ordering of data_buf_offset and dstate; so that the
1139 * peer sees the right data_buf_offset after it checks that the dstate
1140 * is DONE.
1141 */
1142 MEMBAR_PRODUCER();
1143
1144 /* Now mark the descriptor 'done' */
1145 rxdp->dstate = VIO_DESC_DONE;
1146
1147 /* Update stats */
1148 statsp->ipackets++;
1149 statsp->rbytes += rxdp->nbytes;
1150 ehp = (struct ether_header *)mp->b_rptr;
1151 if (IS_BROADCAST(ehp))
1152 statsp->brdcstrcv++;
1153 else if (IS_MULTICAST(ehp))
1154 statsp->multircv++;
1155 done:
1156 /* Update the next index to be processed */
1157 INCR_RXI(rxi, ldcp);
1158
1159 /* Save the new recv index */
1160 ldcp->next_rxi = rxi;
1161
1162 /* Return the packet received */
1163 *size = nbytes;
1164 *bp = mp;
1165 return (err);
1166 }
1167
1168 static int
vgen_handle_dringdata_ack_shm(vgen_ldc_t * ldcp,vio_msg_tag_t * tagp)1169 vgen_handle_dringdata_ack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
1170 {
1171 uint32_t start;
1172 int32_t end;
1173 uint32_t txi;
1174 vgen_stats_t *statsp;
1175 vnet_rx_dringdata_desc_t *txdp;
1176 on_trap_data_t otd;
1177 int rv = 0;
1178 boolean_t ready_txd = B_FALSE;
1179 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
1180 vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
1181
1182 start = dringmsg->start_idx;
1183 end = dringmsg->end_idx;
1184 statsp = &ldcp->stats;
1185
1186 /*
1187 * Received an ack for our transmits upto a certain dring index. This
1188 * enables us to reclaim descriptors. We also send a new dring data msg
1189 * to the peer to restart processing if there are pending transmit pkts.
1190 */
1191 DBG2(vgenp, ldcp, "ACK: start(%d), end(%d)\n", start, end);
1192
1193 /*
1194 * In RxDringData mode (v1.6), start index of -1 can be used by the
1195 * peer to indicate that it is unspecified. However, the end index
1196 * must be set correctly indicating the last descriptor index processed.
1197 */
1198 if (((start != VNET_START_IDX_UNSPEC) && !(CHECK_TXI(start, ldcp))) ||
1199 !(CHECK_TXI(end, ldcp))) {
1200 /* drop the message if invalid index */
1201 DWARN(vgenp, ldcp, "Invalid Tx ack start(%d) or end(%d)\n",
1202 start, end);
1203 return (rv);
1204 }
1205
1206 /* Validate dring_ident */
1207 if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
1208 /* invalid dring_ident, drop the msg */
1209 DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
1210 dringmsg->dring_ident);
1211 return (rv);
1212 }
1213 statsp->dring_data_acks_rcvd++;
1214
1215 /*
1216 * Clear transmit flow control condition
1217 * as some descriptors should be free now.
1218 */
1219 mutex_enter(&ldcp->txlock);
1220 if (ldcp->tx_blocked == B_TRUE) {
1221 vio_net_tx_update_t vtx_update =
1222 ldcp->portp->vcb.vio_net_tx_update;
1223
1224 ldcp->tx_blocked = B_FALSE;
1225 vtx_update(ldcp->portp->vhp);
1226 }
1227 mutex_exit(&ldcp->txlock);
1228
1229 if (dringmsg->dring_process_state != VIO_DP_STOPPED) {
1230 /*
1231 * Receiver continued processing
1232 * dring after sending us the ack.
1233 */
1234 return (rv);
1235 }
1236
1237 /*
1238 * Receiver stopped processing descriptors.
1239 */
1240 statsp->dring_stopped_acks_rcvd++;
1241
1242 /*
1243 * Setup on_trap() protection before accessing dring shared memory area.
1244 */
1245 rv = LDC_ON_TRAP(&otd);
1246 if (rv != 0) {
1247 /*
1248 * Data access fault occured down the code path below while
1249 * accessing the descriptors. Release any locks that we might
1250 * have acquired in the code below and return failure.
1251 */
1252 if (mutex_owned(&ldcp->wrlock)) {
1253 mutex_exit(&ldcp->wrlock);
1254 }
1255 return (ECONNRESET);
1256 }
1257
1258 /*
1259 * Determine if there are any pending tx descriptors ready to be
1260 * processed by the receiver(peer) and if so, send a message to the
1261 * peer to restart receiving.
1262 */
1263 mutex_enter(&ldcp->wrlock);
1264
1265 ready_txd = B_FALSE;
1266 txi = end;
1267 INCR_TXI(txi, ldcp);
1268 txdp = &ldcp->mtxdp[txi];
1269 if (txdp->dstate == VIO_DESC_READY) {
1270 ready_txd = B_TRUE;
1271 }
1272
1273 /*
1274 * We are done accessing shared memory; clear trap protection.
1275 */
1276 (void) LDC_NO_TRAP();
1277
1278 if (ready_txd == B_FALSE) {
1279 /*
1280 * No ready tx descriptors. Set the flag to send a message to
1281 * the peer when tx descriptors are ready in transmit routine.
1282 */
1283 ldcp->resched_peer = B_TRUE;
1284 ldcp->resched_peer_txi = txi;
1285 mutex_exit(&ldcp->wrlock);
1286 return (rv);
1287 }
1288
1289 /*
1290 * We have some tx descriptors ready to be processed by the receiver.
1291 * Send a dring data message to the peer to restart processing.
1292 */
1293 ldcp->resched_peer = B_FALSE;
1294 mutex_exit(&ldcp->wrlock);
1295 rv = vgen_send_dringdata_shm(ldcp, txi, -1);
1296 if (rv != VGEN_SUCCESS) {
1297 mutex_enter(&ldcp->wrlock);
1298 ldcp->resched_peer = B_TRUE;
1299 mutex_exit(&ldcp->wrlock);
1300 }
1301
1302 return (rv);
1303 }
1304
1305 static int
vgen_handle_dringdata_nack_shm(vgen_ldc_t * ldcp,vio_msg_tag_t * tagp)1306 vgen_handle_dringdata_nack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
1307 {
1308 uint32_t start;
1309 int32_t end;
1310 uint32_t txi;
1311 vnet_rx_dringdata_desc_t *txdp;
1312 on_trap_data_t otd;
1313 int rv = 0;
1314 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
1315 vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp;
1316
1317 DBG1(vgenp, ldcp, "enter\n");
1318 start = dringmsg->start_idx;
1319 end = dringmsg->end_idx;
1320
1321 /*
1322 * Peer sent a NACK msg (to indicate bad descriptors ?). The start and
1323 * end correspond to the range of descriptors which are being nack'd.
1324 */
1325 DWARN(vgenp, ldcp, "NACK: start(%d), end(%d)\n", start, end);
1326
1327 /*
1328 * In RxDringData mode (v1.6), start index of -1 can be used by
1329 * the peer to indicate that it is unspecified. However, the end index
1330 * must be set correctly indicating the last descriptor index processed.
1331 */
1332 if (((start != VNET_START_IDX_UNSPEC) && !(CHECK_TXI(start, ldcp))) ||
1333 !(CHECK_TXI(end, ldcp))) {
1334 /* drop the message if invalid index */
1335 DWARN(vgenp, ldcp, "Invalid Tx nack start(%d) or end(%d)\n",
1336 start, end);
1337 return (rv);
1338 }
1339
1340 /* Validate dring_ident */
1341 if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
1342 /* invalid dring_ident, drop the msg */
1343 DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n",
1344 dringmsg->dring_ident);
1345 return (rv);
1346 }
1347
1348 /*
1349 * Setup on_trap() protection before accessing dring shared memory area.
1350 */
1351 rv = LDC_ON_TRAP(&otd);
1352 if (rv != 0) {
1353 /*
1354 * Data access fault occured down the code path below while
1355 * accessing the descriptors. Release any locks that we might
1356 * have acquired in the code below and return failure.
1357 */
1358 mutex_exit(&ldcp->txlock);
1359 return (ECONNRESET);
1360 }
1361
1362 /* We just mark the descrs as free so they can be reused */
1363 mutex_enter(&ldcp->txlock);
1364 for (txi = start; txi <= end; ) {
1365 txdp = &(ldcp->mtxdp[txi]);
1366 if (txdp->dstate == VIO_DESC_READY)
1367 txdp->dstate = VIO_DESC_DONE;
1368 INCR_TXI(txi, ldcp);
1369 }
1370
1371 /*
1372 * We are done accessing shared memory; clear trap protection.
1373 */
1374 (void) LDC_NO_TRAP();
1375
1376 mutex_exit(&ldcp->txlock);
1377
1378 return (rv);
1379 }
1380
1381 /*
1382 * Send descriptor ring data message to the peer over LDC.
1383 */
1384 static int
vgen_send_dringdata_shm(vgen_ldc_t * ldcp,uint32_t start,int32_t end)1385 vgen_send_dringdata_shm(vgen_ldc_t *ldcp, uint32_t start, int32_t end)
1386 {
1387 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
1388 vio_dring_msg_t dringmsg, *msgp = &dringmsg;
1389 vio_msg_tag_t *tagp = &msgp->tag;
1390 vgen_stats_t *statsp = &ldcp->stats;
1391 int rv;
1392
1393 #ifdef DEBUG
1394 if (vgen_inject_error(ldcp, VGEN_ERR_TXTIMEOUT)) {
1395 return (VGEN_SUCCESS);
1396 }
1397 #endif
1398 bzero(msgp, sizeof (*msgp));
1399
1400 tagp->vio_msgtype = VIO_TYPE_DATA;
1401 tagp->vio_subtype = VIO_SUBTYPE_INFO;
1402 tagp->vio_subtype_env = VIO_DRING_DATA;
1403 tagp->vio_sid = ldcp->local_sid;
1404
1405 msgp->dring_ident = ldcp->local_hparams.dring_ident;
1406 msgp->start_idx = start;
1407 msgp->end_idx = end;
1408 msgp->seq_num = atomic_inc_32_nv(&ldcp->dringdata_msgid);
1409
1410 rv = vgen_sendmsg_shm(ldcp, (caddr_t)tagp, sizeof (dringmsg));
1411 if (rv != VGEN_SUCCESS) {
1412 DWARN(vgenp, ldcp, "vgen_sendmsg_shm() failed\n");
1413 return (rv);
1414 }
1415
1416 statsp->dring_data_msgs_sent++;
1417
1418 DBG2(vgenp, ldcp, "DRING_DATA_SENT \n");
1419
1420 return (VGEN_SUCCESS);
1421 }
1422
1423 /*
1424 * Send dring data ack message.
1425 */
1426 int
vgen_send_dringack_shm(vgen_ldc_t * ldcp,vio_msg_tag_t * tagp,uint32_t start,int32_t end,uint8_t pstate)1427 vgen_send_dringack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start,
1428 int32_t end, uint8_t pstate)
1429 {
1430 int rv = 0;
1431 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
1432 vio_dring_msg_t *msgp = (vio_dring_msg_t *)tagp;
1433 vgen_stats_t *statsp = &ldcp->stats;
1434
1435 tagp->vio_msgtype = VIO_TYPE_DATA;
1436 tagp->vio_subtype = VIO_SUBTYPE_ACK;
1437 tagp->vio_subtype_env = VIO_DRING_DATA;
1438 tagp->vio_sid = ldcp->local_sid;
1439 msgp->start_idx = start;
1440 msgp->end_idx = end;
1441 msgp->dring_process_state = pstate;
1442 msgp->seq_num = atomic_inc_32_nv(&ldcp->dringdata_msgid);
1443
1444 rv = vgen_sendmsg_shm(ldcp, (caddr_t)tagp, sizeof (*msgp));
1445 if (rv != VGEN_SUCCESS) {
1446 DWARN(vgenp, ldcp, "vgen_sendmsg_shm() failed\n");
1447 }
1448
1449 statsp->dring_data_acks_sent++;
1450 if (pstate == VIO_DP_STOPPED) {
1451 statsp->dring_stopped_acks_sent++;
1452 }
1453
1454 return (rv);
1455 }
1456
1457 /*
1458 * Send dring data msgs (info/ack/nack) over LDC.
1459 */
1460 static int
vgen_sendmsg_shm(vgen_ldc_t * ldcp,caddr_t msg,size_t msglen)1461 vgen_sendmsg_shm(vgen_ldc_t *ldcp, caddr_t msg, size_t msglen)
1462 {
1463 int rv;
1464 size_t len;
1465 uint32_t retries = 0;
1466 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
1467
1468 len = msglen;
1469 if ((len == 0) || (msg == NULL))
1470 return (VGEN_FAILURE);
1471
1472 do {
1473 len = msglen;
1474 rv = ldc_write(ldcp->ldc_handle, (caddr_t)msg, &len);
1475 if (retries++ >= vgen_ldcwr_retries)
1476 break;
1477 } while (rv == EWOULDBLOCK);
1478
1479 if (rv != 0) {
1480 DWARN(vgenp, ldcp, "ldc_write failed: rv(%d) msglen(%d)\n",
1481 rv, msglen);
1482 return (rv);
1483 }
1484
1485 if (len != msglen) {
1486 DWARN(vgenp, ldcp, "ldc_write failed: rv(%d) msglen (%d)\n",
1487 rv, msglen);
1488 return (VGEN_FAILURE);
1489 }
1490
1491 return (VGEN_SUCCESS);
1492 }
1493