1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2018 Joyent, Inc.
26 */
27
28 #ifdef DEBUG
29 #define XNB_DEBUG 1
30 #endif /* DEBUG */
31
32 #include "xnb.h"
33
34 #include <sys/sunddi.h>
35 #include <sys/sunndi.h>
36 #include <sys/modctl.h>
37 #include <sys/conf.h>
38 #include <sys/mac.h>
39 #include <sys/mac_impl.h> /* For mac_fix_cksum(). */
40 #include <sys/dlpi.h>
41 #include <sys/strsubr.h>
42 #include <sys/strsun.h>
43 #include <sys/types.h>
44 #include <sys/pattr.h>
45 #include <vm/seg_kmem.h>
46 #include <vm/hat_i86.h>
47 #include <xen/sys/xenbus_impl.h>
48 #include <xen/sys/xendev.h>
49 #include <sys/balloon_impl.h>
50 #include <sys/evtchn_impl.h>
51 #include <sys/gnttab.h>
52 #include <vm/vm_dep.h>
53 #include <sys/note.h>
54 #include <sys/gld.h>
55 #include <inet/ip.h>
56 #include <inet/ip_impl.h>
57
58 /*
59 * The terms "transmit" and "receive" are used in alignment with domU,
60 * which means that packets originating from the peer domU are "transmitted"
61 * to other parts of the system and packets are "received" from them.
62 */
63
64 /*
65 * Should we allow guests to manipulate multicast group membership?
66 */
67 static boolean_t xnb_multicast_control = B_TRUE;
68
69 static boolean_t xnb_connect_rings(dev_info_t *);
70 static void xnb_disconnect_rings(dev_info_t *);
71 static void xnb_oe_state_change(dev_info_t *, ddi_eventcookie_t,
72 void *, void *);
73 static void xnb_hp_state_change(dev_info_t *, ddi_eventcookie_t,
74 void *, void *);
75
76 static int xnb_txbuf_constructor(void *, void *, int);
77 static void xnb_txbuf_destructor(void *, void *);
78 static void xnb_tx_notify_peer(xnb_t *, boolean_t);
79 static void xnb_tx_mark_complete(xnb_t *, RING_IDX, int16_t);
80
81 mblk_t *xnb_to_peer(xnb_t *, mblk_t *);
82 mblk_t *xnb_copy_to_peer(xnb_t *, mblk_t *);
83
84 static void setup_gop(xnb_t *, gnttab_copy_t *, uchar_t *,
85 size_t, size_t, size_t, grant_ref_t);
86 #pragma inline(setup_gop)
87 static boolean_t is_foreign(void *);
88 #pragma inline(is_foreign)
89
90 #define INVALID_GRANT_HANDLE ((grant_handle_t)-1)
91 #define INVALID_GRANT_REF ((grant_ref_t)-1)
92
93 static kmutex_t xnb_alloc_page_lock;
94
95 /*
96 * On a 32 bit PAE system physical and machine addresses are larger
97 * than 32 bits. ddi_btop() on such systems take an unsigned long
98 * argument, and so addresses above 4G are truncated before ddi_btop()
99 * gets to see them. To avoid this, code the shift operation here.
100 */
101 #define xnb_btop(addr) ((addr) >> PAGESHIFT)
102
103 /* DMA attributes for transmit and receive data */
104 static ddi_dma_attr_t buf_dma_attr = {
105 DMA_ATTR_V0, /* version of this structure */
106 0, /* lowest usable address */
107 0xffffffffffffffffULL, /* highest usable address */
108 0x7fffffff, /* maximum DMAable byte count */
109 MMU_PAGESIZE, /* alignment in bytes */
110 0x7ff, /* bitmap of burst sizes */
111 1, /* minimum transfer */
112 0xffffffffU, /* maximum transfer */
113 0xffffffffffffffffULL, /* maximum segment length */
114 1, /* maximum number of segments */
115 1, /* granularity */
116 0, /* flags (reserved) */
117 };
118
119 /* DMA access attributes for data: NOT to be byte swapped. */
120 static ddi_device_acc_attr_t data_accattr = {
121 DDI_DEVICE_ATTR_V0,
122 DDI_NEVERSWAP_ACC,
123 DDI_STRICTORDER_ACC
124 };
125
126 /*
127 * Statistics.
128 */
129 static const char * const aux_statistics[] = {
130 "rx_cksum_deferred",
131 "tx_cksum_no_need",
132 "rx_rsp_notok",
133 "tx_notify_deferred",
134 "tx_notify_sent",
135 "rx_notify_deferred",
136 "rx_notify_sent",
137 "tx_too_early",
138 "rx_too_early",
139 "rx_allocb_failed",
140 "tx_allocb_failed",
141 "rx_foreign_page",
142 "mac_full",
143 "spurious_intr",
144 "allocation_success",
145 "allocation_failure",
146 "small_allocation_success",
147 "small_allocation_failure",
148 "other_allocation_failure",
149 "rx_pageboundary_crossed",
150 "rx_cpoparea_grown",
151 "csum_hardware",
152 "csum_software",
153 "tx_overflow_page",
154 "tx_unexpected_flags",
155 };
156
157 static int
xnb_ks_aux_update(kstat_t * ksp,int flag)158 xnb_ks_aux_update(kstat_t *ksp, int flag)
159 {
160 xnb_t *xnbp;
161 kstat_named_t *knp;
162
163 if (flag != KSTAT_READ)
164 return (EACCES);
165
166 xnbp = ksp->ks_private;
167 knp = ksp->ks_data;
168
169 /*
170 * Assignment order should match that of the names in
171 * aux_statistics.
172 */
173 (knp++)->value.ui64 = xnbp->xnb_stat_rx_cksum_deferred;
174 (knp++)->value.ui64 = xnbp->xnb_stat_tx_cksum_no_need;
175 (knp++)->value.ui64 = xnbp->xnb_stat_rx_rsp_notok;
176 (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_deferred;
177 (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_sent;
178 (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_deferred;
179 (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_sent;
180 (knp++)->value.ui64 = xnbp->xnb_stat_tx_too_early;
181 (knp++)->value.ui64 = xnbp->xnb_stat_rx_too_early;
182 (knp++)->value.ui64 = xnbp->xnb_stat_rx_allocb_failed;
183 (knp++)->value.ui64 = xnbp->xnb_stat_tx_allocb_failed;
184 (knp++)->value.ui64 = xnbp->xnb_stat_rx_foreign_page;
185 (knp++)->value.ui64 = xnbp->xnb_stat_mac_full;
186 (knp++)->value.ui64 = xnbp->xnb_stat_spurious_intr;
187 (knp++)->value.ui64 = xnbp->xnb_stat_allocation_success;
188 (knp++)->value.ui64 = xnbp->xnb_stat_allocation_failure;
189 (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_success;
190 (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_failure;
191 (knp++)->value.ui64 = xnbp->xnb_stat_other_allocation_failure;
192 (knp++)->value.ui64 = xnbp->xnb_stat_rx_pagebndry_crossed;
193 (knp++)->value.ui64 = xnbp->xnb_stat_rx_cpoparea_grown;
194 (knp++)->value.ui64 = xnbp->xnb_stat_csum_hardware;
195 (knp++)->value.ui64 = xnbp->xnb_stat_csum_software;
196 (knp++)->value.ui64 = xnbp->xnb_stat_tx_overflow_page;
197 (knp++)->value.ui64 = xnbp->xnb_stat_tx_unexpected_flags;
198
199 return (0);
200 }
201
202 static boolean_t
xnb_ks_init(xnb_t * xnbp)203 xnb_ks_init(xnb_t *xnbp)
204 {
205 int nstat = sizeof (aux_statistics) /
206 sizeof (aux_statistics[0]);
207 const char * const *cp = aux_statistics;
208 kstat_named_t *knp;
209
210 /*
211 * Create and initialise kstats.
212 */
213 xnbp->xnb_kstat_aux = kstat_create(ddi_driver_name(xnbp->xnb_devinfo),
214 ddi_get_instance(xnbp->xnb_devinfo), "aux_statistics", "net",
215 KSTAT_TYPE_NAMED, nstat, 0);
216 if (xnbp->xnb_kstat_aux == NULL)
217 return (B_FALSE);
218
219 xnbp->xnb_kstat_aux->ks_private = xnbp;
220 xnbp->xnb_kstat_aux->ks_update = xnb_ks_aux_update;
221
222 knp = xnbp->xnb_kstat_aux->ks_data;
223 while (nstat > 0) {
224 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64);
225
226 knp++;
227 cp++;
228 nstat--;
229 }
230
231 kstat_install(xnbp->xnb_kstat_aux);
232
233 return (B_TRUE);
234 }
235
236 static void
xnb_ks_free(xnb_t * xnbp)237 xnb_ks_free(xnb_t *xnbp)
238 {
239 kstat_delete(xnbp->xnb_kstat_aux);
240 }
241
242 /*
243 * Calculate and insert the transport checksum for an arbitrary packet.
244 */
245 static mblk_t *
xnb_software_csum(xnb_t * xnbp,mblk_t * mp)246 xnb_software_csum(xnb_t *xnbp, mblk_t *mp)
247 {
248 _NOTE(ARGUNUSED(xnbp));
249
250 /*
251 * XXPV dme: shouldn't rely on mac_fix_cksum(), not least
252 * because it doesn't cover all of the interesting cases :-(
253 */
254 mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM);
255 mac_hw_emul(&mp, NULL, NULL, MAC_HWCKSUM_EMUL);
256 return (mp);
257 }
258
259 mblk_t *
xnb_process_cksum_flags(xnb_t * xnbp,mblk_t * mp,uint32_t capab)260 xnb_process_cksum_flags(xnb_t *xnbp, mblk_t *mp, uint32_t capab)
261 {
262 struct ether_header *ehp;
263 uint16_t sap;
264 uint32_t offset;
265 ipha_t *ipha;
266
267 ASSERT(mp->b_next == NULL);
268
269 /*
270 * Check that the packet is contained in a single mblk. In
271 * the "from peer" path this is true today, but may change
272 * when scatter gather support is added. In the "to peer"
273 * path we cannot be sure, but in most cases it will be true
274 * (in the xnbo case the packet has come from a MAC device
275 * which is unlikely to split packets).
276 */
277 if (mp->b_cont != NULL)
278 goto software;
279
280 /*
281 * If the MAC has no hardware capability don't do any further
282 * checking.
283 */
284 if (capab == 0)
285 goto software;
286
287 ASSERT(MBLKL(mp) >= sizeof (struct ether_header));
288 ehp = (struct ether_header *)mp->b_rptr;
289
290 if (ntohs(ehp->ether_type) == VLAN_TPID) {
291 struct ether_vlan_header *evhp;
292
293 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header));
294 evhp = (struct ether_vlan_header *)mp->b_rptr;
295 sap = ntohs(evhp->ether_type);
296 offset = sizeof (struct ether_vlan_header);
297 } else {
298 sap = ntohs(ehp->ether_type);
299 offset = sizeof (struct ether_header);
300 }
301
302 /*
303 * We only attempt to do IPv4 packets in hardware.
304 */
305 if (sap != ETHERTYPE_IP)
306 goto software;
307
308 /*
309 * We know that this is an IPv4 packet.
310 */
311 ipha = (ipha_t *)(mp->b_rptr + offset);
312
313 switch (ipha->ipha_protocol) {
314 case IPPROTO_TCP:
315 case IPPROTO_UDP: {
316 uint32_t start, length, stuff, cksum;
317 uint16_t *stuffp;
318
319 /*
320 * This is a TCP/IPv4 or UDP/IPv4 packet, for which we
321 * can use full IPv4 and partial checksum offload.
322 */
323 if ((capab & (HCKSUM_INET_FULL_V4|HCKSUM_INET_PARTIAL)) == 0)
324 break;
325
326 start = IP_SIMPLE_HDR_LENGTH;
327 length = ntohs(ipha->ipha_length);
328 if (ipha->ipha_protocol == IPPROTO_TCP) {
329 stuff = start + TCP_CHECKSUM_OFFSET;
330 cksum = IP_TCP_CSUM_COMP;
331 } else {
332 stuff = start + UDP_CHECKSUM_OFFSET;
333 cksum = IP_UDP_CSUM_COMP;
334 }
335 stuffp = (uint16_t *)(mp->b_rptr + offset + stuff);
336
337 if (capab & HCKSUM_INET_FULL_V4) {
338 /*
339 * Some devices require that the checksum
340 * field of the packet is zero for full
341 * offload.
342 */
343 *stuffp = 0;
344
345 mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM);
346
347 xnbp->xnb_stat_csum_hardware++;
348
349 return (mp);
350 }
351
352 if (capab & HCKSUM_INET_PARTIAL) {
353 if (*stuffp == 0) {
354 ipaddr_t src, dst;
355
356 /*
357 * Older Solaris guests don't insert
358 * the pseudo-header checksum, so we
359 * calculate it here.
360 */
361 src = ipha->ipha_src;
362 dst = ipha->ipha_dst;
363
364 cksum += (dst >> 16) + (dst & 0xFFFF);
365 cksum += (src >> 16) + (src & 0xFFFF);
366 cksum += length - IP_SIMPLE_HDR_LENGTH;
367
368 cksum = (cksum >> 16) + (cksum & 0xFFFF);
369 cksum = (cksum >> 16) + (cksum & 0xFFFF);
370
371 ASSERT(cksum <= 0xFFFF);
372
373 *stuffp = (uint16_t)(cksum ? cksum : ~cksum);
374 }
375
376 mac_hcksum_set(mp, start, stuff, length, 0,
377 HCK_PARTIALCKSUM);
378
379 xnbp->xnb_stat_csum_hardware++;
380
381 return (mp);
382 }
383
384 /* NOTREACHED */
385 break;
386 }
387
388 default:
389 /* Use software. */
390 break;
391 }
392
393 software:
394 /*
395 * We are not able to use any offload so do the whole thing in
396 * software.
397 */
398 xnbp->xnb_stat_csum_software++;
399
400 return (xnb_software_csum(xnbp, mp));
401 }
402
403 int
xnb_attach(dev_info_t * dip,xnb_flavour_t * flavour,void * flavour_data)404 xnb_attach(dev_info_t *dip, xnb_flavour_t *flavour, void *flavour_data)
405 {
406 xnb_t *xnbp;
407 char *xsname;
408 char cachename[32];
409
410 xnbp = kmem_zalloc(sizeof (*xnbp), KM_SLEEP);
411
412 xnbp->xnb_flavour = flavour;
413 xnbp->xnb_flavour_data = flavour_data;
414 xnbp->xnb_devinfo = dip;
415 xnbp->xnb_evtchn = INVALID_EVTCHN;
416 xnbp->xnb_irq = B_FALSE;
417 xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE;
418 xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE;
419 xnbp->xnb_connected = B_FALSE;
420 xnbp->xnb_hotplugged = B_FALSE;
421 xnbp->xnb_detachable = B_FALSE;
422 xnbp->xnb_peer = xvdi_get_oeid(dip);
423 xnbp->xnb_be_status = XNB_STATE_INIT;
424 xnbp->xnb_fe_status = XNB_STATE_INIT;
425
426 xnbp->xnb_tx_buf_count = 0;
427
428 xnbp->xnb_rx_hv_copy = B_FALSE;
429 xnbp->xnb_multicast_control = B_FALSE;
430
431 xnbp->xnb_rx_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
432 ASSERT(xnbp->xnb_rx_va != NULL);
433
434 if (ddi_get_iblock_cookie(dip, 0, &xnbp->xnb_icookie)
435 != DDI_SUCCESS)
436 goto failure;
437
438 /* Allocated on demand, when/if we enter xnb_copy_to_peer(). */
439 xnbp->xnb_rx_cpop = NULL;
440 xnbp->xnb_rx_cpop_count = 0;
441
442 mutex_init(&xnbp->xnb_tx_lock, NULL, MUTEX_DRIVER,
443 xnbp->xnb_icookie);
444 mutex_init(&xnbp->xnb_rx_lock, NULL, MUTEX_DRIVER,
445 xnbp->xnb_icookie);
446 mutex_init(&xnbp->xnb_state_lock, NULL, MUTEX_DRIVER,
447 xnbp->xnb_icookie);
448
449 /* Set driver private pointer now. */
450 ddi_set_driver_private(dip, xnbp);
451
452 (void) sprintf(cachename, "xnb_tx_buf_cache_%d", ddi_get_instance(dip));
453 xnbp->xnb_tx_buf_cache = kmem_cache_create(cachename,
454 sizeof (xnb_txbuf_t), 0,
455 xnb_txbuf_constructor, xnb_txbuf_destructor,
456 NULL, xnbp, NULL, 0);
457 if (xnbp->xnb_tx_buf_cache == NULL)
458 goto failure_0;
459
460 if (!xnb_ks_init(xnbp))
461 goto failure_1;
462
463 /*
464 * Receive notification of changes in the state of the
465 * driver in the guest domain.
466 */
467 if (xvdi_add_event_handler(dip, XS_OE_STATE, xnb_oe_state_change,
468 NULL) != DDI_SUCCESS)
469 goto failure_2;
470
471 /*
472 * Receive notification of hotplug events.
473 */
474 if (xvdi_add_event_handler(dip, XS_HP_STATE, xnb_hp_state_change,
475 NULL) != DDI_SUCCESS)
476 goto failure_2;
477
478 xsname = xvdi_get_xsname(dip);
479
480 if (xenbus_printf(XBT_NULL, xsname,
481 "feature-multicast-control", "%d",
482 xnb_multicast_control ? 1 : 0) != 0)
483 goto failure_3;
484
485 if (xenbus_printf(XBT_NULL, xsname,
486 "feature-rx-copy", "%d", 1) != 0)
487 goto failure_3;
488 /*
489 * Linux domUs seem to depend on "feature-rx-flip" being 0
490 * in addition to "feature-rx-copy" being 1. It seems strange
491 * to use four possible states to describe a binary decision,
492 * but we might as well play nice.
493 */
494 if (xenbus_printf(XBT_NULL, xsname,
495 "feature-rx-flip", "%d", 0) != 0)
496 goto failure_3;
497
498 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitWait);
499 (void) xvdi_post_event(dip, XEN_HP_ADD);
500
501 return (DDI_SUCCESS);
502
503 failure_3:
504 xvdi_remove_event_handler(dip, NULL);
505
506 failure_2:
507 xnb_ks_free(xnbp);
508
509 failure_1:
510 kmem_cache_destroy(xnbp->xnb_tx_buf_cache);
511
512 failure_0:
513 mutex_destroy(&xnbp->xnb_state_lock);
514 mutex_destroy(&xnbp->xnb_rx_lock);
515 mutex_destroy(&xnbp->xnb_tx_lock);
516
517 failure:
518 vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE);
519 kmem_free(xnbp, sizeof (*xnbp));
520 return (DDI_FAILURE);
521 }
522
523 void
xnb_detach(dev_info_t * dip)524 xnb_detach(dev_info_t *dip)
525 {
526 xnb_t *xnbp = ddi_get_driver_private(dip);
527
528 ASSERT(xnbp != NULL);
529 ASSERT(!xnbp->xnb_connected);
530 ASSERT(xnbp->xnb_tx_buf_count == 0);
531
532 xnb_disconnect_rings(dip);
533
534 xvdi_remove_event_handler(dip, NULL);
535
536 xnb_ks_free(xnbp);
537
538 kmem_cache_destroy(xnbp->xnb_tx_buf_cache);
539
540 ddi_set_driver_private(dip, NULL);
541
542 mutex_destroy(&xnbp->xnb_state_lock);
543 mutex_destroy(&xnbp->xnb_rx_lock);
544 mutex_destroy(&xnbp->xnb_tx_lock);
545
546 if (xnbp->xnb_rx_cpop_count > 0)
547 kmem_free(xnbp->xnb_rx_cpop, sizeof (xnbp->xnb_rx_cpop[0])
548 * xnbp->xnb_rx_cpop_count);
549
550 ASSERT(xnbp->xnb_rx_va != NULL);
551 vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE);
552
553 kmem_free(xnbp, sizeof (*xnbp));
554 }
555
556 /*
557 * Allocate a page from the hypervisor to be flipped to the peer.
558 *
559 * Try to get pages in batches to reduce the overhead of calls into
560 * the balloon driver.
561 */
562 static mfn_t
xnb_alloc_page(xnb_t * xnbp)563 xnb_alloc_page(xnb_t *xnbp)
564 {
565 #define WARNING_RATE_LIMIT 100
566 #define BATCH_SIZE 256
567 static mfn_t mfns[BATCH_SIZE]; /* common across all instances */
568 static int nth = BATCH_SIZE;
569 mfn_t mfn;
570
571 mutex_enter(&xnb_alloc_page_lock);
572 if (nth == BATCH_SIZE) {
573 if (balloon_alloc_pages(BATCH_SIZE, mfns) != BATCH_SIZE) {
574 xnbp->xnb_stat_allocation_failure++;
575 mutex_exit(&xnb_alloc_page_lock);
576
577 /*
578 * Try for a single page in low memory situations.
579 */
580 if (balloon_alloc_pages(1, &mfn) != 1) {
581 if ((xnbp->xnb_stat_small_allocation_failure++
582 % WARNING_RATE_LIMIT) == 0)
583 cmn_err(CE_WARN, "xnb_alloc_page: "
584 "Cannot allocate memory to "
585 "transfer packets to peer.");
586 return (0);
587 } else {
588 xnbp->xnb_stat_small_allocation_success++;
589 return (mfn);
590 }
591 }
592
593 nth = 0;
594 xnbp->xnb_stat_allocation_success++;
595 }
596
597 mfn = mfns[nth++];
598 mutex_exit(&xnb_alloc_page_lock);
599
600 ASSERT(mfn != 0);
601
602 return (mfn);
603 #undef BATCH_SIZE
604 #undef WARNING_RATE_LIMIT
605 }
606
607 /*
608 * Free a page back to the hypervisor.
609 *
610 * This happens only in the error path, so batching is not worth the
611 * complication.
612 */
613 static void
xnb_free_page(xnb_t * xnbp,mfn_t mfn)614 xnb_free_page(xnb_t *xnbp, mfn_t mfn)
615 {
616 _NOTE(ARGUNUSED(xnbp));
617 int r;
618 pfn_t pfn;
619
620 pfn = xen_assign_pfn(mfn);
621 pfnzero(pfn, 0, PAGESIZE);
622 xen_release_pfn(pfn);
623
624 if ((r = balloon_free_pages(1, &mfn, NULL, NULL)) != 1) {
625 cmn_err(CE_WARN, "free_page: cannot decrease memory "
626 "reservation (%d): page kept but unusable (mfn = 0x%lx).",
627 r, mfn);
628 }
629 }
630
631 /*
632 * Similar to RING_HAS_UNCONSUMED_REQUESTS(&xnbp->rx_ring) but using
633 * local variables. Used in both xnb_to_peer() and xnb_copy_to_peer().
634 */
635 #define XNB_RING_HAS_UNCONSUMED_REQUESTS(_r) \
636 ((((_r)->sring->req_prod - loop) < \
637 (RING_SIZE(_r) - (loop - prod))) ? \
638 ((_r)->sring->req_prod - loop) : \
639 (RING_SIZE(_r) - (loop - prod)))
640
641 /*
642 * Pass packets to the peer using page flipping.
643 */
644 mblk_t *
xnb_to_peer(xnb_t * xnbp,mblk_t * mp)645 xnb_to_peer(xnb_t *xnbp, mblk_t *mp)
646 {
647 mblk_t *free = mp, *prev = NULL;
648 size_t len;
649 gnttab_transfer_t *gop;
650 boolean_t notify;
651 RING_IDX loop, prod, end;
652
653 /*
654 * For each packet the sequence of operations is:
655 *
656 * 1. get a new page from the hypervisor.
657 * 2. get a request slot from the ring.
658 * 3. copy the data into the new page.
659 * 4. transfer the page to the peer.
660 * 5. update the request slot.
661 * 6. kick the peer.
662 * 7. free mp.
663 *
664 * In order to reduce the number of hypercalls, we prepare
665 * several packets for the peer and perform a single hypercall
666 * to transfer them.
667 */
668
669 len = 0;
670 mutex_enter(&xnbp->xnb_rx_lock);
671
672 /*
673 * If we are not connected to the peer or have not yet
674 * finished hotplug it is too early to pass packets to the
675 * peer.
676 */
677 if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) {
678 mutex_exit(&xnbp->xnb_rx_lock);
679 DTRACE_PROBE(flip_rx_too_early);
680 xnbp->xnb_stat_rx_too_early++;
681 return (mp);
682 }
683
684 loop = xnbp->xnb_rx_ring.req_cons;
685 prod = xnbp->xnb_rx_ring.rsp_prod_pvt;
686 gop = xnbp->xnb_rx_top;
687
688 while ((mp != NULL) &&
689 XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) {
690
691 mfn_t mfn;
692 pfn_t pfn;
693 netif_rx_request_t *rxreq;
694 netif_rx_response_t *rxresp;
695 char *valoop;
696 mblk_t *ml;
697 uint16_t cksum_flags;
698
699 /* 1 */
700 if ((mfn = xnb_alloc_page(xnbp)) == 0) {
701 xnbp->xnb_stat_rx_defer++;
702 break;
703 }
704
705 /* 2 */
706 rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop);
707
708 #ifdef XNB_DEBUG
709 if (!(rxreq->id < NET_RX_RING_SIZE))
710 cmn_err(CE_PANIC, "xnb_to_peer: "
711 "id %d out of range in request 0x%p",
712 rxreq->id, (void *)rxreq);
713 #endif /* XNB_DEBUG */
714
715 /* Assign a pfn and map the new page at the allocated va. */
716 pfn = xen_assign_pfn(mfn);
717 hat_devload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE,
718 pfn, PROT_READ | PROT_WRITE, HAT_LOAD);
719
720 /* 3 */
721 len = 0;
722 valoop = xnbp->xnb_rx_va;
723 for (ml = mp; ml != NULL; ml = ml->b_cont) {
724 size_t chunk = ml->b_wptr - ml->b_rptr;
725
726 bcopy(ml->b_rptr, valoop, chunk);
727 valoop += chunk;
728 len += chunk;
729 }
730
731 ASSERT(len < PAGESIZE);
732
733 /* Release the pfn. */
734 hat_unload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE,
735 HAT_UNLOAD_UNMAP);
736 xen_release_pfn(pfn);
737
738 /* 4 */
739 gop->mfn = mfn;
740 gop->domid = xnbp->xnb_peer;
741 gop->ref = rxreq->gref;
742
743 /* 5.1 */
744 rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod);
745 rxresp->offset = 0;
746 rxresp->flags = 0;
747
748 cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp);
749 if (cksum_flags != 0)
750 xnbp->xnb_stat_rx_cksum_deferred++;
751 rxresp->flags |= cksum_flags;
752
753 rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id;
754 rxresp->status = len;
755
756 loop++;
757 prod++;
758 gop++;
759 prev = mp;
760 mp = mp->b_next;
761 }
762
763 /*
764 * Did we actually do anything?
765 */
766 if (loop == xnbp->xnb_rx_ring.req_cons) {
767 mutex_exit(&xnbp->xnb_rx_lock);
768 return (mp);
769 }
770
771 end = loop;
772
773 /*
774 * Unlink the end of the 'done' list from the remainder.
775 */
776 ASSERT(prev != NULL);
777 prev->b_next = NULL;
778
779 if (HYPERVISOR_grant_table_op(GNTTABOP_transfer, xnbp->xnb_rx_top,
780 loop - xnbp->xnb_rx_ring.req_cons) != 0) {
781 cmn_err(CE_WARN, "xnb_to_peer: transfer operation failed");
782 }
783
784 loop = xnbp->xnb_rx_ring.req_cons;
785 prod = xnbp->xnb_rx_ring.rsp_prod_pvt;
786 gop = xnbp->xnb_rx_top;
787
788 while (loop < end) {
789 int16_t status = NETIF_RSP_OKAY;
790
791 if (gop->status != 0) {
792 status = NETIF_RSP_ERROR;
793
794 /*
795 * If the status is anything other than
796 * GNTST_bad_page then we don't own the page
797 * any more, so don't try to give it back.
798 */
799 if (gop->status != GNTST_bad_page)
800 gop->mfn = 0;
801 } else {
802 /* The page is no longer ours. */
803 gop->mfn = 0;
804 }
805
806 if (gop->mfn != 0)
807 /*
808 * Give back the page, as we won't be using
809 * it.
810 */
811 xnb_free_page(xnbp, gop->mfn);
812 else
813 /*
814 * We gave away a page, update our accounting
815 * now.
816 */
817 balloon_drv_subtracted(1);
818
819 /* 5.2 */
820 if (status != NETIF_RSP_OKAY) {
821 RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status =
822 status;
823 } else {
824 xnbp->xnb_stat_ipackets++;
825 xnbp->xnb_stat_rbytes += len;
826 }
827
828 loop++;
829 prod++;
830 gop++;
831 }
832
833 xnbp->xnb_rx_ring.req_cons = loop;
834 xnbp->xnb_rx_ring.rsp_prod_pvt = prod;
835
836 /* 6 */
837 /* LINTED: constant in conditional context */
838 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify);
839 if (notify) {
840 ec_notify_via_evtchn(xnbp->xnb_evtchn);
841 xnbp->xnb_stat_rx_notify_sent++;
842 } else {
843 xnbp->xnb_stat_rx_notify_deferred++;
844 }
845
846 if (mp != NULL)
847 xnbp->xnb_stat_rx_defer++;
848
849 mutex_exit(&xnbp->xnb_rx_lock);
850
851 /* Free mblk_t's that we consumed. */
852 freemsgchain(free);
853
854 return (mp);
855 }
856
857 /* Helper functions for xnb_copy_to_peer(). */
858
859 /*
860 * Grow the array of copy operation descriptors.
861 */
862 static boolean_t
grow_cpop_area(xnb_t * xnbp)863 grow_cpop_area(xnb_t *xnbp)
864 {
865 size_t count;
866 gnttab_copy_t *new;
867
868 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock));
869
870 count = xnbp->xnb_rx_cpop_count + CPOP_DEFCNT;
871
872 if ((new = kmem_alloc(sizeof (new[0]) * count, KM_NOSLEEP)) == NULL) {
873 xnbp->xnb_stat_other_allocation_failure++;
874 return (B_FALSE);
875 }
876
877 bcopy(xnbp->xnb_rx_cpop, new,
878 sizeof (xnbp->xnb_rx_cpop[0]) * xnbp->xnb_rx_cpop_count);
879
880 kmem_free(xnbp->xnb_rx_cpop,
881 sizeof (xnbp->xnb_rx_cpop[0]) * xnbp->xnb_rx_cpop_count);
882
883 xnbp->xnb_rx_cpop = new;
884 xnbp->xnb_rx_cpop_count = count;
885
886 xnbp->xnb_stat_rx_cpoparea_grown++;
887
888 return (B_TRUE);
889 }
890
891 /*
892 * Check whether an address is on a page that's foreign to this domain.
893 */
894 static boolean_t
is_foreign(void * addr)895 is_foreign(void *addr)
896 {
897 pfn_t pfn = hat_getpfnum(kas.a_hat, addr);
898
899 return ((pfn & PFN_IS_FOREIGN_MFN) == PFN_IS_FOREIGN_MFN);
900 }
901
902 /*
903 * Insert a newly allocated mblk into a chain, replacing the old one.
904 */
905 static mblk_t *
replace_msg(mblk_t * mp,size_t len,mblk_t * mp_prev,mblk_t * ml_prev)906 replace_msg(mblk_t *mp, size_t len, mblk_t *mp_prev, mblk_t *ml_prev)
907 {
908 uint32_t start, stuff, end, value, flags;
909 mblk_t *new_mp;
910
911 new_mp = copyb(mp);
912 if (new_mp == NULL) {
913 cmn_err(CE_PANIC, "replace_msg: cannot alloc new message"
914 "for %p, len %lu", (void *) mp, len);
915 }
916
917 mac_hcksum_get(mp, &start, &stuff, &end, &value, &flags);
918 mac_hcksum_set(new_mp, start, stuff, end, value, flags);
919
920 new_mp->b_next = mp->b_next;
921 new_mp->b_prev = mp->b_prev;
922 new_mp->b_cont = mp->b_cont;
923
924 /* Make sure we only overwrite pointers to the mblk being replaced. */
925 if (mp_prev != NULL && mp_prev->b_next == mp)
926 mp_prev->b_next = new_mp;
927
928 if (ml_prev != NULL && ml_prev->b_cont == mp)
929 ml_prev->b_cont = new_mp;
930
931 mp->b_next = mp->b_prev = mp->b_cont = NULL;
932 freemsg(mp);
933
934 return (new_mp);
935 }
936
937 /*
938 * Set all the fields in a gnttab_copy_t.
939 */
940 static void
setup_gop(xnb_t * xnbp,gnttab_copy_t * gp,uchar_t * rptr,size_t s_off,size_t d_off,size_t len,grant_ref_t d_ref)941 setup_gop(xnb_t *xnbp, gnttab_copy_t *gp, uchar_t *rptr,
942 size_t s_off, size_t d_off, size_t len, grant_ref_t d_ref)
943 {
944 ASSERT(xnbp != NULL && gp != NULL);
945
946 gp->source.offset = s_off;
947 gp->source.u.gmfn = pfn_to_mfn(hat_getpfnum(kas.a_hat, (caddr_t)rptr));
948 gp->source.domid = DOMID_SELF;
949
950 gp->len = (uint16_t)len;
951 gp->flags = GNTCOPY_dest_gref;
952 gp->status = 0;
953
954 gp->dest.u.ref = d_ref;
955 gp->dest.offset = d_off;
956 gp->dest.domid = xnbp->xnb_peer;
957 }
958
959 /*
960 * Pass packets to the peer using hypervisor copy operations.
961 */
962 mblk_t *
xnb_copy_to_peer(xnb_t * xnbp,mblk_t * mp)963 xnb_copy_to_peer(xnb_t *xnbp, mblk_t *mp)
964 {
965 mblk_t *free = mp, *mp_prev = NULL, *saved_mp = mp;
966 mblk_t *ml, *ml_prev;
967 boolean_t notify;
968 RING_IDX loop, prod;
969 int i;
970
971 /*
972 * If the peer does not pre-post buffers for received packets,
973 * use page flipping to pass packets to it.
974 */
975 if (!xnbp->xnb_rx_hv_copy)
976 return (xnb_to_peer(xnbp, mp));
977
978 /*
979 * For each packet the sequence of operations is:
980 *
981 * 1. get a request slot from the ring.
982 * 2. set up data for hypercall (see NOTE below)
983 * 3. have the hypervisore copy the data
984 * 4. update the request slot.
985 * 5. kick the peer.
986 *
987 * NOTE ad 2.
988 * In order to reduce the number of hypercalls, we prepare
989 * several mblks (mp->b_cont != NULL) for the peer and
990 * perform a single hypercall to transfer them. We also have
991 * to set up a seperate copy operation for every page.
992 *
993 * If we have more than one packet (mp->b_next != NULL), we do
994 * this whole dance repeatedly.
995 */
996
997 mutex_enter(&xnbp->xnb_rx_lock);
998
999 if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) {
1000 mutex_exit(&xnbp->xnb_rx_lock);
1001 DTRACE_PROBE(copy_rx_too_early);
1002 xnbp->xnb_stat_rx_too_early++;
1003 return (mp);
1004 }
1005
1006 loop = xnbp->xnb_rx_ring.req_cons;
1007 prod = xnbp->xnb_rx_ring.rsp_prod_pvt;
1008
1009 while ((mp != NULL) &&
1010 XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) {
1011 netif_rx_request_t *rxreq;
1012 size_t d_offset, len;
1013 int item_count;
1014 gnttab_copy_t *gop_cp;
1015 netif_rx_response_t *rxresp;
1016 uint16_t cksum_flags;
1017 int16_t status = NETIF_RSP_OKAY;
1018
1019 /* 1 */
1020 rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop);
1021
1022 #ifdef XNB_DEBUG
1023 if (!(rxreq->id < NET_RX_RING_SIZE))
1024 cmn_err(CE_PANIC, "xnb_copy_to_peer: "
1025 "id %d out of range in request 0x%p",
1026 rxreq->id, (void *)rxreq);
1027 #endif /* XNB_DEBUG */
1028
1029 /* 2 */
1030 d_offset = 0;
1031 len = 0;
1032 item_count = 0;
1033
1034 gop_cp = xnbp->xnb_rx_cpop;
1035
1036 /*
1037 * We walk the b_cont pointers and set up a
1038 * gnttab_copy_t for each sub-page chunk in each data
1039 * block.
1040 */
1041 /* 2a */
1042 for (ml = mp, ml_prev = NULL; ml != NULL; ml = ml->b_cont) {
1043 size_t chunk = ml->b_wptr - ml->b_rptr;
1044 uchar_t *r_tmp, *rpt_align;
1045 size_t r_offset;
1046
1047 /*
1048 * The hypervisor will not allow us to
1049 * reference a foreign page (e.g. one
1050 * belonging to another domain) by mfn in the
1051 * copy operation. If the data in this mblk is
1052 * on such a page we must copy the data into a
1053 * local page before initiating the hypervisor
1054 * copy operation.
1055 */
1056 if (is_foreign(ml->b_rptr) || is_foreign(ml->b_wptr)) {
1057 mblk_t *ml_new = replace_msg(ml, chunk,
1058 mp_prev, ml_prev);
1059
1060 /* We can still use old ml, but not *ml! */
1061 if (free == ml)
1062 free = ml_new;
1063 if (mp == ml)
1064 mp = ml_new;
1065 ml = ml_new;
1066
1067 xnbp->xnb_stat_rx_foreign_page++;
1068 }
1069
1070 rpt_align = (uchar_t *)ALIGN2PAGE(ml->b_rptr);
1071 r_offset = (uint16_t)(ml->b_rptr - rpt_align);
1072 r_tmp = ml->b_rptr;
1073
1074 if (d_offset + chunk > PAGESIZE)
1075 cmn_err(CE_PANIC, "xnb_copy_to_peer: mp %p "
1076 "(svd: %p), ml %p,rpt_alg. %p, d_offset "
1077 "(%lu) + chunk (%lu) > PAGESIZE %d!",
1078 (void *)mp, (void *)saved_mp, (void *)ml,
1079 (void *)rpt_align,
1080 d_offset, chunk, (int)PAGESIZE);
1081
1082 while (chunk > 0) {
1083 size_t part_len;
1084
1085 if (item_count == xnbp->xnb_rx_cpop_count) {
1086 if (!grow_cpop_area(xnbp))
1087 goto failure;
1088 gop_cp = &xnbp->xnb_rx_cpop[item_count];
1089 }
1090 /*
1091 * If our mblk crosses a page boundary, we need
1092 * to do a seperate copy for each page.
1093 */
1094 if (r_offset + chunk > PAGESIZE) {
1095 part_len = PAGESIZE - r_offset;
1096
1097 DTRACE_PROBE3(mblk_page_crossed,
1098 (mblk_t *), ml, int, chunk, int,
1099 (int)r_offset);
1100
1101 xnbp->xnb_stat_rx_pagebndry_crossed++;
1102 } else {
1103 part_len = chunk;
1104 }
1105
1106 setup_gop(xnbp, gop_cp, r_tmp, r_offset,
1107 d_offset, part_len, rxreq->gref);
1108
1109 chunk -= part_len;
1110
1111 len += part_len;
1112 d_offset += part_len;
1113 r_tmp += part_len;
1114 /*
1115 * The 2nd, 3rd ... last copies will always
1116 * start at r_tmp, therefore r_offset is 0.
1117 */
1118 r_offset = 0;
1119 gop_cp++;
1120 item_count++;
1121 }
1122 ml_prev = ml;
1123
1124 DTRACE_PROBE4(mblk_loop_end, (mblk_t *), ml, int,
1125 chunk, int, len, int, item_count);
1126 }
1127 /* 3 */
1128 if (HYPERVISOR_grant_table_op(GNTTABOP_copy, xnbp->xnb_rx_cpop,
1129 item_count) != 0) {
1130 cmn_err(CE_WARN, "xnb_copy_to_peer: copy op. failed");
1131 DTRACE_PROBE(HV_granttableopfailed);
1132 }
1133
1134 /* 4 */
1135 rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod);
1136 rxresp->offset = 0;
1137
1138 rxresp->flags = 0;
1139
1140 DTRACE_PROBE4(got_RX_rsp, int, (int)rxresp->id, int,
1141 (int)rxresp->offset, int, (int)rxresp->flags, int,
1142 (int)rxresp->status);
1143
1144 cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp);
1145 if (cksum_flags != 0)
1146 xnbp->xnb_stat_rx_cksum_deferred++;
1147 rxresp->flags |= cksum_flags;
1148
1149 rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id;
1150 rxresp->status = len;
1151
1152 DTRACE_PROBE4(RX_rsp_set, int, (int)rxresp->id, int,
1153 (int)rxresp->offset, int, (int)rxresp->flags, int,
1154 (int)rxresp->status);
1155
1156 for (i = 0; i < item_count; i++) {
1157 if (xnbp->xnb_rx_cpop[i].status != 0) {
1158 DTRACE_PROBE2(cpop_status_nonnull, int,
1159 (int)xnbp->xnb_rx_cpop[i].status,
1160 int, i);
1161 status = NETIF_RSP_ERROR;
1162 }
1163 }
1164
1165 /* 5.2 */
1166 if (status != NETIF_RSP_OKAY) {
1167 RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status =
1168 status;
1169 xnbp->xnb_stat_rx_rsp_notok++;
1170 } else {
1171 xnbp->xnb_stat_ipackets++;
1172 xnbp->xnb_stat_rbytes += len;
1173 }
1174
1175 loop++;
1176 prod++;
1177 mp_prev = mp;
1178 mp = mp->b_next;
1179 }
1180 failure:
1181 /*
1182 * Did we actually do anything?
1183 */
1184 if (loop == xnbp->xnb_rx_ring.req_cons) {
1185 mutex_exit(&xnbp->xnb_rx_lock);
1186 return (mp);
1187 }
1188
1189 /*
1190 * Unlink the end of the 'done' list from the remainder.
1191 */
1192 ASSERT(mp_prev != NULL);
1193 mp_prev->b_next = NULL;
1194
1195 xnbp->xnb_rx_ring.req_cons = loop;
1196 xnbp->xnb_rx_ring.rsp_prod_pvt = prod;
1197
1198 /* 6 */
1199 /* LINTED: constant in conditional context */
1200 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify);
1201 if (notify) {
1202 ec_notify_via_evtchn(xnbp->xnb_evtchn);
1203 xnbp->xnb_stat_rx_notify_sent++;
1204 } else {
1205 xnbp->xnb_stat_rx_notify_deferred++;
1206 }
1207
1208 if (mp != NULL)
1209 xnbp->xnb_stat_rx_defer++;
1210
1211 mutex_exit(&xnbp->xnb_rx_lock);
1212
1213 /* Free mblk_t structs we have consumed. */
1214 freemsgchain(free);
1215
1216 return (mp);
1217 }
1218
1219
1220 static void
xnb_tx_notify_peer(xnb_t * xnbp,boolean_t force)1221 xnb_tx_notify_peer(xnb_t *xnbp, boolean_t force)
1222 {
1223 boolean_t notify;
1224
1225 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock));
1226
1227 /* LINTED: constant in conditional context */
1228 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_tx_ring, notify);
1229 if (notify || force) {
1230 ec_notify_via_evtchn(xnbp->xnb_evtchn);
1231 xnbp->xnb_stat_tx_notify_sent++;
1232 } else {
1233 xnbp->xnb_stat_tx_notify_deferred++;
1234 }
1235 }
1236
1237 static void
xnb_tx_mark_complete(xnb_t * xnbp,RING_IDX id,int16_t status)1238 xnb_tx_mark_complete(xnb_t *xnbp, RING_IDX id, int16_t status)
1239 {
1240 RING_IDX i;
1241 netif_tx_response_t *txresp;
1242
1243 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock));
1244
1245 i = xnbp->xnb_tx_ring.rsp_prod_pvt;
1246
1247 txresp = RING_GET_RESPONSE(&xnbp->xnb_tx_ring, i);
1248 txresp->id = id;
1249 txresp->status = status;
1250
1251 xnbp->xnb_tx_ring.rsp_prod_pvt = i + 1;
1252
1253 /*
1254 * Note that we don't push the change to the peer here - that
1255 * is the callers responsibility.
1256 */
1257 }
1258
1259 static void
xnb_txbuf_recycle(xnb_txbuf_t * txp)1260 xnb_txbuf_recycle(xnb_txbuf_t *txp)
1261 {
1262 xnb_t *xnbp = txp->xt_xnbp;
1263
1264 kmem_cache_free(xnbp->xnb_tx_buf_cache, txp);
1265
1266 xnbp->xnb_tx_buf_outstanding--;
1267 }
1268
1269 static int
xnb_txbuf_constructor(void * buf,void * arg,int kmflag)1270 xnb_txbuf_constructor(void *buf, void *arg, int kmflag)
1271 {
1272 _NOTE(ARGUNUSED(kmflag));
1273 xnb_txbuf_t *txp = buf;
1274 xnb_t *xnbp = arg;
1275 size_t len;
1276 ddi_dma_cookie_t dma_cookie;
1277 uint_t ncookies;
1278
1279 txp->xt_free_rtn.free_func = xnb_txbuf_recycle;
1280 txp->xt_free_rtn.free_arg = (caddr_t)txp;
1281 txp->xt_xnbp = xnbp;
1282 txp->xt_next = NULL;
1283
1284 if (ddi_dma_alloc_handle(xnbp->xnb_devinfo, &buf_dma_attr,
1285 0, 0, &txp->xt_dma_handle) != DDI_SUCCESS)
1286 goto failure;
1287
1288 if (ddi_dma_mem_alloc(txp->xt_dma_handle, PAGESIZE, &data_accattr,
1289 DDI_DMA_STREAMING, 0, 0, &txp->xt_buf, &len,
1290 &txp->xt_acc_handle) != DDI_SUCCESS)
1291 goto failure_1;
1292
1293 if (ddi_dma_addr_bind_handle(txp->xt_dma_handle, NULL, txp->xt_buf,
1294 len, DDI_DMA_RDWR | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 0,
1295 &dma_cookie, &ncookies)
1296 != DDI_DMA_MAPPED)
1297 goto failure_2;
1298 ASSERT(ncookies == 1);
1299
1300 txp->xt_mfn = xnb_btop(dma_cookie.dmac_laddress);
1301 txp->xt_buflen = dma_cookie.dmac_size;
1302
1303 DTRACE_PROBE(txbuf_allocated);
1304
1305 atomic_inc_32(&xnbp->xnb_tx_buf_count);
1306 xnbp->xnb_tx_buf_outstanding++;
1307
1308 return (0);
1309
1310 failure_2:
1311 ddi_dma_mem_free(&txp->xt_acc_handle);
1312
1313 failure_1:
1314 ddi_dma_free_handle(&txp->xt_dma_handle);
1315
1316 failure:
1317
1318 return (-1);
1319 }
1320
1321 static void
xnb_txbuf_destructor(void * buf,void * arg)1322 xnb_txbuf_destructor(void *buf, void *arg)
1323 {
1324 xnb_txbuf_t *txp = buf;
1325 xnb_t *xnbp = arg;
1326
1327 (void) ddi_dma_unbind_handle(txp->xt_dma_handle);
1328 ddi_dma_mem_free(&txp->xt_acc_handle);
1329 ddi_dma_free_handle(&txp->xt_dma_handle);
1330
1331 atomic_dec_32(&xnbp->xnb_tx_buf_count);
1332 }
1333
1334 /*
1335 * Take packets from the peer and deliver them onward.
1336 */
1337 static mblk_t *
xnb_from_peer(xnb_t * xnbp)1338 xnb_from_peer(xnb_t *xnbp)
1339 {
1340 RING_IDX start, end, loop;
1341 gnttab_copy_t *cop;
1342 xnb_txbuf_t **txpp;
1343 netif_tx_request_t *txreq;
1344 boolean_t work_to_do, need_notify = B_FALSE;
1345 mblk_t *head, *tail;
1346 int n_data_req, i;
1347
1348 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock));
1349
1350 head = tail = NULL;
1351 around:
1352
1353 /* LINTED: constant in conditional context */
1354 RING_FINAL_CHECK_FOR_REQUESTS(&xnbp->xnb_tx_ring, work_to_do);
1355 if (!work_to_do) {
1356 finished:
1357 xnb_tx_notify_peer(xnbp, need_notify);
1358
1359 return (head);
1360 }
1361
1362 start = xnbp->xnb_tx_ring.req_cons;
1363 end = xnbp->xnb_tx_ring.sring->req_prod;
1364
1365 if ((end - start) > NET_TX_RING_SIZE) {
1366 /*
1367 * This usually indicates that the frontend driver is
1368 * misbehaving, as it's not possible to have more than
1369 * NET_TX_RING_SIZE ring elements in play at any one
1370 * time.
1371 *
1372 * We reset the ring pointers to the state declared by
1373 * the frontend and try to carry on.
1374 */
1375 cmn_err(CE_WARN, "xnb_from_peer: domain %d tried to give us %u "
1376 "items in the ring, resetting and trying to recover.",
1377 xnbp->xnb_peer, (end - start));
1378
1379 /* LINTED: constant in conditional context */
1380 BACK_RING_ATTACH(&xnbp->xnb_tx_ring,
1381 (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE);
1382
1383 goto around;
1384 }
1385
1386 loop = start;
1387 cop = xnbp->xnb_tx_cop;
1388 txpp = xnbp->xnb_tx_bufp;
1389 n_data_req = 0;
1390
1391 while (loop < end) {
1392 static const uint16_t acceptable_flags =
1393 NETTXF_csum_blank |
1394 NETTXF_data_validated |
1395 NETTXF_extra_info;
1396 uint16_t unexpected_flags;
1397
1398 txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop);
1399
1400 unexpected_flags = txreq->flags & ~acceptable_flags;
1401 if (unexpected_flags != 0) {
1402 /*
1403 * The peer used flag bits that we do not
1404 * recognize.
1405 */
1406 cmn_err(CE_WARN, "xnb_from_peer: "
1407 "unexpected flag bits (0x%x) from peer "
1408 "in transmit request",
1409 unexpected_flags);
1410 xnbp->xnb_stat_tx_unexpected_flags++;
1411
1412 /* Mark this entry as failed. */
1413 xnb_tx_mark_complete(xnbp, txreq->id, NETIF_RSP_ERROR);
1414 need_notify = B_TRUE;
1415
1416 } else if (txreq->flags & NETTXF_extra_info) {
1417 struct netif_extra_info *erp;
1418 boolean_t status;
1419
1420 loop++; /* Consume another slot in the ring. */
1421 ASSERT(loop <= end);
1422
1423 erp = (struct netif_extra_info *)
1424 RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop);
1425
1426 switch (erp->type) {
1427 case XEN_NETIF_EXTRA_TYPE_MCAST_ADD:
1428 ASSERT(xnbp->xnb_multicast_control);
1429 status = xnbp->xnb_flavour->xf_mcast_add(xnbp,
1430 &erp->u.mcast.addr);
1431 break;
1432 case XEN_NETIF_EXTRA_TYPE_MCAST_DEL:
1433 ASSERT(xnbp->xnb_multicast_control);
1434 status = xnbp->xnb_flavour->xf_mcast_del(xnbp,
1435 &erp->u.mcast.addr);
1436 break;
1437 default:
1438 status = B_FALSE;
1439 cmn_err(CE_WARN, "xnb_from_peer: "
1440 "unknown extra type %d", erp->type);
1441 break;
1442 }
1443
1444 xnb_tx_mark_complete(xnbp, txreq->id,
1445 status ? NETIF_RSP_OKAY : NETIF_RSP_ERROR);
1446 need_notify = B_TRUE;
1447
1448 } else if ((txreq->offset > PAGESIZE) ||
1449 (txreq->offset + txreq->size > PAGESIZE)) {
1450 /*
1451 * Peer attempted to refer to data beyond the
1452 * end of the granted page.
1453 */
1454 cmn_err(CE_WARN, "xnb_from_peer: "
1455 "attempt to refer beyond the end of granted "
1456 "page in txreq (offset %d, size %d).",
1457 txreq->offset, txreq->size);
1458 xnbp->xnb_stat_tx_overflow_page++;
1459
1460 /* Mark this entry as failed. */
1461 xnb_tx_mark_complete(xnbp, txreq->id, NETIF_RSP_ERROR);
1462 need_notify = B_TRUE;
1463
1464 } else {
1465 xnb_txbuf_t *txp;
1466
1467 txp = kmem_cache_alloc(xnbp->xnb_tx_buf_cache,
1468 KM_NOSLEEP);
1469 if (txp == NULL)
1470 break;
1471
1472 txp->xt_mblk = desballoc((unsigned char *)txp->xt_buf,
1473 txp->xt_buflen, 0, &txp->xt_free_rtn);
1474 if (txp->xt_mblk == NULL) {
1475 kmem_cache_free(xnbp->xnb_tx_buf_cache, txp);
1476 break;
1477 }
1478
1479 txp->xt_idx = loop;
1480 txp->xt_id = txreq->id;
1481
1482 cop->source.u.ref = txreq->gref;
1483 cop->source.domid = xnbp->xnb_peer;
1484 cop->source.offset = txreq->offset;
1485
1486 cop->dest.u.gmfn = txp->xt_mfn;
1487 cop->dest.domid = DOMID_SELF;
1488 cop->dest.offset = 0;
1489
1490 cop->len = txreq->size;
1491 cop->flags = GNTCOPY_source_gref;
1492 cop->status = 0;
1493
1494 *txpp = txp;
1495
1496 txpp++;
1497 cop++;
1498 n_data_req++;
1499
1500 ASSERT(n_data_req <= NET_TX_RING_SIZE);
1501 }
1502
1503 loop++;
1504 }
1505
1506 xnbp->xnb_tx_ring.req_cons = loop;
1507
1508 if (n_data_req == 0)
1509 goto around;
1510
1511 if (HYPERVISOR_grant_table_op(GNTTABOP_copy,
1512 xnbp->xnb_tx_cop, n_data_req) != 0) {
1513
1514 cmn_err(CE_WARN, "xnb_from_peer: copy operation failed");
1515
1516 txpp = xnbp->xnb_tx_bufp;
1517 i = n_data_req;
1518 while (i > 0) {
1519 kmem_cache_free(xnbp->xnb_tx_buf_cache, *txpp);
1520 txpp++;
1521 i--;
1522 }
1523
1524 goto finished;
1525 }
1526
1527 txpp = xnbp->xnb_tx_bufp;
1528 cop = xnbp->xnb_tx_cop;
1529 i = n_data_req;
1530
1531 while (i > 0) {
1532 xnb_txbuf_t *txp = *txpp;
1533
1534 txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, txp->xt_idx);
1535
1536 if (cop->status != 0) {
1537 #ifdef XNB_DEBUG
1538 cmn_err(CE_WARN, "xnb_from_peer: "
1539 "txpp 0x%p failed (%d)",
1540 (void *)*txpp, cop->status);
1541 #endif /* XNB_DEBUG */
1542 xnb_tx_mark_complete(xnbp, txp->xt_id, NETIF_RSP_ERROR);
1543 freemsg(txp->xt_mblk);
1544 } else {
1545 mblk_t *mp;
1546
1547 mp = txp->xt_mblk;
1548 mp->b_rptr = mp->b_wptr = (unsigned char *)txp->xt_buf;
1549 mp->b_wptr += txreq->size;
1550 mp->b_next = NULL;
1551
1552 /*
1553 * If there are checksum flags, process them
1554 * appropriately.
1555 */
1556 if ((txreq->flags &
1557 (NETTXF_csum_blank | NETTXF_data_validated))
1558 != 0) {
1559 mp = xnbp->xnb_flavour->xf_cksum_from_peer(xnbp,
1560 mp, txreq->flags);
1561 xnbp->xnb_stat_tx_cksum_no_need++;
1562
1563 txp->xt_mblk = mp;
1564 }
1565
1566 if (head == NULL) {
1567 ASSERT(tail == NULL);
1568 head = mp;
1569 } else {
1570 ASSERT(tail != NULL);
1571 tail->b_next = mp;
1572 }
1573 tail = mp;
1574
1575 xnbp->xnb_stat_opackets++;
1576 xnbp->xnb_stat_obytes += txreq->size;
1577
1578 xnb_tx_mark_complete(xnbp, txp->xt_id, NETIF_RSP_OKAY);
1579 }
1580
1581 txpp++;
1582 cop++;
1583 i--;
1584 }
1585
1586 goto around;
1587 /* NOTREACHED */
1588 }
1589
1590 static uint_t
xnb_intr(caddr_t arg)1591 xnb_intr(caddr_t arg)
1592 {
1593 xnb_t *xnbp = (xnb_t *)arg;
1594 mblk_t *mp;
1595
1596 xnbp->xnb_stat_intr++;
1597
1598 mutex_enter(&xnbp->xnb_tx_lock);
1599
1600 ASSERT(xnbp->xnb_connected);
1601
1602 mp = xnb_from_peer(xnbp);
1603
1604 mutex_exit(&xnbp->xnb_tx_lock);
1605
1606 if (!xnbp->xnb_hotplugged) {
1607 xnbp->xnb_stat_tx_too_early++;
1608 goto fail;
1609 }
1610 if (mp == NULL) {
1611 xnbp->xnb_stat_spurious_intr++;
1612 goto fail;
1613 }
1614
1615 xnbp->xnb_flavour->xf_from_peer(xnbp, mp);
1616
1617 return (DDI_INTR_CLAIMED);
1618
1619 fail:
1620 freemsgchain(mp);
1621 return (DDI_INTR_CLAIMED);
1622 }
1623
1624 /*
1625 * Read our configuration from xenstore.
1626 */
1627 boolean_t
xnb_read_xs_config(xnb_t * xnbp)1628 xnb_read_xs_config(xnb_t *xnbp)
1629 {
1630 char *xsname;
1631 char mac[ETHERADDRL * 3];
1632
1633 xsname = xvdi_get_xsname(xnbp->xnb_devinfo);
1634
1635 if (xenbus_scanf(XBT_NULL, xsname,
1636 "mac", "%s", mac) != 0) {
1637 cmn_err(CE_WARN, "xnb_attach: "
1638 "cannot read mac address from %s",
1639 xsname);
1640 return (B_FALSE);
1641 }
1642
1643 if (ether_aton(mac, xnbp->xnb_mac_addr) != ETHERADDRL) {
1644 cmn_err(CE_WARN,
1645 "xnb_attach: cannot parse mac address %s",
1646 mac);
1647 return (B_FALSE);
1648 }
1649
1650 return (B_TRUE);
1651 }
1652
1653 /*
1654 * Read the configuration of the peer from xenstore.
1655 */
1656 boolean_t
xnb_read_oe_config(xnb_t * xnbp)1657 xnb_read_oe_config(xnb_t *xnbp)
1658 {
1659 char *oename;
1660 int i;
1661
1662 oename = xvdi_get_oename(xnbp->xnb_devinfo);
1663
1664 if (xenbus_gather(XBT_NULL, oename,
1665 "event-channel", "%u", &xnbp->xnb_fe_evtchn,
1666 "tx-ring-ref", "%lu", &xnbp->xnb_tx_ring_ref,
1667 "rx-ring-ref", "%lu", &xnbp->xnb_rx_ring_ref,
1668 NULL) != 0) {
1669 cmn_err(CE_WARN, "xnb_read_oe_config: "
1670 "cannot read other-end details from %s",
1671 oename);
1672 return (B_FALSE);
1673 }
1674
1675 /*
1676 * Check whether our peer requests receive side hypervisor
1677 * copy.
1678 */
1679 if (xenbus_scanf(XBT_NULL, oename,
1680 "request-rx-copy", "%d", &i) != 0)
1681 i = 0;
1682 if (i != 0)
1683 xnbp->xnb_rx_hv_copy = B_TRUE;
1684
1685 /*
1686 * Check whether our peer requests multicast_control.
1687 */
1688 if (xenbus_scanf(XBT_NULL, oename,
1689 "request-multicast-control", "%d", &i) != 0)
1690 i = 0;
1691 if (i != 0)
1692 xnbp->xnb_multicast_control = B_TRUE;
1693
1694 /*
1695 * The Linux backend driver here checks to see if the peer has
1696 * set 'feature-no-csum-offload'. This is used to indicate
1697 * that the guest cannot handle receiving packets without a
1698 * valid checksum. We don't check here, because packets passed
1699 * to the peer _always_ have a valid checksum.
1700 *
1701 * There are three cases:
1702 *
1703 * - the NIC is dedicated: packets from the wire should always
1704 * have a valid checksum. If the hardware validates the
1705 * checksum then the relevant bit will be set in the packet
1706 * attributes and we will inform the peer. It can choose to
1707 * ignore the hardware verification.
1708 *
1709 * - the NIC is shared (VNIC) and a packet originates from the
1710 * wire: this is the same as the case above - the packets
1711 * will have a valid checksum.
1712 *
1713 * - the NIC is shared (VNIC) and a packet originates from the
1714 * host: the MAC layer ensures that all such packets have a
1715 * valid checksum by calculating one if the stack did not.
1716 */
1717
1718 return (B_TRUE);
1719 }
1720
1721 void
xnb_start_connect(xnb_t * xnbp)1722 xnb_start_connect(xnb_t *xnbp)
1723 {
1724 dev_info_t *dip = xnbp->xnb_devinfo;
1725
1726 if (!xnb_connect_rings(dip)) {
1727 cmn_err(CE_WARN, "xnb_start_connect: "
1728 "cannot connect rings");
1729 goto failed;
1730 }
1731
1732 if (!xnbp->xnb_flavour->xf_start_connect(xnbp)) {
1733 cmn_err(CE_WARN, "xnb_start_connect: "
1734 "flavour failed to connect");
1735 goto failed;
1736 }
1737
1738 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected);
1739 return;
1740
1741 failed:
1742 xnbp->xnb_flavour->xf_peer_disconnected(xnbp);
1743 xnb_disconnect_rings(dip);
1744 (void) xvdi_switch_state(dip, XBT_NULL,
1745 XenbusStateClosed);
1746 (void) xvdi_post_event(dip, XEN_HP_REMOVE);
1747 }
1748
1749 static boolean_t
xnb_connect_rings(dev_info_t * dip)1750 xnb_connect_rings(dev_info_t *dip)
1751 {
1752 xnb_t *xnbp = ddi_get_driver_private(dip);
1753 struct gnttab_map_grant_ref map_op;
1754
1755 /*
1756 * Cannot attempt to connect the rings if already connected.
1757 */
1758 ASSERT(!xnbp->xnb_connected);
1759
1760 /*
1761 * 1. allocate a vaddr for the tx page, one for the rx page.
1762 * 2. call GNTTABOP_map_grant_ref to map the relevant pages
1763 * into the allocated vaddr (one for tx, one for rx).
1764 * 3. call EVTCHNOP_bind_interdomain to have the event channel
1765 * bound to this domain.
1766 * 4. associate the event channel with an interrupt.
1767 * 5. enable the interrupt.
1768 */
1769
1770 /* 1.tx */
1771 xnbp->xnb_tx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE,
1772 0, 0, 0, 0, VM_SLEEP);
1773 ASSERT(xnbp->xnb_tx_ring_addr != NULL);
1774
1775 /* 2.tx */
1776 map_op.host_addr = (uint64_t)((long)xnbp->xnb_tx_ring_addr);
1777 map_op.flags = GNTMAP_host_map;
1778 map_op.ref = xnbp->xnb_tx_ring_ref;
1779 map_op.dom = xnbp->xnb_peer;
1780 hat_prepare_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr, NULL);
1781 if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 ||
1782 map_op.status != 0) {
1783 cmn_err(CE_WARN, "xnb_connect_rings: cannot map tx-ring page.");
1784 goto fail;
1785 }
1786 xnbp->xnb_tx_ring_handle = map_op.handle;
1787
1788 /* LINTED: constant in conditional context */
1789 BACK_RING_INIT(&xnbp->xnb_tx_ring,
1790 (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE);
1791
1792 /* 1.rx */
1793 xnbp->xnb_rx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE,
1794 0, 0, 0, 0, VM_SLEEP);
1795 ASSERT(xnbp->xnb_rx_ring_addr != NULL);
1796
1797 /* 2.rx */
1798 map_op.host_addr = (uint64_t)((long)xnbp->xnb_rx_ring_addr);
1799 map_op.flags = GNTMAP_host_map;
1800 map_op.ref = xnbp->xnb_rx_ring_ref;
1801 map_op.dom = xnbp->xnb_peer;
1802 hat_prepare_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr, NULL);
1803 if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 ||
1804 map_op.status != 0) {
1805 cmn_err(CE_WARN, "xnb_connect_rings: cannot map rx-ring page.");
1806 goto fail;
1807 }
1808 xnbp->xnb_rx_ring_handle = map_op.handle;
1809
1810 /* LINTED: constant in conditional context */
1811 BACK_RING_INIT(&xnbp->xnb_rx_ring,
1812 (netif_rx_sring_t *)xnbp->xnb_rx_ring_addr, PAGESIZE);
1813
1814 /* 3 */
1815 if (xvdi_bind_evtchn(dip, xnbp->xnb_fe_evtchn) != DDI_SUCCESS) {
1816 cmn_err(CE_WARN, "xnb_connect_rings: "
1817 "cannot bind event channel %d", xnbp->xnb_evtchn);
1818 xnbp->xnb_evtchn = INVALID_EVTCHN;
1819 goto fail;
1820 }
1821 xnbp->xnb_evtchn = xvdi_get_evtchn(dip);
1822
1823 /*
1824 * It would be good to set the state to XenbusStateConnected
1825 * here as well, but then what if ddi_add_intr() failed?
1826 * Changing the state in the store will be noticed by the peer
1827 * and cannot be "taken back".
1828 */
1829 mutex_enter(&xnbp->xnb_tx_lock);
1830 mutex_enter(&xnbp->xnb_rx_lock);
1831
1832 xnbp->xnb_connected = B_TRUE;
1833
1834 mutex_exit(&xnbp->xnb_rx_lock);
1835 mutex_exit(&xnbp->xnb_tx_lock);
1836
1837 /* 4, 5 */
1838 if (ddi_add_intr(dip, 0, NULL, NULL, xnb_intr, (caddr_t)xnbp)
1839 != DDI_SUCCESS) {
1840 cmn_err(CE_WARN, "xnb_connect_rings: cannot add interrupt");
1841 goto fail;
1842 }
1843 xnbp->xnb_irq = B_TRUE;
1844
1845 return (B_TRUE);
1846
1847 fail:
1848 mutex_enter(&xnbp->xnb_tx_lock);
1849 mutex_enter(&xnbp->xnb_rx_lock);
1850
1851 xnbp->xnb_connected = B_FALSE;
1852
1853 mutex_exit(&xnbp->xnb_rx_lock);
1854 mutex_exit(&xnbp->xnb_tx_lock);
1855
1856 return (B_FALSE);
1857 }
1858
1859 static void
xnb_disconnect_rings(dev_info_t * dip)1860 xnb_disconnect_rings(dev_info_t *dip)
1861 {
1862 xnb_t *xnbp = ddi_get_driver_private(dip);
1863
1864 if (xnbp->xnb_irq) {
1865 ddi_remove_intr(dip, 0, NULL);
1866 xnbp->xnb_irq = B_FALSE;
1867 }
1868
1869 if (xnbp->xnb_evtchn != INVALID_EVTCHN) {
1870 xvdi_free_evtchn(dip);
1871 xnbp->xnb_evtchn = INVALID_EVTCHN;
1872 }
1873
1874 if (xnbp->xnb_rx_ring_handle != INVALID_GRANT_HANDLE) {
1875 struct gnttab_unmap_grant_ref unmap_op;
1876
1877 unmap_op.host_addr = (uint64_t)(uintptr_t)
1878 xnbp->xnb_rx_ring_addr;
1879 unmap_op.dev_bus_addr = 0;
1880 unmap_op.handle = xnbp->xnb_rx_ring_handle;
1881 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
1882 &unmap_op, 1) != 0)
1883 cmn_err(CE_WARN, "xnb_disconnect_rings: "
1884 "cannot unmap rx-ring page (%d)",
1885 unmap_op.status);
1886
1887 xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE;
1888 }
1889
1890 if (xnbp->xnb_rx_ring_addr != NULL) {
1891 hat_release_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr);
1892 vmem_free(heap_arena, xnbp->xnb_rx_ring_addr, PAGESIZE);
1893 xnbp->xnb_rx_ring_addr = NULL;
1894 }
1895
1896 if (xnbp->xnb_tx_ring_handle != INVALID_GRANT_HANDLE) {
1897 struct gnttab_unmap_grant_ref unmap_op;
1898
1899 unmap_op.host_addr = (uint64_t)(uintptr_t)
1900 xnbp->xnb_tx_ring_addr;
1901 unmap_op.dev_bus_addr = 0;
1902 unmap_op.handle = xnbp->xnb_tx_ring_handle;
1903 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
1904 &unmap_op, 1) != 0)
1905 cmn_err(CE_WARN, "xnb_disconnect_rings: "
1906 "cannot unmap tx-ring page (%d)",
1907 unmap_op.status);
1908
1909 xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE;
1910 }
1911
1912 if (xnbp->xnb_tx_ring_addr != NULL) {
1913 hat_release_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr);
1914 vmem_free(heap_arena, xnbp->xnb_tx_ring_addr, PAGESIZE);
1915 xnbp->xnb_tx_ring_addr = NULL;
1916 }
1917 }
1918
1919 static void
xnb_oe_state_change(dev_info_t * dip,ddi_eventcookie_t id,void * arg,void * impl_data)1920 xnb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id,
1921 void *arg, void *impl_data)
1922 {
1923 _NOTE(ARGUNUSED(id, arg));
1924 xnb_t *xnbp = ddi_get_driver_private(dip);
1925 XenbusState new_state = *(XenbusState *)impl_data;
1926
1927 ASSERT(xnbp != NULL);
1928
1929 switch (new_state) {
1930 case XenbusStateConnected:
1931 /* spurious state change */
1932 if (xnbp->xnb_connected)
1933 return;
1934
1935 if (!xnb_read_oe_config(xnbp) ||
1936 !xnbp->xnb_flavour->xf_peer_connected(xnbp)) {
1937 cmn_err(CE_WARN, "xnb_oe_state_change: "
1938 "read otherend config error");
1939 (void) xvdi_switch_state(dip, XBT_NULL,
1940 XenbusStateClosed);
1941 (void) xvdi_post_event(dip, XEN_HP_REMOVE);
1942
1943 break;
1944 }
1945
1946
1947 mutex_enter(&xnbp->xnb_state_lock);
1948 xnbp->xnb_fe_status = XNB_STATE_READY;
1949 if (xnbp->xnb_be_status == XNB_STATE_READY)
1950 xnb_start_connect(xnbp);
1951 mutex_exit(&xnbp->xnb_state_lock);
1952
1953 /*
1954 * Now that we've attempted to connect it's reasonable
1955 * to allow an attempt to detach.
1956 */
1957 xnbp->xnb_detachable = B_TRUE;
1958
1959 break;
1960
1961 case XenbusStateClosing:
1962 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing);
1963
1964 break;
1965
1966 case XenbusStateClosed:
1967 xnbp->xnb_flavour->xf_peer_disconnected(xnbp);
1968
1969 mutex_enter(&xnbp->xnb_tx_lock);
1970 mutex_enter(&xnbp->xnb_rx_lock);
1971
1972 xnb_disconnect_rings(dip);
1973 xnbp->xnb_connected = B_FALSE;
1974
1975 mutex_exit(&xnbp->xnb_rx_lock);
1976 mutex_exit(&xnbp->xnb_tx_lock);
1977
1978 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed);
1979 (void) xvdi_post_event(dip, XEN_HP_REMOVE);
1980 /*
1981 * In all likelyhood this is already set (in the above
1982 * case), but if the peer never attempted to connect
1983 * and the domain is destroyed we get here without
1984 * having been through the case above, so we set it to
1985 * be sure.
1986 */
1987 xnbp->xnb_detachable = B_TRUE;
1988
1989 break;
1990
1991 default:
1992 break;
1993 }
1994 }
1995
1996 static void
xnb_hp_state_change(dev_info_t * dip,ddi_eventcookie_t id,void * arg,void * impl_data)1997 xnb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id,
1998 void *arg, void *impl_data)
1999 {
2000 _NOTE(ARGUNUSED(id, arg));
2001 xnb_t *xnbp = ddi_get_driver_private(dip);
2002 xendev_hotplug_state_t state = *(xendev_hotplug_state_t *)impl_data;
2003
2004 ASSERT(xnbp != NULL);
2005
2006 switch (state) {
2007 case Connected:
2008 /* spurious hotplug event */
2009 if (xnbp->xnb_hotplugged)
2010 break;
2011
2012 if (!xnb_read_xs_config(xnbp))
2013 break;
2014
2015 if (!xnbp->xnb_flavour->xf_hotplug_connected(xnbp))
2016 break;
2017
2018 mutex_enter(&xnbp->xnb_tx_lock);
2019 mutex_enter(&xnbp->xnb_rx_lock);
2020
2021 xnbp->xnb_hotplugged = B_TRUE;
2022
2023 mutex_exit(&xnbp->xnb_rx_lock);
2024 mutex_exit(&xnbp->xnb_tx_lock);
2025
2026 mutex_enter(&xnbp->xnb_state_lock);
2027 xnbp->xnb_be_status = XNB_STATE_READY;
2028 if (xnbp->xnb_fe_status == XNB_STATE_READY)
2029 xnb_start_connect(xnbp);
2030 mutex_exit(&xnbp->xnb_state_lock);
2031
2032 break;
2033
2034 default:
2035 break;
2036 }
2037 }
2038
2039 static struct modldrv modldrv = {
2040 &mod_miscops, "xnb",
2041 };
2042
2043 static struct modlinkage modlinkage = {
2044 MODREV_1, &modldrv, NULL
2045 };
2046
2047 int
_init(void)2048 _init(void)
2049 {
2050 int i;
2051
2052 mutex_init(&xnb_alloc_page_lock, NULL, MUTEX_DRIVER, NULL);
2053
2054 i = mod_install(&modlinkage);
2055 if (i != DDI_SUCCESS)
2056 mutex_destroy(&xnb_alloc_page_lock);
2057
2058 return (i);
2059 }
2060
2061 int
_info(struct modinfo * modinfop)2062 _info(struct modinfo *modinfop)
2063 {
2064 return (mod_info(&modlinkage, modinfop));
2065 }
2066
2067 int
_fini(void)2068 _fini(void)
2069 {
2070 int i;
2071
2072 i = mod_remove(&modlinkage);
2073 if (i == DDI_SUCCESS)
2074 mutex_destroy(&xnb_alloc_page_lock);
2075
2076 return (i);
2077 }
2078