1 /* 2 * Copyright (C) 2014 Giuseppe Lettieri. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 /* 27 * $FreeBSD$ 28 * 29 * Monitors 30 * 31 * netmap monitors can be used to do zero-copy monitoring of network traffic 32 * on another adapter, when the latter adapter is working in netmap mode. 33 * 34 * Monitors offer to userspace the same interface as any other netmap port, 35 * with as many pairs of netmap rings as the monitored adapter. 36 * However, only the rx rings are actually used. Each monitor rx ring receives 37 * the traffic transiting on both the tx and rx corresponding rings in the 38 * monitored adapter. During registration, the user can choose if she wants 39 * to intercept tx only, rx only, or both tx and rx traffic. 40 * 41 * The monitor only sees the frames after they have been consumed in the 42 * monitored adapter: 43 * 44 * - For tx traffic, this is after the slots containing the frames have been 45 * marked as free. Note that this may happen at a considerably delay after 46 * frame transmission, since freeing of slots is often done lazily. 47 * 48 * - For rx traffic, this is after the consumer on the monitored adapter 49 * has released them. In most cases, the consumer is a userspace 50 * application which may have modified the frame contents. 51 * 52 * If the monitor is not able to cope with the stream of frames, excess traffic 53 * will be dropped. 54 * 55 * Each ring can be monitored by at most one monitor. This may change in the 56 * future, if we implement monitor chaining. 57 * 58 */ 59 60 61 #if defined(__FreeBSD__) 62 #include <sys/cdefs.h> /* prerequisite */ 63 64 #include <sys/types.h> 65 #include <sys/errno.h> 66 #include <sys/param.h> /* defines used in kernel.h */ 67 #include <sys/kernel.h> /* types used in module initialization */ 68 #include <sys/malloc.h> 69 #include <sys/poll.h> 70 #include <sys/lock.h> 71 #include <sys/rwlock.h> 72 #include <sys/selinfo.h> 73 #include <sys/sysctl.h> 74 #include <sys/socket.h> /* sockaddrs */ 75 #include <net/if.h> 76 #include <net/if_var.h> 77 #include <machine/bus.h> /* bus_dmamap_* */ 78 #include <sys/refcount.h> 79 80 81 #elif defined(linux) 82 83 #include "bsd_glue.h" 84 85 #elif defined(__APPLE__) 86 87 #warning OSX support is only partial 88 #include "osx_glue.h" 89 90 #else 91 92 #error Unsupported platform 93 94 #endif /* unsupported */ 95 96 /* 97 * common headers 98 */ 99 100 #include <net/netmap.h> 101 #include <dev/netmap/netmap_kern.h> 102 #include <dev/netmap/netmap_mem2.h> 103 104 #ifdef WITH_MONITOR 105 106 #define NM_MONITOR_MAXSLOTS 4096 107 108 /* monitor works by replacing the nm_sync callbacks in the monitored rings. 109 * The actions to be performed are the same on both tx and rx rings, so we 110 * have collected them here 111 */ 112 static int 113 netmap_monitor_parent_sync(struct netmap_kring *kring, int flags, u_int* ringptr) 114 { 115 struct netmap_monitor_adapter *mna = kring->monitor; 116 struct netmap_kring *mkring = &mna->up.rx_rings[kring->ring_id]; 117 struct netmap_ring *ring = kring->ring, *mring = mkring->ring; 118 int error; 119 int rel_slots, free_slots, busy; 120 u_int beg, end, i; 121 u_int lim = kring->nkr_num_slots - 1, 122 mlim = mkring->nkr_num_slots - 1; 123 124 /* get the relased slots (rel_slots) */ 125 beg = *ringptr; 126 error = kring->save_sync(kring, flags); 127 if (error) 128 return error; 129 end = *ringptr; 130 rel_slots = end - beg; 131 if (rel_slots < 0) 132 rel_slots += kring->nkr_num_slots; 133 134 if (!rel_slots) { 135 return 0; 136 } 137 138 /* we need to lock the monitor receive ring, since it 139 * is the target of bot tx and rx traffic from the monitored 140 * adapter 141 */ 142 mtx_lock(&mkring->q_lock); 143 /* get the free slots available on the monitor ring */ 144 i = mkring->nr_hwtail; 145 busy = i - mkring->nr_hwcur; 146 if (busy < 0) 147 busy += mkring->nkr_num_slots; 148 free_slots = mlim - busy; 149 150 if (!free_slots) { 151 mtx_unlock(&mkring->q_lock); 152 return 0; 153 } 154 155 /* swap min(free_slots, rel_slots) slots */ 156 if (free_slots < rel_slots) { 157 beg += (rel_slots - free_slots); 158 if (beg > lim) 159 beg = 0; 160 rel_slots = free_slots; 161 } 162 163 for ( ; rel_slots; rel_slots--) { 164 struct netmap_slot *s = &ring->slot[beg]; 165 struct netmap_slot *ms = &mring->slot[i]; 166 uint32_t tmp; 167 168 tmp = ms->buf_idx; 169 ms->buf_idx = s->buf_idx; 170 s->buf_idx = tmp; 171 172 tmp = ms->len; 173 ms->len = s->len; 174 s->len = tmp; 175 176 s->flags |= NS_BUF_CHANGED; 177 178 beg = nm_next(beg, lim); 179 i = nm_next(i, mlim); 180 181 } 182 mb(); 183 mkring->nr_hwtail = i; 184 185 mtx_unlock(&mkring->q_lock); 186 /* notify the new frames to the monitor */ 187 mna->up.nm_notify(&mna->up, mkring->ring_id, NR_RX, 0); 188 return 0; 189 } 190 191 /* callback used to replace the nm_sync callback in the monitored tx rings */ 192 static int 193 netmap_monitor_parent_txsync(struct netmap_kring *kring, int flags) 194 { 195 ND("%s %x", kring->name, flags); 196 return netmap_monitor_parent_sync(kring, flags, &kring->nr_hwtail); 197 } 198 199 /* callback used to replace the nm_sync callback in the monitored rx rings */ 200 static int 201 netmap_monitor_parent_rxsync(struct netmap_kring *kring, int flags) 202 { 203 ND("%s %x", kring->name, flags); 204 return netmap_monitor_parent_sync(kring, flags, &kring->rcur); 205 } 206 207 /* nm_sync callback for the monitor's own tx rings. 208 * This makes no sense and always returns error 209 */ 210 static int 211 netmap_monitor_txsync(struct netmap_kring *kring, int flags) 212 { 213 D("%s %x", kring->name, flags); 214 return EIO; 215 } 216 217 /* nm_sync callback for the monitor's own rx rings. 218 * Note that the lock in netmap_monitor_parent_sync only protects 219 * writers among themselves. Synchronization between writers 220 * (i.e., netmap_monitor_parent_txsync and netmap_monitor_parent_rxsync) 221 * and readers (i.e., netmap_monitor_rxsync) relies on memory barriers. 222 */ 223 static int 224 netmap_monitor_rxsync(struct netmap_kring *kring, int flags) 225 { 226 ND("%s %x", kring->name, flags); 227 kring->nr_hwcur = kring->rcur; 228 mb(); 229 nm_rxsync_finalize(kring); 230 return 0; 231 } 232 233 /* nm_krings_create callbacks for monitors. 234 * We could use the default netmap_hw_krings_monitor, but 235 * we don't need the mbq. 236 */ 237 static int 238 netmap_monitor_krings_create(struct netmap_adapter *na) 239 { 240 return netmap_krings_create(na, 0); 241 } 242 243 244 /* nm_register callback for monitors. 245 * 246 * On registration, replace the nm_sync callbacks in the monitored 247 * rings with our own, saving the previous ones in the monitored 248 * rings themselves, where they are used by netmap_monitor_parent_sync. 249 * 250 * On de-registration, restore the original callbacks. We need to 251 * stop traffic while we are doing this, since the monitored adapter may 252 * have already started executing a netmap_monitor_parent_sync 253 * and may not like the kring->save_sync pointer to become NULL. 254 */ 255 static int 256 netmap_monitor_reg(struct netmap_adapter *na, int onoff) 257 { 258 struct netmap_monitor_adapter *mna = 259 (struct netmap_monitor_adapter *)na; 260 struct netmap_priv_d *priv = &mna->priv; 261 struct netmap_adapter *pna = priv->np_na; 262 struct netmap_kring *kring; 263 int i; 264 265 ND("%p: onoff %d", na, onoff); 266 if (onoff) { 267 if (!nm_netmap_on(pna)) { 268 /* parent left netmap mode, fatal */ 269 return ENXIO; 270 } 271 if (mna->flags & NR_MONITOR_TX) { 272 for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) { 273 kring = &pna->tx_rings[i]; 274 kring->save_sync = kring->nm_sync; 275 kring->nm_sync = netmap_monitor_parent_txsync; 276 } 277 } 278 if (mna->flags & NR_MONITOR_RX) { 279 for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) { 280 kring = &pna->rx_rings[i]; 281 kring->save_sync = kring->nm_sync; 282 kring->nm_sync = netmap_monitor_parent_rxsync; 283 } 284 } 285 na->na_flags |= NAF_NETMAP_ON; 286 } else { 287 if (!nm_netmap_on(pna)) { 288 /* parent left netmap mode, nothing to restore */ 289 return 0; 290 } 291 na->na_flags &= ~NAF_NETMAP_ON; 292 if (mna->flags & NR_MONITOR_TX) { 293 for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) { 294 netmap_set_txring(pna, i, 1 /* stopped */); 295 kring = &pna->tx_rings[i]; 296 kring->nm_sync = kring->save_sync; 297 kring->save_sync = NULL; 298 netmap_set_txring(pna, i, 0 /* enabled */); 299 } 300 } 301 if (mna->flags & NR_MONITOR_RX) { 302 for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) { 303 netmap_set_rxring(pna, i, 1 /* stopped */); 304 kring = &pna->rx_rings[i]; 305 kring->nm_sync = kring->save_sync; 306 kring->save_sync = NULL; 307 netmap_set_rxring(pna, i, 0 /* enabled */); 308 } 309 } 310 } 311 return 0; 312 } 313 /* nm_krings_delete callback for monitors */ 314 static void 315 netmap_monitor_krings_delete(struct netmap_adapter *na) 316 { 317 netmap_krings_delete(na); 318 } 319 320 321 /* nm_dtor callback for monitors */ 322 static void 323 netmap_monitor_dtor(struct netmap_adapter *na) 324 { 325 struct netmap_monitor_adapter *mna = 326 (struct netmap_monitor_adapter *)na; 327 struct netmap_priv_d *priv = &mna->priv; 328 struct netmap_adapter *pna = priv->np_na; 329 int i; 330 331 ND("%p", na); 332 if (nm_netmap_on(pna)) { 333 /* parent still in netmap mode, mark its krings as free */ 334 if (mna->flags & NR_MONITOR_TX) { 335 for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) { 336 pna->tx_rings[i].monitor = NULL; 337 } 338 } 339 if (mna->flags & NR_MONITOR_RX) { 340 for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) { 341 pna->rx_rings[i].monitor = NULL; 342 } 343 } 344 } 345 netmap_adapter_put(pna); 346 } 347 348 349 /* check if nmr is a request for a monitor adapter that we can satisfy */ 350 int 351 netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create) 352 { 353 struct nmreq pnmr; 354 struct netmap_adapter *pna; /* parent adapter */ 355 struct netmap_monitor_adapter *mna; 356 int i, error; 357 358 if ((nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX)) == 0) { 359 ND("not a monitor"); 360 return 0; 361 } 362 /* this is a request for a monitor adapter */ 363 364 D("flags %x", nmr->nr_flags); 365 366 mna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO); 367 if (mna == NULL) { 368 D("memory error"); 369 return ENOMEM; 370 } 371 372 /* first, try to find the adapter that we want to monitor 373 * We use the same nmr, after we have turned off the monitor flags. 374 * In this way we can potentially monitor everything netmap understands, 375 * except other monitors. 376 */ 377 memcpy(&pnmr, nmr, sizeof(pnmr)); 378 pnmr.nr_flags &= ~(NR_MONITOR_TX | NR_MONITOR_RX); 379 error = netmap_get_na(&pnmr, &pna, create); 380 if (error) { 381 D("parent lookup failed: %d", error); 382 return error; 383 } 384 D("found parent: %s", pna->name); 385 386 if (!nm_netmap_on(pna)) { 387 /* parent not in netmap mode */ 388 /* XXX we can wait for the parent to enter netmap mode, 389 * by intercepting its nm_register callback (2014-03-16) 390 */ 391 D("%s not in netmap mode", pna->name); 392 error = EINVAL; 393 goto put_out; 394 } 395 396 /* grab all the rings we need in the parent */ 397 mna->priv.np_na = pna; 398 error = netmap_interp_ringid(&mna->priv, nmr->nr_ringid, nmr->nr_flags); 399 if (error) { 400 D("ringid error"); 401 goto put_out; 402 } 403 if (nmr->nr_flags & NR_MONITOR_TX) { 404 for (i = mna->priv.np_txqfirst; i < mna->priv.np_txqlast; i++) { 405 struct netmap_kring *kring = &pna->tx_rings[i]; 406 if (kring->monitor) { 407 error = EBUSY; 408 D("ring busy"); 409 goto release_out; 410 } 411 kring->monitor = mna; 412 } 413 } 414 if (nmr->nr_flags & NR_MONITOR_RX) { 415 for (i = mna->priv.np_rxqfirst; i < mna->priv.np_rxqlast; i++) { 416 struct netmap_kring *kring = &pna->rx_rings[i]; 417 if (kring->monitor) { 418 error = EBUSY; 419 D("ring busy"); 420 goto release_out; 421 } 422 kring->monitor = mna; 423 } 424 } 425 426 snprintf(mna->up.name, sizeof(mna->up.name), "mon:%s", pna->name); 427 428 /* the monitor supports the host rings iff the parent does */ 429 mna->up.na_flags = (pna->na_flags & NAF_HOST_RINGS); 430 mna->up.nm_txsync = netmap_monitor_txsync; 431 mna->up.nm_rxsync = netmap_monitor_rxsync; 432 mna->up.nm_register = netmap_monitor_reg; 433 mna->up.nm_dtor = netmap_monitor_dtor; 434 mna->up.nm_krings_create = netmap_monitor_krings_create; 435 mna->up.nm_krings_delete = netmap_monitor_krings_delete; 436 mna->up.nm_mem = pna->nm_mem; 437 mna->up.na_lut = pna->na_lut; 438 mna->up.na_lut_objtotal = pna->na_lut_objtotal; 439 mna->up.na_lut_objsize = pna->na_lut_objsize; 440 441 mna->up.num_tx_rings = 1; // XXX we don't need it, but field can't be zero 442 /* we set the number of our rx_rings to be max(num_rx_rings, num_rx_rings) 443 * in the parent 444 */ 445 mna->up.num_rx_rings = pna->num_rx_rings; 446 if (pna->num_tx_rings > pna->num_rx_rings) 447 mna->up.num_rx_rings = pna->num_tx_rings; 448 /* by default, the number of slots is the same as in 449 * the parent rings, but the user may ask for a different 450 * number 451 */ 452 mna->up.num_tx_desc = nmr->nr_tx_slots; 453 nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc, 454 1, NM_MONITOR_MAXSLOTS, NULL); 455 mna->up.num_rx_desc = nmr->nr_rx_slots; 456 nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc, 457 1, NM_MONITOR_MAXSLOTS, NULL); 458 error = netmap_attach_common(&mna->up); 459 if (error) { 460 D("attach_common error"); 461 goto release_out; 462 } 463 464 /* remember the traffic directions we have to monitor */ 465 mna->flags = (nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX)); 466 467 *na = &mna->up; 468 netmap_adapter_get(*na); 469 470 /* write the configuration back */ 471 nmr->nr_tx_rings = mna->up.num_tx_rings; 472 nmr->nr_rx_rings = mna->up.num_rx_rings; 473 nmr->nr_tx_slots = mna->up.num_tx_desc; 474 nmr->nr_rx_slots = mna->up.num_rx_desc; 475 476 /* keep the reference to the parent */ 477 D("monitor ok"); 478 479 return 0; 480 481 release_out: 482 D("monitor error"); 483 for (i = mna->priv.np_txqfirst; i < mna->priv.np_txqlast; i++) { 484 if (pna->tx_rings[i].monitor == mna) 485 pna->tx_rings[i].monitor = NULL; 486 } 487 for (i = mna->priv.np_rxqfirst; i < mna->priv.np_rxqlast; i++) { 488 if (pna->rx_rings[i].monitor == mna) 489 pna->rx_rings[i].monitor = NULL; 490 } 491 put_out: 492 netmap_adapter_put(pna); 493 free(mna, M_DEVBUF); 494 return error; 495 } 496 497 498 #endif /* WITH_MONITOR */ 499