1 /*- 2 * Copyright (C) 2012 Intel Corporation 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/bus.h> 33 #include <sys/conf.h> 34 #include <sys/ioccom.h> 35 #include <sys/kernel.h> 36 #include <sys/lock.h> 37 #include <sys/malloc.h> 38 #include <sys/module.h> 39 #include <sys/mutex.h> 40 #include <sys/rman.h> 41 #include <sys/sysctl.h> 42 #include <dev/pci/pcireg.h> 43 #include <dev/pci/pcivar.h> 44 #include <machine/bus.h> 45 #include <machine/resource.h> 46 #include <machine/stdarg.h> 47 #include <vm/vm.h> 48 #include <vm/vm_param.h> 49 #include <vm/pmap.h> 50 51 #include "ioat.h" 52 #include "ioat_hw.h" 53 #include "ioat_internal.h" 54 #include "ioat_test.h" 55 56 #ifndef time_after 57 #define time_after(a,b) ((long)(b) - (long)(a) < 0) 58 #endif 59 60 MALLOC_DEFINE(M_IOAT_TEST, "ioat_test", "ioat test allocations"); 61 62 #define IOAT_MAX_BUFS 256 63 64 struct test_transaction { 65 void *buf[IOAT_MAX_BUFS]; 66 uint32_t length; 67 uint32_t depth; 68 struct ioat_test *test; 69 TAILQ_ENTRY(test_transaction) entry; 70 }; 71 72 #define IT_LOCK() mtx_lock(&ioat_test_lk) 73 #define IT_UNLOCK() mtx_unlock(&ioat_test_lk) 74 #define IT_ASSERT() mtx_assert(&ioat_test_lk, MA_OWNED) 75 static struct mtx ioat_test_lk; 76 MTX_SYSINIT(ioat_test_lk, &ioat_test_lk, "test coordination mtx", MTX_DEF); 77 78 static int g_thread_index = 1; 79 static struct cdev *g_ioat_cdev = NULL; 80 81 #define ioat_test_log(v, ...) _ioat_test_log((v), "ioat_test: " __VA_ARGS__) 82 static inline void _ioat_test_log(int verbosity, const char *fmt, ...); 83 84 static void 85 ioat_test_transaction_destroy(struct test_transaction *tx) 86 { 87 int i; 88 89 for (i = 0; i < IOAT_MAX_BUFS; i++) { 90 if (tx->buf[i] != NULL) { 91 contigfree(tx->buf[i], tx->length, M_IOAT_TEST); 92 tx->buf[i] = NULL; 93 } 94 } 95 96 free(tx, M_IOAT_TEST); 97 } 98 99 static struct 100 test_transaction *ioat_test_transaction_create(unsigned num_buffers, 101 uint32_t buffer_size) 102 { 103 struct test_transaction *tx; 104 unsigned i; 105 106 tx = malloc(sizeof(*tx), M_IOAT_TEST, M_NOWAIT | M_ZERO); 107 if (tx == NULL) 108 return (NULL); 109 110 tx->length = buffer_size; 111 112 for (i = 0; i < num_buffers; i++) { 113 tx->buf[i] = contigmalloc(buffer_size, M_IOAT_TEST, M_NOWAIT, 114 0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); 115 116 if (tx->buf[i] == NULL) { 117 ioat_test_transaction_destroy(tx); 118 return (NULL); 119 } 120 } 121 return (tx); 122 } 123 124 static void 125 dump_hex(void *p, size_t chunks) 126 { 127 size_t i, j; 128 129 for (i = 0; i < chunks; i++) { 130 for (j = 0; j < 8; j++) 131 printf("%08x ", ((uint32_t *)p)[i * 8 + j]); 132 printf("\n"); 133 } 134 } 135 136 static bool 137 ioat_compare_ok(struct test_transaction *tx) 138 { 139 struct ioat_test *test; 140 char *dst, *src; 141 uint32_t i, j; 142 143 test = tx->test; 144 145 for (i = 0; i < tx->depth; i++) { 146 dst = tx->buf[2 * i + 1]; 147 src = tx->buf[2 * i]; 148 149 if (test->testkind == IOAT_TEST_FILL) { 150 for (j = 0; j < tx->length; j += sizeof(uint64_t)) { 151 if (memcmp(src, &dst[j], 152 MIN(sizeof(uint64_t), tx->length - j)) 153 != 0) 154 return (false); 155 } 156 } else if (test->testkind == IOAT_TEST_DMA) { 157 if (memcmp(src, dst, tx->length) != 0) 158 return (false); 159 } else if (test->testkind == IOAT_TEST_RAW_DMA) { 160 if (test->raw_write) 161 dst = test->raw_vtarget; 162 dump_hex(dst, tx->length / 32); 163 } 164 } 165 return (true); 166 } 167 168 static void 169 ioat_dma_test_callback(void *arg, int error) 170 { 171 struct test_transaction *tx; 172 struct ioat_test *test; 173 174 if (error != 0) 175 ioat_test_log(0, "%s: Got error: %d\n", __func__, error); 176 177 tx = arg; 178 test = tx->test; 179 180 if (test->verify && !ioat_compare_ok(tx)) { 181 ioat_test_log(0, "miscompare found\n"); 182 atomic_add_32(&test->status[IOAT_TEST_MISCOMPARE], tx->depth); 183 } else if (!test->too_late) 184 atomic_add_32(&test->status[IOAT_TEST_OK], tx->depth); 185 186 IT_LOCK(); 187 TAILQ_REMOVE(&test->pend_q, tx, entry); 188 TAILQ_INSERT_TAIL(&test->free_q, tx, entry); 189 wakeup(&test->free_q); 190 IT_UNLOCK(); 191 } 192 193 static int 194 ioat_test_prealloc_memory(struct ioat_test *test, int index) 195 { 196 uint32_t i, j, k; 197 struct test_transaction *tx; 198 199 for (i = 0; i < test->transactions; i++) { 200 tx = ioat_test_transaction_create(test->chain_depth * 2, 201 test->buffer_size); 202 if (tx == NULL) { 203 ioat_test_log(0, "tx == NULL - memory exhausted\n"); 204 test->status[IOAT_TEST_NO_MEMORY]++; 205 return (ENOMEM); 206 } 207 208 TAILQ_INSERT_HEAD(&test->free_q, tx, entry); 209 210 tx->test = test; 211 tx->depth = test->chain_depth; 212 213 /* fill in source buffers */ 214 for (j = 0; j < (tx->length / sizeof(uint32_t)); j++) { 215 uint32_t val = j + (index << 28); 216 217 for (k = 0; k < test->chain_depth; k++) { 218 ((uint32_t *)tx->buf[2*k])[j] = ~val; 219 ((uint32_t *)tx->buf[2*k+1])[j] = val; 220 } 221 } 222 } 223 return (0); 224 } 225 226 static void 227 ioat_test_release_memory(struct ioat_test *test) 228 { 229 struct test_transaction *tx, *s; 230 231 TAILQ_FOREACH_SAFE(tx, &test->free_q, entry, s) 232 ioat_test_transaction_destroy(tx); 233 TAILQ_INIT(&test->free_q); 234 235 TAILQ_FOREACH_SAFE(tx, &test->pend_q, entry, s) 236 ioat_test_transaction_destroy(tx); 237 TAILQ_INIT(&test->pend_q); 238 } 239 240 static void 241 ioat_test_submit_1_tx(struct ioat_test *test, bus_dmaengine_t dma) 242 { 243 struct test_transaction *tx; 244 struct bus_dmadesc *desc; 245 bus_dmaengine_callback_t cb; 246 bus_addr_t src, dest; 247 uint64_t fillpattern; 248 uint32_t i, flags; 249 250 desc = NULL; 251 252 IT_LOCK(); 253 while (TAILQ_EMPTY(&test->free_q)) 254 msleep(&test->free_q, &ioat_test_lk, 0, "test_submit", 0); 255 256 tx = TAILQ_FIRST(&test->free_q); 257 TAILQ_REMOVE(&test->free_q, tx, entry); 258 TAILQ_INSERT_HEAD(&test->pend_q, tx, entry); 259 IT_UNLOCK(); 260 261 ioat_acquire(dma); 262 for (i = 0; i < tx->depth; i++) { 263 src = vtophys((vm_offset_t)tx->buf[2*i]); 264 dest = vtophys((vm_offset_t)tx->buf[2*i+1]); 265 266 if (test->testkind == IOAT_TEST_RAW_DMA) { 267 if (test->raw_write) 268 dest = test->raw_target; 269 else 270 src = test->raw_target; 271 } 272 273 if (i == tx->depth - 1) { 274 cb = ioat_dma_test_callback; 275 flags = DMA_INT_EN; 276 } else { 277 cb = NULL; 278 flags = 0; 279 } 280 281 if (test->testkind == IOAT_TEST_DMA || 282 test->testkind == IOAT_TEST_RAW_DMA) 283 desc = ioat_copy(dma, dest, src, tx->length, cb, tx, 284 flags); 285 else if (test->testkind == IOAT_TEST_FILL) { 286 fillpattern = *(uint64_t *)tx->buf[2*i]; 287 desc = ioat_blockfill(dma, dest, fillpattern, 288 tx->length, cb, tx, flags); 289 } 290 if (desc == NULL) 291 break; 292 } 293 ioat_release(dma); 294 295 /* 296 * We couldn't issue an IO -- either the device is being detached or 297 * the HW reset. Essentially spin until the device comes back up or 298 * our timer expires. 299 */ 300 if (desc == NULL && tx->depth > 0) { 301 atomic_add_32(&test->status[IOAT_TEST_NO_DMA_ENGINE], tx->depth); 302 IT_LOCK(); 303 TAILQ_REMOVE(&test->pend_q, tx, entry); 304 TAILQ_INSERT_HEAD(&test->free_q, tx, entry); 305 IT_UNLOCK(); 306 } 307 } 308 309 static void 310 ioat_dma_test(void *arg) 311 { 312 struct ioat_test *test; 313 bus_dmaengine_t dmaengine; 314 uint32_t loops; 315 int index, rc, start, end; 316 317 test = arg; 318 memset(__DEVOLATILE(void *, test->status), 0, sizeof(test->status)); 319 320 if (test->buffer_size > 1024 * 1024) { 321 ioat_test_log(0, "Buffer size too large >1MB\n"); 322 test->status[IOAT_TEST_NO_MEMORY]++; 323 return; 324 } 325 326 if (test->chain_depth * 2 > IOAT_MAX_BUFS) { 327 ioat_test_log(0, "Depth too large (> %u)\n", 328 (unsigned)IOAT_MAX_BUFS / 2); 329 test->status[IOAT_TEST_NO_MEMORY]++; 330 return; 331 } 332 333 if (btoc((uint64_t)test->buffer_size * test->chain_depth * 334 test->transactions) > (physmem / 4)) { 335 ioat_test_log(0, "Sanity check failed -- test would " 336 "use more than 1/4 of phys mem.\n"); 337 test->status[IOAT_TEST_NO_MEMORY]++; 338 return; 339 } 340 341 if ((uint64_t)test->transactions * test->chain_depth > (1<<16)) { 342 ioat_test_log(0, "Sanity check failed -- test would " 343 "use more than available IOAT ring space.\n"); 344 test->status[IOAT_TEST_NO_MEMORY]++; 345 return; 346 } 347 348 if (test->testkind >= IOAT_NUM_TESTKINDS) { 349 ioat_test_log(0, "Invalid kind %u\n", 350 (unsigned)test->testkind); 351 test->status[IOAT_TEST_INVALID_INPUT]++; 352 return; 353 } 354 355 dmaengine = ioat_get_dmaengine(test->channel_index); 356 if (dmaengine == NULL) { 357 ioat_test_log(0, "Couldn't acquire dmaengine\n"); 358 test->status[IOAT_TEST_NO_DMA_ENGINE]++; 359 return; 360 } 361 362 if (test->testkind == IOAT_TEST_FILL && 363 (to_ioat_softc(dmaengine)->capabilities & IOAT_DMACAP_BFILL) == 0) 364 { 365 ioat_test_log(0, 366 "Hardware doesn't support block fill, aborting test\n"); 367 test->status[IOAT_TEST_INVALID_INPUT]++; 368 goto out; 369 } 370 371 if (test->testkind == IOAT_TEST_RAW_DMA) { 372 if (test->raw_is_virtual) { 373 test->raw_vtarget = (void *)test->raw_target; 374 test->raw_target = vtophys(test->raw_vtarget); 375 } else { 376 test->raw_vtarget = pmap_mapdev(test->raw_target, 377 test->buffer_size); 378 } 379 } 380 381 index = g_thread_index++; 382 TAILQ_INIT(&test->free_q); 383 TAILQ_INIT(&test->pend_q); 384 385 if (test->duration == 0) 386 ioat_test_log(1, "Thread %d: num_loops remaining: 0x%08x\n", 387 index, test->transactions); 388 else 389 ioat_test_log(1, "Thread %d: starting\n", index); 390 391 rc = ioat_test_prealloc_memory(test, index); 392 if (rc != 0) { 393 ioat_test_log(0, "prealloc_memory: %d\n", rc); 394 goto out; 395 } 396 wmb(); 397 398 test->too_late = false; 399 start = ticks; 400 end = start + (((sbintime_t)test->duration * hz) / 1000); 401 402 for (loops = 0;; loops++) { 403 if (test->duration == 0 && loops >= test->transactions) 404 break; 405 else if (test->duration != 0 && time_after(ticks, end)) { 406 test->too_late = true; 407 break; 408 } 409 410 ioat_test_submit_1_tx(test, dmaengine); 411 } 412 413 ioat_test_log(1, "Test Elapsed: %d ticks (overrun %d), %d sec.\n", 414 ticks - start, ticks - end, (ticks - start) / hz); 415 416 IT_LOCK(); 417 while (!TAILQ_EMPTY(&test->pend_q)) 418 msleep(&test->free_q, &ioat_test_lk, 0, "ioattestcompl", hz); 419 IT_UNLOCK(); 420 421 ioat_test_log(1, "Test Elapsed2: %d ticks (overrun %d), %d sec.\n", 422 ticks - start, ticks - end, (ticks - start) / hz); 423 424 ioat_test_release_memory(test); 425 out: 426 if (test->testkind == IOAT_TEST_RAW_DMA && !test->raw_is_virtual) 427 pmap_unmapdev((vm_offset_t)test->raw_vtarget, 428 test->buffer_size); 429 ioat_put_dmaengine(dmaengine); 430 } 431 432 static int 433 ioat_test_open(struct cdev *dev, int flags, int fmt, struct thread *td) 434 { 435 436 return (0); 437 } 438 439 static int 440 ioat_test_close(struct cdev *dev, int flags, int fmt, struct thread *td) 441 { 442 443 return (0); 444 } 445 446 static int 447 ioat_test_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg, int flag, 448 struct thread *td) 449 { 450 451 switch (cmd) { 452 case IOAT_DMATEST: 453 ioat_dma_test(arg); 454 break; 455 default: 456 return (EINVAL); 457 } 458 return (0); 459 } 460 461 static struct cdevsw ioat_cdevsw = { 462 .d_version = D_VERSION, 463 .d_flags = 0, 464 .d_open = ioat_test_open, 465 .d_close = ioat_test_close, 466 .d_ioctl = ioat_test_ioctl, 467 .d_name = "ioat_test", 468 }; 469 470 static int 471 enable_ioat_test(bool enable) 472 { 473 474 mtx_assert(&Giant, MA_OWNED); 475 476 if (enable && g_ioat_cdev == NULL) { 477 g_ioat_cdev = make_dev(&ioat_cdevsw, 0, UID_ROOT, GID_WHEEL, 478 0600, "ioat_test"); 479 } else if (!enable && g_ioat_cdev != NULL) { 480 destroy_dev(g_ioat_cdev); 481 g_ioat_cdev = NULL; 482 } 483 return (0); 484 } 485 486 static int 487 sysctl_enable_ioat_test(SYSCTL_HANDLER_ARGS) 488 { 489 int error, enabled; 490 491 enabled = (g_ioat_cdev != NULL); 492 error = sysctl_handle_int(oidp, &enabled, 0, req); 493 if (error != 0 || req->newptr == NULL) 494 return (error); 495 496 enable_ioat_test(enabled); 497 return (0); 498 } 499 SYSCTL_PROC(_hw_ioat, OID_AUTO, enable_ioat_test, CTLTYPE_INT | CTLFLAG_RW, 500 0, 0, sysctl_enable_ioat_test, "I", 501 "Non-zero: Enable the /dev/ioat_test device"); 502 503 void 504 ioat_test_attach(void) 505 { 506 char *val; 507 508 val = kern_getenv("hw.ioat.enable_ioat_test"); 509 if (val != NULL && strcmp(val, "0") != 0) { 510 mtx_lock(&Giant); 511 enable_ioat_test(true); 512 mtx_unlock(&Giant); 513 } 514 freeenv(val); 515 } 516 517 void 518 ioat_test_detach(void) 519 { 520 521 mtx_lock(&Giant); 522 enable_ioat_test(false); 523 mtx_unlock(&Giant); 524 } 525 526 static inline void 527 _ioat_test_log(int verbosity, const char *fmt, ...) 528 { 529 va_list argp; 530 531 if (verbosity > g_ioat_debug_level) 532 return; 533 534 va_start(argp, fmt); 535 vprintf(fmt, argp); 536 va_end(argp); 537 } 538