1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2016 Joyent, Inc. 14 */ 15 16 /* 17 * xHCI DMA Management Routines 18 * 19 * Please see the big theory statement in xhci.c for more information. 20 */ 21 22 #include <sys/usb/hcd/xhci/xhci.h> 23 24 int 25 xhci_check_dma_handle(xhci_t *xhcip, xhci_dma_buffer_t *xdb) 26 { 27 ddi_fm_error_t de; 28 29 if (!DDI_FM_DMA_ERR_CAP(xhcip->xhci_fm_caps)) 30 return (0); 31 32 ddi_fm_dma_err_get(xdb->xdb_dma_handle, &de, DDI_FME_VERSION); 33 return (de.fme_status); 34 } 35 36 void 37 xhci_dma_acc_attr(xhci_t *xhcip, ddi_device_acc_attr_t *accp) 38 { 39 accp->devacc_attr_version = DDI_DEVICE_ATTR_V0; 40 accp->devacc_attr_endian_flags = DDI_NEVERSWAP_ACC; 41 accp->devacc_attr_dataorder = DDI_STRICTORDER_ACC; 42 43 if (DDI_FM_DMA_ERR_CAP(xhcip->xhci_fm_caps)) { 44 accp->devacc_attr_access = DDI_FLAGERR_ACC; 45 } else { 46 accp->devacc_attr_access = DDI_DEFAULT_ACC; 47 } 48 } 49 50 /* 51 * These are DMA attributes that we assign when making a transfer. The SGL is 52 * variable and based on the caller, which varies based on the type of transfer 53 * we're doing. 54 */ 55 void 56 xhci_dma_transfer_attr(xhci_t *xhcip, ddi_dma_attr_t *attrp, uint_t sgl) 57 { 58 VERIFY3U(sgl, >, 0); 59 VERIFY3U(sgl, <=, XHCI_TRANSFER_DMA_SGL); 60 attrp->dma_attr_version = DMA_ATTR_V0; 61 62 /* 63 * The range of data that we can use is based on what hardware supports. 64 */ 65 attrp->dma_attr_addr_lo = 0x0; 66 if (xhcip->xhci_caps.xcap_flags & XCAP_AC64) { 67 attrp->dma_attr_addr_hi = UINT64_MAX; 68 } else { 69 attrp->dma_attr_addr_hi = UINT32_MAX; 70 } 71 72 /* 73 * The count max indicates the total amount that will fit into one 74 * cookie, which is one TRB in our world. In other words 64k. 75 */ 76 attrp->dma_attr_count_max = XHCI_TRB_MAX_TRANSFER; 77 78 /* 79 * The alignment and segment are related. The alignment describes the 80 * alignment of the PA. The segment describes a boundary that the DMA 81 * allocation cannot cross. In other words, for a given chunk of memory 82 * it cannot cross a 64-byte boundary. However, the physical address 83 * only needs to be aligned to align bytes. 84 */ 85 attrp->dma_attr_align = XHCI_DMA_ALIGN; 86 attrp->dma_attr_seg = XHCI_TRB_MAX_TRANSFER - 1; 87 88 89 attrp->dma_attr_burstsizes = 0xfff; 90 91 /* 92 * This is the maximum we can send. Technically this is limited by the 93 * descriptors and not by hardware, hence why we use a large value for 94 * the max that'll be less than any memory allocation we ever throw at 95 * it. 96 */ 97 attrp->dma_attr_minxfer = 0x1; 98 attrp->dma_attr_maxxfer = UINT32_MAX; 99 100 /* 101 * This is determined by the caller. 102 */ 103 attrp->dma_attr_sgllen = sgl; 104 105 /* 106 * The granularity describes the addressing granularity. e.g. can things 107 * ask for chunks in units of this number of bytes. For PCI this should 108 * always be one. 109 */ 110 attrp->dma_attr_granular = 1; 111 112 if (DDI_FM_DMA_ERR_CAP(xhcip->xhci_fm_caps)) { 113 attrp->dma_attr_flags = DDI_DMA_FLAGERR; 114 } else { 115 attrp->dma_attr_flags = 0; 116 } 117 } 118 119 /* 120 * This routine tries to create DMA attributes for normal allocations for data 121 * structures and the like. By default we use the same values as the transfer 122 * attributes, but have explicit comments about how they're different. 123 */ 124 void 125 xhci_dma_dma_attr(xhci_t *xhcip, ddi_dma_attr_t *attrp) 126 { 127 /* 128 * Note, we always use a single SGL for these DMA allocations as these 129 * are used for small data structures. 130 */ 131 xhci_dma_transfer_attr(xhcip, attrp, XHCI_DEF_DMA_SGL); 132 133 /* 134 * The maximum size of any of these structures is 4k as opposed to the 135 * 64K max described above. Similarly the boundary requirement is 136 * reduced to 4k. 137 */ 138 attrp->dma_attr_count_max = xhcip->xhci_caps.xcap_pagesize; 139 attrp->dma_attr_maxxfer = xhcip->xhci_caps.xcap_pagesize; 140 attrp->dma_attr_seg = xhcip->xhci_caps.xcap_pagesize - 1; 141 } 142 143 /* 144 * Fill in attributes for a scratchpad entry. The scratchpad entries are 145 * somewhat different in so far as they are closest to a normal DMA attribute, 146 * except they have stricter alignments, needing to be page sized. 147 * 148 * In addition, because we never access this memory ourselves, we can just mark 149 * it all as relaxed ordering. 150 */ 151 void 152 xhci_dma_scratchpad_attr(xhci_t *xhcip, ddi_dma_attr_t *attrp) 153 { 154 xhci_dma_dma_attr(xhcip, attrp); 155 attrp->dma_attr_align = xhcip->xhci_caps.xcap_pagesize; 156 attrp->dma_attr_flags |= DDI_DMA_RELAXED_ORDERING; 157 } 158 159 /* 160 * This should be used for the simple case of a single SGL entry, which is the 161 * vast majority of the non-transfer allocations. 162 */ 163 uint64_t 164 xhci_dma_pa(xhci_dma_buffer_t *xdb) 165 { 166 ASSERT(xdb->xdb_ncookies == 1); 167 return (xdb->xdb_cookies[0].dmac_laddress); 168 } 169 170 void 171 xhci_dma_free(xhci_dma_buffer_t *xdb) 172 { 173 if (xdb->xdb_ncookies != 0) { 174 VERIFY(xdb->xdb_dma_handle != NULL); 175 (void) ddi_dma_unbind_handle(xdb->xdb_dma_handle); 176 xdb->xdb_ncookies = 0; 177 bzero(xdb->xdb_cookies, sizeof (ddi_dma_cookie_t) * 178 XHCI_TRANSFER_DMA_SGL); 179 xdb->xdb_len = 0; 180 } 181 182 if (xdb->xdb_acc_handle != NULL) { 183 ddi_dma_mem_free(&xdb->xdb_acc_handle); 184 xdb->xdb_acc_handle = NULL; 185 xdb->xdb_va = NULL; 186 } 187 188 if (xdb->xdb_dma_handle != NULL) { 189 ddi_dma_free_handle(&xdb->xdb_dma_handle); 190 xdb->xdb_dma_handle = NULL; 191 } 192 193 ASSERT(xdb->xdb_va == NULL); 194 ASSERT(xdb->xdb_ncookies == 0); 195 ASSERT(xdb->xdb_cookies[0].dmac_laddress == 0); 196 ASSERT(xdb->xdb_len == 0); 197 } 198 199 boolean_t 200 xhci_dma_alloc(xhci_t *xhcip, xhci_dma_buffer_t *xdb, 201 ddi_dma_attr_t *attrp, ddi_device_acc_attr_t *accp, boolean_t zero, 202 size_t size, boolean_t wait) 203 { 204 int ret, i; 205 uint_t flags = DDI_DMA_CONSISTENT; 206 size_t len; 207 ddi_dma_cookie_t cookie; 208 uint_t ncookies; 209 int (*memcb)(caddr_t); 210 211 if (wait == B_TRUE) { 212 memcb = DDI_DMA_SLEEP; 213 } else { 214 memcb = DDI_DMA_DONTWAIT; 215 } 216 217 ret = ddi_dma_alloc_handle(xhcip->xhci_dip, attrp, memcb, NULL, 218 &xdb->xdb_dma_handle); 219 if (ret != 0) { 220 xhci_log(xhcip, "!failed to allocate DMA handle: %d", ret); 221 xdb->xdb_dma_handle = NULL; 222 return (B_FALSE); 223 } 224 225 ret = ddi_dma_mem_alloc(xdb->xdb_dma_handle, size, accp, flags, memcb, 226 NULL, &xdb->xdb_va, &len, &xdb->xdb_acc_handle); 227 if (ret != DDI_SUCCESS) { 228 xhci_log(xhcip, "!failed to allocate DMA memory: %d", ret); 229 xdb->xdb_va = NULL; 230 xdb->xdb_acc_handle = NULL; 231 xhci_dma_free(xdb); 232 return (B_FALSE); 233 } 234 235 if (zero == B_TRUE) 236 bzero(xdb->xdb_va, len); 237 238 ret = ddi_dma_addr_bind_handle(xdb->xdb_dma_handle, NULL, 239 xdb->xdb_va, len, DDI_DMA_RDWR | flags, memcb, NULL, &cookie, 240 &ncookies); 241 if (ret != 0) { 242 xhci_log(xhcip, "!failed to bind DMA memory: %d", ret); 243 xhci_dma_free(xdb); 244 return (B_FALSE); 245 } 246 247 /* 248 * Note we explicitly store the logical length of this allocation. The 249 * physical length is available via the cookies. 250 */ 251 xdb->xdb_len = size; 252 xdb->xdb_ncookies = ncookies; 253 xdb->xdb_cookies[0] = cookie; 254 for (i = 1; i < ncookies; i++) { 255 ddi_dma_nextcookie(xdb->xdb_dma_handle, &xdb->xdb_cookies[i]); 256 } 257 258 259 return (B_TRUE); 260 } 261 262 void 263 xhci_transfer_free(xhci_t *xhcip, xhci_transfer_t *xt) 264 { 265 if (xt == NULL) 266 return; 267 268 VERIFY(xhcip != NULL); 269 xhci_dma_free(&xt->xt_buffer); 270 if (xt->xt_isoc != NULL) { 271 ASSERT(xt->xt_ntrbs > 0); 272 kmem_free(xt->xt_isoc, sizeof (usb_isoc_pkt_descr_t) * 273 xt->xt_ntrbs); 274 xt->xt_isoc = NULL; 275 } 276 if (xt->xt_trbs != NULL) { 277 ASSERT(xt->xt_ntrbs > 0); 278 kmem_free(xt->xt_trbs, sizeof (xhci_trb_t) * xt->xt_ntrbs); 279 xt->xt_trbs = NULL; 280 } 281 kmem_free(xt, sizeof (xhci_transfer_t)); 282 } 283 284 xhci_transfer_t * 285 xhci_transfer_alloc(xhci_t *xhcip, xhci_endpoint_t *xep, size_t size, int trbs, 286 int usb_flags) 287 { 288 int kmflags; 289 boolean_t dmawait; 290 xhci_transfer_t *xt; 291 ddi_device_acc_attr_t acc; 292 ddi_dma_attr_t attr; 293 294 if (usb_flags & USB_FLAGS_SLEEP) { 295 kmflags = KM_SLEEP; 296 dmawait = B_TRUE; 297 } else { 298 kmflags = KM_NOSLEEP; 299 dmawait = B_FALSE; 300 } 301 302 xt = kmem_zalloc(sizeof (xhci_transfer_t), kmflags); 303 if (xt == NULL) 304 return (NULL); 305 306 if (size != 0) { 307 int sgl = XHCI_DEF_DMA_SGL; 308 309 /* 310 * For BULK transfers, we always increase the number of SGL 311 * entries that we support to make things easier for the kernel. 312 * However, for control transfers, we currently opt to keep 313 * things a bit simpler and use our default of one SGL. There's 314 * no good technical reason for this, rather it just keeps 315 * things a bit easier. 316 * 317 * To simplify things, we don't use additional SGL entries for 318 * ISOC transfers. While this isn't the best, it isn't too far 319 * off from what ehci and co. have done before. If this becomes 320 * a technical issue, it's certainly possible to increase the 321 * SGL entry count. 322 */ 323 if (xep->xep_type == USB_EP_ATTR_BULK) 324 sgl = XHCI_TRANSFER_DMA_SGL; 325 326 xhci_dma_acc_attr(xhcip, &acc); 327 xhci_dma_transfer_attr(xhcip, &attr, sgl); 328 if (xhci_dma_alloc(xhcip, &xt->xt_buffer, &attr, &acc, B_FALSE, 329 size, dmawait) == B_FALSE) { 330 kmem_free(xt, sizeof (xhci_transfer_t)); 331 return (NULL); 332 } 333 334 /* 335 * ISOC transfers are a bit special and don't need additional 336 * TRBs for data. 337 */ 338 if (xep->xep_type != USB_EP_ATTR_ISOCH) 339 trbs += xt->xt_buffer.xdb_ncookies; 340 } 341 342 xt->xt_trbs = kmem_zalloc(sizeof (xhci_trb_t) * trbs, kmflags); 343 if (xt->xt_trbs == NULL) { 344 xhci_dma_free(&xt->xt_buffer); 345 kmem_free(xt, sizeof (xhci_transfer_t)); 346 return (NULL); 347 } 348 349 /* 350 * For ISOCH transfers, we need to also allocate the results data. 351 */ 352 if (xep->xep_type == USB_EP_ATTR_ISOCH) { 353 xt->xt_isoc = kmem_zalloc(sizeof (usb_isoc_pkt_descr_t) * trbs, 354 kmflags); 355 if (xt->xt_isoc == NULL) { 356 kmem_free(xt->xt_trbs, sizeof (xhci_trb_t) * trbs); 357 xhci_dma_free(&xt->xt_buffer); 358 kmem_free(xt, sizeof (xhci_transfer_t)); 359 return (NULL); 360 } 361 } 362 363 xt->xt_ntrbs = trbs; 364 xt->xt_cr = USB_CR_OK; 365 366 return (xt); 367 } 368 369 /* 370 * Abstract the notion of copying out to handle the case of multiple DMA 371 * cookies. If tobuf is true, we are copying to the kernel provided buffer, 372 * otherwise we're copying into the DMA memory. 373 */ 374 void 375 xhci_transfer_copy(xhci_transfer_t *xt, void *buf, size_t len, 376 boolean_t tobuf) 377 { 378 void *dmabuf = xt->xt_buffer.xdb_va; 379 if (tobuf == B_TRUE) 380 bcopy(dmabuf, buf, len); 381 else 382 bcopy(buf, dmabuf, len); 383 } 384 385 int 386 xhci_transfer_sync(xhci_t *xhcip, xhci_transfer_t *xt, uint_t type) 387 { 388 XHCI_DMA_SYNC(xt->xt_buffer, type); 389 return (xhci_check_dma_handle(xhcip, &xt->xt_buffer)); 390 } 391 392 /* 393 * We're required to try and inform the xHCI controller about the number of data 394 * packets that are required. The algorithm to use is described in xHCI 1.1 / 395 * 4.11.2.4. While it might be tempting to just try and calculate the number of 396 * packets based on simple rounding of the remaining number of bytes, that 397 * misses a critical problem -- DMA boundaries may cause us to need additional 398 * packets that are missed initially. Consider a transfer made up of four 399 * different DMA buffers sized in bytes: 4096, 4096, 256, 256, with a 512 byte 400 * packet size. 401 * 402 * Remain 4608 512 256 0 403 * Bytes 4096 4096 256 256 404 * Naive TD 9 1 1 0 405 * Act TD 10 2 1 0 406 * 407 * This means that the only safe way forward here is to work backwards and see 408 * how many we need to work up to this point. 409 */ 410 static int 411 xhci_transfer_get_tdsize(xhci_transfer_t *xt, uint_t off, uint_t mps) 412 { 413 int i; 414 uint_t npkt = 0; 415 416 /* 417 * There are always zero packets for the last TRB. 418 */ 419 ASSERT(xt->xt_buffer.xdb_ncookies > 0); 420 for (i = xt->xt_buffer.xdb_ncookies - 1; i > off; i--) { 421 size_t len; 422 423 /* 424 * The maximum value we can return is 31 packets. So, in that 425 * case we short-circuit and return. 426 */ 427 if (npkt >= 31) 428 return (31); 429 430 len = roundup(xt->xt_buffer.xdb_cookies[i].dmac_size, mps); 431 npkt += len / mps; 432 } 433 434 return (npkt); 435 } 436 437 void 438 xhci_transfer_trb_fill_data(xhci_endpoint_t *xep, xhci_transfer_t *xt, int off, 439 boolean_t in) 440 { 441 uint_t mps, tdsize, flags; 442 int i; 443 444 VERIFY(xt->xt_buffer.xdb_ncookies > 0); 445 VERIFY(xep->xep_pipe != NULL); 446 VERIFY(off + xt->xt_buffer.xdb_ncookies <= xt->xt_ntrbs); 447 mps = xep->xep_pipe->p_ep.wMaxPacketSize; 448 449 for (i = 0; i < xt->xt_buffer.xdb_ncookies; i++) { 450 uint64_t pa, dmasz; 451 452 pa = xt->xt_buffer.xdb_cookies[i].dmac_laddress; 453 dmasz = xt->xt_buffer.xdb_cookies[i].dmac_size; 454 455 tdsize = xhci_transfer_get_tdsize(xt, i, mps); 456 457 flags = XHCI_TRB_TYPE_NORMAL; 458 if (i == 0 && xep->xep_type == USB_EP_ATTR_CONTROL) { 459 flags = XHCI_TRB_TYPE_DATA; 460 if (in == B_TRUE) 461 flags |= XHCI_TRB_DIR_IN; 462 } 463 464 /* 465 * When reading data in (from the device), we may get shorter 466 * transfers than the buffer allowed for. To make sure we get 467 * notified about that and handle that, we need to set the ISP 468 * flag. 469 */ 470 if (in == B_TRUE) { 471 flags |= XHCI_TRB_ISP; 472 xt->xt_data_tohost = B_TRUE; 473 } 474 475 /* 476 * When we have more than one cookie, we are technically 477 * chaining together things according to the controllers view, 478 * hence why we need to set the chain flag. 479 */ 480 if (xt->xt_buffer.xdb_ncookies > 1 && 481 i != (xt->xt_buffer.xdb_ncookies - 1)) { 482 flags |= XHCI_TRB_CHAIN; 483 } 484 485 /* 486 * If we have a non-control transfer, then we need to make sure 487 * that we set ourselves up to be interrupted, which we set for 488 * the last entry. 489 */ 490 if (i + 1 == xt->xt_buffer.xdb_ncookies && 491 xep->xep_type != USB_EP_ATTR_CONTROL) { 492 flags |= XHCI_TRB_IOC; 493 } 494 495 xt->xt_trbs[off + i].trb_addr = LE_64(pa); 496 xt->xt_trbs[off + i].trb_status = LE_32(XHCI_TRB_LEN(dmasz) | 497 XHCI_TRB_TDREM(tdsize) | XHCI_TRB_INTR(0)); 498 xt->xt_trbs[off + i].trb_flags = LE_32(flags); 499 } 500 } 501 502 /* 503 * These are utility functions for isochronus transfers to help calculate the 504 * transfer burst count (TBC) and transfer last burst packet count (TLPBC) 505 * entries for an isochronus entry. See xHCI 1.1 / 4.11.2.3 for how to calculate 506 * them. 507 */ 508 void 509 xhci_transfer_calculate_isoc(xhci_device_t *xd, xhci_endpoint_t *xep, 510 uint_t trb_len, uint_t *tbc, uint_t *tlbpc) 511 { 512 uint_t mps, tdpc, burst; 513 514 /* 515 * Even if we're asked to send no data, that actually requires the 516 * equivalent of sending one byte of data. 517 */ 518 if (trb_len == 0) 519 trb_len = 1; 520 521 mps = XHCI_EPCTX_GET_MPS(xd->xd_endout[xep->xep_num]->xec_info2); 522 burst = XHCI_EPCTX_GET_MAXB(xd->xd_endout[xep->xep_num]->xec_info2); 523 524 /* 525 * This is supposed to correspond to the Transfer Descriptor Packet 526 * Count from xHCI 1.1 / 4.14.1. 527 */ 528 tdpc = howmany(trb_len, mps); 529 *tbc = howmany(tdpc, burst + 1) - 1; 530 531 if ((tdpc % (burst + 1)) == 0) 532 *tlbpc = burst; 533 else 534 *tlbpc = (tdpc % (burst + 1)) - 1; 535 } 536