1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2016 Joyent, Inc. 14 */ 15 16 /* 17 * ----------------------------- 18 * xHCI Ring Management Routines 19 * ----------------------------- 20 * 21 * There are three major different types of rings for xHCI, these are: 22 * 23 * 1) Command Rings 24 * 2) Event Rings 25 * 3) Transfer Rings 26 * 27 * Command and Transfer rings function in similar ways while the event rings are 28 * different. The difference comes in who is the consumer and who is the 29 * producer. In the case of command and transfer rings, the driver is the 30 * producer. For the event ring the driver is the consumer. 31 * 32 * Each ring in xhci has a synthetic head and tail register. Each entry in a 33 * ring has a bit that's often referred to as the 'Cycle bit'. The cycle bit is 34 * toggled as a means of saying that a given entry needs to be consumed. 35 * 36 * When a ring is created, all of the data in it is initialized to zero and the 37 * producer and consumer agree that when the cycle bit is toggled, the ownership 38 * of the entry is transfered from the producer to the consumer. For example, 39 * the command ring defaults to saying that a cycle bit of one is what indicates 40 * the command is owned by the hardware. So as the driver (the producer) fills 41 * in entries, the driver toggles the cycle bit from 0->1 as part of writing out 42 * the TRB. When the command ring's doorbell is rung, the hardware (the 43 * consumer) begins processing commands. It will process them until one of two 44 * things happens: 45 * 46 * 1) The hardware encounters an entry with the old cycle bit (0 in this case) 47 * 48 * 2) The hardware hits the last entry in the ring which is a special kind of 49 * entry called a LINK TRB. 50 * 51 * A LINK TRB has two purposes: 52 * 53 * 1) Indicate where processing should be redirected. This can potentially be to 54 * another memory segment; however, this driver always programs LINK TRBs to 55 * point back to the start of the ring. 56 * 57 * 2) Indicate whether or not the cycle bit should be changed. We always 58 * indicate that the cycle bit should be toggled when a LINK TRB is processed. 59 * 60 * In this same example, whereas the driver (the producer) would be setting the 61 * cycle to 1 to indicate that an entry is to be processed, the driver would now 62 * set it to 0. Similarly, the hardware (the consumer) would be looking for a 63 * 0 to determine whether or not it should process the entry. 64 * 65 * Currently, when the driver allocates rings, it always allocates a single page 66 * for the ring. The entire page is dedicated to ring use, which is determined 67 * based on the devices PAGESIZE register. The last entry in a given page is 68 * always configured as a LINK TRB. As each entry in a ring is 16 bytes, this 69 * gives us an average of 255 usable descriptors on x86 and 511 on SPARC, as 70 * PAGESIZE is 4k and 8k respectively. 71 * 72 * The driver is always the producer for all rings except for the event ring, 73 * where it is the consumer. 74 * 75 * ---------------------- 76 * Head and Tail Pointers 77 * ---------------------- 78 * 79 * Now, while we have the cycle bits for the ring explained, we still need to 80 * keep track of what we consider the head and tail pointers, what the xHCI 81 * specification calls enqueue (head) and dequeue (tail) pointers. Now, in all 82 * the cases here, the actual tracking of the head pointer is basically done by 83 * the cycle bit; however, we maintain an actual offset in the xhci_ring_t 84 * structure. The tail is usually less synthetic; however, it's up for different 85 * folks to maintain it. 86 * 87 * We handle the command and transfer rings the same way. The head pointer 88 * indicates where we should insert the next TRB to transfer. The tail pointer 89 * indicates the last thing that hardware has told us it has processed. If the 90 * head and tail point to the same index, then we know the ring is empty. 91 * 92 * We increment the head pointer whenever we insert an entry. Note that we do 93 * not tell hardware about this in any way, it's just maintained by the cycle 94 * bit. Then, we keep track of what hardware has processed in our tail pointer, 95 * incrementing it only when we have an interrupt that indicates that it's been 96 * processed. 97 * 98 * One oddity here is that we only get notified of this via the event ring. So 99 * when the event ring encounters this information, it needs to go back and 100 * increment our command and transfer ring tails after processing events. 101 * 102 * For the event ring, we handle things differently. We still initialize 103 * everything to zero; however, we start processing things and looking at cycle 104 * bits only when we get an interrupt from hardware. With the event ring, we do 105 * *not* maintain a head pointer (it's still in the structure, but unused). We 106 * always start processing at the tail pointer and use the cycle bit to indicate 107 * what we should process. Once we're done incrementing things, we go and notify 108 * the hardware of how far we got with this process by updating the tail for the 109 * event ring via a memory mapped register. 110 */ 111 112 #include <sys/usb/hcd/xhci/xhci.h> 113 114 void 115 xhci_ring_free(xhci_ring_t *xrp) 116 { 117 if (xrp->xr_trb != NULL) { 118 xhci_dma_free(&xrp->xr_dma); 119 xrp->xr_trb = NULL; 120 } 121 xrp->xr_ntrb = 0; 122 xrp->xr_head = 0; 123 xrp->xr_tail = 0; 124 xrp->xr_cycle = 0; 125 } 126 127 /* 128 * Initialize a ring that hasn't been used and set up its link pointer back to 129 * it. 130 */ 131 int 132 xhci_ring_reset(xhci_t *xhcip, xhci_ring_t *xrp) 133 { 134 xhci_trb_t *ltrb; 135 136 ASSERT(xrp->xr_trb != NULL); 137 138 bzero(xrp->xr_trb, sizeof (xhci_trb_t) * xrp->xr_ntrb); 139 xrp->xr_head = 0; 140 xrp->xr_tail = 0; 141 xrp->xr_cycle = 1; 142 143 /* 144 * Set up the link TRB back to ourselves. 145 */ 146 ltrb = &xrp->xr_trb[xrp->xr_ntrb - 1]; 147 ltrb->trb_addr = LE_64(xhci_dma_pa(&xrp->xr_dma)); 148 ltrb->trb_flags = LE_32(XHCI_TRB_TYPE_LINK | XHCI_TRB_LINKSEG); 149 150 XHCI_DMA_SYNC(xrp->xr_dma, DDI_DMA_SYNC_FORDEV); 151 if (xhci_check_dma_handle(xhcip, &xrp->xr_dma) != DDI_FM_OK) { 152 ddi_fm_service_impact(xhcip->xhci_dip, DDI_SERVICE_LOST); 153 return (EIO); 154 } 155 156 return (0); 157 } 158 159 int 160 xhci_ring_alloc(xhci_t *xhcip, xhci_ring_t *xrp) 161 { 162 ddi_dma_attr_t attr; 163 ddi_device_acc_attr_t acc; 164 165 /* 166 * We use a transfer attribute for the rings as they require 64-byte 167 * boundaries. 168 */ 169 xhci_dma_acc_attr(xhcip, &acc); 170 xhci_dma_transfer_attr(xhcip, &attr, XHCI_DEF_DMA_SGL); 171 bzero(xrp, sizeof (xhci_ring_t)); 172 if (xhci_dma_alloc(xhcip, &xrp->xr_dma, &attr, &acc, B_FALSE, 173 xhcip->xhci_caps.xcap_pagesize, B_FALSE) == B_FALSE) 174 return (ENOMEM); 175 xrp->xr_trb = (xhci_trb_t *)xrp->xr_dma.xdb_va; 176 xrp->xr_ntrb = xhcip->xhci_caps.xcap_pagesize / sizeof (xhci_trb_t); 177 return (0); 178 } 179 180 /* 181 * Note, caller should have already synced our DMA memory. This should not be 182 * used for the command ring, as its cycle is maintained by the cycling of the 183 * head. This function is only used for managing the event ring. 184 */ 185 xhci_trb_t * 186 xhci_ring_event_advance(xhci_ring_t *xrp) 187 { 188 xhci_trb_t *trb = &xrp->xr_trb[xrp->xr_tail]; 189 VERIFY(xrp->xr_tail < xrp->xr_ntrb); 190 191 if (xrp->xr_cycle != (LE_32(trb->trb_flags) & XHCI_TRB_CYCLE)) 192 return (NULL); 193 194 /* 195 * The event ring does not use a link TRB. It instead always uses 196 * information based on the table to wrap. That means that the last 197 * entry is in fact going to contain data, so we shouldn't wrap and 198 * toggle the cycle until after we've processed that, in other words the 199 * tail equals the total number of entries. 200 */ 201 xrp->xr_tail++; 202 if (xrp->xr_tail == xrp->xr_ntrb) { 203 xrp->xr_cycle ^= 1; 204 xrp->xr_tail = 0; 205 } 206 207 return (trb); 208 } 209 210 /* 211 * When processing the command ring, we're going to get a single event for each 212 * entry in it. As we've submitted things in order, we need to make sure that 213 * this address matches the DMA address that we'd expect of the current tail. 214 */ 215 boolean_t 216 xhci_ring_trb_tail_valid(xhci_ring_t *xrp, uint64_t dma) 217 { 218 uint64_t tail; 219 220 tail = xhci_dma_pa(&xrp->xr_dma) + xrp->xr_tail * sizeof (xhci_trb_t); 221 return (dma == tail); 222 } 223 224 /* 225 * A variant on the above that checks for a given message within a range of 226 * entries and returns the offset to it from the tail. 227 */ 228 int 229 xhci_ring_trb_valid_range(xhci_ring_t *xrp, uint64_t dma, uint_t range) 230 { 231 uint_t i; 232 uint_t tail = xrp->xr_tail; 233 uint64_t taddr; 234 235 VERIFY(range < xrp->xr_ntrb); 236 for (i = 0; i < range; i++) { 237 taddr = xhci_dma_pa(&xrp->xr_dma) + tail * sizeof (xhci_trb_t); 238 if (taddr == dma) 239 return (i); 240 241 tail++; 242 if (tail == xrp->xr_ntrb - 1) 243 tail = 0; 244 } 245 246 return (-1); 247 } 248 249 /* 250 * Determine whether or not we have enough space for this request in a given 251 * ring for the given request. Note, we have to be a bit careful here and ensure 252 * that we properly handle cases where we cross the link TRB and that we don't 253 * count it. 254 * 255 * To determine if we have enough space for a given number of trbs, we need to 256 * logically advance the head pointer and make sure that we don't cross the tail 257 * pointer. In other words, if after advancement, head == tail, we're in 258 * trouble and don't have enough space. 259 */ 260 boolean_t 261 xhci_ring_trb_space(xhci_ring_t *xrp, uint_t ntrb) 262 { 263 uint_t i; 264 uint_t head = xrp->xr_head; 265 266 VERIFY(ntrb > 0); 267 /* We use < to ignore the link TRB */ 268 VERIFY(ntrb < xrp->xr_ntrb); 269 270 for (i = 0; i < ntrb; i++) { 271 head++; 272 if (head == xrp->xr_ntrb - 1) { 273 head = 0; 274 } 275 276 if (head == xrp->xr_tail) 277 return (B_FALSE); 278 } 279 280 return (B_TRUE); 281 } 282 283 /* 284 * Fill in a TRB in the ring at offset trboff. If cycle is currently set to 285 * B_TRUE, then we fill in the appropriate cycle bit to tell the system to 286 * advance, otherwise we leave the existing cycle bit untouched so the system 287 * doesn't accidentally advance until we have everything filled in. 288 */ 289 void 290 xhci_ring_trb_fill(xhci_ring_t *xrp, uint_t trboff, xhci_trb_t *host_trb, 291 boolean_t put_cycle) 292 { 293 uint_t i; 294 uint32_t flags; 295 uint_t ent = xrp->xr_head; 296 uint8_t cycle = xrp->xr_cycle; 297 xhci_trb_t *trb; 298 299 for (i = 0; i < trboff; i++) { 300 ent++; 301 if (ent == xrp->xr_ntrb - 1) { 302 ent = 0; 303 cycle ^= 1; 304 } 305 } 306 307 /* 308 * If we're being asked to not update the cycle for it to be valid to be 309 * produced, we need to xor this once again to get to the inappropriate 310 * value. 311 */ 312 if (put_cycle == B_FALSE) 313 cycle ^= 1; 314 315 trb = &xrp->xr_trb[ent]; 316 317 trb->trb_addr = host_trb->trb_addr; 318 trb->trb_status = host_trb->trb_status; 319 flags = host_trb->trb_flags; 320 if (cycle == 0) { 321 flags &= ~LE_32(XHCI_TRB_CYCLE); 322 } else { 323 flags |= LE_32(XHCI_TRB_CYCLE); 324 } 325 326 trb->trb_flags = flags; 327 } 328 329 /* 330 * Update our metadata for the ring and verify the cycle bit is correctly set 331 * for the first trb. It is expected that it is incorrectly set. 332 */ 333 void 334 xhci_ring_trb_produce(xhci_ring_t *xrp, uint_t ntrb) 335 { 336 uint_t i, ohead; 337 xhci_trb_t *trb; 338 339 VERIFY(ntrb > 0); 340 341 ohead = xrp->xr_head; 342 343 /* 344 * As part of updating the head, we need to make sure we correctly 345 * update the cycle bit of the link TRB. So we always do this first 346 * before we update the old head, to try and get a consistent view of 347 * the cycle bit. 348 */ 349 for (i = 0; i < ntrb; i++) { 350 xrp->xr_head++; 351 /* 352 * If we're updating the link TRB, we also need to make sure 353 * that the Chain bit is set if we're in the middle of a TD 354 * comprised of multiple TRDs. Thankfully the algorithmn here is 355 * simple: set it to the value of the previous TRB. 356 */ 357 if (xrp->xr_head == xrp->xr_ntrb - 1) { 358 trb = &xrp->xr_trb[xrp->xr_ntrb - 1]; 359 if (xrp->xr_trb[xrp->xr_ntrb - 2].trb_flags & 360 XHCI_TRB_CHAIN) { 361 trb->trb_flags |= XHCI_TRB_CHAIN; 362 } else { 363 trb->trb_flags &= ~XHCI_TRB_CHAIN; 364 365 } 366 trb->trb_flags ^= LE_32(XHCI_TRB_CYCLE); 367 xrp->xr_cycle ^= 1; 368 xrp->xr_head = 0; 369 } 370 } 371 372 trb = &xrp->xr_trb[ohead]; 373 trb->trb_flags ^= LE_32(XHCI_TRB_CYCLE); 374 } 375 376 /* 377 * This is a convenience wrapper for the single TRB case to make callers less 378 * likely to mess up some of the required semantics. 379 */ 380 void 381 xhci_ring_trb_put(xhci_ring_t *xrp, xhci_trb_t *trb) 382 { 383 xhci_ring_trb_fill(xrp, 0U, trb, B_FALSE); 384 xhci_ring_trb_produce(xrp, 1U); 385 } 386 387 /* 388 * Update the tail pointer for a ring based on the DMA address of a consumed 389 * entry. Note, this entry indicates what we just processed, therefore we should 390 * bump the tail entry to the next one. 391 */ 392 boolean_t 393 xhci_ring_trb_consumed(xhci_ring_t *xrp, uint64_t dma) 394 { 395 uint64_t pa = xhci_dma_pa(&xrp->xr_dma); 396 uint64_t high = pa + xrp->xr_ntrb * sizeof (xhci_trb_t); 397 398 if (dma < pa || dma >= high || 399 dma % sizeof (xhci_trb_t) != 0) 400 return (B_FALSE); 401 402 dma -= pa; 403 dma /= sizeof (xhci_trb_t); 404 405 VERIFY(dma < xrp->xr_ntrb); 406 407 xrp->xr_tail = dma + 1; 408 if (xrp->xr_tail == xrp->xr_ntrb - 1) 409 xrp->xr_tail = 0; 410 411 return (B_TRUE); 412 } 413 414 /* 415 * The ring represented here has been reset and we're being asked to basically 416 * skip all outstanding entries. Note, this shouldn't be used for the event 417 * ring. Because the cycle bit is toggled whenever the head moves past the link 418 * trb, the cycle bit is already correct. So in this case, it's really just a 419 * matter of setting the current tail equal to the head, at which point we 420 * consider things empty. 421 */ 422 void 423 xhci_ring_skip(xhci_ring_t *xrp) 424 { 425 xrp->xr_tail = xrp->xr_head; 426 } 427 428 /* 429 * A variant on the normal skip. This basically just tells us to make sure that 430 * that everything this transfer represents has been skipped. Callers need to 431 * make sure that this is actually the first transfer in the ring. Like above, 432 * we don't need to touch the cycle bit. 433 */ 434 void 435 xhci_ring_skip_transfer(xhci_ring_t *xrp, xhci_transfer_t *xt) 436 { 437 uint_t i; 438 439 for (i = 0; i < xt->xt_ntrbs; i++) { 440 xrp->xr_tail++; 441 if (xrp->xr_tail == xrp->xr_ntrb - 1) 442 xrp->xr_tail = 0; 443 } 444 } 445