1 /* 2 * CDDL HEADER START 3 * 4 * Copyright(c) 2007-2009 Intel Corporation. All rights reserved. 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License (the "License"). 7 * You may not use this file except in compliance with the License. 8 * 9 * You can obtain a copy of the license at: 10 * http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When using or redistributing this file, you may do so under the 15 * License only. No other modification of this header is permitted. 16 * 17 * If applicable, add the following below this CDDL HEADER, with the 18 * fields enclosed by brackets "[]" replaced with your own identifying 19 * information: Portions Copyright [yyyy] [name of copyright owner] 20 * 21 * CDDL HEADER END 22 */ 23 24 /* 25 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms of the CDDL. 27 */ 28 29 #include "igb_sw.h" 30 31 /* function prototypes */ 32 static mblk_t *igb_rx_bind(igb_rx_ring_t *, uint32_t, uint32_t); 33 static mblk_t *igb_rx_copy(igb_rx_ring_t *, uint32_t, uint32_t); 34 static void igb_rx_assoc_hcksum(mblk_t *, uint32_t); 35 36 #ifndef IGB_DEBUG 37 #pragma inline(igb_rx_assoc_hcksum) 38 #endif 39 40 41 /* 42 * igb_rx_recycle - the call-back function to reclaim rx buffer 43 * 44 * This function is called when an mp is freed by the user thru 45 * freeb call (Only for mp constructed through desballoc call). 46 * It returns back the freed buffer to the free list. 47 */ 48 void 49 igb_rx_recycle(caddr_t arg) 50 { 51 igb_rx_ring_t *rx_ring; 52 rx_control_block_t *recycle_rcb; 53 uint32_t free_index; 54 55 recycle_rcb = (rx_control_block_t *)(uintptr_t)arg; 56 rx_ring = recycle_rcb->rx_ring; 57 58 if (recycle_rcb->state == RCB_FREE) 59 return; 60 61 recycle_rcb->state = RCB_FREE; 62 63 ASSERT(recycle_rcb->mp == NULL); 64 65 /* 66 * Using the recycled data buffer to generate a new mblk 67 */ 68 recycle_rcb->mp = desballoc((unsigned char *) 69 recycle_rcb->rx_buf.address, 70 recycle_rcb->rx_buf.size, 71 0, &recycle_rcb->free_rtn); 72 73 /* 74 * Put the recycled rx control block into free list 75 */ 76 mutex_enter(&rx_ring->recycle_lock); 77 78 free_index = rx_ring->rcb_tail; 79 ASSERT(rx_ring->free_list[free_index] == NULL); 80 81 rx_ring->free_list[free_index] = recycle_rcb; 82 rx_ring->rcb_tail = NEXT_INDEX(free_index, 1, rx_ring->free_list_size); 83 84 mutex_exit(&rx_ring->recycle_lock); 85 86 /* 87 * The atomic operation on the number of the available rx control 88 * blocks in the free list is used to make the recycling mutual 89 * exclusive with the receiving. 90 */ 91 atomic_inc_32(&rx_ring->rcb_free); 92 ASSERT(rx_ring->rcb_free <= rx_ring->free_list_size); 93 } 94 95 /* 96 * igb_rx_copy - Use copy to process the received packet 97 * 98 * This function will use bcopy to process the packet 99 * and send the copied packet upstream 100 */ 101 static mblk_t * 102 igb_rx_copy(igb_rx_ring_t *rx_ring, uint32_t index, uint32_t pkt_len) 103 { 104 rx_control_block_t *current_rcb; 105 mblk_t *mp; 106 igb_t *igb = rx_ring->igb; 107 108 current_rcb = rx_ring->work_list[index]; 109 110 DMA_SYNC(¤t_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL); 111 112 if (igb_check_dma_handle( 113 current_rcb->rx_buf.dma_handle) != DDI_FM_OK) { 114 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED); 115 atomic_or_32(&igb->igb_state, IGB_ERROR); 116 return (NULL); 117 } 118 119 /* 120 * Allocate buffer to receive this packet 121 */ 122 mp = allocb(pkt_len + IPHDR_ALIGN_ROOM, 0); 123 if (mp == NULL) { 124 igb_log(rx_ring->igb, "igb_rx_copy: allocate buffer failed"); 125 return (NULL); 126 } 127 128 /* 129 * Copy the data received into the new cluster 130 */ 131 mp->b_rptr += IPHDR_ALIGN_ROOM; 132 bcopy(current_rcb->rx_buf.address, mp->b_rptr, pkt_len); 133 mp->b_wptr = mp->b_rptr + pkt_len; 134 135 return (mp); 136 } 137 138 /* 139 * igb_rx_bind - Use existing DMA buffer to build mblk for receiving 140 * 141 * This function will use pre-bound DMA buffer to receive the packet 142 * and build mblk that will be sent upstream. 143 */ 144 static mblk_t * 145 igb_rx_bind(igb_rx_ring_t *rx_ring, uint32_t index, uint32_t pkt_len) 146 { 147 rx_control_block_t *current_rcb; 148 rx_control_block_t *free_rcb; 149 uint32_t free_index; 150 mblk_t *mp; 151 igb_t *igb = rx_ring->igb; 152 153 /* 154 * If the free list is empty, we cannot proceed to send 155 * the current DMA buffer upstream. We'll have to return 156 * and use bcopy to process the packet. 157 */ 158 if (igb_atomic_reserve(&rx_ring->rcb_free, 1) < 0) 159 return (NULL); 160 161 current_rcb = rx_ring->work_list[index]; 162 /* 163 * If the mp of the rx control block is NULL, try to do 164 * desballoc again. 165 */ 166 if (current_rcb->mp == NULL) { 167 current_rcb->mp = desballoc((unsigned char *) 168 current_rcb->rx_buf.address, 169 current_rcb->rx_buf.size, 170 0, ¤t_rcb->free_rtn); 171 /* 172 * If it is failed to built a mblk using the current 173 * DMA buffer, we have to return and use bcopy to 174 * process the packet. 175 */ 176 if (current_rcb->mp == NULL) { 177 atomic_inc_32(&rx_ring->rcb_free); 178 return (NULL); 179 } 180 } 181 /* 182 * Sync up the data received 183 */ 184 DMA_SYNC(¤t_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL); 185 186 if (igb_check_dma_handle( 187 current_rcb->rx_buf.dma_handle) != DDI_FM_OK) { 188 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED); 189 atomic_or_32(&igb->igb_state, IGB_ERROR); 190 atomic_inc_32(&rx_ring->rcb_free); 191 return (NULL); 192 } 193 194 mp = current_rcb->mp; 195 current_rcb->mp = NULL; 196 current_rcb->state = RCB_SENDUP; 197 198 mp->b_wptr = mp->b_rptr + pkt_len; 199 mp->b_next = mp->b_cont = NULL; 200 201 /* 202 * Strip off one free rx control block from the free list 203 */ 204 free_index = rx_ring->rcb_head; 205 free_rcb = rx_ring->free_list[free_index]; 206 ASSERT(free_rcb != NULL); 207 rx_ring->free_list[free_index] = NULL; 208 rx_ring->rcb_head = NEXT_INDEX(free_index, 1, rx_ring->free_list_size); 209 210 /* 211 * Put the rx control block to the work list 212 */ 213 rx_ring->work_list[index] = free_rcb; 214 215 return (mp); 216 } 217 218 /* 219 * igb_rx_assoc_hcksum 220 * 221 * Check the rx hardware checksum status and associate the hcksum flags 222 */ 223 static void 224 igb_rx_assoc_hcksum(mblk_t *mp, uint32_t status_error) 225 { 226 uint32_t hcksum_flags = 0; 227 228 /* Ignore Checksum Indication */ 229 if (status_error & E1000_RXD_STAT_IXSM) 230 return; 231 232 /* 233 * Check TCP/UDP checksum 234 */ 235 if (((status_error & E1000_RXD_STAT_TCPCS) || 236 (status_error & E1000_RXD_STAT_UDPCS)) && 237 !(status_error & E1000_RXDEXT_STATERR_TCPE)) 238 hcksum_flags |= HCK_FULLCKSUM | HCK_FULLCKSUM_OK; 239 240 /* 241 * Check IP Checksum 242 */ 243 if ((status_error & E1000_RXD_STAT_IPCS) && 244 !(status_error & E1000_RXDEXT_STATERR_IPE)) 245 hcksum_flags |= HCK_IPV4_HDRCKSUM; 246 247 if (hcksum_flags != 0) { 248 (void) hcksum_assoc(mp, 249 NULL, NULL, 0, 0, 0, 0, hcksum_flags, 0); 250 } 251 } 252 253 mblk_t * 254 igb_rx_ring_poll(void *arg, int bytes) 255 { 256 igb_rx_ring_t *rx_ring = (igb_rx_ring_t *)arg; 257 mblk_t *mp = NULL; 258 259 ASSERT(bytes >= 0); 260 261 if ((bytes == 0) || (rx_ring->igb->igb_state & IGB_SUSPENDED) || 262 !(rx_ring->igb->igb_state & IGB_STARTED)) 263 return (NULL); 264 265 mutex_enter(&rx_ring->rx_lock); 266 mp = igb_rx(rx_ring, bytes); 267 mutex_exit(&rx_ring->rx_lock); 268 269 return (mp); 270 } 271 272 /* 273 * igb_rx - Receive the data of one ring 274 * 275 * This function goes throught h/w descriptor in one specified rx ring, 276 * receives the data if the descriptor status shows the data is ready. 277 * It returns a chain of mblks containing the received data, to be 278 * passed up to mac_rx(). 279 */ 280 mblk_t * 281 igb_rx(igb_rx_ring_t *rx_ring, int poll_bytes) 282 { 283 union e1000_adv_rx_desc *current_rbd; 284 rx_control_block_t *current_rcb; 285 mblk_t *mp; 286 mblk_t *mblk_head; 287 mblk_t **mblk_tail; 288 uint32_t rx_next; 289 uint32_t rx_tail; 290 uint32_t pkt_len; 291 uint32_t status_error; 292 uint32_t pkt_num; 293 uint32_t total_bytes; 294 igb_t *igb = rx_ring->igb; 295 296 mblk_head = NULL; 297 mblk_tail = &mblk_head; 298 299 if (igb->igb_state & IGB_ERROR) 300 return (NULL); 301 302 /* 303 * Sync the receive descriptors before 304 * accepting the packets 305 */ 306 DMA_SYNC(&rx_ring->rbd_area, DDI_DMA_SYNC_FORKERNEL); 307 308 if (igb_check_dma_handle( 309 rx_ring->rbd_area.dma_handle) != DDI_FM_OK) { 310 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED); 311 atomic_or_32(&igb->igb_state, IGB_ERROR); 312 return (NULL); 313 } 314 315 /* 316 * Get the start point of rx bd ring which should be examined 317 * during this cycle. 318 */ 319 rx_next = rx_ring->rbd_next; 320 321 current_rbd = &rx_ring->rbd_ring[rx_next]; 322 pkt_num = 0; 323 total_bytes = 0; 324 status_error = current_rbd->wb.upper.status_error; 325 while (status_error & E1000_RXD_STAT_DD) { 326 /* 327 * If hardware has found the errors, but the error 328 * is hardware checksum error, here does not discard the 329 * packet, and let upper layer compute the checksum; 330 * Otherwise discard the packet. 331 */ 332 if ((status_error & E1000_RXDEXT_ERR_FRAME_ERR_MASK) || 333 !(status_error & E1000_RXD_STAT_EOP)) { 334 IGB_DEBUG_STAT(rx_ring->stat_frame_error); 335 goto rx_discard; 336 } 337 338 IGB_DEBUG_STAT_COND(rx_ring->stat_cksum_error, 339 (status_error & E1000_RXDEXT_STATERR_TCPE) || 340 (status_error & E1000_RXDEXT_STATERR_IPE)); 341 342 pkt_len = current_rbd->wb.upper.length; 343 344 if ((poll_bytes != IGB_NO_POLL) && 345 ((pkt_len + total_bytes) > poll_bytes)) 346 break; 347 348 IGB_DEBUG_STAT(rx_ring->stat_pkt_cnt); 349 total_bytes += pkt_len; 350 351 mp = NULL; 352 /* 353 * For packets with length more than the copy threshold, 354 * we'll firstly try to use the existed DMA buffer to built 355 * a mblk and send the mblk upstream. 356 * 357 * If the first method fails, or the packet length is less 358 * than the copy threshold, we'll allocate a new mblk and 359 * copy the packet data to the mblk. 360 */ 361 if (pkt_len > rx_ring->copy_thresh) 362 mp = igb_rx_bind(rx_ring, rx_next, pkt_len); 363 364 if (mp == NULL) 365 mp = igb_rx_copy(rx_ring, rx_next, pkt_len); 366 367 if (mp != NULL) { 368 /* 369 * Check h/w checksum offload status 370 */ 371 if (igb->rx_hcksum_enable) 372 igb_rx_assoc_hcksum(mp, status_error); 373 374 *mblk_tail = mp; 375 mblk_tail = &mp->b_next; 376 } 377 378 rx_discard: 379 /* 380 * Reset rx descriptor read bits 381 */ 382 current_rcb = rx_ring->work_list[rx_next]; 383 current_rbd->read.pkt_addr = current_rcb->rx_buf.dma_address; 384 current_rbd->read.hdr_addr = 0; 385 386 rx_next = NEXT_INDEX(rx_next, 1, rx_ring->ring_size); 387 388 /* 389 * The receive function is in interrupt context, so here 390 * limit_per_intr is used to avoid doing receiving too long 391 * per interrupt. 392 */ 393 if (++pkt_num > rx_ring->limit_per_intr) { 394 IGB_DEBUG_STAT(rx_ring->stat_exceed_pkt); 395 break; 396 } 397 398 current_rbd = &rx_ring->rbd_ring[rx_next]; 399 status_error = current_rbd->wb.upper.status_error; 400 } 401 402 DMA_SYNC(&rx_ring->rbd_area, DDI_DMA_SYNC_FORDEV); 403 404 rx_ring->rbd_next = rx_next; 405 406 /* 407 * Update the h/w tail accordingly 408 */ 409 rx_tail = PREV_INDEX(rx_next, 1, rx_ring->ring_size); 410 411 E1000_WRITE_REG(&igb->hw, E1000_RDT(rx_ring->index), rx_tail); 412 413 if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) { 414 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED); 415 atomic_or_32(&igb->igb_state, IGB_ERROR); 416 } 417 418 return (mblk_head); 419 } 420