1 /* 2 * CDDL HEADER START 3 * 4 * Copyright(c) 2007-2009 Intel Corporation. All rights reserved. 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License (the "License"). 7 * You may not use this file except in compliance with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 23 /* 24 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 25 * Use is subject to license terms. 26 */ 27 28 #include "igb_sw.h" 29 30 /* function prototypes */ 31 static mblk_t *igb_rx_bind(igb_rx_data_t *, uint32_t, uint32_t); 32 static mblk_t *igb_rx_copy(igb_rx_data_t *, uint32_t, uint32_t); 33 static void igb_rx_assoc_hcksum(mblk_t *, uint32_t); 34 35 /* 36 * igb_rx_recycle - the call-back function to reclaim rx buffer 37 * 38 * This function is called when an mp is freed by the user thru 39 * freeb call (Only for mp constructed through desballoc call). 40 * It returns back the freed buffer to the free list. 41 */ 42 void 43 igb_rx_recycle(caddr_t arg) 44 { 45 igb_t *igb; 46 igb_rx_ring_t *rx_ring; 47 igb_rx_data_t *rx_data; 48 rx_control_block_t *recycle_rcb; 49 uint32_t free_index; 50 uint32_t ref_cnt; 51 52 recycle_rcb = (rx_control_block_t *)(uintptr_t)arg; 53 rx_data = recycle_rcb->rx_data; 54 rx_ring = rx_data->rx_ring; 55 igb = rx_ring->igb; 56 57 if (recycle_rcb->ref_cnt == 0) { 58 /* 59 * This case only happens when rx buffers are being freed 60 * in igb_stop() and freemsg() is called. 61 */ 62 return; 63 } 64 65 ASSERT(recycle_rcb->mp == NULL); 66 67 /* 68 * Using the recycled data buffer to generate a new mblk 69 */ 70 recycle_rcb->mp = desballoc((unsigned char *) 71 recycle_rcb->rx_buf.address, 72 recycle_rcb->rx_buf.size, 73 0, &recycle_rcb->free_rtn); 74 75 /* 76 * Put the recycled rx control block into free list 77 */ 78 mutex_enter(&rx_data->recycle_lock); 79 80 free_index = rx_data->rcb_tail; 81 ASSERT(rx_data->free_list[free_index] == NULL); 82 83 rx_data->free_list[free_index] = recycle_rcb; 84 rx_data->rcb_tail = NEXT_INDEX(free_index, 1, rx_data->free_list_size); 85 86 mutex_exit(&rx_data->recycle_lock); 87 88 /* 89 * The atomic operation on the number of the available rx control 90 * blocks in the free list is used to make the recycling mutual 91 * exclusive with the receiving. 92 */ 93 atomic_inc_32(&rx_data->rcb_free); 94 ASSERT(rx_data->rcb_free <= rx_data->free_list_size); 95 96 /* 97 * Considering the case that the interface is unplumbed 98 * and there are still some buffers held by the upper layer. 99 * When the buffer is returned back, we need to free it. 100 */ 101 ref_cnt = atomic_dec_32_nv(&recycle_rcb->ref_cnt); 102 if (ref_cnt == 0) { 103 if (recycle_rcb->mp != NULL) { 104 freemsg(recycle_rcb->mp); 105 recycle_rcb->mp = NULL; 106 } 107 108 igb_free_dma_buffer(&recycle_rcb->rx_buf); 109 110 mutex_enter(&igb->rx_pending_lock); 111 atomic_dec_32(&rx_data->rcb_pending); 112 atomic_dec_32(&igb->rcb_pending); 113 114 /* 115 * When there is not any buffer belonging to this rx_data 116 * held by the upper layer, the rx_data can be freed. 117 */ 118 if ((rx_data->flag & IGB_RX_STOPPED) && 119 (rx_data->rcb_pending == 0)) 120 igb_free_rx_ring_data(rx_data); 121 122 mutex_exit(&igb->rx_pending_lock); 123 } 124 } 125 126 /* 127 * igb_rx_copy - Use copy to process the received packet 128 * 129 * This function will use bcopy to process the packet 130 * and send the copied packet upstream 131 */ 132 static mblk_t * 133 igb_rx_copy(igb_rx_data_t *rx_data, uint32_t index, uint32_t pkt_len) 134 { 135 rx_control_block_t *current_rcb; 136 mblk_t *mp; 137 igb_t *igb = rx_data->rx_ring->igb; 138 139 current_rcb = rx_data->work_list[index]; 140 141 DMA_SYNC(¤t_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL); 142 143 if (igb_check_dma_handle( 144 current_rcb->rx_buf.dma_handle) != DDI_FM_OK) { 145 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED); 146 atomic_or_32(&igb->igb_state, IGB_ERROR); 147 return (NULL); 148 } 149 150 /* 151 * Allocate buffer to receive this packet 152 */ 153 mp = allocb(pkt_len + IPHDR_ALIGN_ROOM, 0); 154 if (mp == NULL) { 155 igb_log(igb, IGB_LOG_INFO, 156 "igb_rx_copy: allocate buffer failed"); 157 return (NULL); 158 } 159 160 /* 161 * Copy the data received into the new cluster 162 */ 163 mp->b_rptr += IPHDR_ALIGN_ROOM; 164 bcopy(current_rcb->rx_buf.address, mp->b_rptr, pkt_len); 165 mp->b_wptr = mp->b_rptr + pkt_len; 166 167 return (mp); 168 } 169 170 /* 171 * igb_rx_bind - Use existing DMA buffer to build mblk for receiving 172 * 173 * This function will use pre-bound DMA buffer to receive the packet 174 * and build mblk that will be sent upstream. 175 */ 176 static mblk_t * 177 igb_rx_bind(igb_rx_data_t *rx_data, uint32_t index, uint32_t pkt_len) 178 { 179 rx_control_block_t *current_rcb; 180 rx_control_block_t *free_rcb; 181 uint32_t free_index; 182 mblk_t *mp; 183 igb_t *igb = rx_data->rx_ring->igb; 184 185 /* 186 * If the free list is empty, we cannot proceed to send 187 * the current DMA buffer upstream. We'll have to return 188 * and use bcopy to process the packet. 189 */ 190 if (igb_atomic_reserve(&rx_data->rcb_free, 1) < 0) 191 return (NULL); 192 193 current_rcb = rx_data->work_list[index]; 194 /* 195 * If the mp of the rx control block is NULL, try to do 196 * desballoc again. 197 */ 198 if (current_rcb->mp == NULL) { 199 current_rcb->mp = desballoc((unsigned char *) 200 current_rcb->rx_buf.address, 201 current_rcb->rx_buf.size, 202 0, ¤t_rcb->free_rtn); 203 /* 204 * If it is failed to built a mblk using the current 205 * DMA buffer, we have to return and use bcopy to 206 * process the packet. 207 */ 208 if (current_rcb->mp == NULL) { 209 atomic_inc_32(&rx_data->rcb_free); 210 return (NULL); 211 } 212 } 213 /* 214 * Sync up the data received 215 */ 216 DMA_SYNC(¤t_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL); 217 218 if (igb_check_dma_handle( 219 current_rcb->rx_buf.dma_handle) != DDI_FM_OK) { 220 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED); 221 atomic_or_32(&igb->igb_state, IGB_ERROR); 222 atomic_inc_32(&rx_data->rcb_free); 223 return (NULL); 224 } 225 226 mp = current_rcb->mp; 227 current_rcb->mp = NULL; 228 atomic_inc_32(¤t_rcb->ref_cnt); 229 230 mp->b_wptr = mp->b_rptr + pkt_len; 231 mp->b_next = mp->b_cont = NULL; 232 233 /* 234 * Strip off one free rx control block from the free list 235 */ 236 free_index = rx_data->rcb_head; 237 free_rcb = rx_data->free_list[free_index]; 238 ASSERT(free_rcb != NULL); 239 rx_data->free_list[free_index] = NULL; 240 rx_data->rcb_head = NEXT_INDEX(free_index, 1, rx_data->free_list_size); 241 242 /* 243 * Put the rx control block to the work list 244 */ 245 rx_data->work_list[index] = free_rcb; 246 247 return (mp); 248 } 249 250 /* 251 * igb_rx_assoc_hcksum 252 * 253 * Check the rx hardware checksum status and associate the hcksum flags 254 */ 255 static void 256 igb_rx_assoc_hcksum(mblk_t *mp, uint32_t status_error) 257 { 258 uint32_t hcksum_flags = 0; 259 260 /* Ignore Checksum Indication */ 261 if (status_error & E1000_RXD_STAT_IXSM) 262 return; 263 264 /* 265 * Check TCP/UDP checksum 266 */ 267 if (((status_error & E1000_RXD_STAT_TCPCS) || 268 (status_error & E1000_RXD_STAT_UDPCS)) && 269 !(status_error & E1000_RXDEXT_STATERR_TCPE)) 270 hcksum_flags |= HCK_FULLCKSUM_OK; 271 272 /* 273 * Check IP Checksum 274 */ 275 if ((status_error & E1000_RXD_STAT_IPCS) && 276 !(status_error & E1000_RXDEXT_STATERR_IPE)) 277 hcksum_flags |= HCK_IPV4_HDRCKSUM_OK; 278 279 if (hcksum_flags != 0) { 280 mac_hcksum_set(mp, 0, 0, 0, 0, hcksum_flags); 281 } 282 } 283 284 mblk_t * 285 igb_rx_ring_poll(void *arg, int bytes) 286 { 287 igb_rx_ring_t *rx_ring = (igb_rx_ring_t *)arg; 288 mblk_t *mp = NULL; 289 290 ASSERT(bytes >= 0); 291 292 if ((bytes == 0) || (rx_ring->igb->igb_state & IGB_SUSPENDED) || 293 !(rx_ring->igb->igb_state & IGB_STARTED)) 294 return (NULL); 295 296 mutex_enter(&rx_ring->rx_lock); 297 mp = igb_rx(rx_ring, bytes); 298 mutex_exit(&rx_ring->rx_lock); 299 300 return (mp); 301 } 302 303 /* 304 * igb_rx - Receive the data of one ring 305 * 306 * This function goes throught h/w descriptor in one specified rx ring, 307 * receives the data if the descriptor status shows the data is ready. 308 * It returns a chain of mblks containing the received data, to be 309 * passed up to mac_rx(). 310 */ 311 mblk_t * 312 igb_rx(igb_rx_ring_t *rx_ring, int poll_bytes) 313 { 314 union e1000_adv_rx_desc *current_rbd; 315 rx_control_block_t *current_rcb; 316 mblk_t *mp; 317 mblk_t *mblk_head; 318 mblk_t **mblk_tail; 319 uint32_t rx_next; 320 uint32_t rx_tail; 321 uint32_t pkt_len; 322 uint32_t status_error; 323 uint32_t pkt_num; 324 uint32_t total_bytes; 325 igb_t *igb = rx_ring->igb; 326 igb_rx_data_t *rx_data = rx_ring->rx_data; 327 328 mblk_head = NULL; 329 mblk_tail = &mblk_head; 330 331 if (igb->igb_state & IGB_ERROR) 332 return (NULL); 333 334 /* 335 * Sync the receive descriptors before 336 * accepting the packets 337 */ 338 DMA_SYNC(&rx_data->rbd_area, DDI_DMA_SYNC_FORKERNEL); 339 340 if (igb_check_dma_handle( 341 rx_data->rbd_area.dma_handle) != DDI_FM_OK) { 342 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED); 343 atomic_or_32(&igb->igb_state, IGB_ERROR); 344 return (NULL); 345 } 346 347 /* 348 * Get the start point of rx bd ring which should be examined 349 * during this cycle. 350 */ 351 rx_next = rx_data->rbd_next; 352 353 current_rbd = &rx_data->rbd_ring[rx_next]; 354 pkt_num = 0; 355 total_bytes = 0; 356 status_error = current_rbd->wb.upper.status_error; 357 while (status_error & E1000_RXD_STAT_DD) { 358 /* 359 * If hardware has found the errors, but the error 360 * is hardware checksum error, here does not discard the 361 * packet, and let upper layer compute the checksum; 362 * Otherwise discard the packet. 363 */ 364 if ((status_error & E1000_RXDEXT_ERR_FRAME_ERR_MASK) || 365 !(status_error & E1000_RXD_STAT_EOP)) { 366 IGB_DEBUG_STAT(rx_ring->stat_frame_error); 367 goto rx_discard; 368 } 369 370 IGB_DEBUG_STAT_COND(rx_ring->stat_cksum_error, 371 (status_error & E1000_RXDEXT_STATERR_TCPE) || 372 (status_error & E1000_RXDEXT_STATERR_IPE)); 373 374 pkt_len = current_rbd->wb.upper.length; 375 376 if ((poll_bytes != IGB_NO_POLL) && 377 ((pkt_len + total_bytes) > poll_bytes)) 378 break; 379 380 IGB_DEBUG_STAT(rx_ring->stat_pkt_cnt); 381 total_bytes += pkt_len; 382 383 mp = NULL; 384 /* 385 * For packets with length more than the copy threshold, 386 * we'll firstly try to use the existed DMA buffer to built 387 * a mblk and send the mblk upstream. 388 * 389 * If the first method fails, or the packet length is less 390 * than the copy threshold, we'll allocate a new mblk and 391 * copy the packet data to the mblk. 392 */ 393 if (pkt_len > igb->rx_copy_thresh) 394 mp = igb_rx_bind(rx_data, rx_next, pkt_len); 395 396 if (mp == NULL) 397 mp = igb_rx_copy(rx_data, rx_next, pkt_len); 398 399 if (mp != NULL) { 400 /* 401 * Check h/w checksum offload status 402 */ 403 if (igb->rx_hcksum_enable) 404 igb_rx_assoc_hcksum(mp, status_error); 405 406 *mblk_tail = mp; 407 mblk_tail = &mp->b_next; 408 } 409 410 /* Update per-ring rx statistics */ 411 rx_ring->rx_pkts++; 412 rx_ring->rx_bytes += pkt_len; 413 414 rx_discard: 415 /* 416 * Reset rx descriptor read bits 417 */ 418 current_rcb = rx_data->work_list[rx_next]; 419 current_rbd->read.pkt_addr = current_rcb->rx_buf.dma_address; 420 current_rbd->read.hdr_addr = 0; 421 422 rx_next = NEXT_INDEX(rx_next, 1, rx_data->ring_size); 423 424 /* 425 * The receive function is in interrupt context, so here 426 * rx_limit_per_intr is used to avoid doing receiving too long 427 * per interrupt. 428 */ 429 if (++pkt_num > igb->rx_limit_per_intr) { 430 IGB_DEBUG_STAT(rx_ring->stat_exceed_pkt); 431 break; 432 } 433 434 current_rbd = &rx_data->rbd_ring[rx_next]; 435 status_error = current_rbd->wb.upper.status_error; 436 } 437 438 DMA_SYNC(&rx_data->rbd_area, DDI_DMA_SYNC_FORDEV); 439 440 rx_data->rbd_next = rx_next; 441 442 /* 443 * Update the h/w tail accordingly 444 */ 445 rx_tail = PREV_INDEX(rx_next, 1, rx_data->ring_size); 446 447 E1000_WRITE_REG(&igb->hw, E1000_RDT(rx_ring->index), rx_tail); 448 449 if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) { 450 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED); 451 atomic_or_32(&igb->igb_state, IGB_ERROR); 452 } 453 454 return (mblk_head); 455 } 456