1 /* 2 * CDDL HEADER START 3 * 4 * Copyright(c) 2007-2009 Intel Corporation. All rights reserved. 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License (the "License"). 7 * You may not use this file except in compliance with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 23 /* 24 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 25 * Use is subject to license terms. 26 */ 27 28 #include "igb_sw.h" 29 30 /* function prototypes */ 31 static mblk_t *igb_rx_bind(igb_rx_data_t *, uint32_t, uint32_t); 32 static mblk_t *igb_rx_copy(igb_rx_data_t *, uint32_t, uint32_t); 33 static void igb_rx_assoc_hcksum(mblk_t *, uint32_t); 34 35 #ifndef IGB_DEBUG 36 #pragma inline(igb_rx_assoc_hcksum) 37 #endif 38 39 40 /* 41 * igb_rx_recycle - the call-back function to reclaim rx buffer 42 * 43 * This function is called when an mp is freed by the user thru 44 * freeb call (Only for mp constructed through desballoc call). 45 * It returns back the freed buffer to the free list. 46 */ 47 void 48 igb_rx_recycle(caddr_t arg) 49 { 50 igb_t *igb; 51 igb_rx_ring_t *rx_ring; 52 igb_rx_data_t *rx_data; 53 rx_control_block_t *recycle_rcb; 54 uint32_t free_index; 55 uint32_t ref_cnt; 56 57 recycle_rcb = (rx_control_block_t *)(uintptr_t)arg; 58 rx_data = recycle_rcb->rx_data; 59 rx_ring = rx_data->rx_ring; 60 igb = rx_ring->igb; 61 62 if (recycle_rcb->ref_cnt == 0) { 63 /* 64 * This case only happens when rx buffers are being freed 65 * in igb_stop() and freemsg() is called. 66 */ 67 return; 68 } 69 70 ASSERT(recycle_rcb->mp == NULL); 71 72 /* 73 * Using the recycled data buffer to generate a new mblk 74 */ 75 recycle_rcb->mp = desballoc((unsigned char *) 76 recycle_rcb->rx_buf.address, 77 recycle_rcb->rx_buf.size, 78 0, &recycle_rcb->free_rtn); 79 80 /* 81 * Put the recycled rx control block into free list 82 */ 83 mutex_enter(&rx_data->recycle_lock); 84 85 free_index = rx_data->rcb_tail; 86 ASSERT(rx_data->free_list[free_index] == NULL); 87 88 rx_data->free_list[free_index] = recycle_rcb; 89 rx_data->rcb_tail = NEXT_INDEX(free_index, 1, rx_data->free_list_size); 90 91 mutex_exit(&rx_data->recycle_lock); 92 93 /* 94 * The atomic operation on the number of the available rx control 95 * blocks in the free list is used to make the recycling mutual 96 * exclusive with the receiving. 97 */ 98 atomic_inc_32(&rx_data->rcb_free); 99 ASSERT(rx_data->rcb_free <= rx_data->free_list_size); 100 101 /* 102 * Considering the case that the interface is unplumbed 103 * and there are still some buffers held by the upper layer. 104 * When the buffer is returned back, we need to free it. 105 */ 106 ref_cnt = atomic_dec_32_nv(&recycle_rcb->ref_cnt); 107 if (ref_cnt == 0) { 108 if (recycle_rcb->mp != NULL) { 109 freemsg(recycle_rcb->mp); 110 recycle_rcb->mp = NULL; 111 } 112 113 igb_free_dma_buffer(&recycle_rcb->rx_buf); 114 115 mutex_enter(&igb->rx_pending_lock); 116 atomic_dec_32(&rx_data->rcb_pending); 117 atomic_dec_32(&igb->rcb_pending); 118 119 /* 120 * When there is not any buffer belonging to this rx_data 121 * held by the upper layer, the rx_data can be freed. 122 */ 123 if ((rx_data->flag & IGB_RX_STOPPED) && 124 (rx_data->rcb_pending == 0)) 125 igb_free_rx_ring_data(rx_data); 126 127 mutex_exit(&igb->rx_pending_lock); 128 } 129 } 130 131 /* 132 * igb_rx_copy - Use copy to process the received packet 133 * 134 * This function will use bcopy to process the packet 135 * and send the copied packet upstream 136 */ 137 static mblk_t * 138 igb_rx_copy(igb_rx_data_t *rx_data, uint32_t index, uint32_t pkt_len) 139 { 140 rx_control_block_t *current_rcb; 141 mblk_t *mp; 142 igb_t *igb = rx_data->rx_ring->igb; 143 144 current_rcb = rx_data->work_list[index]; 145 146 DMA_SYNC(¤t_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL); 147 148 if (igb_check_dma_handle( 149 current_rcb->rx_buf.dma_handle) != DDI_FM_OK) { 150 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED); 151 atomic_or_32(&igb->igb_state, IGB_ERROR); 152 return (NULL); 153 } 154 155 /* 156 * Allocate buffer to receive this packet 157 */ 158 mp = allocb(pkt_len + IPHDR_ALIGN_ROOM, 0); 159 if (mp == NULL) { 160 igb_log(igb, IGB_LOG_INFO, 161 "igb_rx_copy: allocate buffer failed"); 162 return (NULL); 163 } 164 165 /* 166 * Copy the data received into the new cluster 167 */ 168 mp->b_rptr += IPHDR_ALIGN_ROOM; 169 bcopy(current_rcb->rx_buf.address, mp->b_rptr, pkt_len); 170 mp->b_wptr = mp->b_rptr + pkt_len; 171 172 return (mp); 173 } 174 175 /* 176 * igb_rx_bind - Use existing DMA buffer to build mblk for receiving 177 * 178 * This function will use pre-bound DMA buffer to receive the packet 179 * and build mblk that will be sent upstream. 180 */ 181 static mblk_t * 182 igb_rx_bind(igb_rx_data_t *rx_data, uint32_t index, uint32_t pkt_len) 183 { 184 rx_control_block_t *current_rcb; 185 rx_control_block_t *free_rcb; 186 uint32_t free_index; 187 mblk_t *mp; 188 igb_t *igb = rx_data->rx_ring->igb; 189 190 /* 191 * If the free list is empty, we cannot proceed to send 192 * the current DMA buffer upstream. We'll have to return 193 * and use bcopy to process the packet. 194 */ 195 if (igb_atomic_reserve(&rx_data->rcb_free, 1) < 0) 196 return (NULL); 197 198 current_rcb = rx_data->work_list[index]; 199 /* 200 * If the mp of the rx control block is NULL, try to do 201 * desballoc again. 202 */ 203 if (current_rcb->mp == NULL) { 204 current_rcb->mp = desballoc((unsigned char *) 205 current_rcb->rx_buf.address, 206 current_rcb->rx_buf.size, 207 0, ¤t_rcb->free_rtn); 208 /* 209 * If it is failed to built a mblk using the current 210 * DMA buffer, we have to return and use bcopy to 211 * process the packet. 212 */ 213 if (current_rcb->mp == NULL) { 214 atomic_inc_32(&rx_data->rcb_free); 215 return (NULL); 216 } 217 } 218 /* 219 * Sync up the data received 220 */ 221 DMA_SYNC(¤t_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL); 222 223 if (igb_check_dma_handle( 224 current_rcb->rx_buf.dma_handle) != DDI_FM_OK) { 225 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED); 226 atomic_or_32(&igb->igb_state, IGB_ERROR); 227 atomic_inc_32(&rx_data->rcb_free); 228 return (NULL); 229 } 230 231 mp = current_rcb->mp; 232 current_rcb->mp = NULL; 233 atomic_inc_32(¤t_rcb->ref_cnt); 234 235 mp->b_wptr = mp->b_rptr + pkt_len; 236 mp->b_next = mp->b_cont = NULL; 237 238 /* 239 * Strip off one free rx control block from the free list 240 */ 241 free_index = rx_data->rcb_head; 242 free_rcb = rx_data->free_list[free_index]; 243 ASSERT(free_rcb != NULL); 244 rx_data->free_list[free_index] = NULL; 245 rx_data->rcb_head = NEXT_INDEX(free_index, 1, rx_data->free_list_size); 246 247 /* 248 * Put the rx control block to the work list 249 */ 250 rx_data->work_list[index] = free_rcb; 251 252 return (mp); 253 } 254 255 /* 256 * igb_rx_assoc_hcksum 257 * 258 * Check the rx hardware checksum status and associate the hcksum flags 259 */ 260 static void 261 igb_rx_assoc_hcksum(mblk_t *mp, uint32_t status_error) 262 { 263 uint32_t hcksum_flags = 0; 264 265 /* Ignore Checksum Indication */ 266 if (status_error & E1000_RXD_STAT_IXSM) 267 return; 268 269 /* 270 * Check TCP/UDP checksum 271 */ 272 if (((status_error & E1000_RXD_STAT_TCPCS) || 273 (status_error & E1000_RXD_STAT_UDPCS)) && 274 !(status_error & E1000_RXDEXT_STATERR_TCPE)) 275 hcksum_flags |= HCK_FULLCKSUM_OK; 276 277 /* 278 * Check IP Checksum 279 */ 280 if ((status_error & E1000_RXD_STAT_IPCS) && 281 !(status_error & E1000_RXDEXT_STATERR_IPE)) 282 hcksum_flags |= HCK_IPV4_HDRCKSUM_OK; 283 284 if (hcksum_flags != 0) { 285 mac_hcksum_set(mp, 0, 0, 0, 0, hcksum_flags); 286 } 287 } 288 289 mblk_t * 290 igb_rx_ring_poll(void *arg, int bytes) 291 { 292 igb_rx_ring_t *rx_ring = (igb_rx_ring_t *)arg; 293 mblk_t *mp = NULL; 294 295 ASSERT(bytes >= 0); 296 297 if ((bytes == 0) || (rx_ring->igb->igb_state & IGB_SUSPENDED) || 298 !(rx_ring->igb->igb_state & IGB_STARTED)) 299 return (NULL); 300 301 mutex_enter(&rx_ring->rx_lock); 302 mp = igb_rx(rx_ring, bytes); 303 mutex_exit(&rx_ring->rx_lock); 304 305 return (mp); 306 } 307 308 /* 309 * igb_rx - Receive the data of one ring 310 * 311 * This function goes throught h/w descriptor in one specified rx ring, 312 * receives the data if the descriptor status shows the data is ready. 313 * It returns a chain of mblks containing the received data, to be 314 * passed up to mac_rx(). 315 */ 316 mblk_t * 317 igb_rx(igb_rx_ring_t *rx_ring, int poll_bytes) 318 { 319 union e1000_adv_rx_desc *current_rbd; 320 rx_control_block_t *current_rcb; 321 mblk_t *mp; 322 mblk_t *mblk_head; 323 mblk_t **mblk_tail; 324 uint32_t rx_next; 325 uint32_t rx_tail; 326 uint32_t pkt_len; 327 uint32_t status_error; 328 uint32_t pkt_num; 329 uint32_t total_bytes; 330 igb_t *igb = rx_ring->igb; 331 igb_rx_data_t *rx_data = rx_ring->rx_data; 332 333 mblk_head = NULL; 334 mblk_tail = &mblk_head; 335 336 if (igb->igb_state & IGB_ERROR) 337 return (NULL); 338 339 /* 340 * Sync the receive descriptors before 341 * accepting the packets 342 */ 343 DMA_SYNC(&rx_data->rbd_area, DDI_DMA_SYNC_FORKERNEL); 344 345 if (igb_check_dma_handle( 346 rx_data->rbd_area.dma_handle) != DDI_FM_OK) { 347 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED); 348 atomic_or_32(&igb->igb_state, IGB_ERROR); 349 return (NULL); 350 } 351 352 /* 353 * Get the start point of rx bd ring which should be examined 354 * during this cycle. 355 */ 356 rx_next = rx_data->rbd_next; 357 358 current_rbd = &rx_data->rbd_ring[rx_next]; 359 pkt_num = 0; 360 total_bytes = 0; 361 status_error = current_rbd->wb.upper.status_error; 362 while (status_error & E1000_RXD_STAT_DD) { 363 /* 364 * If hardware has found the errors, but the error 365 * is hardware checksum error, here does not discard the 366 * packet, and let upper layer compute the checksum; 367 * Otherwise discard the packet. 368 */ 369 if ((status_error & E1000_RXDEXT_ERR_FRAME_ERR_MASK) || 370 !(status_error & E1000_RXD_STAT_EOP)) { 371 IGB_DEBUG_STAT(rx_ring->stat_frame_error); 372 goto rx_discard; 373 } 374 375 IGB_DEBUG_STAT_COND(rx_ring->stat_cksum_error, 376 (status_error & E1000_RXDEXT_STATERR_TCPE) || 377 (status_error & E1000_RXDEXT_STATERR_IPE)); 378 379 pkt_len = current_rbd->wb.upper.length; 380 381 if ((poll_bytes != IGB_NO_POLL) && 382 ((pkt_len + total_bytes) > poll_bytes)) 383 break; 384 385 IGB_DEBUG_STAT(rx_ring->stat_pkt_cnt); 386 total_bytes += pkt_len; 387 388 mp = NULL; 389 /* 390 * For packets with length more than the copy threshold, 391 * we'll firstly try to use the existed DMA buffer to built 392 * a mblk and send the mblk upstream. 393 * 394 * If the first method fails, or the packet length is less 395 * than the copy threshold, we'll allocate a new mblk and 396 * copy the packet data to the mblk. 397 */ 398 if (pkt_len > igb->rx_copy_thresh) 399 mp = igb_rx_bind(rx_data, rx_next, pkt_len); 400 401 if (mp == NULL) 402 mp = igb_rx_copy(rx_data, rx_next, pkt_len); 403 404 if (mp != NULL) { 405 /* 406 * Check h/w checksum offload status 407 */ 408 if (igb->rx_hcksum_enable) 409 igb_rx_assoc_hcksum(mp, status_error); 410 411 *mblk_tail = mp; 412 mblk_tail = &mp->b_next; 413 } 414 415 /* Update per-ring rx statistics */ 416 rx_ring->rx_pkts++; 417 rx_ring->rx_bytes += pkt_len; 418 419 rx_discard: 420 /* 421 * Reset rx descriptor read bits 422 */ 423 current_rcb = rx_data->work_list[rx_next]; 424 current_rbd->read.pkt_addr = current_rcb->rx_buf.dma_address; 425 current_rbd->read.hdr_addr = 0; 426 427 rx_next = NEXT_INDEX(rx_next, 1, rx_data->ring_size); 428 429 /* 430 * The receive function is in interrupt context, so here 431 * rx_limit_per_intr is used to avoid doing receiving too long 432 * per interrupt. 433 */ 434 if (++pkt_num > igb->rx_limit_per_intr) { 435 IGB_DEBUG_STAT(rx_ring->stat_exceed_pkt); 436 break; 437 } 438 439 current_rbd = &rx_data->rbd_ring[rx_next]; 440 status_error = current_rbd->wb.upper.status_error; 441 } 442 443 DMA_SYNC(&rx_data->rbd_area, DDI_DMA_SYNC_FORDEV); 444 445 rx_data->rbd_next = rx_next; 446 447 /* 448 * Update the h/w tail accordingly 449 */ 450 rx_tail = PREV_INDEX(rx_next, 1, rx_data->ring_size); 451 452 E1000_WRITE_REG(&igb->hw, E1000_RDT(rx_ring->index), rx_tail); 453 454 if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) { 455 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED); 456 atomic_or_32(&igb->igb_state, IGB_ERROR); 457 } 458 459 return (mblk_head); 460 } 461