xref: /illumos-gate/usr/src/uts/common/io/qede/qede_fp.c (revision ed093b41a93e8563e6e1e5dae0768dda2a7bcc27)
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, v.1,  (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 
22 /*
23 * Copyright 2014-2017 Cavium, Inc.
24 * The contents of this file are subject to the terms of the Common Development
25 * and Distribution License, v.1,  (the "License").
26 
27 * You may not use this file except in compliance with the License.
28 
29 * You can obtain a copy of the License at available
30 * at http://opensource.org/licenses/CDDL-1.0
31 
32 * See the License for the specific language governing permissions and
33 * limitations under the License.
34 */
35 
36 #include "qede.h"
37 
38 static qede_dma_handle_entry_t *
39 qede_get_dmah_entry(qede_tx_ring_t *tx_ring)
40 {
41 	qede_dma_handles_list_t *list = &tx_ring->dmah_list;
42 	qede_dma_handle_entry_t *dmah;
43 
44 	mutex_enter(&list->lock);
45 	dmah = list->free_list[list->head];
46 	list->free_list[list->head] = NULL;
47 	list->head = (list->head + 1) & TX_RING_MASK;
48 	mutex_exit(&list->lock);
49 
50 	return (dmah);
51 }
52 
53 static void
54 qede_put_dmah_entries(qede_tx_ring_t *tx_ring, qede_dma_handle_entry_t *dmah)
55 {
56 	qede_dma_handles_list_t *list = &tx_ring->dmah_list;
57 	qede_dma_handle_entry_t *next;
58 	u16 index;
59 
60 	mutex_enter(&list->lock);
61 	index = list->tail;
62 
63 	while (dmah != NULL) {
64 		next = dmah->next;
65 		dmah->next = NULL;
66 		list->free_list[index] = dmah;
67 		index = (index + 1) & TX_RING_MASK;
68 		dmah = next;
69 	}
70 
71 	list->tail = index;
72 
73 	mutex_exit(&list->lock);
74 }
75 
76 static qede_tx_bcopy_pkt_t *
77 qede_get_bcopy_pkt(qede_tx_ring_t *tx_ring)
78 {
79 	qede_tx_bcopy_list_t *list = &tx_ring->bcopy_list;
80 	qede_tx_bcopy_pkt_t *pkt;
81 
82 	mutex_enter(&list->lock);
83 	pkt = list->free_list[list->head];
84 	list->free_list[list->head] = NULL;
85 	list->head = (list->head + 1) & TX_RING_MASK;
86 	mutex_exit(&list->lock);
87 
88 	return (pkt);
89 }
90 
91 static void
92 qede_put_bcopy_pkt(qede_tx_ring_t *tx_ring, qede_tx_bcopy_pkt_t *pkt)
93 {
94 	qede_tx_bcopy_list_t *list = &tx_ring->bcopy_list;
95 
96 	mutex_enter(&list->lock);
97 	list->free_list[list->tail] = pkt;
98 	list->tail = (list->tail + 1) & TX_RING_MASK;
99 	mutex_exit(&list->lock);
100 }
101 
102 void
103 qede_print_tx_indexes(qede_tx_ring_t *tx_ring)
104 {
105 	uint16_t hw_consumer = LE_16(*tx_ring->hw_cons_ptr);
106 	uint16_t chain_idx = ecore_chain_get_cons_idx(&tx_ring->tx_bd_ring);
107 	hw_consumer &= TX_RING_MASK;
108 	chain_idx &= TX_RING_MASK;
109 	qede_print_err("!indices: hw_cons %d, chain_cons = %d, sw_prod = %d",
110 	    hw_consumer, chain_idx, tx_ring->sw_tx_prod);
111 }
112 
113 void
114 qede_print_rx_indexes(qede_rx_ring_t *rx_ring)
115 {
116 	u16 hw_bd_cons = HOST_TO_LE_16(*rx_ring->hw_cons_ptr);
117 	u16 sw_bd_cons = ecore_chain_get_cons_idx(&rx_ring->rx_cqe_ring);
118 
119 	hw_bd_cons &= (rx_ring->qede->rx_ring_size - 1);
120 	sw_bd_cons &= (rx_ring->qede->rx_ring_size - 1);
121 	qede_print_err("!RX indices: hw_cons %d, chain_cons = %d",
122 	    hw_bd_cons, sw_bd_cons);
123 }
124 
125 
126 /*
127  * Called from tx_completion intr handler.
128  * NOTE: statu_block dma mem. must be sync'ed
129  * in the interrupt handler
130  */
131 int
132 qede_process_tx_completions(qede_tx_ring_t *tx_ring)
133 {
134 	int count = 0;
135 	u16 hw_consumer;
136 	struct eth_tx_bd *tx_bd;
137 	uint16_t chain_idx;
138 	u16 nbd, sw_consumer = tx_ring->sw_tx_cons;
139 	struct eth_tx_1st_bd *first_bd;
140 	u16 bd_consumed = 0;
141 	qede_tx_recycle_list_t *recycle_entry;
142 	qede_dma_handle_entry_t *dmah, *head = NULL, *tail = NULL;
143 	qede_tx_bcopy_pkt_t *bcopy_pkt;
144 
145 	hw_consumer = LE_16(*tx_ring->hw_cons_ptr);
146 	chain_idx = ecore_chain_get_cons_idx(&tx_ring->tx_bd_ring);
147 
148 	while (hw_consumer != chain_idx) {
149 		nbd = 0;
150 		bd_consumed = 0;
151 		first_bd = NULL;
152 
153 		recycle_entry = &tx_ring->tx_recycle_list[sw_consumer];
154 		if (recycle_entry->dmah_entry != NULL) {
155 			dmah = recycle_entry->dmah_entry;
156 
157 			head = dmah;
158 
159 			if (head->mp) {
160 				freemsg(head->mp);
161 			}
162 
163 			while (dmah != NULL) {
164 				(void) ddi_dma_unbind_handle(dmah->dma_handle);
165 				dmah = dmah->next;
166 			}
167 
168 
169 			qede_put_dmah_entries(tx_ring,
170 			    head);
171 			recycle_entry->dmah_entry = NULL;
172 		} else if (recycle_entry->bcopy_pkt != NULL) {
173 			bcopy_pkt = recycle_entry->bcopy_pkt;
174 
175 			qede_put_bcopy_pkt(tx_ring, bcopy_pkt);
176 			recycle_entry->bcopy_pkt = NULL;
177 		} else {
178 			qede_warn(tx_ring->qede,
179 			    "Invalid completion at index %d",
180 			    sw_consumer);
181 		}
182 
183 		sw_consumer = (sw_consumer + 1) & TX_RING_MASK;
184 
185 		first_bd =
186 		    (struct eth_tx_1st_bd *)ecore_chain_consume(
187 		    &tx_ring->tx_bd_ring);
188 		bd_consumed++;
189 
190 		nbd = first_bd->data.nbds;
191 
192 		while (bd_consumed++ < nbd) {
193 			ecore_chain_consume(&tx_ring->tx_bd_ring);
194 		}
195 
196 		chain_idx = ecore_chain_get_cons_idx(&tx_ring->tx_bd_ring);
197 		count++;
198 	}
199 
200 	tx_ring->sw_tx_cons = sw_consumer;
201 
202 	if (count && tx_ring->tx_q_sleeping) {
203 		tx_ring->tx_q_sleeping = 0;
204 #ifndef NO_CROSSBOW
205 		RESUME_TX(tx_ring);
206 #else
207 		mac_tx_update(tx_ring->qede->mac_handle);
208 #endif
209 	}
210 
211 	return (count);
212 }
213 
214 static int
215 qede_has_tx_work(qede_tx_ring_t *tx_ring)
216 {
217 	u16 hw_bd_cons = LE_16(*tx_ring->hw_cons_ptr);
218 	u16 sw_bd_cons = ecore_chain_get_cons_idx(&tx_ring->tx_bd_ring);
219 
220 	if (sw_bd_cons == (hw_bd_cons + 1)) {
221 		return (0);
222 	}
223 	return (hw_bd_cons != sw_bd_cons);
224 }
225 
226 static int
227 qede_has_rx_work(qede_rx_ring_t *rx_ring)
228 {
229 	u16 hw_bd_cons = HOST_TO_LE_16(*rx_ring->hw_cons_ptr);
230 	u16 sw_bd_cons = ecore_chain_get_cons_idx(&rx_ring->rx_cqe_ring);
231 	return (hw_bd_cons != sw_bd_cons);
232 }
233 
234 static void
235 qede_set_cksum_flags(mblk_t *mp,
236     uint16_t parse_flags)
237 {
238 	uint32_t cksum_flags = 0;
239 	int error = 0;
240 	bool l4_is_calc, l4_csum_err, iphdr_len_err;
241 
242 	l4_is_calc =
243 	    (parse_flags >> PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_SHIFT)
244 	    & PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_MASK;
245 	l4_csum_err = (parse_flags >> PARSING_AND_ERR_FLAGS_L4CHKSMERROR_SHIFT)
246 	    & PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_MASK;
247 	iphdr_len_err = (parse_flags >> PARSING_AND_ERR_FLAGS_IPHDRERROR_SHIFT)
248 	    & PARSING_AND_ERR_FLAGS_IPHDRERROR_MASK;
249 
250 	if (l4_is_calc) {
251 		if (l4_csum_err) {
252 			error = 1;
253         	} else if (iphdr_len_err) {
254             		error = 2;
255         	} else {
256 			cksum_flags =  HCK_FULLCKSUM_OK | HCK_IPV4_HDRCKSUM_OK;
257 		}
258 	}
259 
260 	if (error == 1) {
261 		qede_print_err("!%s: got L4 csum error",__func__);
262 	} else if (error == 2) {
263 		qede_print_err("!%s: got IPHDER csum error" ,__func__);
264 	}
265 
266 	mac_hcksum_set(mp, 0, 0, 0, 0, cksum_flags);
267 }
268 
269 static qede_rx_buffer_t *
270 qede_get_next_rx_buffer(qede_rx_ring_t *rx_ring,
271     uint32_t *free_buffer_count)
272 {
273 	qede_rx_buffer_t *rx_buffer;
274 	uint32_t num_entries;
275 
276 	rx_buffer = qede_get_from_active_list(rx_ring, &num_entries);
277 	ASSERT(rx_buffer != NULL);
278 	ecore_chain_consume(&rx_ring->rx_bd_ring);
279 	*free_buffer_count = num_entries;
280 
281 	return (rx_buffer);
282 }
283 
284 static uint32_t
285 qede_get_next_lro_buffer(qede_rx_ring_t *rx_ring,
286     qede_lro_info_t *lro_info)
287 {
288 	lro_info->rx_buffer[lro_info->bd_count] =
289 	    qede_get_next_rx_buffer(rx_ring,
290 	    &lro_info->free_buffer_count);
291 	lro_info->bd_count++;
292 	return (DDI_SUCCESS);
293 }
294 #ifdef DEBUG_LRO
295 int agg_count = 0;
296 bool agg_print = B_TRUE;
297 #endif
298 static void
299 qede_lro_start(qede_rx_ring_t *rx_ring,
300     struct eth_fast_path_rx_tpa_start_cqe *cqe)
301 {
302 	qede_lro_info_t *lro_info;
303 	int i, len_on_first_bd, seg_len;
304 
305 	lro_info = &rx_ring->lro_info[cqe->tpa_agg_index];
306 
307 	/* ASSERT(lro_info->agg_state != QEDE_AGG_STATE_NONE); */
308 
309 #ifdef DEBUG_LRO
310 	if (agg_count++ < 30)  {
311 		qede_dump_start_lro_cqe(cqe);
312 	} else {
313 		agg_print = B_FALSE;
314 	}
315 #endif
316 
317 	memset(lro_info, 0, sizeof (qede_lro_info_t));
318 	lro_info->agg_state = QEDE_AGG_STATE_START;
319 	rx_ring->lro_active_count++;
320 
321 	/* Parsing and error flags from the parser */;
322 
323 	lro_info->pars_flags = LE_16(cqe->pars_flags.flags);
324 	lro_info->pad = LE_16(cqe->placement_offset);
325 	lro_info->header_len = (uint32_t)cqe->header_len;
326 	lro_info->vlan_tag = LE_16(cqe->vlan_tag);
327 	lro_info->rss_hash = LE_32(cqe->rss_hash);
328 
329 	seg_len = (int)LE_16(cqe->seg_len);
330 	len_on_first_bd = (int)LE_16(cqe->len_on_first_bd);
331 	/*
332 	 * Get the first bd
333 	 */
334 	qede_get_next_lro_buffer(rx_ring, lro_info);
335 
336 	if (len_on_first_bd < seg_len) {
337 		/*
338 		 * We end up here with jumbo frames
339 		 * since a TCP segment can span
340 		 * multiple buffer descriptors.
341 		 */
342 		for (i = 0; i < ETH_TPA_CQE_START_LEN_LIST_SIZE; i++) {
343 			if (cqe->ext_bd_len_list[i] == 0) {
344 			    break;
345 			}
346 			qede_get_next_lro_buffer(rx_ring, lro_info);
347 		}
348 	}
349 }
350 
351 static void
352 qede_lro_cont(qede_rx_ring_t *rx_ring,
353     struct eth_fast_path_rx_tpa_cont_cqe *cqe)
354 {
355 	qede_lro_info_t *lro_info;
356 	int i;
357 
358 	lro_info = &rx_ring->lro_info[cqe->tpa_agg_index];
359 
360 	/* ASSERT(lro_info->agg_state != QEDE_AGG_STATE_START); */
361 #ifdef DEBUG_LRO
362 	if (agg_print) {
363 		qede_dump_cont_lro_cqe(cqe);
364 	}
365 #endif
366 
367 	for (i = 0; i < ETH_TPA_CQE_CONT_LEN_LIST_SIZE; i++) {
368 		if (cqe->len_list[i] == 0) {
369 			break;
370 		}
371 		qede_get_next_lro_buffer(rx_ring, lro_info);
372 	}
373 }
374 
375 static mblk_t *
376 qede_lro_end(qede_rx_ring_t *rx_ring,
377     struct eth_fast_path_rx_tpa_end_cqe *cqe,
378     int *pkt_bytes)
379 {
380 	qede_lro_info_t *lro_info;
381 	mblk_t *head = NULL, *tail = NULL, *mp = NULL;
382 	qede_rx_buffer_t *rx_buffer;
383 	int i, bd_len;
384 	uint16_t work_length, total_packet_length;
385 	uint32_t rx_buf_size = rx_ring->rx_buf_size;
386 	qede_dma_info_t *dma_info;
387 
388 	lro_info = &rx_ring->lro_info[cqe->tpa_agg_index];
389 
390 	/* ASSERT(lro_info->agg_state != QEDE_AGG_STATE_START); */
391 
392 #ifdef DEBUG_LRO
393 	if (agg_print) {
394 		qede_dump_end_lro_cqe(cqe);
395 	}
396 #endif
397 
398 	work_length = total_packet_length = LE_16(cqe->total_packet_len);
399 
400 	/*
401 	 * Get any buffer descriptors for this cqe
402 	 */
403 	for (i=0; i<ETH_TPA_CQE_END_LEN_LIST_SIZE; i++) {
404 		if (cqe->len_list[i] == 0) {
405 		    break;
406 		}
407 		qede_get_next_lro_buffer(rx_ring, lro_info);
408 	}
409 
410 	/* ASSERT(lro_info->bd_count != cqe->num_of_bds); */
411 
412 	if (lro_info->free_buffer_count <
413 	    rx_ring->rx_low_buffer_threshold) {
414 		for (i = 0; i < lro_info->bd_count; i++) {
415 			qede_recycle_copied_rx_buffer(
416 			    lro_info->rx_buffer[i]);
417 			lro_info->rx_buffer[i] = NULL;
418 		}
419 		rx_ring->rx_low_water_cnt++;
420 		lro_info->agg_state = QEDE_AGG_STATE_NONE;
421 		return (NULL);
422 	}
423 	/*
424 	 * Loop through list of buffers for this
425 	 * aggregation.  For each one:
426 	 * 1. Calculate the buffer length
427 	 * 2. Adjust the mblk read/write pointers
428 	 * 3. Link the mblk to the local chain using
429 	 *    b_cont pointers.
430 	 * Note: each buffer will be rx_buf_size except
431 	 * the first (subtract the placement_offset)
432 	 * and the last which contains the remainder
433 	 * of cqe_end->total_packet_len minus length
434 	 * of all other buffers.
435 	 */
436 	for (i = 0; i < lro_info->bd_count; i++) {
437 
438 		rx_buffer = lro_info->rx_buffer[i];
439 
440 		bd_len =
441 		    (work_length > rx_buf_size) ? rx_buf_size : work_length;
442 		if (i == 0 &&
443 		    (cqe->num_of_bds > 1)) {
444 			bd_len -= lro_info->pad;
445 		}
446 
447 		dma_info = &rx_buffer->dma_info;
448 		ddi_dma_sync(dma_info->dma_handle,
449 		    dma_info->offset,
450 		    rx_buf_size,
451 		    DDI_DMA_SYNC_FORKERNEL);
452 
453 		mp = rx_buffer->mp;
454 		mp->b_next = mp->b_cont = NULL;
455 
456 		if (head == NULL) {
457 			head = tail = mp;
458 			mp->b_rptr += lro_info->pad;
459 		} else {
460 			tail->b_cont = mp;
461 			tail = mp;
462 		}
463 
464 		mp->b_wptr = (uchar_t *)((unsigned long)mp->b_rptr + bd_len);
465 		work_length -= bd_len;
466 	}
467 
468 	qede_set_cksum_flags(head, lro_info->pars_flags);
469 
470 	rx_ring->rx_lro_pkt_cnt++;
471 	rx_ring->lro_active_count--;
472 	lro_info->agg_state = QEDE_AGG_STATE_NONE;
473 
474 #ifdef DEBUG_LRO
475 	if (agg_print) {
476 		qede_dump_mblk_chain_bcont_ptr(rx_ring->qede, head);
477 	}
478 #endif
479 	*pkt_bytes = (int)total_packet_length;
480 	return (head);
481 }
482 
483 
484 
485 #ifdef DEBUG_JUMBO
486 int jumbo_count = 0;
487 bool jumbo_print = B_TRUE;
488 #endif
489 static mblk_t *
490 qede_reg_jumbo_cqe(qede_rx_ring_t *rx_ring,
491    struct eth_fast_path_rx_reg_cqe *cqe)
492 {
493 	int i;
494 	qede_rx_buffer_t *rx_buf, *rx_buffer[ETH_RX_MAX_BUFF_PER_PKT];
495 	mblk_t *mp = NULL, *head = NULL, *tail = NULL;
496 	uint32_t free_buffer_count = 0;
497 	uint16_t work_length;
498 	uint32_t rx_buf_size = rx_ring->rx_buf_size, bd_len;
499 	qede_dma_info_t *dma_info;
500 	u8 pad = cqe->placement_offset;
501 
502 #ifdef DEBUG_JUMBO
503 	if (jumbo_count++ < 8) {
504 		qede_dump_reg_cqe(cqe);
505 	} else {
506 		jumbo_print = B_FALSE;
507 	}
508 #endif
509 
510 	work_length = HOST_TO_LE_16(cqe->pkt_len);
511 
512 	/*
513 	 * Get the buffers/mps for this cqe
514 	 */
515 	for (i = 0; i < cqe->bd_num; i++) {
516 		rx_buffer[i] =
517 		    qede_get_next_rx_buffer(rx_ring, &free_buffer_count);
518 	}
519 
520 	/*
521 	 * If the buffer ring is running low, drop the
522 	 * packet and return these buffers.
523 	 */
524 	if (free_buffer_count <
525 	    rx_ring->rx_low_buffer_threshold) {
526 		for (i = 0; i < cqe->bd_num; i++) {
527 			qede_recycle_copied_rx_buffer(rx_buffer[i]);
528 		}
529 		rx_ring->rx_low_water_cnt++;
530 		return (NULL);
531 	}
532 
533 	for (i = 0; i < cqe->bd_num; i++) {
534 		rx_buf = rx_buffer[i];
535 
536 		bd_len =
537 		    (work_length > rx_buf_size) ? rx_buf_size : work_length;
538 
539 		/*
540 		 * Adjust for placement offset
541 		 * on first bufffer.
542 		 */
543 		if (i == 0) {
544 			bd_len -= pad;
545 		}
546 
547 		dma_info = &rx_buf->dma_info;
548 		ddi_dma_sync(dma_info->dma_handle,
549 		    dma_info->offset,
550 		    rx_buf_size,
551 		    DDI_DMA_SYNC_FORKERNEL);
552 
553 		mp = rx_buf->mp;
554 		mp->b_next = mp->b_cont = NULL;
555 		/*
556 		 * Adjust for placement offset
557 		 * on first bufffer.
558 		 */
559 		if (i == 0) {
560 			mp->b_rptr += pad;
561 		}
562 
563 		mp->b_wptr = (uchar_t *)((unsigned long)mp->b_rptr + bd_len);
564 
565 		if (head == NULL) {
566 			head = tail = mp;
567 		} else {
568 			tail->b_cont = mp;
569 			tail = mp;
570 		}
571 
572 		work_length -= bd_len;
573 	}
574 
575 	qede_set_cksum_flags(head,
576 		    HOST_TO_LE_16(cqe->pars_flags.flags));
577 #ifdef DEBUG_JUMBO
578 	if (jumbo_print) {
579 		qede_dump_mblk_chain_bcont_ptr(rx_ring->qede, head);
580 	}
581 #endif
582 	rx_ring->rx_jumbo_pkt_cnt++;
583 	return (head);
584 }
585 
586 static mblk_t *
587 qede_reg_cqe(qede_rx_ring_t *rx_ring,
588     struct eth_fast_path_rx_reg_cqe *cqe,
589     int *pkt_bytes)
590 {
591 	qede_t *qede = rx_ring->qede;
592 	qede_rx_buffer_t *rx_buffer;
593 	uint32_t free_buffer_count;
594 	mblk_t *mp;
595 	uint16_t pkt_len = HOST_TO_LE_16(cqe->pkt_len);
596 	u8 pad = cqe->placement_offset;
597 	qede_dma_info_t *dma_info;
598 	ddi_dma_handle_t dma_handle;
599 	char *virt_addr;
600 
601 	/*
602 	 * Update the byte count as it will
603 	 * be the same for normal and jumbo
604 	 */
605 	*pkt_bytes = (int)pkt_len;
606 
607 	if (cqe->bd_num > 1) {
608 		/*
609 		 * If this cqe uses more than one
610 		 * rx buffer then it must be
611 		 * jumbo.  Call another handler
612 		 * for this because the process is
613 		 * quite different.
614 		 */
615 		return (qede_reg_jumbo_cqe(rx_ring, cqe));
616 	}
617 
618 
619 	rx_buffer = qede_get_next_rx_buffer(rx_ring,
620             &free_buffer_count);
621 
622 	if (free_buffer_count <
623 	    rx_ring->rx_low_buffer_threshold) {
624 		qede_recycle_copied_rx_buffer(rx_buffer);
625 		rx_ring->rx_low_water_cnt++;
626 		*pkt_bytes = 0;
627 		return (NULL);
628 	}
629 
630 	dma_info = &rx_buffer->dma_info;
631 	virt_addr = dma_info->virt_addr;
632 	dma_handle = dma_info->dma_handle;
633 	ddi_dma_sync(dma_handle,
634 	    0, 0, DDI_DMA_SYNC_FORKERNEL);
635 
636 	if (pkt_len <= rx_ring->rx_copy_threshold) {
637 		mp = allocb(pkt_len + 2, 0); /* IP HDR_ALIGN */
638 		if (mp != NULL) {
639 			virt_addr += pad;
640 			bcopy(virt_addr, mp->b_rptr, pkt_len);
641 		} else {
642 			/*
643 			 * Post the buffer back to fw and
644 			 * drop packet
645 			 */
646 			qede_print_err("!%s(%d): allocb failed",
647 		    	    __func__,
648 			    rx_ring->qede->instance);
649 			qede->allocbFailures++;
650                         goto freebuf;
651 		}
652 		/*
653 		 * We've copied it (or not) and are done with it
654 		 * so put it back into the passive list.
655 		 */
656 		ddi_dma_sync(dma_handle,
657 	            0, 0, DDI_DMA_SYNC_FORDEV);
658 		qede_recycle_copied_rx_buffer(rx_buffer);
659 		rx_ring->rx_copy_cnt++;
660 	} else {
661 
662 		/*
663 		 * We are going to send this mp/buffer
664 		 * up to the mac layer.  Adjust the
665 		 * pointeres and link it to our chain.
666 		 * the rx_buffer is returned to us in
667 		 * the recycle function so we drop it
668 		 * here.
669 		 */
670 		mp = rx_buffer->mp;
671 		mp->b_rptr += pad;
672 	}
673 	mp->b_cont = mp->b_next = NULL;
674 	mp->b_wptr = (uchar_t *)((unsigned long)mp->b_rptr + pkt_len);
675 
676 	qede_set_cksum_flags(mp,
677 	    HOST_TO_LE_16(cqe->pars_flags.flags));
678 #ifdef DEBUG_JUMBO
679 	if (jumbo_print) {
680 	    qede_dump_mblk_chain_bnext_ptr(rx_ring->qede, mp);
681 	}
682 #endif
683 
684 	rx_ring->rx_reg_pkt_cnt++;
685 	return (mp);
686 
687 freebuf:
688         qede_recycle_copied_rx_buffer(rx_buffer);
689         return (NULL);
690 }
691 
692 /*
693  * Routine to process the rx packets on the
694  * passed rx_ring. Can be called for intr or
695  * poll context/routines
696  */
697 static mblk_t *
698 qede_process_rx_ring(qede_rx_ring_t *rx_ring, int nbytes, int npkts)
699 {
700 	union eth_rx_cqe *cqe;
701 	u16 last_cqe_consumer = rx_ring->last_cqe_consumer;
702 	enum eth_rx_cqe_type cqe_type;
703 	u16 sw_comp_cons, hw_comp_cons;
704 	mblk_t *mp = NULL, *first_mp = NULL, *last_mp = NULL;
705 	int pkt_bytes = 0, byte_cnt = 0, pkt_cnt = 0;
706 
707 	hw_comp_cons = HOST_TO_LE_16(*rx_ring->hw_cons_ptr);
708 
709 	/* Completion ring sw consumer */
710 	sw_comp_cons = ecore_chain_get_cons_idx(&rx_ring->rx_cqe_ring);
711 
712 	while (sw_comp_cons != hw_comp_cons) {
713 		if ((byte_cnt >= nbytes) ||
714 		    (pkt_cnt >= npkts)) {
715 			break;
716 		}
717 
718 		cqe = (union eth_rx_cqe *)
719 		    ecore_chain_consume(&rx_ring->rx_cqe_ring);
720 		/* Get next element and increment the cons_idx */
721 
722 		(void) ddi_dma_sync(rx_ring->rx_cqe_dmah,
723 		    last_cqe_consumer, sizeof (*cqe),
724 		    DDI_DMA_SYNC_FORKERNEL);
725 
726 		cqe_type = cqe->fast_path_regular.type;
727 
728 		switch (cqe_type) {
729 		case ETH_RX_CQE_TYPE_SLOW_PATH:
730 			ecore_eth_cqe_completion(&rx_ring->qede->edev.hwfns[0],
731 			    (struct eth_slow_path_rx_cqe *)cqe);
732 			goto next_cqe;
733 		case ETH_RX_CQE_TYPE_REGULAR:
734 			mp = qede_reg_cqe(rx_ring,
735 			    &cqe->fast_path_regular,
736 			    &pkt_bytes);
737 			break;
738 		case ETH_RX_CQE_TYPE_TPA_START:
739 			qede_lro_start(rx_ring,
740 			    &cqe->fast_path_tpa_start);
741 			goto next_cqe;
742 		case ETH_RX_CQE_TYPE_TPA_CONT:
743 			qede_lro_cont(rx_ring,
744 			    &cqe->fast_path_tpa_cont);
745 			goto next_cqe;
746 		case ETH_RX_CQE_TYPE_TPA_END:
747 			mp = qede_lro_end(rx_ring,
748 			    &cqe->fast_path_tpa_end,
749 			    &pkt_bytes);
750 			break;
751 		default:
752 			if (cqe_type != 0) {
753 				qede_print_err("!%s(%d): cqe_type %x not "
754 				    "supported", __func__,
755 				    rx_ring->qede->instance,
756 				    cqe_type);
757 			}
758 			goto exit_rx;
759 		}
760 
761 		/*
762 		 * If we arrive here with no mp,
763 		 * then we hit an RX buffer threshold
764 		 * where we had to drop the packet and
765 		 * give the buffers back to the device.
766 		 */
767 		if (mp == NULL) {
768 			rx_ring->rx_drop_cnt++;
769 			goto next_cqe;
770 		}
771 
772 		if (first_mp) {
773 			last_mp->b_next = mp;
774 		} else {
775 			first_mp = mp;
776 		}
777 		last_mp = mp;
778 		pkt_cnt++;
779 		byte_cnt += pkt_bytes;
780 next_cqe:
781 		ecore_chain_recycle_consumed(&rx_ring->rx_cqe_ring);
782 		last_cqe_consumer = sw_comp_cons;
783 		sw_comp_cons = ecore_chain_get_cons_idx(&rx_ring->rx_cqe_ring);
784 		if (!(qede_has_rx_work(rx_ring))) {
785 			ecore_sb_update_sb_idx(rx_ring->fp->sb_info);
786 		}
787 		hw_comp_cons = HOST_TO_LE_16(*rx_ring->hw_cons_ptr);
788 	}
789 	rx_ring->rx_pkt_cnt += pkt_cnt;
790 	rx_ring->rx_byte_cnt += byte_cnt;
791 
792 exit_rx:
793 	if (first_mp) {
794 		last_mp->b_next = NULL;
795 	}
796 
797 	/*
798 	 * Since prod update will result in
799 	 * reading of the bd's, do a dma_sync
800 	 */
801 	qede_replenish_rx_buffers(rx_ring);
802 	qede_update_rx_q_producer(rx_ring);
803 	rx_ring->last_cqe_consumer = last_cqe_consumer;
804 
805 	return (first_mp);
806 }
807 
808 mblk_t *
809 qede_process_fastpath(qede_fastpath_t *fp,
810     int nbytes, int npkts, int *work_done)
811 {
812 	int i = 0;
813 	qede_tx_ring_t *tx_ring;
814 	qede_rx_ring_t *rx_ring;
815 	mblk_t *mp = NULL;
816 
817 	rx_ring = fp->rx_ring;
818 
819 	for (i = 0; i < fp->qede->num_tc; i++) {
820 		tx_ring = fp->tx_ring[i];
821 		if (qede_has_tx_work(tx_ring)) {
822 		/* process tx completions */
823 			if (mutex_tryenter(&tx_ring->tx_lock) != 0) {
824 				*work_done +=
825 				    qede_process_tx_completions(tx_ring);
826 				mutex_exit(&tx_ring->tx_lock);
827 			}
828 		}
829 	}
830 
831 	if (!(qede_has_rx_work(rx_ring))) {
832 		ecore_sb_update_sb_idx(fp->sb_info);
833 	}
834 
835 	rx_ring = fp->rx_ring;
836 	if (qede_has_rx_work(rx_ring)) {
837 		mutex_enter(&rx_ring->rx_lock);
838 		mp = qede_process_rx_ring(rx_ring,
839 		    nbytes, npkts);
840 		if (mp) {
841 			*work_done += 1;
842 		}
843 		mutex_exit(&rx_ring->rx_lock);
844 	}
845 
846 	return (mp);
847 }
848 
849 /*
850  * Parse the mblk to extract information
851  * from the protocol headers.
852  * The routine assumes that the l4 header is tcp. Also
853  * it does not account for ipv6 headers since ipv6 lso is
854  * unsupported
855  */
856 static void
857 qede_pkt_parse_lso_headers(qede_tx_pktinfo_t *pktinfo, mblk_t *mp)
858 {
859 	struct ether_header *eth_hdr =
860 	    (struct ether_header *)(void *)mp->b_rptr;
861 	ipha_t *ip_hdr;
862 	struct tcphdr *tcp_hdr;
863 
864 	/* mac header type and len */
865 	if (ntohs(eth_hdr->ether_type) == ETHERTYPE_IP) {
866 		pktinfo->ether_type = ntohs(eth_hdr->ether_type);
867 		pktinfo->mac_hlen = sizeof (struct ether_header);
868 	} else if (ntohs(eth_hdr->ether_type) == ETHERTYPE_VLAN) {
869 		struct ether_vlan_header *vlan_hdr =
870 		    (struct ether_vlan_header *)(void *)mp->b_rptr;
871 		pktinfo->ether_type = ntohs(vlan_hdr->ether_type);
872 		pktinfo->mac_hlen = sizeof (struct ether_vlan_header);
873 	}
874 
875 	/* ip header type and len */
876 	ip_hdr = (ipha_t *)(void *)((u8 *)mp->b_rptr + pktinfo->mac_hlen);
877 	pktinfo->ip_hlen = IPH_HDR_LENGTH(ip_hdr);
878 
879 	/* Assume TCP protocol */
880 	pktinfo->l4_proto = 0x06;
881 
882 	tcp_hdr = (struct tcphdr *)(void *)
883 	    ((u8 *)mp->b_rptr + pktinfo->mac_hlen + pktinfo->ip_hlen);
884 	pktinfo->l4_hlen = TCP_HDR_LENGTH(tcp_hdr);
885 
886 
887 	pktinfo->total_hlen =
888 	    pktinfo->mac_hlen +
889 	    pktinfo->ip_hlen +
890 	    pktinfo->l4_hlen;
891 }
892 
893 static void
894 qede_get_pkt_offload_info(qede_t *qede, mblk_t *mp,
895     u32 *use_cksum, boolean_t *use_lso, uint16_t *mss)
896 {
897 	u32 pflags;
898 
899 	mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &pflags);
900 
901 	*use_cksum = pflags;
902 	if (qede->lso_enable) {
903 		u32 pkt_mss = 0;
904 		u32 lso_flags = 0;
905 
906 		mac_lso_get(mp, &pkt_mss, &lso_flags);
907 		*use_lso = (lso_flags == HW_LSO);
908 		*mss = (u16)pkt_mss;
909 	}
910 }
911 
912 static void
913 /* LINTED E_FUNC_ARG_UNUSED */
914 qede_get_pkt_info(qede_t *qede, mblk_t *mp,
915     qede_tx_pktinfo_t *pktinfo)
916 {
917 	mblk_t *bp;
918 	size_t size;
919 	struct ether_header *eth_hdr =
920 	    (struct ether_header *)(void *)mp->b_rptr;
921 
922 	pktinfo->total_len = 0;
923 	pktinfo->mblk_no = 0;
924 
925 	/*
926 	 * Count the total length and the number of
927 	 * chained mblks in the packet
928 	 */
929 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
930 		size = MBLKL(bp);
931 		if (size == 0) {
932 			continue;
933 		}
934 
935 		pktinfo->total_len += size;
936 		pktinfo->mblk_no++;
937 	}
938 	/* mac header type and len */
939 	if (ntohs(eth_hdr->ether_type) == ETHERTYPE_IP) {
940 		pktinfo->ether_type = ntohs(eth_hdr->ether_type);
941 		pktinfo->mac_hlen = sizeof (struct ether_header);
942 	} else if (ntohs(eth_hdr->ether_type) == ETHERTYPE_VLAN) {
943 		struct ether_vlan_header *vlan_hdr =
944 		    (struct ether_vlan_header *)(void *)mp->b_rptr;
945 		pktinfo->ether_type = ntohs(vlan_hdr->ether_type);
946 		pktinfo->mac_hlen = sizeof (struct ether_vlan_header);
947 	}
948 
949 }
950 
951 /*
952  * Routine to sync dma mem for multiple
953  * descriptors in a chain
954  */
955 void
956 qede_desc_dma_mem_sync(ddi_dma_handle_t *dma_handle,
957     uint_t start, uint_t count, uint_t range,
958     uint_t unit_size, uint_t direction)
959 {
960 	if ((start + count) < range) {
961 		(void) ddi_dma_sync(*dma_handle,
962 		    start * unit_size, count * unit_size, direction);
963 	} else {
964 		(void) ddi_dma_sync(*dma_handle, start * unit_size,
965 		    0, direction);
966 		(void) ddi_dma_sync(*dma_handle, 0,
967 		    (start + count - range) * unit_size,
968 		    direction);
969 	}
970 }
971 
972 /*
973  * Send tx pkt by copying incoming packet in a
974  * preallocated and mapped dma buffer
975  * Not designed to handle lso for now
976  */
977 static enum qede_xmit_status
978 qede_tx_bcopy(qede_tx_ring_t *tx_ring, mblk_t *mp, qede_tx_pktinfo_t *pktinfo)
979 {
980 	qede_tx_bcopy_pkt_t *bcopy_pkt = NULL;
981 	/* Only one bd will be needed for bcopy packets */
982 	struct eth_tx_1st_bd *first_bd;
983 	u16 last_producer = tx_ring->sw_tx_prod;
984 	uint8_t *txb;
985 	mblk_t *bp;
986 	u32 mblen;
987 
988 	bcopy_pkt = qede_get_bcopy_pkt(tx_ring);
989 	if (bcopy_pkt == NULL) {
990 		qede_print_err("!%s(%d): entry NULL at _tx_ bcopy_list head",
991 		    __func__, tx_ring->qede->instance);
992 		return (XMIT_FAILED);
993 	}
994 
995 	/*
996 	 * Copy the packet data to our copy
997 	 * buffer
998 	 */
999 	txb = bcopy_pkt->virt_addr;
1000 
1001 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
1002 		mblen = MBLKL(bp);
1003 		if (mblen == 0) {
1004 			continue;
1005 		}
1006 		bcopy(bp->b_rptr, txb, mblen);
1007 		txb += mblen;
1008 	}
1009 
1010 	(void) ddi_dma_sync(bcopy_pkt->dma_handle,
1011 	    0, pktinfo->total_len,
1012 	    DDI_DMA_SYNC_FORDEV);
1013 
1014 
1015 	mutex_enter(&tx_ring->tx_lock);
1016 	if (ecore_chain_get_elem_left(&tx_ring->tx_bd_ring)<
1017 	    QEDE_TX_COPY_PATH_PAUSE_THRESHOLD) {
1018 		tx_ring->tx_q_sleeping = 1;
1019 		qede_put_bcopy_pkt(tx_ring, bcopy_pkt);
1020 		mutex_exit(&tx_ring->tx_lock);
1021 #ifdef	DEBUG_TX_RECYCLE
1022 		qede_print_err("!%s(%d): Pausing tx queue",
1023 		    __func__, tx_ring->qede->instance);
1024 #endif
1025 		return (XMIT_PAUSE_QUEUE);
1026 	}
1027 
1028 	first_bd = ecore_chain_produce(&tx_ring->tx_bd_ring);
1029 	bzero(first_bd, sizeof (*first_bd));
1030 	first_bd->data.nbds = 1;
1031 	first_bd->data.bd_flags.bitfields =
1032 	    (1 << ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT);
1033 
1034 	if (pktinfo->cksum_flags & HCK_IPV4_HDRCKSUM) {
1035 		first_bd->data.bd_flags.bitfields |=
1036 		    (1 << ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT);
1037 	}
1038 
1039 	if (pktinfo->cksum_flags & HCK_FULLCKSUM) {
1040 		first_bd->data.bd_flags.bitfields |=
1041 		    (1 << ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT);
1042 	}
1043 
1044 	BD_SET_ADDR_LEN(first_bd,
1045 	    bcopy_pkt->phys_addr,
1046 	    pktinfo->total_len);
1047 
1048 	first_bd->data.bitfields |=
1049 		(pktinfo->total_len & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK)
1050 		<< ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT;
1051 
1052 	tx_ring->tx_db.data.bd_prod =
1053 	    HOST_TO_LE_16(ecore_chain_get_prod_idx(&tx_ring->tx_bd_ring));
1054 
1055 	tx_ring->tx_recycle_list[tx_ring->sw_tx_prod].bcopy_pkt = bcopy_pkt;
1056 	tx_ring->tx_recycle_list[tx_ring->sw_tx_prod].dmah_entry =  NULL;
1057 
1058 	tx_ring->sw_tx_prod++;
1059 	tx_ring->sw_tx_prod &= TX_RING_MASK;
1060 
1061 	(void) ddi_dma_sync(tx_ring->tx_bd_dmah,
1062 	    last_producer, sizeof (*first_bd),
1063 	    DDI_DMA_SYNC_FORDEV);
1064 
1065 	QEDE_DOORBELL_WR(tx_ring, tx_ring->tx_db.raw);
1066 	mutex_exit(&tx_ring->tx_lock);
1067 
1068 	freemsg(mp);
1069 
1070 	return (XMIT_DONE);
1071 }
1072 
1073 /*
1074  * Send tx packet by mapping the mp(kernel addr)
1075  * to an existing dma_handle in the driver
1076  */
1077 static enum qede_xmit_status
1078 qede_tx_mapped(qede_tx_ring_t *tx_ring, mblk_t *mp, qede_tx_pktinfo_t *pktinfo)
1079 {
1080 	enum qede_xmit_status status = XMIT_FAILED;
1081 	int ret;
1082 	qede_dma_handle_entry_t *dmah_entry = NULL;
1083 	qede_dma_handle_entry_t *head = NULL, *tail = NULL, *hdl;
1084 	struct eth_tx_1st_bd *first_bd;
1085 	struct eth_tx_2nd_bd *second_bd = 0;
1086 	struct eth_tx_3rd_bd *third_bd = 0;
1087 	struct eth_tx_bd *tx_data_bd;
1088 	struct eth_tx_bd local_bd[64] = { 0 };
1089 	ddi_dma_cookie_t cookie[64];
1090 	u32 ncookies, total_cookies = 0, max_cookies = 0, index = 0;
1091 	ddi_dma_handle_t dma_handle;
1092 	mblk_t *bp;
1093 	u32 mblen;
1094 	bool is_premapped = B_FALSE;
1095 	u64 dma_premapped = 0, dma_bound = 0;
1096 	u32 hdl_reserved = 0;
1097 	u8 nbd = 0;
1098 	int i, bd_index;
1099 	u16 last_producer;
1100 	qede_tx_recycle_list_t *tx_recycle_list = tx_ring->tx_recycle_list;
1101 	u64 data_addr;
1102 	size_t data_size;
1103 
1104 	if (pktinfo->use_lso) {
1105 		/*
1106 		 * For tso pkt, we can use as many as 255 bds
1107 		 */
1108 		max_cookies = ETH_TX_MAX_BDS_PER_NON_LSO_PACKET - 1;
1109 		qede_pkt_parse_lso_headers(pktinfo, mp);
1110 	} else {
1111 		/*
1112 		 * For non-tso packet, only 18 bds can be used
1113 		 */
1114 		max_cookies = ETH_TX_MAX_BDS_PER_NON_LSO_PACKET - 1;
1115 	}
1116 
1117 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
1118 		mblen = MBLKL(bp);
1119 		if (mblen == 0) {
1120 			continue;
1121 		}
1122 		is_premapped = B_FALSE;
1123 		/*
1124 		 * If the mblk is premapped then get the
1125 		 * dma_handle and sync the dma mem. otherwise
1126 		 * reserve an handle from the driver dma
1127 		 * handles list
1128 		 */
1129 #ifdef	DBLK_DMA_PREMAP
1130 		if (bp->b_datap->db_flags & DBLK_DMA_PREMAP) {
1131 #ifdef	DEBUG_PREMAP
1132 			qede_info(tx_ring->qede, "mp is premapped");
1133 #endif
1134 			tx_ring->tx_premap_count++;
1135 			ret = dblk_dma_info_get(tx_ring->pm_handle,
1136 			    bp->b_rptr, mblen,
1137 			    bp->b_datap, &cookie[index],
1138 			    &ncookies, &dma_handle);
1139 			if (ret == DDI_DMA_MAPPED) {
1140 				is_premapped = B_TRUE;
1141 				dma_premapped++;
1142 				(void) ddi_dma_sync(dma_handle, 0, 0,
1143 				    DDI_DMA_SYNC_FORDEV);
1144 			} else {
1145 				tx_ring->tx_premap_fail++;
1146 			}
1147 		}
1148 #endif	/* DBLK_DMA_PREMAP */
1149 
1150 		if (!is_premapped) {
1151 			dmah_entry = qede_get_dmah_entry(tx_ring);
1152 			if (dmah_entry == NULL) {
1153 				qede_info(tx_ring->qede, "dmah_entry NULL, "
1154 				    "Fallback to copy mode...");
1155 				status = XMIT_FAILED;
1156 				goto err_map;
1157 			}
1158 
1159 			if (ddi_dma_addr_bind_handle(dmah_entry->dma_handle,
1160 			    NULL, (caddr_t)bp->b_rptr, mblen,
1161 			    DDI_DMA_STREAMING | DDI_DMA_WRITE,
1162 			    DDI_DMA_DONTWAIT, NULL, &cookie[index], &ncookies)
1163 			    != DDI_DMA_MAPPED) {
1164 
1165 #ifdef DEBUG_PULLUP
1166 			qede_info(tx_ring->qede, "addr_bind() failed for "
1167 			    "handle %p, len %d mblk_no %d tot_len 0x%x"
1168 			    " use_lso %d",  dmah_entry->dma_handle,
1169 			    mblen, pktinfo->mblk_no, pktinfo->total_len,
1170 			    pktinfo->use_lso);
1171 
1172 			qede_info(tx_ring->qede, "Falling back to pullup");
1173 #endif
1174 				status = XMIT_FALLBACK_PULLUP;
1175 				tx_ring->tx_bind_fail++;
1176 				goto err_map;
1177 			}
1178 			tx_ring->tx_bind_count++;
1179 
1180 			if (index == 0) {
1181 				dmah_entry->mp = mp;
1182 			} else {
1183 				dmah_entry->mp = NULL;
1184 			}
1185 
1186 			/* queue into recycle list for tx completion routine */
1187 			if (tail == NULL) {
1188 				head = tail = dmah_entry;
1189 			} else {
1190 				tail->next = dmah_entry;
1191 				tail = dmah_entry;
1192 			}
1193 
1194 			hdl_reserved++;
1195 			dma_bound++;
1196 		}
1197 
1198 		total_cookies += ncookies;
1199 		if (total_cookies > max_cookies) {
1200 			tx_ring->tx_too_many_cookies++;
1201 #ifdef DEBUG_PULLUP
1202 			qede_info(tx_ring->qede,
1203 			    "total_cookies > max_cookies, "
1204 			    "pktlen %d, mb num %d",
1205 			    pktinfo->total_len, pktinfo->mblk_no);
1206 #endif
1207 			status = XMIT_TOO_MANY_COOKIES;
1208 			goto err_map_sec;
1209 		}
1210 
1211 		if (is_premapped) {
1212 			index += ncookies;
1213 		} else {
1214 			index++;
1215 			/*
1216 			 * Dec. ncookies since we already stored cookie[0]
1217 			 */
1218 			ncookies--;
1219 
1220 			for (i = 0; i < ncookies; i++, index++)
1221 				ddi_dma_nextcookie(dmah_entry->dma_handle,
1222 				    &cookie[index]);
1223 		}
1224 	}
1225 
1226 	/*
1227 	 * Guard against the case where we get a series of mblks that cause us
1228 	 * not to end up with any mapped data.
1229 	 */
1230 	if (total_cookies == 0) {
1231 		status = XMIT_FAILED;
1232 		goto err_map_sec;
1233 	}
1234 
1235 	if (total_cookies > max_cookies) {
1236 		tx_ring->tx_too_many_cookies++;
1237 		status = XMIT_TOO_MANY_COOKIES;
1238 		goto err_map_sec;
1239 	}
1240 	first_bd = (struct eth_tx_1st_bd *)&local_bd[0];
1241 
1242 	/*
1243 	 * Mark this bd as start bd
1244 	 */
1245 	first_bd->data.bd_flags.bitfields =
1246 	    (1 << ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT);
1247 
1248 	if (pktinfo->cksum_flags & HCK_IPV4_HDRCKSUM) {
1249 		first_bd->data.bd_flags.bitfields |=
1250 		    (1 << ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT);
1251 	}
1252 
1253 	if (pktinfo->cksum_flags & HCK_FULLCKSUM) {
1254 		first_bd->data.bd_flags.bitfields |=
1255 		    (1 << ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT);
1256 	}
1257 
1258 
1259 	/* Fill-up local bds with the tx data and flags */
1260 	for (i = 0, bd_index = 0; i < total_cookies; i++, bd_index++) {
1261 		if (bd_index == 0) {
1262 			BD_SET_ADDR_LEN(first_bd,
1263 			    cookie[i].dmac_laddress,
1264 			    cookie[i].dmac_size);
1265 
1266 			if (pktinfo->use_lso) {
1267 			first_bd->data.bd_flags.bitfields |=
1268 			    1 << ETH_TX_1ST_BD_FLAGS_LSO_SHIFT;
1269 
1270 			second_bd = (struct eth_tx_2nd_bd *)&local_bd[1];
1271 
1272 			/*
1273 			 * If the fisrt bd contains
1274 			 * hdr + data (partial or full data), then spilt
1275 			 * the hdr and data between 1st and 2nd
1276 			 * bd respectively
1277 			 */
1278 			if (first_bd->nbytes > pktinfo->total_hlen) {
1279 				data_addr = cookie[0].dmac_laddress
1280 				    + pktinfo->total_hlen;
1281 				data_size = cookie[i].dmac_size
1282 				    - pktinfo->total_hlen;
1283 
1284 				BD_SET_ADDR_LEN(second_bd,
1285 				    data_addr,
1286 				    data_size);
1287 
1288 				/*
1289 				 * First bd already contains the addr to
1290 				 * to start of pkt, just adjust the dma
1291 				 * len of first_bd
1292 				 */
1293 				first_bd->nbytes = pktinfo->total_hlen;
1294 				bd_index++;
1295 			} else if (first_bd->nbytes < pktinfo->total_hlen) {
1296 #ifdef DEBUG_PULLUP
1297 				qede_info(tx_ring->qede,
1298 				    "Headers not in single bd");
1299 #endif
1300 				status = XMIT_FALLBACK_PULLUP;
1301 				goto err_map_sec;
1302 
1303 			}
1304 
1305 			/*
1306 			 * Third bd is used to indicates to fw
1307 			 * that tso needs to be performed. It should
1308 			 * be present even if only two cookies are
1309 			 * needed for the mblk
1310 			 */
1311 			third_bd = (struct eth_tx_3rd_bd *)&local_bd[2];
1312 			third_bd->data.lso_mss |=
1313 			    HOST_TO_LE_16(pktinfo->mss);
1314 			third_bd->data.bitfields |=
1315 			    1 << ETH_TX_DATA_3RD_BD_HDR_NBD_SHIFT;
1316 			}
1317 
1318 			continue;
1319 		}
1320 
1321 		tx_data_bd = &local_bd[bd_index];
1322 		BD_SET_ADDR_LEN(tx_data_bd,
1323 		    cookie[i].dmac_laddress,
1324 		    cookie[i].dmac_size);
1325 	}
1326 
1327 	if (pktinfo->use_lso) {
1328 		if (bd_index < 3) {
1329 			nbd = 3;
1330 		} else {
1331 			nbd = bd_index;
1332 		}
1333 	} else {
1334 		nbd = total_cookies;
1335 		first_bd->data.bitfields |=
1336 		    (pktinfo->total_len & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK)
1337 		    << ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT;
1338 	}
1339 
1340 	first_bd->data.nbds = nbd;
1341 
1342 	mutex_enter(&tx_ring->tx_lock);
1343 
1344 	/*
1345 	 * Before copying the local bds into actual,
1346 	 * check if we have enough on the bd_chain
1347 	 */
1348 	if (ecore_chain_get_elem_left(&tx_ring->tx_bd_ring) <
1349 	    nbd) {
1350 		tx_ring->tx_q_sleeping = 1;
1351 		status = XMIT_PAUSE_QUEUE;
1352 #ifdef	DEBUG_TX_RECYCLE
1353 			qede_info(tx_ring->qede, "Pausing tx queue...");
1354 #endif
1355 		mutex_exit(&tx_ring->tx_lock);
1356 		goto err_map_sec ;
1357 	}
1358 
1359 	/* Copy the local_bd(s) into the actual bds */
1360 	for (i = 0; i < nbd; i++) {
1361 		tx_data_bd = ecore_chain_produce(&tx_ring->tx_bd_ring);
1362 		bcopy(&local_bd[i], tx_data_bd, sizeof (*tx_data_bd));
1363 	}
1364 
1365 	last_producer = tx_ring->sw_tx_prod;
1366 
1367 	tx_ring->tx_recycle_list[tx_ring->sw_tx_prod].dmah_entry = head;
1368 	tx_ring->tx_recycle_list[tx_ring->sw_tx_prod].bcopy_pkt = NULL;
1369 	tx_ring->sw_tx_prod = (tx_ring->sw_tx_prod + 1) & TX_RING_MASK;
1370 
1371 	tx_ring->tx_db.data.bd_prod =
1372 	    HOST_TO_LE_16(ecore_chain_get_prod_idx(&tx_ring->tx_bd_ring));
1373 
1374 	/* Sync the tx_bd dma mem */
1375 	qede_desc_dma_mem_sync(&tx_ring->tx_bd_dmah,
1376 	    last_producer, nbd,
1377 	    tx_ring->tx_ring_size,
1378 	    sizeof (struct eth_tx_bd),
1379 	    DDI_DMA_SYNC_FORDEV);
1380 
1381 	/*
1382 	 * Write to doorbell bar
1383 	 */
1384 	QEDE_DOORBELL_WR(tx_ring, tx_ring->tx_db.raw);
1385 
1386 	mutex_exit(&tx_ring->tx_lock);
1387 
1388 	return (XMIT_DONE);
1389 err_map:
1390 	if (dmah_entry != NULL) {
1391 		if (tail == NULL) {
1392 			head = tail = dmah_entry;
1393 		} else {
1394 			tail->next = dmah_entry;
1395 			tail = dmah_entry;
1396 		}
1397 		hdl_reserved++;
1398 	}
1399 
1400 err_map_sec:
1401 
1402 	hdl = head;
1403 
1404 	while (hdl != NULL) {
1405 		(void) ddi_dma_unbind_handle(hdl->dma_handle);
1406 		hdl = hdl->next;
1407 	}
1408 
1409 	if (head != NULL) {
1410 		qede_put_dmah_entries(tx_ring, head);
1411 	}
1412 
1413 	return (status);
1414 }
1415 
1416 static enum qede_xmit_status
1417 qede_send_tx_packet(qede_t *qede, qede_tx_ring_t *tx_ring, mblk_t *mp)
1418 {
1419 	boolean_t force_pullup = B_FALSE;
1420 	enum qede_xmit_status status = XMIT_FAILED;
1421 	enum qede_xmit_mode xmit_mode = USE_BCOPY;
1422 	qede_tx_pktinfo_t pktinfo;
1423 	mblk_t *original_mp = NULL, *pulled_up_mp = NULL;
1424 	struct ether_vlan_header *ethvhdr;
1425 
1426 	mutex_enter(&tx_ring->tx_lock);
1427 	if (ecore_chain_get_elem_left(&tx_ring->tx_bd_ring) <
1428 	    qede->tx_recycle_threshold) {
1429 #ifdef	DEBUG_TX_RECYCLE
1430 		qede_info(qede, "Recyclycling from tx routine");
1431 #endif
1432 		if (qede_process_tx_completions(tx_ring) <
1433 		    qede->tx_recycle_threshold) {
1434 #ifdef	DEBUG_TX_RECYCLE
1435 			qede_info(qede, "Still not enough bd after cleanup, "
1436 			    "pausing tx queue...");
1437 #endif
1438 			tx_ring->tx_q_sleeping = 1;
1439 			mutex_exit(&tx_ring->tx_lock);
1440 			return (XMIT_PAUSE_QUEUE);
1441 		}
1442 	}
1443 
1444 	mutex_exit(&tx_ring->tx_lock);
1445 
1446 	bzero(&pktinfo, sizeof (pktinfo));
1447 
1448 	/* Get the offload reqd. on the pkt */
1449 	qede_get_pkt_offload_info(qede, mp, &pktinfo.cksum_flags,
1450 	    &pktinfo.use_lso, &pktinfo.mss);
1451 
1452 do_pullup:
1453 	if (force_pullup) {
1454 		tx_ring->tx_pullup_count++;
1455 #ifdef	DEBUG_PULLUP
1456 		qede_info(qede, "Pulling up original mp %p", mp);
1457 #endif
1458 		/*
1459 		 * Try to accumulate all mblks of this pkt
1460 		 * into a single mblk
1461 		 */
1462 		original_mp = mp;
1463 		if ((pulled_up_mp = msgpullup(mp, -1)) != NULL) {
1464 #ifdef	DEBUG_PULLUP
1465 			qede_info(qede, "New mp %p, ori %p", pulled_up_mp, mp);
1466 #endif
1467 			/*
1468 			 * Proceed with the new single
1469 			 * mp
1470 			 */
1471 			mp = pulled_up_mp;
1472 			xmit_mode = XMIT_MODE_UNUSED;
1473 			pktinfo.pulled_up = B_TRUE;
1474 		} else {
1475 #ifdef	DEBUG_PULLUP
1476 			qede_info(tx_ring->qede, "Pullup failed");
1477 #endif
1478 			status = XMIT_FAILED;
1479 			goto exit;
1480 		}
1481 	}
1482 
1483 	qede_get_pkt_info(qede, mp, &pktinfo);
1484 
1485 
1486 	if ((!pktinfo.use_lso) &&
1487                  (pktinfo.total_len > (qede->mtu + pktinfo.mac_hlen))) {
1488   		qede_info(tx_ring->qede,
1489 		    "Packet drop as packet len 0x%x > 0x%x",
1490 		    pktinfo.total_len, (qede->mtu + QEDE_MAX_ETHER_HDR));
1491 		status = XMIT_FAILED;
1492 		goto exit;
1493 	}
1494 
1495 
1496 #ifdef	DEBUG_PULLUP
1497 	if (force_pullup) {
1498 	qede_print_err("!%s: mp %p, pktinfo : total_len %d,"
1499 	    " mblk_no %d, ether_type %d\n"
1500 	    "mac_hlen %d, ip_hlen %d, l4_hlen %d\n"
1501 	    "l4_proto %d, use_cksum:use_lso %d:%d mss %d", __func__, mp,
1502 	    pktinfo.total_len, pktinfo.mblk_no, pktinfo.ether_type,
1503 	    pktinfo.mac_hlen, pktinfo.ip_hlen, pktinfo.l4_hlen,
1504 	    pktinfo.l4_proto, pktinfo.cksum_flags, pktinfo.use_lso,
1505 	    pktinfo.mss);
1506 	}
1507 #endif
1508 
1509 #ifdef	DEBUG_PREMAP
1510 	if (DBLK_IS_PREMAPPED(mp->b_datap)) {
1511 		qede_print_err("!%s(%d): mp %p id PREMAPPMED",
1512 		    __func__, qede->instance);
1513 	}
1514 #endif
1515 
1516 #ifdef	DBLK_DMA_PREMAP
1517 	if (DBLK_IS_PREMAPPED(mp->b_datap) ||
1518 	    pktinfo.total_len > qede->tx_bcopy_threshold) {
1519 		xmit_mode = USE_DMA_BIND;
1520 	}
1521 #else
1522 	if (pktinfo.total_len > qede->tx_bcopy_threshold) {
1523 		xmit_mode = USE_DMA_BIND;
1524 	}
1525 #endif
1526 
1527 	if (pktinfo.total_len <= qede->tx_bcopy_threshold) {
1528 		xmit_mode = USE_BCOPY;
1529 	}
1530 
1531 	/*
1532 	 * if mac + ip hdr not in one contiguous block,
1533 	 * use copy mode
1534 	 */
1535 	if (MBLKL(mp) < (ETHER_HEADER_LEN + IP_HEADER_LEN)) {
1536 		/*qede_info(qede, "mblk too small, using copy mode, len = %d", MBLKL(mp));*/
1537 		xmit_mode = USE_BCOPY;
1538 	}
1539 
1540 	if ((uintptr_t)mp->b_rptr & 1) {
1541 		xmit_mode = USE_BCOPY;
1542 	}
1543 
1544 	/*
1545 	 * if too many mblks and hence the dma cookies, needed
1546 	 * for tx, then use bcopy or pullup on packet
1547 	 * currently, ETH_TX_MAX_BDS_PER_NON_LSO_PACKET = 18
1548 	 */
1549 	if (pktinfo.mblk_no > (ETH_TX_MAX_BDS_PER_NON_LSO_PACKET - 1)) {
1550 		if (force_pullup) {
1551 			tx_ring->tx_too_many_mblks++;
1552 			status = XMIT_FAILED;
1553 			goto exit;
1554 		} else {
1555 			xmit_mode = USE_PULLUP;
1556 		}
1557 	}
1558 
1559 #ifdef	TX_FORCE_COPY_MODE
1560 	xmit_mode = USE_BCOPY;
1561 #elif	TX_FORCE_MAPPED_MODE
1562 	xmit_mode = USE_DMA_BIND;
1563 #endif
1564 
1565 #ifdef	DEBUG_PULLUP
1566 	if (force_pullup) {
1567 		qede_info(qede, "using mode %d on pulled mp %p",
1568 		    xmit_mode, mp);
1569 	}
1570 #endif
1571 
1572 	/*
1573 	 * Use Mapped mode for the packet
1574 	 */
1575 	if (xmit_mode == USE_DMA_BIND) {
1576 		status = qede_tx_mapped(tx_ring, mp, &pktinfo);
1577 		if (status == XMIT_DONE) {
1578 			if (pktinfo.use_lso) {
1579 				tx_ring->tx_lso_pkt_count++;
1580 			} else if(pktinfo.total_len > 1518) {
1581 				tx_ring->tx_jumbo_pkt_count++;
1582 			}
1583 			tx_ring->tx_mapped_pkts++;
1584 			goto exit;
1585                 } else if ((status == XMIT_TOO_MANY_COOKIES ||
1586 		    (status == XMIT_FALLBACK_PULLUP)) && !force_pullup) {
1587 			xmit_mode = USE_PULLUP;
1588 		} else {
1589 			status = XMIT_FAILED;
1590 			goto exit;
1591 		}
1592 	}
1593 
1594 	if (xmit_mode == USE_BCOPY) {
1595 		status = qede_tx_bcopy(tx_ring, mp, &pktinfo);
1596 		if (status == XMIT_DONE) {
1597 			tx_ring->tx_copy_count++;
1598 			goto exit;
1599 		} else if ((status == XMIT_FALLBACK_PULLUP) &&
1600 		    !force_pullup) {
1601 			xmit_mode = USE_PULLUP;
1602 		} else {
1603 			goto exit;
1604 		}
1605 	}
1606 
1607 	if (xmit_mode == USE_PULLUP) {
1608 		force_pullup = B_TRUE;
1609 		tx_ring->tx_pullup_count++;
1610 		goto do_pullup;
1611 	}
1612 
1613 exit:
1614 	if (status != XMIT_DONE) {
1615 		/*
1616 		 * if msgpullup succeeded, but something else  failed,
1617 		 * free the pulled-up msg and return original mblk to
1618 		 * stack, indicating tx failure
1619 		 */
1620 		if (pulled_up_mp) {
1621 			qede_info(qede, "tx failed, free pullup pkt %p", mp);
1622 			freemsg(pulled_up_mp);
1623 			mp = original_mp;
1624 		}
1625 	} else {
1626 		tx_ring->tx_byte_count += pktinfo.total_len;
1627 		/*
1628 		 * If tx was successfull after a pullup, then free the
1629 		 * original mp. The pulled-up will be freed as part of
1630 		 * tx completions processing
1631 		 */
1632 		if (pulled_up_mp) {
1633 #ifdef	DEBUG_PULLUP
1634 			qede_info(qede,
1635 			    "success, free ori mp %p", original_mp);
1636 #endif
1637 			freemsg(original_mp);
1638 		}
1639 	}
1640 
1641 	return (status);
1642 }
1643 
1644 typedef	uint32_t	ub4; /* unsigned 4-byte quantities */
1645 typedef	uint8_t		ub1;
1646 
1647 #define	hashsize(n)	((ub4)1<<(n))
1648 #define	hashmask(n)	(hashsize(n)-1)
1649 
1650 #define	mix(a, b, c) \
1651 { \
1652 	a -= b; a -= c; a ^= (c>>13); \
1653 	b -= c; b -= a; b ^= (a<<8); \
1654 	c -= a; c -= b; c ^= (b>>13); \
1655 	a -= b; a -= c; a ^= (c>>12);  \
1656 	b -= c; b -= a; b ^= (a<<16); \
1657 	c -= a; c -= b; c ^= (b>>5); \
1658 	a -= b; a -= c; a ^= (c>>3);  \
1659 	b -= c; b -= a; b ^= (a<<10); \
1660 	c -= a; c -= b; c ^= (b>>15); \
1661 }
1662 
1663 ub4
1664 hash(k, length, initval)
1665 register ub1 *k;	/* the key */
1666 register ub4 length;	/* the length of the key */
1667 register ub4 initval;	/* the previous hash, or an arbitrary value */
1668 {
1669 	register ub4 a, b, c, len;
1670 
1671 	/* Set up the internal state */
1672 	len = length;
1673 	a = b = 0x9e3779b9;	/* the golden ratio; an arbitrary value */
1674 	c = initval;		/* the previous hash value */
1675 
1676 	/* handle most of the key */
1677 	while (len >= 12)
1678 	{
1679 		a += (k[0] +((ub4)k[1]<<8) +((ub4)k[2]<<16) +((ub4)k[3]<<24));
1680 		b += (k[4] +((ub4)k[5]<<8) +((ub4)k[6]<<16) +((ub4)k[7]<<24));
1681 		c += (k[8] +((ub4)k[9]<<8) +((ub4)k[10]<<16)+((ub4)k[11]<<24));
1682 		mix(a, b, c);
1683 		k += 12;
1684 		len -= 12;
1685 	}
1686 
1687 	/* handle the last 11 bytes */
1688 	c += length;
1689 	/* all the case statements fall through */
1690 	switch (len)
1691 	{
1692 	/* FALLTHRU */
1693 	case 11:
1694 		c += ((ub4)k[10]<<24);
1695 	/* FALLTHRU */
1696 	case 10:
1697 		c += ((ub4)k[9]<<16);
1698 	/* FALLTHRU */
1699 	case 9 :
1700 		c += ((ub4)k[8]<<8);
1701 	/* the first byte of c is reserved for the length */
1702 	/* FALLTHRU */
1703 	case 8 :
1704 		b += ((ub4)k[7]<<24);
1705 	/* FALLTHRU */
1706 	case 7 :
1707 		b += ((ub4)k[6]<<16);
1708 	/* FALLTHRU */
1709 	case 6 :
1710 		b += ((ub4)k[5]<<8);
1711 	/* FALLTHRU */
1712 	case 5 :
1713 		b += k[4];
1714 	/* FALLTHRU */
1715 	case 4 :
1716 		a += ((ub4)k[3]<<24);
1717 	/* FALLTHRU */
1718 	case 3 :
1719 		a += ((ub4)k[2]<<16);
1720 	/* FALLTHRU */
1721 	case 2 :
1722 		a += ((ub4)k[1]<<8);
1723 	/* FALLTHRU */
1724 	case 1 :
1725 		a += k[0];
1726 	/* case 0: nothing left to add */
1727 	}
1728 	mix(a, b, c);
1729 	/* report the result */
1730 	return (c);
1731 }
1732 
1733 #ifdef	NO_CROSSBOW
1734 static uint8_t
1735 qede_hash_get_txq(qede_t *qede, caddr_t bp)
1736 {
1737 	struct ip *iphdr = NULL;
1738 	struct ether_header *ethhdr;
1739 	struct ether_vlan_header *ethvhdr;
1740 	struct tcphdr *tcp_hdr;
1741 	struct udphdr *udp_hdr;
1742 	uint32_t etherType;
1743 	int mac_hdr_len, ip_hdr_len;
1744 	uint32_t h = 0; /* 0 by default */
1745 	uint8_t tx_ring_id = 0;
1746 	uint32_t ip_src_addr = 0;
1747 	uint32_t ip_desc_addr = 0;
1748 	uint16_t src_port = 0;
1749 	uint16_t dest_port = 0;
1750 	uint8_t key[12];
1751 
1752 	if (qede->num_fp == 1) {
1753 		return (tx_ring_id);
1754 	}
1755 
1756 	ethhdr = (struct ether_header *)((void *)bp);
1757 	ethvhdr = (struct ether_vlan_header *)((void *)bp);
1758 
1759 	/* Is this vlan packet? */
1760 	if (ntohs(ethvhdr->ether_tpid) == ETHERTYPE_VLAN) {
1761 		mac_hdr_len = sizeof (struct ether_vlan_header);
1762 		etherType = ntohs(ethvhdr->ether_type);
1763 	} else {
1764 		mac_hdr_len = sizeof (struct ether_header);
1765 		etherType = ntohs(ethhdr->ether_type);
1766 	}
1767 	/* Is this IPv4 or IPv6 packet? */
1768 	if (etherType == ETHERTYPE_IP /* 0800 */) {
1769 		if (IPH_HDR_VERSION((ipha_t *)(void *)(bp+mac_hdr_len))
1770 		    == IPV4_VERSION) {
1771 			iphdr = (struct ip *)(void *)(bp+mac_hdr_len);
1772 		}
1773 		if (((unsigned long)iphdr) & 0x3) {
1774 			/*  IP hdr not 4-byte aligned */
1775 			return (tx_ring_id);
1776 		}
1777 	}
1778 	/* ipV4 packets */
1779 	if (iphdr) {
1780 
1781 		ip_hdr_len = IPH_HDR_LENGTH(iphdr);
1782 		ip_src_addr = iphdr->ip_src.s_addr;
1783 		ip_desc_addr = iphdr->ip_dst.s_addr;
1784 
1785 		if (iphdr->ip_p == IPPROTO_TCP) {
1786 			tcp_hdr = (struct tcphdr *)(void *)
1787 			    ((uint8_t *)iphdr + ip_hdr_len);
1788 			src_port = tcp_hdr->th_sport;
1789 			dest_port = tcp_hdr->th_dport;
1790 		} else if (iphdr->ip_p == IPPROTO_UDP) {
1791 			udp_hdr = (struct udphdr *)(void *)
1792 			    ((uint8_t *)iphdr + ip_hdr_len);
1793 			src_port = udp_hdr->uh_sport;
1794 			dest_port = udp_hdr->uh_dport;
1795 		}
1796 		key[0] = (uint8_t)((ip_src_addr) &0xFF);
1797 		key[1] = (uint8_t)((ip_src_addr >> 8) &0xFF);
1798 		key[2] = (uint8_t)((ip_src_addr >> 16) &0xFF);
1799 		key[3] = (uint8_t)((ip_src_addr >> 24) &0xFF);
1800 		key[4] = (uint8_t)((ip_desc_addr) &0xFF);
1801 		key[5] = (uint8_t)((ip_desc_addr >> 8) &0xFF);
1802 		key[6] = (uint8_t)((ip_desc_addr >> 16) &0xFF);
1803 		key[7] = (uint8_t)((ip_desc_addr >> 24) &0xFF);
1804 		key[8] = (uint8_t)((src_port) &0xFF);
1805 		key[9] = (uint8_t)((src_port >> 8) &0xFF);
1806 		key[10] = (uint8_t)((dest_port) &0xFF);
1807 		key[11] = (uint8_t)((dest_port >> 8) &0xFF);
1808 		h = hash(key, 12, 0); /* return 32 bit */
1809 		tx_ring_id = (h & (qede->num_fp - 1));
1810 		if (tx_ring_id >= qede->num_fp) {
1811 			cmn_err(CE_WARN, "%s bad tx_ring_id %d\n",
1812 			    __func__, tx_ring_id);
1813 			tx_ring_id = 0;
1814 		}
1815 	}
1816 	return (tx_ring_id);
1817 }
1818 #endif
1819 
1820 mblk_t *
1821 qede_ring_tx(void *arg, mblk_t *mp)
1822 {
1823 	qede_fastpath_t *fp = (qede_fastpath_t *)arg;
1824 	qede_t *qede = fp->qede;
1825 #ifndef	NO_CROSSBOW
1826 	qede_tx_ring_t *tx_ring = fp->tx_ring[0];
1827 #else
1828 	qede_tx_ring_t *tx_ring;
1829 #endif
1830 	uint32_t ring_id;
1831 	mblk_t *next = NULL;
1832 	enum qede_xmit_status status = XMIT_FAILED;
1833 	caddr_t bp;
1834 
1835 	ASSERT(mp->b_next == NULL);
1836 
1837 #ifndef	NO_CROSSBOW
1838 	if (!fp || !tx_ring) {
1839 		qede_print_err("!%s: error, fp %p, tx_ring %p",
1840 		    __func__, fp, tx_ring);
1841 		goto exit;
1842 	}
1843 #endif
1844 	if (qede->qede_state != QEDE_STATE_STARTED) {
1845 		qede_print_err("!%s(%d): qede_state %d invalid",
1846 		    __func__, qede->instance, qede->qede_state);
1847 		goto exit;
1848 	}
1849 
1850 	if (!qede->params.link_state) {
1851 		goto exit;
1852 	}
1853 
1854 	while (mp != NULL) {
1855 #ifdef	NO_CROSSBOW
1856 		/*
1857 		 * Figure out which tx ring to send this packet to.
1858 		 * Currently multiple rings are not exposed to mac layer
1859 		 * and fanout done by driver
1860 		 */
1861 		bp = (caddr_t)mp->b_rptr;
1862 		ring_id = qede_hash_get_txq(qede, bp);
1863 		fp = &qede->fp_array[ring_id];
1864 		tx_ring = fp->tx_ring[0];
1865 
1866 		if (qede->num_tc > 1) {
1867 			qede_info(qede,
1868 			    "Traffic classes(%d) > 1 not supported",
1869 			    qede->num_tc);
1870 			goto exit;
1871 		}
1872 #endif
1873 		next = mp->b_next;
1874 		mp->b_next = NULL;
1875 
1876 		status = qede_send_tx_packet(qede, tx_ring, mp);
1877 		if (status == XMIT_DONE) {
1878 			tx_ring->tx_pkt_count++;
1879 			mp = next;
1880 		} else if (status == XMIT_PAUSE_QUEUE) {
1881 			tx_ring->tx_ring_pause++;
1882 			mp->b_next = next;
1883 			break;
1884 		} else if (status == XMIT_FAILED) {
1885 			goto exit;
1886 		}
1887 	}
1888 
1889 	return (mp);
1890 exit:
1891 	tx_ring->tx_pkt_dropped++;
1892 	freemsgchain(mp);
1893 	mp = NULL;
1894 	return (mp);
1895 }
1896