xref: /illumos-gate/usr/src/uts/common/io/qede/qede_fp.c (revision 7f3d7c9289dee6488b3cd2848a68c0b8580d750c)
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, v.1,  (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 
22 /*
23 * Copyright 2014-2017 Cavium, Inc.
24 * Copyright 2025 Oxide Computer Company
25 */
26 
27 #include "qede.h"
28 
29 static qede_dma_handle_entry_t *
30 qede_get_dmah_entry(qede_tx_ring_t *tx_ring)
31 {
32 	qede_dma_handles_list_t *list = &tx_ring->dmah_list;
33 	qede_dma_handle_entry_t *dmah;
34 
35 	mutex_enter(&list->lock);
36 	dmah = list->free_list[list->head];
37 	list->free_list[list->head] = NULL;
38 	list->head = (list->head + 1) & TX_RING_MASK;
39 	mutex_exit(&list->lock);
40 
41 	return (dmah);
42 }
43 
44 static void
45 qede_put_dmah_entries(qede_tx_ring_t *tx_ring, qede_dma_handle_entry_t *dmah)
46 {
47 	qede_dma_handles_list_t *list = &tx_ring->dmah_list;
48 	qede_dma_handle_entry_t *next;
49 	u16 index;
50 
51 	mutex_enter(&list->lock);
52 	index = list->tail;
53 
54 	while (dmah != NULL) {
55 		next = dmah->next;
56 		dmah->next = NULL;
57 		list->free_list[index] = dmah;
58 		index = (index + 1) & TX_RING_MASK;
59 		dmah = next;
60 	}
61 
62 	list->tail = index;
63 
64 	mutex_exit(&list->lock);
65 }
66 
67 static qede_tx_bcopy_pkt_t *
68 qede_get_bcopy_pkt(qede_tx_ring_t *tx_ring)
69 {
70 	qede_tx_bcopy_list_t *list = &tx_ring->bcopy_list;
71 	qede_tx_bcopy_pkt_t *pkt;
72 
73 	mutex_enter(&list->lock);
74 	pkt = list->free_list[list->head];
75 	list->free_list[list->head] = NULL;
76 	list->head = (list->head + 1) & TX_RING_MASK;
77 	mutex_exit(&list->lock);
78 
79 	return (pkt);
80 }
81 
82 static void
83 qede_put_bcopy_pkt(qede_tx_ring_t *tx_ring, qede_tx_bcopy_pkt_t *pkt)
84 {
85 	qede_tx_bcopy_list_t *list = &tx_ring->bcopy_list;
86 
87 	mutex_enter(&list->lock);
88 	list->free_list[list->tail] = pkt;
89 	list->tail = (list->tail + 1) & TX_RING_MASK;
90 	mutex_exit(&list->lock);
91 }
92 
93 void
94 qede_print_tx_indexes(qede_tx_ring_t *tx_ring)
95 {
96 	uint16_t hw_consumer = LE_16(*tx_ring->hw_cons_ptr);
97 	uint16_t chain_idx = ecore_chain_get_cons_idx(&tx_ring->tx_bd_ring);
98 	hw_consumer &= TX_RING_MASK;
99 	chain_idx &= TX_RING_MASK;
100 	qede_print_err("!indices: hw_cons %d, chain_cons = %d, sw_prod = %d",
101 	    hw_consumer, chain_idx, tx_ring->sw_tx_prod);
102 }
103 
104 void
105 qede_print_rx_indexes(qede_rx_ring_t *rx_ring)
106 {
107 	u16 hw_bd_cons = HOST_TO_LE_16(*rx_ring->hw_cons_ptr);
108 	u16 sw_bd_cons = ecore_chain_get_cons_idx(&rx_ring->rx_cqe_ring);
109 
110 	hw_bd_cons &= (rx_ring->qede->rx_ring_size - 1);
111 	sw_bd_cons &= (rx_ring->qede->rx_ring_size - 1);
112 	qede_print_err("!RX indices: hw_cons %d, chain_cons = %d",
113 	    hw_bd_cons, sw_bd_cons);
114 }
115 
116 
117 /*
118  * Called from tx_completion intr handler.
119  * NOTE: statu_block dma mem. must be sync'ed
120  * in the interrupt handler
121  */
122 int
123 qede_process_tx_completions(qede_tx_ring_t *tx_ring)
124 {
125 	int count = 0;
126 	u16 hw_consumer;
127 	struct eth_tx_bd *tx_bd;
128 	uint16_t chain_idx;
129 	u16 nbd, sw_consumer = tx_ring->sw_tx_cons;
130 	struct eth_tx_1st_bd *first_bd;
131 	u16 bd_consumed = 0;
132 	qede_tx_recycle_list_t *recycle_entry;
133 	qede_dma_handle_entry_t *dmah, *head = NULL, *tail = NULL;
134 	qede_tx_bcopy_pkt_t *bcopy_pkt;
135 
136 	hw_consumer = LE_16(*tx_ring->hw_cons_ptr);
137 	chain_idx = ecore_chain_get_cons_idx(&tx_ring->tx_bd_ring);
138 
139 	while (hw_consumer != chain_idx) {
140 		nbd = 0;
141 		bd_consumed = 0;
142 		first_bd = NULL;
143 
144 		recycle_entry = &tx_ring->tx_recycle_list[sw_consumer];
145 		if (recycle_entry->dmah_entry != NULL) {
146 			dmah = recycle_entry->dmah_entry;
147 
148 			head = dmah;
149 
150 			if (head->mp) {
151 				freemsg(head->mp);
152 			}
153 
154 			while (dmah != NULL) {
155 				(void) ddi_dma_unbind_handle(dmah->dma_handle);
156 				dmah = dmah->next;
157 			}
158 
159 
160 			qede_put_dmah_entries(tx_ring,
161 			    head);
162 			recycle_entry->dmah_entry = NULL;
163 		} else if (recycle_entry->bcopy_pkt != NULL) {
164 			bcopy_pkt = recycle_entry->bcopy_pkt;
165 
166 			qede_put_bcopy_pkt(tx_ring, bcopy_pkt);
167 			recycle_entry->bcopy_pkt = NULL;
168 		} else {
169 			qede_warn(tx_ring->qede,
170 			    "Invalid completion at index %d",
171 			    sw_consumer);
172 		}
173 
174 		sw_consumer = (sw_consumer + 1) & TX_RING_MASK;
175 
176 		first_bd =
177 		    (struct eth_tx_1st_bd *)ecore_chain_consume(
178 		    &tx_ring->tx_bd_ring);
179 		bd_consumed++;
180 
181 		nbd = first_bd->data.nbds;
182 
183 		while (bd_consumed++ < nbd) {
184 			ecore_chain_consume(&tx_ring->tx_bd_ring);
185 		}
186 
187 		chain_idx = ecore_chain_get_cons_idx(&tx_ring->tx_bd_ring);
188 		count++;
189 	}
190 
191 	tx_ring->sw_tx_cons = sw_consumer;
192 
193 	if (count && tx_ring->tx_q_sleeping) {
194 		tx_ring->tx_q_sleeping = 0;
195 #ifndef NO_CROSSBOW
196 		RESUME_TX(tx_ring);
197 #else
198 		mac_tx_update(tx_ring->qede->mac_handle);
199 #endif
200 	}
201 
202 	return (count);
203 }
204 
205 static int
206 qede_has_tx_work(qede_tx_ring_t *tx_ring)
207 {
208 	u16 hw_bd_cons = LE_16(*tx_ring->hw_cons_ptr);
209 	u16 sw_bd_cons = ecore_chain_get_cons_idx(&tx_ring->tx_bd_ring);
210 
211 	if (sw_bd_cons == (hw_bd_cons + 1)) {
212 		return (0);
213 	}
214 	return (hw_bd_cons != sw_bd_cons);
215 }
216 
217 static int
218 qede_has_rx_work(qede_rx_ring_t *rx_ring)
219 {
220 	u16 hw_bd_cons = HOST_TO_LE_16(*rx_ring->hw_cons_ptr);
221 	u16 sw_bd_cons = ecore_chain_get_cons_idx(&rx_ring->rx_cqe_ring);
222 	return (hw_bd_cons != sw_bd_cons);
223 }
224 
225 static void
226 qede_set_cksum_flags(mblk_t *mp,
227     uint16_t parse_flags)
228 {
229 	uint32_t cksum_flags = 0;
230 	int error = 0;
231 	bool l4_is_calc, l4_csum_err, iphdr_len_err;
232 
233 	l4_is_calc =
234 	    (parse_flags >> PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_SHIFT)
235 	    & PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_MASK;
236 	l4_csum_err = (parse_flags >> PARSING_AND_ERR_FLAGS_L4CHKSMERROR_SHIFT)
237 	    & PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_MASK;
238 	iphdr_len_err = (parse_flags >> PARSING_AND_ERR_FLAGS_IPHDRERROR_SHIFT)
239 	    & PARSING_AND_ERR_FLAGS_IPHDRERROR_MASK;
240 
241 	if (l4_is_calc) {
242 		if (l4_csum_err) {
243 			error = 1;
244         	} else if (iphdr_len_err) {
245             		error = 2;
246         	} else {
247 			cksum_flags =  HCK_FULLCKSUM_OK | HCK_IPV4_HDRCKSUM_OK;
248 		}
249 	}
250 
251 	if (error == 1) {
252 		qede_print_err("!%s: got L4 csum error",__func__);
253 	} else if (error == 2) {
254 		qede_print_err("!%s: got IPHDER csum error" ,__func__);
255 	}
256 
257 	mac_hcksum_set(mp, 0, 0, 0, 0, cksum_flags);
258 }
259 
260 static qede_rx_buffer_t *
261 qede_get_next_rx_buffer(qede_rx_ring_t *rx_ring,
262     uint32_t *free_buffer_count)
263 {
264 	qede_rx_buffer_t *rx_buffer;
265 	uint32_t num_entries;
266 
267 	rx_buffer = qede_get_from_active_list(rx_ring, &num_entries);
268 	ASSERT(rx_buffer != NULL);
269 	ecore_chain_consume(&rx_ring->rx_bd_ring);
270 	*free_buffer_count = num_entries;
271 
272 	return (rx_buffer);
273 }
274 
275 static uint32_t
276 qede_get_next_lro_buffer(qede_rx_ring_t *rx_ring,
277     qede_lro_info_t *lro_info)
278 {
279 	lro_info->rx_buffer[lro_info->bd_count] =
280 	    qede_get_next_rx_buffer(rx_ring,
281 	    &lro_info->free_buffer_count);
282 	lro_info->bd_count++;
283 	return (DDI_SUCCESS);
284 }
285 #ifdef DEBUG_LRO
286 int agg_count = 0;
287 bool agg_print = true;
288 #endif
289 static void
290 qede_lro_start(qede_rx_ring_t *rx_ring,
291     struct eth_fast_path_rx_tpa_start_cqe *cqe)
292 {
293 	qede_lro_info_t *lro_info;
294 	int i, len_on_first_bd, seg_len;
295 
296 	lro_info = &rx_ring->lro_info[cqe->tpa_agg_index];
297 
298 	/* ASSERT(lro_info->agg_state != QEDE_AGG_STATE_NONE); */
299 
300 #ifdef DEBUG_LRO
301 	if (agg_count++ < 30)  {
302 		qede_dump_start_lro_cqe(cqe);
303 	} else {
304 		agg_print = B_FALSE;
305 	}
306 #endif
307 
308 	memset(lro_info, 0, sizeof (qede_lro_info_t));
309 	lro_info->agg_state = QEDE_AGG_STATE_START;
310 	rx_ring->lro_active_count++;
311 
312 	/* Parsing and error flags from the parser */;
313 
314 	lro_info->pars_flags = LE_16(cqe->pars_flags.flags);
315 	lro_info->pad = LE_16(cqe->placement_offset);
316 	lro_info->header_len = (uint32_t)cqe->header_len;
317 	lro_info->vlan_tag = LE_16(cqe->vlan_tag);
318 	lro_info->rss_hash = LE_32(cqe->rss_hash);
319 
320 	seg_len = (int)LE_16(cqe->seg_len);
321 	len_on_first_bd = (int)LE_16(cqe->len_on_first_bd);
322 	/*
323 	 * Get the first bd
324 	 */
325 	qede_get_next_lro_buffer(rx_ring, lro_info);
326 
327 	if (len_on_first_bd < seg_len) {
328 		/*
329 		 * We end up here with jumbo frames
330 		 * since a TCP segment can span
331 		 * multiple buffer descriptors.
332 		 */
333 		for (i = 0; i < ETH_TPA_CQE_START_LEN_LIST_SIZE; i++) {
334 			if (cqe->ext_bd_len_list[i] == 0) {
335 			    break;
336 			}
337 			qede_get_next_lro_buffer(rx_ring, lro_info);
338 		}
339 	}
340 }
341 
342 static void
343 qede_lro_cont(qede_rx_ring_t *rx_ring,
344     struct eth_fast_path_rx_tpa_cont_cqe *cqe)
345 {
346 	qede_lro_info_t *lro_info;
347 	int i;
348 
349 	lro_info = &rx_ring->lro_info[cqe->tpa_agg_index];
350 
351 	/* ASSERT(lro_info->agg_state != QEDE_AGG_STATE_START); */
352 #ifdef DEBUG_LRO
353 	if (agg_print) {
354 		qede_dump_cont_lro_cqe(cqe);
355 	}
356 #endif
357 
358 	for (i = 0; i < ETH_TPA_CQE_CONT_LEN_LIST_SIZE; i++) {
359 		if (cqe->len_list[i] == 0) {
360 			break;
361 		}
362 		qede_get_next_lro_buffer(rx_ring, lro_info);
363 	}
364 }
365 
366 static mblk_t *
367 qede_lro_end(qede_rx_ring_t *rx_ring,
368     struct eth_fast_path_rx_tpa_end_cqe *cqe,
369     int *pkt_bytes)
370 {
371 	qede_lro_info_t *lro_info;
372 	mblk_t *head = NULL, *tail = NULL, *mp = NULL;
373 	qede_rx_buffer_t *rx_buffer;
374 	int i, bd_len;
375 	uint16_t work_length, total_packet_length;
376 	uint32_t rx_buf_size = rx_ring->rx_buf_size;
377 	qede_dma_info_t *dma_info;
378 
379 	lro_info = &rx_ring->lro_info[cqe->tpa_agg_index];
380 
381 	/* ASSERT(lro_info->agg_state != QEDE_AGG_STATE_START); */
382 
383 #ifdef DEBUG_LRO
384 	if (agg_print) {
385 		qede_dump_end_lro_cqe(cqe);
386 	}
387 #endif
388 
389 	work_length = total_packet_length = LE_16(cqe->total_packet_len);
390 
391 	/*
392 	 * Get any buffer descriptors for this cqe
393 	 */
394 	for (i=0; i<ETH_TPA_CQE_END_LEN_LIST_SIZE; i++) {
395 		if (cqe->len_list[i] == 0) {
396 		    break;
397 		}
398 		qede_get_next_lro_buffer(rx_ring, lro_info);
399 	}
400 
401 	/* ASSERT(lro_info->bd_count != cqe->num_of_bds); */
402 
403 	if (lro_info->free_buffer_count <
404 	    rx_ring->rx_low_buffer_threshold) {
405 		for (i = 0; i < lro_info->bd_count; i++) {
406 			qede_recycle_copied_rx_buffer(
407 			    lro_info->rx_buffer[i]);
408 			lro_info->rx_buffer[i] = NULL;
409 		}
410 		rx_ring->rx_low_water_cnt++;
411 		lro_info->agg_state = QEDE_AGG_STATE_NONE;
412 		return (NULL);
413 	}
414 	/*
415 	 * Loop through list of buffers for this
416 	 * aggregation.  For each one:
417 	 * 1. Calculate the buffer length
418 	 * 2. Adjust the mblk read/write pointers
419 	 * 3. Link the mblk to the local chain using
420 	 *    b_cont pointers.
421 	 * Note: each buffer will be rx_buf_size except
422 	 * the first (subtract the placement_offset)
423 	 * and the last which contains the remainder
424 	 * of cqe_end->total_packet_len minus length
425 	 * of all other buffers.
426 	 */
427 	for (i = 0; i < lro_info->bd_count; i++) {
428 
429 		rx_buffer = lro_info->rx_buffer[i];
430 
431 		bd_len =
432 		    (work_length > rx_buf_size) ? rx_buf_size : work_length;
433 		if (i == 0 &&
434 		    (cqe->num_of_bds > 1)) {
435 			bd_len -= lro_info->pad;
436 		}
437 
438 		dma_info = &rx_buffer->dma_info;
439 		ddi_dma_sync(dma_info->dma_handle,
440 		    dma_info->offset,
441 		    rx_buf_size,
442 		    DDI_DMA_SYNC_FORKERNEL);
443 
444 		mp = rx_buffer->mp;
445 		mp->b_next = mp->b_cont = NULL;
446 
447 		if (head == NULL) {
448 			head = tail = mp;
449 			mp->b_rptr += lro_info->pad;
450 		} else {
451 			tail->b_cont = mp;
452 			tail = mp;
453 		}
454 
455 		mp->b_wptr = (uchar_t *)((unsigned long)mp->b_rptr + bd_len);
456 		work_length -= bd_len;
457 	}
458 
459 	qede_set_cksum_flags(head, lro_info->pars_flags);
460 
461 	rx_ring->rx_lro_pkt_cnt++;
462 	rx_ring->lro_active_count--;
463 	lro_info->agg_state = QEDE_AGG_STATE_NONE;
464 
465 #ifdef DEBUG_LRO
466 	if (agg_print) {
467 		qede_dump_mblk_chain_bcont_ptr(rx_ring->qede, head);
468 	}
469 #endif
470 	*pkt_bytes = (int)total_packet_length;
471 	return (head);
472 }
473 
474 
475 
476 #ifdef DEBUG_JUMBO
477 int jumbo_count = 0;
478 bool jumbo_print = true;
479 #endif
480 static mblk_t *
481 qede_reg_jumbo_cqe(qede_rx_ring_t *rx_ring,
482    struct eth_fast_path_rx_reg_cqe *cqe)
483 {
484 	int i;
485 	qede_rx_buffer_t *rx_buf, *rx_buffer[ETH_RX_MAX_BUFF_PER_PKT];
486 	mblk_t *mp = NULL, *head = NULL, *tail = NULL;
487 	uint32_t free_buffer_count = 0;
488 	uint16_t work_length;
489 	uint32_t rx_buf_size = rx_ring->rx_buf_size, bd_len;
490 	qede_dma_info_t *dma_info;
491 	u8 pad = cqe->placement_offset;
492 
493 #ifdef DEBUG_JUMBO
494 	if (jumbo_count++ < 8) {
495 		qede_dump_reg_cqe(cqe);
496 	} else {
497 		jumbo_print = B_FALSE;
498 	}
499 #endif
500 
501 	work_length = HOST_TO_LE_16(cqe->pkt_len);
502 
503 	/*
504 	 * Get the buffers/mps for this cqe
505 	 */
506 	for (i = 0; i < cqe->bd_num; i++) {
507 		rx_buffer[i] =
508 		    qede_get_next_rx_buffer(rx_ring, &free_buffer_count);
509 	}
510 
511 	/*
512 	 * If the buffer ring is running low, drop the
513 	 * packet and return these buffers.
514 	 */
515 	if (free_buffer_count <
516 	    rx_ring->rx_low_buffer_threshold) {
517 		for (i = 0; i < cqe->bd_num; i++) {
518 			qede_recycle_copied_rx_buffer(rx_buffer[i]);
519 		}
520 		rx_ring->rx_low_water_cnt++;
521 		return (NULL);
522 	}
523 
524 	for (i = 0; i < cqe->bd_num; i++) {
525 		rx_buf = rx_buffer[i];
526 
527 		bd_len =
528 		    (work_length > rx_buf_size) ? rx_buf_size : work_length;
529 
530 		/*
531 		 * Adjust for placement offset
532 		 * on first bufffer.
533 		 */
534 		if (i == 0) {
535 			bd_len -= pad;
536 		}
537 
538 		dma_info = &rx_buf->dma_info;
539 		ddi_dma_sync(dma_info->dma_handle,
540 		    dma_info->offset,
541 		    rx_buf_size,
542 		    DDI_DMA_SYNC_FORKERNEL);
543 
544 		mp = rx_buf->mp;
545 		mp->b_next = mp->b_cont = NULL;
546 		/*
547 		 * Adjust for placement offset
548 		 * on first bufffer.
549 		 */
550 		if (i == 0) {
551 			mp->b_rptr += pad;
552 		}
553 
554 		mp->b_wptr = (uchar_t *)((unsigned long)mp->b_rptr + bd_len);
555 
556 		if (head == NULL) {
557 			head = tail = mp;
558 		} else {
559 			tail->b_cont = mp;
560 			tail = mp;
561 		}
562 
563 		work_length -= bd_len;
564 	}
565 
566 	qede_set_cksum_flags(head,
567 		    HOST_TO_LE_16(cqe->pars_flags.flags));
568 #ifdef DEBUG_JUMBO
569 	if (jumbo_print) {
570 		qede_dump_mblk_chain_bcont_ptr(rx_ring->qede, head);
571 	}
572 #endif
573 	rx_ring->rx_jumbo_pkt_cnt++;
574 	return (head);
575 }
576 
577 static mblk_t *
578 qede_reg_cqe(qede_rx_ring_t *rx_ring,
579     struct eth_fast_path_rx_reg_cqe *cqe,
580     int *pkt_bytes)
581 {
582 	qede_t *qede = rx_ring->qede;
583 	qede_rx_buffer_t *rx_buffer;
584 	uint32_t free_buffer_count;
585 	mblk_t *mp;
586 	uint16_t pkt_len = HOST_TO_LE_16(cqe->pkt_len);
587 	u8 pad = cqe->placement_offset;
588 	qede_dma_info_t *dma_info;
589 	ddi_dma_handle_t dma_handle;
590 	char *virt_addr;
591 
592 	/*
593 	 * Update the byte count as it will
594 	 * be the same for normal and jumbo
595 	 */
596 	*pkt_bytes = (int)pkt_len;
597 
598 	if (cqe->bd_num > 1) {
599 		/*
600 		 * If this cqe uses more than one
601 		 * rx buffer then it must be
602 		 * jumbo.  Call another handler
603 		 * for this because the process is
604 		 * quite different.
605 		 */
606 		return (qede_reg_jumbo_cqe(rx_ring, cqe));
607 	}
608 
609 
610 	rx_buffer = qede_get_next_rx_buffer(rx_ring,
611             &free_buffer_count);
612 
613 	if (free_buffer_count <
614 	    rx_ring->rx_low_buffer_threshold) {
615 		qede_recycle_copied_rx_buffer(rx_buffer);
616 		rx_ring->rx_low_water_cnt++;
617 		*pkt_bytes = 0;
618 		return (NULL);
619 	}
620 
621 	dma_info = &rx_buffer->dma_info;
622 	virt_addr = dma_info->virt_addr;
623 	dma_handle = dma_info->dma_handle;
624 	ddi_dma_sync(dma_handle,
625 	    0, 0, DDI_DMA_SYNC_FORKERNEL);
626 
627 	if (pkt_len <= rx_ring->rx_copy_threshold) {
628 		mp = allocb(pkt_len + 2, 0); /* IP HDR_ALIGN */
629 		if (mp != NULL) {
630 			virt_addr += pad;
631 			bcopy(virt_addr, mp->b_rptr, pkt_len);
632 		} else {
633 			/*
634 			 * Post the buffer back to fw and
635 			 * drop packet
636 			 */
637 			qede_print_err("!%s(%d): allocb failed",
638 		    	    __func__,
639 			    rx_ring->qede->instance);
640 			qede->allocbFailures++;
641                         goto freebuf;
642 		}
643 		/*
644 		 * We've copied it (or not) and are done with it
645 		 * so put it back into the passive list.
646 		 */
647 		ddi_dma_sync(dma_handle,
648 	            0, 0, DDI_DMA_SYNC_FORDEV);
649 		qede_recycle_copied_rx_buffer(rx_buffer);
650 		rx_ring->rx_copy_cnt++;
651 	} else {
652 
653 		/*
654 		 * We are going to send this mp/buffer
655 		 * up to the mac layer.  Adjust the
656 		 * pointeres and link it to our chain.
657 		 * the rx_buffer is returned to us in
658 		 * the recycle function so we drop it
659 		 * here.
660 		 */
661 		mp = rx_buffer->mp;
662 		mp->b_rptr += pad;
663 	}
664 	mp->b_cont = mp->b_next = NULL;
665 	mp->b_wptr = (uchar_t *)((unsigned long)mp->b_rptr + pkt_len);
666 
667 	qede_set_cksum_flags(mp,
668 	    HOST_TO_LE_16(cqe->pars_flags.flags));
669 #ifdef DEBUG_JUMBO
670 	if (jumbo_print) {
671 	    qede_dump_mblk_chain_bnext_ptr(rx_ring->qede, mp);
672 	}
673 #endif
674 
675 	rx_ring->rx_reg_pkt_cnt++;
676 	return (mp);
677 
678 freebuf:
679         qede_recycle_copied_rx_buffer(rx_buffer);
680         return (NULL);
681 }
682 
683 /*
684  * Routine to process the rx packets on the
685  * passed rx_ring. Can be called for intr or
686  * poll context/routines
687  */
688 static mblk_t *
689 qede_process_rx_ring(qede_rx_ring_t *rx_ring, int nbytes, int npkts)
690 {
691 	union eth_rx_cqe *cqe;
692 	u16 last_cqe_consumer = rx_ring->last_cqe_consumer;
693 	enum eth_rx_cqe_type cqe_type;
694 	u16 sw_comp_cons, hw_comp_cons;
695 	mblk_t *mp = NULL, *first_mp = NULL, *last_mp = NULL;
696 	int pkt_bytes = 0, byte_cnt = 0, pkt_cnt = 0;
697 
698 	hw_comp_cons = HOST_TO_LE_16(*rx_ring->hw_cons_ptr);
699 
700 	/* Completion ring sw consumer */
701 	sw_comp_cons = ecore_chain_get_cons_idx(&rx_ring->rx_cqe_ring);
702 
703 	while (sw_comp_cons != hw_comp_cons) {
704 		if ((byte_cnt >= nbytes) ||
705 		    (pkt_cnt >= npkts)) {
706 			break;
707 		}
708 
709 		cqe = (union eth_rx_cqe *)
710 		    ecore_chain_consume(&rx_ring->rx_cqe_ring);
711 		/* Get next element and increment the cons_idx */
712 
713 		(void) ddi_dma_sync(rx_ring->rx_cqe_dmah,
714 		    last_cqe_consumer, sizeof (*cqe),
715 		    DDI_DMA_SYNC_FORKERNEL);
716 
717 		cqe_type = cqe->fast_path_regular.type;
718 
719 		switch (cqe_type) {
720 		case ETH_RX_CQE_TYPE_SLOW_PATH:
721 			ecore_eth_cqe_completion(&rx_ring->qede->edev.hwfns[0],
722 			    (struct eth_slow_path_rx_cqe *)cqe);
723 			goto next_cqe;
724 		case ETH_RX_CQE_TYPE_REGULAR:
725 			mp = qede_reg_cqe(rx_ring,
726 			    &cqe->fast_path_regular,
727 			    &pkt_bytes);
728 			break;
729 		case ETH_RX_CQE_TYPE_TPA_START:
730 			qede_lro_start(rx_ring,
731 			    &cqe->fast_path_tpa_start);
732 			goto next_cqe;
733 		case ETH_RX_CQE_TYPE_TPA_CONT:
734 			qede_lro_cont(rx_ring,
735 			    &cqe->fast_path_tpa_cont);
736 			goto next_cqe;
737 		case ETH_RX_CQE_TYPE_TPA_END:
738 			mp = qede_lro_end(rx_ring,
739 			    &cqe->fast_path_tpa_end,
740 			    &pkt_bytes);
741 			break;
742 		default:
743 			if (cqe_type != 0) {
744 				qede_print_err("!%s(%d): cqe_type %x not "
745 				    "supported", __func__,
746 				    rx_ring->qede->instance,
747 				    cqe_type);
748 			}
749 			goto exit_rx;
750 		}
751 
752 		/*
753 		 * If we arrive here with no mp,
754 		 * then we hit an RX buffer threshold
755 		 * where we had to drop the packet and
756 		 * give the buffers back to the device.
757 		 */
758 		if (mp == NULL) {
759 			rx_ring->rx_drop_cnt++;
760 			goto next_cqe;
761 		}
762 
763 		if (first_mp) {
764 			last_mp->b_next = mp;
765 		} else {
766 			first_mp = mp;
767 		}
768 		last_mp = mp;
769 		pkt_cnt++;
770 		byte_cnt += pkt_bytes;
771 next_cqe:
772 		ecore_chain_recycle_consumed(&rx_ring->rx_cqe_ring);
773 		last_cqe_consumer = sw_comp_cons;
774 		sw_comp_cons = ecore_chain_get_cons_idx(&rx_ring->rx_cqe_ring);
775 		if (!(qede_has_rx_work(rx_ring))) {
776 			ecore_sb_update_sb_idx(rx_ring->fp->sb_info);
777 		}
778 		hw_comp_cons = HOST_TO_LE_16(*rx_ring->hw_cons_ptr);
779 	}
780 	rx_ring->rx_pkt_cnt += pkt_cnt;
781 	rx_ring->rx_byte_cnt += byte_cnt;
782 
783 exit_rx:
784 	if (first_mp) {
785 		last_mp->b_next = NULL;
786 	}
787 
788 	/*
789 	 * Since prod update will result in
790 	 * reading of the bd's, do a dma_sync
791 	 */
792 	qede_replenish_rx_buffers(rx_ring);
793 	qede_update_rx_q_producer(rx_ring);
794 	rx_ring->last_cqe_consumer = last_cqe_consumer;
795 
796 	return (first_mp);
797 }
798 
799 mblk_t *
800 qede_process_fastpath(qede_fastpath_t *fp,
801     int nbytes, int npkts, int *work_done)
802 {
803 	int i = 0;
804 	qede_tx_ring_t *tx_ring;
805 	qede_rx_ring_t *rx_ring;
806 	mblk_t *mp = NULL;
807 
808 	rx_ring = fp->rx_ring;
809 
810 	for (i = 0; i < fp->qede->num_tc; i++) {
811 		tx_ring = fp->tx_ring[i];
812 		if (qede_has_tx_work(tx_ring)) {
813 		/* process tx completions */
814 			if (mutex_tryenter(&tx_ring->tx_lock) != 0) {
815 				*work_done +=
816 				    qede_process_tx_completions(tx_ring);
817 				mutex_exit(&tx_ring->tx_lock);
818 			}
819 		}
820 	}
821 
822 	if (!(qede_has_rx_work(rx_ring))) {
823 		ecore_sb_update_sb_idx(fp->sb_info);
824 	}
825 
826 	rx_ring = fp->rx_ring;
827 	if (qede_has_rx_work(rx_ring)) {
828 		mutex_enter(&rx_ring->rx_lock);
829 		mp = qede_process_rx_ring(rx_ring,
830 		    nbytes, npkts);
831 		if (mp) {
832 			*work_done += 1;
833 		}
834 		mutex_exit(&rx_ring->rx_lock);
835 	}
836 
837 	return (mp);
838 }
839 
840 /*
841  * Parse the mblk to extract information
842  * from the protocol headers.
843  * The routine assumes that the l4 header is tcp. Also
844  * it does not account for ipv6 headers since ipv6 lso is
845  * unsupported
846  */
847 static void
848 qede_pkt_parse_lso_headers(qede_tx_pktinfo_t *pktinfo, mblk_t *mp)
849 {
850 	struct ether_header *eth_hdr =
851 	    (struct ether_header *)(void *)mp->b_rptr;
852 	ipha_t *ip_hdr;
853 	struct tcphdr *tcp_hdr;
854 
855 	/* mac header type and len */
856 	if (ntohs(eth_hdr->ether_type) == ETHERTYPE_IP) {
857 		pktinfo->ether_type = ntohs(eth_hdr->ether_type);
858 		pktinfo->mac_hlen = sizeof (struct ether_header);
859 	} else if (ntohs(eth_hdr->ether_type) == ETHERTYPE_VLAN) {
860 		struct ether_vlan_header *vlan_hdr =
861 		    (struct ether_vlan_header *)(void *)mp->b_rptr;
862 		pktinfo->ether_type = ntohs(vlan_hdr->ether_type);
863 		pktinfo->mac_hlen = sizeof (struct ether_vlan_header);
864 	}
865 
866 	/* ip header type and len */
867 	ip_hdr = (ipha_t *)(void *)((u8 *)mp->b_rptr + pktinfo->mac_hlen);
868 	pktinfo->ip_hlen = IPH_HDR_LENGTH(ip_hdr);
869 
870 	/* Assume TCP protocol */
871 	pktinfo->l4_proto = 0x06;
872 
873 	tcp_hdr = (struct tcphdr *)(void *)
874 	    ((u8 *)mp->b_rptr + pktinfo->mac_hlen + pktinfo->ip_hlen);
875 	pktinfo->l4_hlen = TCP_HDR_LENGTH(tcp_hdr);
876 
877 
878 	pktinfo->total_hlen =
879 	    pktinfo->mac_hlen +
880 	    pktinfo->ip_hlen +
881 	    pktinfo->l4_hlen;
882 }
883 
884 static void
885 qede_get_pkt_offload_info(qede_t *qede, mblk_t *mp,
886     u32 *use_cksum, bool *use_lso, uint16_t *mss)
887 {
888 	u32 pflags;
889 
890 	mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &pflags);
891 
892 	*use_cksum = pflags;
893 	if (qede->lso_enable) {
894 		u32 pkt_mss = 0;
895 		u32 lso_flags = 0;
896 
897 		mac_lso_get(mp, &pkt_mss, &lso_flags);
898 		*use_lso = (lso_flags == HW_LSO);
899 		*mss = (u16)pkt_mss;
900 	}
901 }
902 
903 static void
904 /* LINTED E_FUNC_ARG_UNUSED */
905 qede_get_pkt_info(qede_t *qede, mblk_t *mp,
906     qede_tx_pktinfo_t *pktinfo)
907 {
908 	mblk_t *bp;
909 	size_t size;
910 	struct ether_header *eth_hdr =
911 	    (struct ether_header *)(void *)mp->b_rptr;
912 
913 	pktinfo->total_len = 0;
914 	pktinfo->mblk_no = 0;
915 
916 	/*
917 	 * Count the total length and the number of
918 	 * chained mblks in the packet
919 	 */
920 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
921 		size = MBLKL(bp);
922 		if (size == 0) {
923 			continue;
924 		}
925 
926 		pktinfo->total_len += size;
927 		pktinfo->mblk_no++;
928 	}
929 	/* mac header type and len */
930 	if (ntohs(eth_hdr->ether_type) == ETHERTYPE_IP) {
931 		pktinfo->ether_type = ntohs(eth_hdr->ether_type);
932 		pktinfo->mac_hlen = sizeof (struct ether_header);
933 	} else if (ntohs(eth_hdr->ether_type) == ETHERTYPE_VLAN) {
934 		struct ether_vlan_header *vlan_hdr =
935 		    (struct ether_vlan_header *)(void *)mp->b_rptr;
936 		pktinfo->ether_type = ntohs(vlan_hdr->ether_type);
937 		pktinfo->mac_hlen = sizeof (struct ether_vlan_header);
938 	}
939 
940 }
941 
942 /*
943  * Routine to sync dma mem for multiple
944  * descriptors in a chain
945  */
946 void
947 qede_desc_dma_mem_sync(ddi_dma_handle_t *dma_handle,
948     uint_t start, uint_t count, uint_t range,
949     uint_t unit_size, uint_t direction)
950 {
951 	if ((start + count) < range) {
952 		(void) ddi_dma_sync(*dma_handle,
953 		    start * unit_size, count * unit_size, direction);
954 	} else {
955 		(void) ddi_dma_sync(*dma_handle, start * unit_size,
956 		    0, direction);
957 		(void) ddi_dma_sync(*dma_handle, 0,
958 		    (start + count - range) * unit_size,
959 		    direction);
960 	}
961 }
962 
963 /*
964  * Send tx pkt by copying incoming packet in a
965  * preallocated and mapped dma buffer
966  * Not designed to handle lso for now
967  */
968 static enum qede_xmit_status
969 qede_tx_bcopy(qede_tx_ring_t *tx_ring, mblk_t *mp, qede_tx_pktinfo_t *pktinfo)
970 {
971 	qede_tx_bcopy_pkt_t *bcopy_pkt = NULL;
972 	/* Only one bd will be needed for bcopy packets */
973 	struct eth_tx_1st_bd *first_bd;
974 	u16 last_producer = tx_ring->sw_tx_prod;
975 	uint8_t *txb;
976 	mblk_t *bp;
977 	u32 mblen;
978 
979 	bcopy_pkt = qede_get_bcopy_pkt(tx_ring);
980 	if (bcopy_pkt == NULL) {
981 		qede_print_err("!%s(%d): entry NULL at _tx_ bcopy_list head",
982 		    __func__, tx_ring->qede->instance);
983 		return (XMIT_FAILED);
984 	}
985 
986 	/*
987 	 * Copy the packet data to our copy
988 	 * buffer
989 	 */
990 	txb = bcopy_pkt->virt_addr;
991 
992 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
993 		mblen = MBLKL(bp);
994 		if (mblen == 0) {
995 			continue;
996 		}
997 		bcopy(bp->b_rptr, txb, mblen);
998 		txb += mblen;
999 	}
1000 
1001 	(void) ddi_dma_sync(bcopy_pkt->dma_handle,
1002 	    0, pktinfo->total_len,
1003 	    DDI_DMA_SYNC_FORDEV);
1004 
1005 
1006 	mutex_enter(&tx_ring->tx_lock);
1007 	if (ecore_chain_get_elem_left(&tx_ring->tx_bd_ring)<
1008 	    QEDE_TX_COPY_PATH_PAUSE_THRESHOLD) {
1009 		tx_ring->tx_q_sleeping = 1;
1010 		qede_put_bcopy_pkt(tx_ring, bcopy_pkt);
1011 		mutex_exit(&tx_ring->tx_lock);
1012 #ifdef	DEBUG_TX_RECYCLE
1013 		qede_print_err("!%s(%d): Pausing tx queue",
1014 		    __func__, tx_ring->qede->instance);
1015 #endif
1016 		return (XMIT_PAUSE_QUEUE);
1017 	}
1018 
1019 	first_bd = ecore_chain_produce(&tx_ring->tx_bd_ring);
1020 	bzero(first_bd, sizeof (*first_bd));
1021 	first_bd->data.nbds = 1;
1022 	first_bd->data.bd_flags.bitfields =
1023 	    (1 << ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT);
1024 
1025 	if (pktinfo->cksum_flags & HCK_IPV4_HDRCKSUM) {
1026 		first_bd->data.bd_flags.bitfields |=
1027 		    (1 << ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT);
1028 	}
1029 
1030 	if (pktinfo->cksum_flags & HCK_FULLCKSUM) {
1031 		first_bd->data.bd_flags.bitfields |=
1032 		    (1 << ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT);
1033 	}
1034 
1035 	BD_SET_ADDR_LEN(first_bd,
1036 	    bcopy_pkt->phys_addr,
1037 	    pktinfo->total_len);
1038 
1039 	first_bd->data.bitfields |=
1040 		(pktinfo->total_len & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK)
1041 		<< ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT;
1042 
1043 	tx_ring->tx_db.data.bd_prod =
1044 	    HOST_TO_LE_16(ecore_chain_get_prod_idx(&tx_ring->tx_bd_ring));
1045 
1046 	tx_ring->tx_recycle_list[tx_ring->sw_tx_prod].bcopy_pkt = bcopy_pkt;
1047 	tx_ring->tx_recycle_list[tx_ring->sw_tx_prod].dmah_entry =  NULL;
1048 
1049 	tx_ring->sw_tx_prod++;
1050 	tx_ring->sw_tx_prod &= TX_RING_MASK;
1051 
1052 	(void) ddi_dma_sync(tx_ring->tx_bd_dmah,
1053 	    last_producer, sizeof (*first_bd),
1054 	    DDI_DMA_SYNC_FORDEV);
1055 
1056 	QEDE_DOORBELL_WR(tx_ring, tx_ring->tx_db.raw);
1057 	mutex_exit(&tx_ring->tx_lock);
1058 
1059 	freemsg(mp);
1060 
1061 	return (XMIT_DONE);
1062 }
1063 
1064 /*
1065  * Send tx packet by mapping the mp(kernel addr)
1066  * to an existing dma_handle in the driver
1067  */
1068 static enum qede_xmit_status
1069 qede_tx_mapped(qede_tx_ring_t *tx_ring, mblk_t *mp, qede_tx_pktinfo_t *pktinfo)
1070 {
1071 	enum qede_xmit_status status = XMIT_FAILED;
1072 	int ret;
1073 	qede_dma_handle_entry_t *dmah_entry = NULL;
1074 	qede_dma_handle_entry_t *head = NULL, *tail = NULL, *hdl;
1075 	struct eth_tx_1st_bd *first_bd;
1076 	struct eth_tx_2nd_bd *second_bd = 0;
1077 	struct eth_tx_3rd_bd *third_bd = 0;
1078 	struct eth_tx_bd *tx_data_bd;
1079 	struct eth_tx_bd local_bd[64] = { 0 };
1080 	ddi_dma_cookie_t cookie[64];
1081 	u32 ncookies, total_cookies = 0, max_cookies = 0, index = 0;
1082 	ddi_dma_handle_t dma_handle;
1083 	mblk_t *bp;
1084 	u32 mblen;
1085 	bool is_premapped = false;
1086 	u64 dma_premapped = 0, dma_bound = 0;
1087 	u32 hdl_reserved = 0;
1088 	u8 nbd = 0;
1089 	int i, bd_index;
1090 	u16 last_producer;
1091 	qede_tx_recycle_list_t *tx_recycle_list = tx_ring->tx_recycle_list;
1092 	u64 data_addr;
1093 	size_t data_size;
1094 
1095 	if (pktinfo->use_lso) {
1096 		/*
1097 		 * For tso pkt, we can use as many as 255 bds
1098 		 */
1099 		max_cookies = ETH_TX_MAX_BDS_PER_NON_LSO_PACKET - 1;
1100 		qede_pkt_parse_lso_headers(pktinfo, mp);
1101 	} else {
1102 		/*
1103 		 * For non-tso packet, only 18 bds can be used
1104 		 */
1105 		max_cookies = ETH_TX_MAX_BDS_PER_NON_LSO_PACKET - 1;
1106 	}
1107 
1108 	for (bp = mp; bp != NULL; bp = bp->b_cont) {
1109 		mblen = MBLKL(bp);
1110 		if (mblen == 0) {
1111 			continue;
1112 		}
1113 		is_premapped = false;
1114 		/*
1115 		 * If the mblk is premapped then get the
1116 		 * dma_handle and sync the dma mem. otherwise
1117 		 * reserve an handle from the driver dma
1118 		 * handles list
1119 		 */
1120 #ifdef	DBLK_DMA_PREMAP
1121 		if (bp->b_datap->db_flags & DBLK_DMA_PREMAP) {
1122 #ifdef	DEBUG_PREMAP
1123 			qede_info(tx_ring->qede, "mp is premapped");
1124 #endif
1125 			tx_ring->tx_premap_count++;
1126 			ret = dblk_dma_info_get(tx_ring->pm_handle,
1127 			    bp->b_rptr, mblen,
1128 			    bp->b_datap, &cookie[index],
1129 			    &ncookies, &dma_handle);
1130 			if (ret == DDI_DMA_MAPPED) {
1131 				is_premapped = true;
1132 				dma_premapped++;
1133 				(void) ddi_dma_sync(dma_handle, 0, 0,
1134 				    DDI_DMA_SYNC_FORDEV);
1135 			} else {
1136 				tx_ring->tx_premap_fail++;
1137 			}
1138 		}
1139 #endif	/* DBLK_DMA_PREMAP */
1140 
1141 		if (!is_premapped) {
1142 			dmah_entry = qede_get_dmah_entry(tx_ring);
1143 			if (dmah_entry == NULL) {
1144 				qede_info(tx_ring->qede, "dmah_entry NULL, "
1145 				    "Fallback to copy mode...");
1146 				status = XMIT_FAILED;
1147 				goto err_map;
1148 			}
1149 
1150 			if (ddi_dma_addr_bind_handle(dmah_entry->dma_handle,
1151 			    NULL, (caddr_t)bp->b_rptr, mblen,
1152 			    DDI_DMA_STREAMING | DDI_DMA_WRITE,
1153 			    DDI_DMA_DONTWAIT, NULL, &cookie[index], &ncookies)
1154 			    != DDI_DMA_MAPPED) {
1155 
1156 #ifdef DEBUG_PULLUP
1157 			qede_info(tx_ring->qede, "addr_bind() failed for "
1158 			    "handle %p, len %d mblk_no %d tot_len 0x%x"
1159 			    " use_lso %d",  dmah_entry->dma_handle,
1160 			    mblen, pktinfo->mblk_no, pktinfo->total_len,
1161 			    pktinfo->use_lso);
1162 
1163 			qede_info(tx_ring->qede, "Falling back to pullup");
1164 #endif
1165 				status = XMIT_FALLBACK_PULLUP;
1166 				tx_ring->tx_bind_fail++;
1167 				goto err_map;
1168 			}
1169 			tx_ring->tx_bind_count++;
1170 
1171 			if (index == 0) {
1172 				dmah_entry->mp = mp;
1173 			} else {
1174 				dmah_entry->mp = NULL;
1175 			}
1176 
1177 			/* queue into recycle list for tx completion routine */
1178 			if (tail == NULL) {
1179 				head = tail = dmah_entry;
1180 			} else {
1181 				tail->next = dmah_entry;
1182 				tail = dmah_entry;
1183 			}
1184 
1185 			hdl_reserved++;
1186 			dma_bound++;
1187 		}
1188 
1189 		total_cookies += ncookies;
1190 		if (total_cookies > max_cookies) {
1191 			tx_ring->tx_too_many_cookies++;
1192 #ifdef DEBUG_PULLUP
1193 			qede_info(tx_ring->qede,
1194 			    "total_cookies > max_cookies, "
1195 			    "pktlen %d, mb num %d",
1196 			    pktinfo->total_len, pktinfo->mblk_no);
1197 #endif
1198 			status = XMIT_TOO_MANY_COOKIES;
1199 			goto err_map_sec;
1200 		}
1201 
1202 		if (is_premapped) {
1203 			index += ncookies;
1204 		} else {
1205 			index++;
1206 			/*
1207 			 * Dec. ncookies since we already stored cookie[0]
1208 			 */
1209 			ncookies--;
1210 
1211 			for (i = 0; i < ncookies; i++, index++)
1212 				ddi_dma_nextcookie(dmah_entry->dma_handle,
1213 				    &cookie[index]);
1214 		}
1215 	}
1216 
1217 	/*
1218 	 * Guard against the case where we get a series of mblks that cause us
1219 	 * not to end up with any mapped data.
1220 	 */
1221 	if (total_cookies == 0) {
1222 		status = XMIT_FAILED;
1223 		goto err_map_sec;
1224 	}
1225 
1226 	if (total_cookies > max_cookies) {
1227 		tx_ring->tx_too_many_cookies++;
1228 		status = XMIT_TOO_MANY_COOKIES;
1229 		goto err_map_sec;
1230 	}
1231 	first_bd = (struct eth_tx_1st_bd *)&local_bd[0];
1232 
1233 	/*
1234 	 * Mark this bd as start bd
1235 	 */
1236 	first_bd->data.bd_flags.bitfields =
1237 	    (1 << ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT);
1238 
1239 	if (pktinfo->cksum_flags & HCK_IPV4_HDRCKSUM) {
1240 		first_bd->data.bd_flags.bitfields |=
1241 		    (1 << ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT);
1242 	}
1243 
1244 	if (pktinfo->cksum_flags & HCK_FULLCKSUM) {
1245 		first_bd->data.bd_flags.bitfields |=
1246 		    (1 << ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT);
1247 	}
1248 
1249 
1250 	/* Fill-up local bds with the tx data and flags */
1251 	for (i = 0, bd_index = 0; i < total_cookies; i++, bd_index++) {
1252 		if (bd_index == 0) {
1253 			BD_SET_ADDR_LEN(first_bd,
1254 			    cookie[i].dmac_laddress,
1255 			    cookie[i].dmac_size);
1256 
1257 			if (pktinfo->use_lso) {
1258 			first_bd->data.bd_flags.bitfields |=
1259 			    1 << ETH_TX_1ST_BD_FLAGS_LSO_SHIFT;
1260 
1261 			second_bd = (struct eth_tx_2nd_bd *)&local_bd[1];
1262 
1263 			/*
1264 			 * If the fisrt bd contains
1265 			 * hdr + data (partial or full data), then spilt
1266 			 * the hdr and data between 1st and 2nd
1267 			 * bd respectively
1268 			 */
1269 			if (first_bd->nbytes > pktinfo->total_hlen) {
1270 				data_addr = cookie[0].dmac_laddress
1271 				    + pktinfo->total_hlen;
1272 				data_size = cookie[i].dmac_size
1273 				    - pktinfo->total_hlen;
1274 
1275 				BD_SET_ADDR_LEN(second_bd,
1276 				    data_addr,
1277 				    data_size);
1278 
1279 				/*
1280 				 * First bd already contains the addr to
1281 				 * to start of pkt, just adjust the dma
1282 				 * len of first_bd
1283 				 */
1284 				first_bd->nbytes = pktinfo->total_hlen;
1285 				bd_index++;
1286 			} else if (first_bd->nbytes < pktinfo->total_hlen) {
1287 #ifdef DEBUG_PULLUP
1288 				qede_info(tx_ring->qede,
1289 				    "Headers not in single bd");
1290 #endif
1291 				status = XMIT_FALLBACK_PULLUP;
1292 				goto err_map_sec;
1293 
1294 			}
1295 
1296 			/*
1297 			 * Third bd is used to indicates to fw
1298 			 * that tso needs to be performed. It should
1299 			 * be present even if only two cookies are
1300 			 * needed for the mblk
1301 			 */
1302 			third_bd = (struct eth_tx_3rd_bd *)&local_bd[2];
1303 			third_bd->data.lso_mss |=
1304 			    HOST_TO_LE_16(pktinfo->mss);
1305 			third_bd->data.bitfields |=
1306 			    1 << ETH_TX_DATA_3RD_BD_HDR_NBD_SHIFT;
1307 			}
1308 
1309 			continue;
1310 		}
1311 
1312 		tx_data_bd = &local_bd[bd_index];
1313 		BD_SET_ADDR_LEN(tx_data_bd,
1314 		    cookie[i].dmac_laddress,
1315 		    cookie[i].dmac_size);
1316 	}
1317 
1318 	if (pktinfo->use_lso) {
1319 		if (bd_index < 3) {
1320 			nbd = 3;
1321 		} else {
1322 			nbd = bd_index;
1323 		}
1324 	} else {
1325 		nbd = total_cookies;
1326 		first_bd->data.bitfields |=
1327 		    (pktinfo->total_len & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK)
1328 		    << ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT;
1329 	}
1330 
1331 	first_bd->data.nbds = nbd;
1332 
1333 	mutex_enter(&tx_ring->tx_lock);
1334 
1335 	/*
1336 	 * Before copying the local bds into actual,
1337 	 * check if we have enough on the bd_chain
1338 	 */
1339 	if (ecore_chain_get_elem_left(&tx_ring->tx_bd_ring) <
1340 	    nbd) {
1341 		tx_ring->tx_q_sleeping = 1;
1342 		status = XMIT_PAUSE_QUEUE;
1343 #ifdef	DEBUG_TX_RECYCLE
1344 			qede_info(tx_ring->qede, "Pausing tx queue...");
1345 #endif
1346 		mutex_exit(&tx_ring->tx_lock);
1347 		goto err_map_sec ;
1348 	}
1349 
1350 	/* Copy the local_bd(s) into the actual bds */
1351 	for (i = 0; i < nbd; i++) {
1352 		tx_data_bd = ecore_chain_produce(&tx_ring->tx_bd_ring);
1353 		bcopy(&local_bd[i], tx_data_bd, sizeof (*tx_data_bd));
1354 	}
1355 
1356 	last_producer = tx_ring->sw_tx_prod;
1357 
1358 	tx_ring->tx_recycle_list[tx_ring->sw_tx_prod].dmah_entry = head;
1359 	tx_ring->tx_recycle_list[tx_ring->sw_tx_prod].bcopy_pkt = NULL;
1360 	tx_ring->sw_tx_prod = (tx_ring->sw_tx_prod + 1) & TX_RING_MASK;
1361 
1362 	tx_ring->tx_db.data.bd_prod =
1363 	    HOST_TO_LE_16(ecore_chain_get_prod_idx(&tx_ring->tx_bd_ring));
1364 
1365 	/* Sync the tx_bd dma mem */
1366 	qede_desc_dma_mem_sync(&tx_ring->tx_bd_dmah,
1367 	    last_producer, nbd,
1368 	    tx_ring->tx_ring_size,
1369 	    sizeof (struct eth_tx_bd),
1370 	    DDI_DMA_SYNC_FORDEV);
1371 
1372 	/*
1373 	 * Write to doorbell bar
1374 	 */
1375 	QEDE_DOORBELL_WR(tx_ring, tx_ring->tx_db.raw);
1376 
1377 	mutex_exit(&tx_ring->tx_lock);
1378 
1379 	return (XMIT_DONE);
1380 err_map:
1381 	if (dmah_entry != NULL) {
1382 		if (tail == NULL) {
1383 			head = tail = dmah_entry;
1384 		} else {
1385 			tail->next = dmah_entry;
1386 			tail = dmah_entry;
1387 		}
1388 		hdl_reserved++;
1389 	}
1390 
1391 err_map_sec:
1392 
1393 	hdl = head;
1394 
1395 	while (hdl != NULL) {
1396 		(void) ddi_dma_unbind_handle(hdl->dma_handle);
1397 		hdl = hdl->next;
1398 	}
1399 
1400 	if (head != NULL) {
1401 		qede_put_dmah_entries(tx_ring, head);
1402 	}
1403 
1404 	return (status);
1405 }
1406 
1407 static enum qede_xmit_status
1408 qede_send_tx_packet(qede_t *qede, qede_tx_ring_t *tx_ring, mblk_t *mp)
1409 {
1410 	bool force_pullup = false;
1411 	enum qede_xmit_status status = XMIT_FAILED;
1412 	enum qede_xmit_mode xmit_mode = USE_BCOPY;
1413 	qede_tx_pktinfo_t pktinfo;
1414 	mblk_t *original_mp = NULL, *pulled_up_mp = NULL;
1415 	struct ether_vlan_header *ethvhdr;
1416 
1417 	mutex_enter(&tx_ring->tx_lock);
1418 	if (ecore_chain_get_elem_left(&tx_ring->tx_bd_ring) <
1419 	    qede->tx_recycle_threshold) {
1420 #ifdef	DEBUG_TX_RECYCLE
1421 		qede_info(qede, "Recyclycling from tx routine");
1422 #endif
1423 		if (qede_process_tx_completions(tx_ring) <
1424 		    qede->tx_recycle_threshold) {
1425 #ifdef	DEBUG_TX_RECYCLE
1426 			qede_info(qede, "Still not enough bd after cleanup, "
1427 			    "pausing tx queue...");
1428 #endif
1429 			tx_ring->tx_q_sleeping = 1;
1430 			mutex_exit(&tx_ring->tx_lock);
1431 			return (XMIT_PAUSE_QUEUE);
1432 		}
1433 	}
1434 
1435 	mutex_exit(&tx_ring->tx_lock);
1436 
1437 	bzero(&pktinfo, sizeof (pktinfo));
1438 
1439 	/* Get the offload reqd. on the pkt */
1440 	qede_get_pkt_offload_info(qede, mp, &pktinfo.cksum_flags,
1441 	    &pktinfo.use_lso, &pktinfo.mss);
1442 
1443 do_pullup:
1444 	if (force_pullup) {
1445 		tx_ring->tx_pullup_count++;
1446 #ifdef	DEBUG_PULLUP
1447 		qede_info(qede, "Pulling up original mp %p", mp);
1448 #endif
1449 		/*
1450 		 * Try to accumulate all mblks of this pkt
1451 		 * into a single mblk
1452 		 */
1453 		original_mp = mp;
1454 		if ((pulled_up_mp = msgpullup(mp, -1)) != NULL) {
1455 #ifdef	DEBUG_PULLUP
1456 			qede_info(qede, "New mp %p, ori %p", pulled_up_mp, mp);
1457 #endif
1458 			/*
1459 			 * Proceed with the new single
1460 			 * mp
1461 			 */
1462 			mp = pulled_up_mp;
1463 			xmit_mode = XMIT_MODE_UNUSED;
1464 			pktinfo.pulled_up = B_TRUE;
1465 		} else {
1466 #ifdef	DEBUG_PULLUP
1467 			qede_info(tx_ring->qede, "Pullup failed");
1468 #endif
1469 			status = XMIT_FAILED;
1470 			goto exit;
1471 		}
1472 	}
1473 
1474 	qede_get_pkt_info(qede, mp, &pktinfo);
1475 
1476 
1477 	if ((!pktinfo.use_lso) &&
1478                  (pktinfo.total_len > (qede->mtu + pktinfo.mac_hlen))) {
1479   		qede_info(tx_ring->qede,
1480 		    "Packet drop as packet len 0x%x > 0x%x",
1481 		    pktinfo.total_len, (qede->mtu + QEDE_MAX_ETHER_HDR));
1482 		status = XMIT_FAILED;
1483 		goto exit;
1484 	}
1485 
1486 
1487 #ifdef	DEBUG_PULLUP
1488 	if (force_pullup) {
1489 	qede_print_err("!%s: mp %p, pktinfo : total_len %d,"
1490 	    " mblk_no %d, ether_type %d\n"
1491 	    "mac_hlen %d, ip_hlen %d, l4_hlen %d\n"
1492 	    "l4_proto %d, use_cksum:use_lso %d:%d mss %d", __func__, mp,
1493 	    pktinfo.total_len, pktinfo.mblk_no, pktinfo.ether_type,
1494 	    pktinfo.mac_hlen, pktinfo.ip_hlen, pktinfo.l4_hlen,
1495 	    pktinfo.l4_proto, pktinfo.cksum_flags, pktinfo.use_lso,
1496 	    pktinfo.mss);
1497 	}
1498 #endif
1499 
1500 #ifdef	DEBUG_PREMAP
1501 	if (DBLK_IS_PREMAPPED(mp->b_datap)) {
1502 		qede_print_err("!%s(%d): mp %p id PREMAPPMED",
1503 		    __func__, qede->instance);
1504 	}
1505 #endif
1506 
1507 #ifdef	DBLK_DMA_PREMAP
1508 	if (DBLK_IS_PREMAPPED(mp->b_datap) ||
1509 	    pktinfo.total_len > qede->tx_bcopy_threshold) {
1510 		xmit_mode = USE_DMA_BIND;
1511 	}
1512 #else
1513 	if (pktinfo.total_len > qede->tx_bcopy_threshold) {
1514 		xmit_mode = USE_DMA_BIND;
1515 	}
1516 #endif
1517 
1518 	if (pktinfo.total_len <= qede->tx_bcopy_threshold) {
1519 		xmit_mode = USE_BCOPY;
1520 	}
1521 
1522 	/*
1523 	 * if mac + ip hdr not in one contiguous block,
1524 	 * use copy mode
1525 	 */
1526 	if (MBLKL(mp) < (ETHER_HEADER_LEN + IP_HEADER_LEN)) {
1527 		/*qede_info(qede, "mblk too small, using copy mode, len = %d", MBLKL(mp));*/
1528 		xmit_mode = USE_BCOPY;
1529 	}
1530 
1531 	if ((uintptr_t)mp->b_rptr & 1) {
1532 		xmit_mode = USE_BCOPY;
1533 	}
1534 
1535 	/*
1536 	 * if too many mblks and hence the dma cookies, needed
1537 	 * for tx, then use bcopy or pullup on packet
1538 	 * currently, ETH_TX_MAX_BDS_PER_NON_LSO_PACKET = 18
1539 	 */
1540 	if (pktinfo.mblk_no > (ETH_TX_MAX_BDS_PER_NON_LSO_PACKET - 1)) {
1541 		if (force_pullup) {
1542 			tx_ring->tx_too_many_mblks++;
1543 			status = XMIT_FAILED;
1544 			goto exit;
1545 		} else {
1546 			xmit_mode = USE_PULLUP;
1547 		}
1548 	}
1549 
1550 #ifdef	TX_FORCE_COPY_MODE
1551 	xmit_mode = USE_BCOPY;
1552 #elif	TX_FORCE_MAPPED_MODE
1553 	xmit_mode = USE_DMA_BIND;
1554 #endif
1555 
1556 #ifdef	DEBUG_PULLUP
1557 	if (force_pullup) {
1558 		qede_info(qede, "using mode %d on pulled mp %p",
1559 		    xmit_mode, mp);
1560 	}
1561 #endif
1562 
1563 	/*
1564 	 * Use Mapped mode for the packet
1565 	 */
1566 	if (xmit_mode == USE_DMA_BIND) {
1567 		status = qede_tx_mapped(tx_ring, mp, &pktinfo);
1568 		if (status == XMIT_DONE) {
1569 			if (pktinfo.use_lso) {
1570 				tx_ring->tx_lso_pkt_count++;
1571 			} else if(pktinfo.total_len > 1518) {
1572 				tx_ring->tx_jumbo_pkt_count++;
1573 			}
1574 			tx_ring->tx_mapped_pkts++;
1575 			goto exit;
1576                 } else if ((status == XMIT_TOO_MANY_COOKIES ||
1577 		    (status == XMIT_FALLBACK_PULLUP)) && !force_pullup) {
1578 			xmit_mode = USE_PULLUP;
1579 		} else {
1580 			status = XMIT_FAILED;
1581 			goto exit;
1582 		}
1583 	}
1584 
1585 	if (xmit_mode == USE_BCOPY) {
1586 		status = qede_tx_bcopy(tx_ring, mp, &pktinfo);
1587 		if (status == XMIT_DONE) {
1588 			tx_ring->tx_copy_count++;
1589 			goto exit;
1590 		} else if ((status == XMIT_FALLBACK_PULLUP) &&
1591 		    !force_pullup) {
1592 			xmit_mode = USE_PULLUP;
1593 		} else {
1594 			goto exit;
1595 		}
1596 	}
1597 
1598 	if (xmit_mode == USE_PULLUP) {
1599 		force_pullup = true;
1600 		tx_ring->tx_pullup_count++;
1601 		goto do_pullup;
1602 	}
1603 
1604 exit:
1605 	if (status != XMIT_DONE) {
1606 		/*
1607 		 * if msgpullup succeeded, but something else  failed,
1608 		 * free the pulled-up msg and return original mblk to
1609 		 * stack, indicating tx failure
1610 		 */
1611 		if (pulled_up_mp) {
1612 			qede_info(qede, "tx failed, free pullup pkt %p", mp);
1613 			freemsg(pulled_up_mp);
1614 			mp = original_mp;
1615 		}
1616 	} else {
1617 		tx_ring->tx_byte_count += pktinfo.total_len;
1618 		/*
1619 		 * If tx was successfull after a pullup, then free the
1620 		 * original mp. The pulled-up will be freed as part of
1621 		 * tx completions processing
1622 		 */
1623 		if (pulled_up_mp) {
1624 #ifdef	DEBUG_PULLUP
1625 			qede_info(qede,
1626 			    "success, free ori mp %p", original_mp);
1627 #endif
1628 			freemsg(original_mp);
1629 		}
1630 	}
1631 
1632 	return (status);
1633 }
1634 
1635 typedef	uint32_t	ub4; /* unsigned 4-byte quantities */
1636 typedef	uint8_t		ub1;
1637 
1638 #define	hashsize(n)	((ub4)1<<(n))
1639 #define	hashmask(n)	(hashsize(n)-1)
1640 
1641 #define	mix(a, b, c) \
1642 { \
1643 	a -= b; a -= c; a ^= (c>>13); \
1644 	b -= c; b -= a; b ^= (a<<8); \
1645 	c -= a; c -= b; c ^= (b>>13); \
1646 	a -= b; a -= c; a ^= (c>>12);  \
1647 	b -= c; b -= a; b ^= (a<<16); \
1648 	c -= a; c -= b; c ^= (b>>5); \
1649 	a -= b; a -= c; a ^= (c>>3);  \
1650 	b -= c; b -= a; b ^= (a<<10); \
1651 	c -= a; c -= b; c ^= (b>>15); \
1652 }
1653 
1654 ub4
1655 hash(k, length, initval)
1656 register ub1 *k;	/* the key */
1657 register ub4 length;	/* the length of the key */
1658 register ub4 initval;	/* the previous hash, or an arbitrary value */
1659 {
1660 	register ub4 a, b, c, len;
1661 
1662 	/* Set up the internal state */
1663 	len = length;
1664 	a = b = 0x9e3779b9;	/* the golden ratio; an arbitrary value */
1665 	c = initval;		/* the previous hash value */
1666 
1667 	/* handle most of the key */
1668 	while (len >= 12)
1669 	{
1670 		a += (k[0] +((ub4)k[1]<<8) +((ub4)k[2]<<16) +((ub4)k[3]<<24));
1671 		b += (k[4] +((ub4)k[5]<<8) +((ub4)k[6]<<16) +((ub4)k[7]<<24));
1672 		c += (k[8] +((ub4)k[9]<<8) +((ub4)k[10]<<16)+((ub4)k[11]<<24));
1673 		mix(a, b, c);
1674 		k += 12;
1675 		len -= 12;
1676 	}
1677 
1678 	/* handle the last 11 bytes */
1679 	c += length;
1680 	/* all the case statements fall through */
1681 	switch (len)
1682 	{
1683 	/* FALLTHRU */
1684 	case 11:
1685 		c += ((ub4)k[10]<<24);
1686 	/* FALLTHRU */
1687 	case 10:
1688 		c += ((ub4)k[9]<<16);
1689 	/* FALLTHRU */
1690 	case 9 :
1691 		c += ((ub4)k[8]<<8);
1692 	/* the first byte of c is reserved for the length */
1693 	/* FALLTHRU */
1694 	case 8 :
1695 		b += ((ub4)k[7]<<24);
1696 	/* FALLTHRU */
1697 	case 7 :
1698 		b += ((ub4)k[6]<<16);
1699 	/* FALLTHRU */
1700 	case 6 :
1701 		b += ((ub4)k[5]<<8);
1702 	/* FALLTHRU */
1703 	case 5 :
1704 		b += k[4];
1705 	/* FALLTHRU */
1706 	case 4 :
1707 		a += ((ub4)k[3]<<24);
1708 	/* FALLTHRU */
1709 	case 3 :
1710 		a += ((ub4)k[2]<<16);
1711 	/* FALLTHRU */
1712 	case 2 :
1713 		a += ((ub4)k[1]<<8);
1714 	/* FALLTHRU */
1715 	case 1 :
1716 		a += k[0];
1717 	/* case 0: nothing left to add */
1718 	}
1719 	mix(a, b, c);
1720 	/* report the result */
1721 	return (c);
1722 }
1723 
1724 #ifdef	NO_CROSSBOW
1725 static uint8_t
1726 qede_hash_get_txq(qede_t *qede, caddr_t bp)
1727 {
1728 	struct ip *iphdr = NULL;
1729 	struct ether_header *ethhdr;
1730 	struct ether_vlan_header *ethvhdr;
1731 	struct tcphdr *tcp_hdr;
1732 	struct udphdr *udp_hdr;
1733 	uint32_t etherType;
1734 	int mac_hdr_len, ip_hdr_len;
1735 	uint32_t h = 0; /* 0 by default */
1736 	uint8_t tx_ring_id = 0;
1737 	uint32_t ip_src_addr = 0;
1738 	uint32_t ip_desc_addr = 0;
1739 	uint16_t src_port = 0;
1740 	uint16_t dest_port = 0;
1741 	uint8_t key[12];
1742 
1743 	if (qede->num_fp == 1) {
1744 		return (tx_ring_id);
1745 	}
1746 
1747 	ethhdr = (struct ether_header *)((void *)bp);
1748 	ethvhdr = (struct ether_vlan_header *)((void *)bp);
1749 
1750 	/* Is this vlan packet? */
1751 	if (ntohs(ethvhdr->ether_tpid) == ETHERTYPE_VLAN) {
1752 		mac_hdr_len = sizeof (struct ether_vlan_header);
1753 		etherType = ntohs(ethvhdr->ether_type);
1754 	} else {
1755 		mac_hdr_len = sizeof (struct ether_header);
1756 		etherType = ntohs(ethhdr->ether_type);
1757 	}
1758 	/* Is this IPv4 or IPv6 packet? */
1759 	if (etherType == ETHERTYPE_IP /* 0800 */) {
1760 		if (IPH_HDR_VERSION((ipha_t *)(void *)(bp+mac_hdr_len))
1761 		    == IPV4_VERSION) {
1762 			iphdr = (struct ip *)(void *)(bp+mac_hdr_len);
1763 		}
1764 		if (((unsigned long)iphdr) & 0x3) {
1765 			/*  IP hdr not 4-byte aligned */
1766 			return (tx_ring_id);
1767 		}
1768 	}
1769 	/* ipV4 packets */
1770 	if (iphdr) {
1771 
1772 		ip_hdr_len = IPH_HDR_LENGTH(iphdr);
1773 		ip_src_addr = iphdr->ip_src.s_addr;
1774 		ip_desc_addr = iphdr->ip_dst.s_addr;
1775 
1776 		if (iphdr->ip_p == IPPROTO_TCP) {
1777 			tcp_hdr = (struct tcphdr *)(void *)
1778 			    ((uint8_t *)iphdr + ip_hdr_len);
1779 			src_port = tcp_hdr->th_sport;
1780 			dest_port = tcp_hdr->th_dport;
1781 		} else if (iphdr->ip_p == IPPROTO_UDP) {
1782 			udp_hdr = (struct udphdr *)(void *)
1783 			    ((uint8_t *)iphdr + ip_hdr_len);
1784 			src_port = udp_hdr->uh_sport;
1785 			dest_port = udp_hdr->uh_dport;
1786 		}
1787 		key[0] = (uint8_t)((ip_src_addr) &0xFF);
1788 		key[1] = (uint8_t)((ip_src_addr >> 8) &0xFF);
1789 		key[2] = (uint8_t)((ip_src_addr >> 16) &0xFF);
1790 		key[3] = (uint8_t)((ip_src_addr >> 24) &0xFF);
1791 		key[4] = (uint8_t)((ip_desc_addr) &0xFF);
1792 		key[5] = (uint8_t)((ip_desc_addr >> 8) &0xFF);
1793 		key[6] = (uint8_t)((ip_desc_addr >> 16) &0xFF);
1794 		key[7] = (uint8_t)((ip_desc_addr >> 24) &0xFF);
1795 		key[8] = (uint8_t)((src_port) &0xFF);
1796 		key[9] = (uint8_t)((src_port >> 8) &0xFF);
1797 		key[10] = (uint8_t)((dest_port) &0xFF);
1798 		key[11] = (uint8_t)((dest_port >> 8) &0xFF);
1799 		h = hash(key, 12, 0); /* return 32 bit */
1800 		tx_ring_id = (h & (qede->num_fp - 1));
1801 		if (tx_ring_id >= qede->num_fp) {
1802 			cmn_err(CE_WARN, "%s bad tx_ring_id %d\n",
1803 			    __func__, tx_ring_id);
1804 			tx_ring_id = 0;
1805 		}
1806 	}
1807 	return (tx_ring_id);
1808 }
1809 #endif
1810 
1811 mblk_t *
1812 qede_ring_tx(void *arg, mblk_t *mp)
1813 {
1814 	qede_fastpath_t *fp = (qede_fastpath_t *)arg;
1815 	qede_t *qede = fp->qede;
1816 #ifndef	NO_CROSSBOW
1817 	qede_tx_ring_t *tx_ring = fp->tx_ring[0];
1818 #else
1819 	qede_tx_ring_t *tx_ring;
1820 #endif
1821 	uint32_t ring_id;
1822 	mblk_t *next = NULL;
1823 	enum qede_xmit_status status = XMIT_FAILED;
1824 	caddr_t bp;
1825 
1826 	ASSERT(mp->b_next == NULL);
1827 
1828 #ifndef	NO_CROSSBOW
1829 	if (!fp || !tx_ring) {
1830 		qede_print_err("!%s: error, fp %p, tx_ring %p",
1831 		    __func__, fp, tx_ring);
1832 		goto exit;
1833 	}
1834 #endif
1835 	if (qede->qede_state != QEDE_STATE_STARTED) {
1836 		qede_print_err("!%s(%d): qede_state %d invalid",
1837 		    __func__, qede->instance, qede->qede_state);
1838 		goto exit;
1839 	}
1840 
1841 	if (!qede->params.link_state) {
1842 		goto exit;
1843 	}
1844 
1845 	while (mp != NULL) {
1846 #ifdef	NO_CROSSBOW
1847 		/*
1848 		 * Figure out which tx ring to send this packet to.
1849 		 * Currently multiple rings are not exposed to mac layer
1850 		 * and fanout done by driver
1851 		 */
1852 		bp = (caddr_t)mp->b_rptr;
1853 		ring_id = qede_hash_get_txq(qede, bp);
1854 		fp = &qede->fp_array[ring_id];
1855 		tx_ring = fp->tx_ring[0];
1856 
1857 		if (qede->num_tc > 1) {
1858 			qede_info(qede,
1859 			    "Traffic classes(%d) > 1 not supported",
1860 			    qede->num_tc);
1861 			goto exit;
1862 		}
1863 #endif
1864 		next = mp->b_next;
1865 		mp->b_next = NULL;
1866 
1867 		status = qede_send_tx_packet(qede, tx_ring, mp);
1868 		if (status == XMIT_DONE) {
1869 			tx_ring->tx_pkt_count++;
1870 			mp = next;
1871 		} else if (status == XMIT_PAUSE_QUEUE) {
1872 			tx_ring->tx_ring_pause++;
1873 			mp->b_next = next;
1874 			break;
1875 		} else if (status == XMIT_FAILED) {
1876 			goto exit;
1877 		}
1878 	}
1879 
1880 	return (mp);
1881 exit:
1882 	tx_ring->tx_pkt_dropped++;
1883 	freemsgchain(mp);
1884 	mp = NULL;
1885 	return (mp);
1886 }
1887