xref: /linux/drivers/net/ethernet/ibm/ibmveth.c (revision fcab107abe1ab5be9dbe874baa722372da8f4f73)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * IBM Power Virtual Ethernet Device Driver
4  *
5  * Copyright (C) IBM Corporation, 2003, 2010
6  *
7  * Authors: Dave Larson <larson1@us.ibm.com>
8  *	    Santiago Leon <santil@linux.vnet.ibm.com>
9  *	    Brian King <brking@linux.vnet.ibm.com>
10  *	    Robert Jennings <rcj@linux.vnet.ibm.com>
11  *	    Anton Blanchard <anton@au.ibm.com>
12  */
13 
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/errno.h>
17 #include <linux/dma-mapping.h>
18 #include <linux/kernel.h>
19 #include <linux/netdevice.h>
20 #include <linux/etherdevice.h>
21 #include <linux/skbuff.h>
22 #include <linux/init.h>
23 #include <linux/interrupt.h>
24 #include <linux/mm.h>
25 #include <linux/pm.h>
26 #include <linux/ethtool.h>
27 #include <linux/in.h>
28 #include <linux/ip.h>
29 #include <linux/ipv6.h>
30 #include <linux/slab.h>
31 #include <asm/hvcall.h>
32 #include <linux/atomic.h>
33 #include <asm/vio.h>
34 #include <asm/iommu.h>
35 #include <asm/firmware.h>
36 #include <net/tcp.h>
37 #include <net/ip6_checksum.h>
38 
39 #include "ibmveth.h"
40 
41 static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance);
42 static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev);
43 
44 static struct kobj_type ktype_veth_pool;
45 
46 
47 static const char ibmveth_driver_name[] = "ibmveth";
48 static const char ibmveth_driver_string[] = "IBM Power Virtual Ethernet Driver";
49 #define ibmveth_driver_version "1.06"
50 
51 MODULE_AUTHOR("Santiago Leon <santil@linux.vnet.ibm.com>");
52 MODULE_DESCRIPTION("IBM Power Virtual Ethernet Driver");
53 MODULE_LICENSE("GPL");
54 MODULE_VERSION(ibmveth_driver_version);
55 
56 static unsigned int tx_copybreak __read_mostly = 128;
57 module_param(tx_copybreak, uint, 0644);
58 MODULE_PARM_DESC(tx_copybreak,
59 	"Maximum size of packet that is copied to a new buffer on transmit");
60 
61 static unsigned int rx_copybreak __read_mostly = 128;
62 module_param(rx_copybreak, uint, 0644);
63 MODULE_PARM_DESC(rx_copybreak,
64 	"Maximum size of packet that is copied to a new buffer on receive");
65 
66 static unsigned int rx_flush __read_mostly = 0;
67 module_param(rx_flush, uint, 0644);
68 MODULE_PARM_DESC(rx_flush, "Flush receive buffers before use");
69 
70 static bool old_large_send __read_mostly;
71 module_param(old_large_send, bool, 0444);
72 MODULE_PARM_DESC(old_large_send,
73 	"Use old large send method on firmware that supports the new method");
74 
75 struct ibmveth_stat {
76 	char name[ETH_GSTRING_LEN];
77 	int offset;
78 };
79 
80 #define IBMVETH_STAT_OFF(stat) offsetof(struct ibmveth_adapter, stat)
81 #define IBMVETH_GET_STAT(a, off) *((u64 *)(((unsigned long)(a)) + off))
82 
83 static struct ibmveth_stat ibmveth_stats[] = {
84 	{ "replenish_task_cycles", IBMVETH_STAT_OFF(replenish_task_cycles) },
85 	{ "replenish_no_mem", IBMVETH_STAT_OFF(replenish_no_mem) },
86 	{ "replenish_add_buff_failure",
87 			IBMVETH_STAT_OFF(replenish_add_buff_failure) },
88 	{ "replenish_add_buff_success",
89 			IBMVETH_STAT_OFF(replenish_add_buff_success) },
90 	{ "rx_invalid_buffer", IBMVETH_STAT_OFF(rx_invalid_buffer) },
91 	{ "rx_no_buffer", IBMVETH_STAT_OFF(rx_no_buffer) },
92 	{ "tx_map_failed", IBMVETH_STAT_OFF(tx_map_failed) },
93 	{ "tx_send_failed", IBMVETH_STAT_OFF(tx_send_failed) },
94 	{ "fw_enabled_ipv4_csum", IBMVETH_STAT_OFF(fw_ipv4_csum_support) },
95 	{ "fw_enabled_ipv6_csum", IBMVETH_STAT_OFF(fw_ipv6_csum_support) },
96 	{ "tx_large_packets", IBMVETH_STAT_OFF(tx_large_packets) },
97 	{ "rx_large_packets", IBMVETH_STAT_OFF(rx_large_packets) },
98 	{ "fw_enabled_large_send", IBMVETH_STAT_OFF(fw_large_send_support) }
99 };
100 
101 /* simple methods of getting data from the current rxq entry */
102 static inline u32 ibmveth_rxq_flags(struct ibmveth_adapter *adapter)
103 {
104 	return be32_to_cpu(adapter->rx_queue.queue_addr[adapter->rx_queue.index].flags_off);
105 }
106 
107 static inline int ibmveth_rxq_toggle(struct ibmveth_adapter *adapter)
108 {
109 	return (ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_TOGGLE) >>
110 			IBMVETH_RXQ_TOGGLE_SHIFT;
111 }
112 
113 static inline int ibmveth_rxq_pending_buffer(struct ibmveth_adapter *adapter)
114 {
115 	return ibmveth_rxq_toggle(adapter) == adapter->rx_queue.toggle;
116 }
117 
118 static inline int ibmveth_rxq_buffer_valid(struct ibmveth_adapter *adapter)
119 {
120 	return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_VALID;
121 }
122 
123 static inline int ibmveth_rxq_frame_offset(struct ibmveth_adapter *adapter)
124 {
125 	return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_OFF_MASK;
126 }
127 
128 static inline int ibmveth_rxq_large_packet(struct ibmveth_adapter *adapter)
129 {
130 	return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_LRG_PKT;
131 }
132 
133 static inline int ibmveth_rxq_frame_length(struct ibmveth_adapter *adapter)
134 {
135 	return be32_to_cpu(adapter->rx_queue.queue_addr[adapter->rx_queue.index].length);
136 }
137 
138 static inline int ibmveth_rxq_csum_good(struct ibmveth_adapter *adapter)
139 {
140 	return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_CSUM_GOOD;
141 }
142 
143 static unsigned int ibmveth_real_max_tx_queues(void)
144 {
145 	unsigned int n_cpu = num_online_cpus();
146 
147 	return min(n_cpu, IBMVETH_MAX_QUEUES);
148 }
149 
150 /* setup the initial settings for a buffer pool */
151 static void ibmveth_init_buffer_pool(struct ibmveth_buff_pool *pool,
152 				     u32 pool_index, u32 pool_size,
153 				     u32 buff_size, u32 pool_active)
154 {
155 	pool->size = pool_size;
156 	pool->index = pool_index;
157 	pool->buff_size = buff_size;
158 	pool->threshold = pool_size * 7 / 8;
159 	pool->active = pool_active;
160 }
161 
162 /* allocate and setup an buffer pool - called during open */
163 static int ibmveth_alloc_buffer_pool(struct ibmveth_buff_pool *pool)
164 {
165 	int i;
166 
167 	pool->free_map = kmalloc_array(pool->size, sizeof(u16), GFP_KERNEL);
168 
169 	if (!pool->free_map)
170 		return -1;
171 
172 	pool->dma_addr = kcalloc(pool->size, sizeof(dma_addr_t), GFP_KERNEL);
173 	if (!pool->dma_addr) {
174 		kfree(pool->free_map);
175 		pool->free_map = NULL;
176 		return -1;
177 	}
178 
179 	pool->skbuff = kcalloc(pool->size, sizeof(void *), GFP_KERNEL);
180 
181 	if (!pool->skbuff) {
182 		kfree(pool->dma_addr);
183 		pool->dma_addr = NULL;
184 
185 		kfree(pool->free_map);
186 		pool->free_map = NULL;
187 		return -1;
188 	}
189 
190 	for (i = 0; i < pool->size; ++i)
191 		pool->free_map[i] = i;
192 
193 	atomic_set(&pool->available, 0);
194 	pool->producer_index = 0;
195 	pool->consumer_index = 0;
196 
197 	return 0;
198 }
199 
200 static inline void ibmveth_flush_buffer(void *addr, unsigned long length)
201 {
202 	unsigned long offset;
203 
204 	for (offset = 0; offset < length; offset += SMP_CACHE_BYTES)
205 		asm("dcbf %0,%1,1" :: "b" (addr), "r" (offset));
206 }
207 
208 /* replenish the buffers for a pool.  note that we don't need to
209  * skb_reserve these since they are used for incoming...
210  */
211 static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter,
212 					  struct ibmveth_buff_pool *pool)
213 {
214 	u32 i;
215 	u32 count = pool->size - atomic_read(&pool->available);
216 	u32 buffers_added = 0;
217 	struct sk_buff *skb;
218 	unsigned int free_index, index;
219 	u64 correlator;
220 	unsigned long lpar_rc;
221 	dma_addr_t dma_addr;
222 
223 	mb();
224 
225 	for (i = 0; i < count; ++i) {
226 		union ibmveth_buf_desc desc;
227 
228 		free_index = pool->consumer_index;
229 		index = pool->free_map[free_index];
230 		skb = NULL;
231 
232 		if (WARN_ON(index == IBM_VETH_INVALID_MAP)) {
233 			schedule_work(&adapter->work);
234 			goto bad_index_failure;
235 		}
236 
237 		/* are we allocating a new buffer or recycling an old one */
238 		if (pool->skbuff[index])
239 			goto reuse;
240 
241 		skb = netdev_alloc_skb(adapter->netdev, pool->buff_size);
242 
243 		if (!skb) {
244 			netdev_dbg(adapter->netdev,
245 				   "replenish: unable to allocate skb\n");
246 			adapter->replenish_no_mem++;
247 			break;
248 		}
249 
250 		dma_addr = dma_map_single(&adapter->vdev->dev, skb->data,
251 				pool->buff_size, DMA_FROM_DEVICE);
252 
253 		if (dma_mapping_error(&adapter->vdev->dev, dma_addr))
254 			goto failure;
255 
256 		pool->dma_addr[index] = dma_addr;
257 		pool->skbuff[index] = skb;
258 
259 		if (rx_flush) {
260 			unsigned int len = min(pool->buff_size,
261 					       adapter->netdev->mtu +
262 					       IBMVETH_BUFF_OH);
263 			ibmveth_flush_buffer(skb->data, len);
264 		}
265 reuse:
266 		dma_addr = pool->dma_addr[index];
267 		desc.fields.flags_len = IBMVETH_BUF_VALID | pool->buff_size;
268 		desc.fields.address = dma_addr;
269 
270 		correlator = ((u64)pool->index << 32) | index;
271 		*(u64 *)pool->skbuff[index]->data = correlator;
272 
273 		lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address,
274 						   desc.desc);
275 
276 		if (lpar_rc != H_SUCCESS) {
277 			netdev_warn(adapter->netdev,
278 				    "%sadd_logical_lan failed %lu\n",
279 				    skb ? "" : "When recycling: ", lpar_rc);
280 			goto failure;
281 		}
282 
283 		pool->free_map[free_index] = IBM_VETH_INVALID_MAP;
284 		pool->consumer_index++;
285 		if (pool->consumer_index >= pool->size)
286 			pool->consumer_index = 0;
287 
288 		buffers_added++;
289 		adapter->replenish_add_buff_success++;
290 	}
291 
292 	mb();
293 	atomic_add(buffers_added, &(pool->available));
294 	return;
295 
296 failure:
297 
298 	if (dma_addr && !dma_mapping_error(&adapter->vdev->dev, dma_addr))
299 		dma_unmap_single(&adapter->vdev->dev,
300 		                 pool->dma_addr[index], pool->buff_size,
301 		                 DMA_FROM_DEVICE);
302 	dev_kfree_skb_any(pool->skbuff[index]);
303 	pool->skbuff[index] = NULL;
304 bad_index_failure:
305 	adapter->replenish_add_buff_failure++;
306 
307 	mb();
308 	atomic_add(buffers_added, &(pool->available));
309 }
310 
311 /*
312  * The final 8 bytes of the buffer list is a counter of frames dropped
313  * because there was not a buffer in the buffer list capable of holding
314  * the frame.
315  */
316 static void ibmveth_update_rx_no_buffer(struct ibmveth_adapter *adapter)
317 {
318 	__be64 *p = adapter->buffer_list_addr + 4096 - 8;
319 
320 	adapter->rx_no_buffer = be64_to_cpup(p);
321 }
322 
323 /* replenish routine */
324 static void ibmveth_replenish_task(struct ibmveth_adapter *adapter)
325 {
326 	int i;
327 
328 	adapter->replenish_task_cycles++;
329 
330 	for (i = (IBMVETH_NUM_BUFF_POOLS - 1); i >= 0; i--) {
331 		struct ibmveth_buff_pool *pool = &adapter->rx_buff_pool[i];
332 
333 		if (pool->active &&
334 		    (atomic_read(&pool->available) < pool->threshold))
335 			ibmveth_replenish_buffer_pool(adapter, pool);
336 	}
337 
338 	ibmveth_update_rx_no_buffer(adapter);
339 }
340 
341 /* empty and free ana buffer pool - also used to do cleanup in error paths */
342 static void ibmveth_free_buffer_pool(struct ibmveth_adapter *adapter,
343 				     struct ibmveth_buff_pool *pool)
344 {
345 	int i;
346 
347 	kfree(pool->free_map);
348 	pool->free_map = NULL;
349 
350 	if (pool->skbuff && pool->dma_addr) {
351 		for (i = 0; i < pool->size; ++i) {
352 			struct sk_buff *skb = pool->skbuff[i];
353 			if (skb) {
354 				dma_unmap_single(&adapter->vdev->dev,
355 						 pool->dma_addr[i],
356 						 pool->buff_size,
357 						 DMA_FROM_DEVICE);
358 				dev_kfree_skb_any(skb);
359 				pool->skbuff[i] = NULL;
360 			}
361 		}
362 	}
363 
364 	if (pool->dma_addr) {
365 		kfree(pool->dma_addr);
366 		pool->dma_addr = NULL;
367 	}
368 
369 	if (pool->skbuff) {
370 		kfree(pool->skbuff);
371 		pool->skbuff = NULL;
372 	}
373 }
374 
375 /**
376  * ibmveth_remove_buffer_from_pool - remove a buffer from a pool
377  * @adapter: adapter instance
378  * @correlator: identifies pool and index
379  * @reuse: whether to reuse buffer
380  *
381  * Return:
382  * * %0       - success
383  * * %-EINVAL - correlator maps to pool or index out of range
384  * * %-EFAULT - pool and index map to null skb
385  */
386 static int ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter,
387 					   u64 correlator, bool reuse)
388 {
389 	unsigned int pool  = correlator >> 32;
390 	unsigned int index = correlator & 0xffffffffUL;
391 	unsigned int free_index;
392 	struct sk_buff *skb;
393 
394 	if (WARN_ON(pool >= IBMVETH_NUM_BUFF_POOLS) ||
395 	    WARN_ON(index >= adapter->rx_buff_pool[pool].size)) {
396 		schedule_work(&adapter->work);
397 		return -EINVAL;
398 	}
399 
400 	skb = adapter->rx_buff_pool[pool].skbuff[index];
401 	if (WARN_ON(!skb)) {
402 		schedule_work(&adapter->work);
403 		return -EFAULT;
404 	}
405 
406 	/* if we are going to reuse the buffer then keep the pointers around
407 	 * but mark index as available. replenish will see the skb pointer and
408 	 * assume it is to be recycled.
409 	 */
410 	if (!reuse) {
411 		/* remove the skb pointer to mark free. actual freeing is done
412 		 * by upper level networking after gro_recieve
413 		 */
414 		adapter->rx_buff_pool[pool].skbuff[index] = NULL;
415 
416 		dma_unmap_single(&adapter->vdev->dev,
417 				 adapter->rx_buff_pool[pool].dma_addr[index],
418 				 adapter->rx_buff_pool[pool].buff_size,
419 				 DMA_FROM_DEVICE);
420 	}
421 
422 	free_index = adapter->rx_buff_pool[pool].producer_index;
423 	adapter->rx_buff_pool[pool].producer_index++;
424 	if (adapter->rx_buff_pool[pool].producer_index >=
425 	    adapter->rx_buff_pool[pool].size)
426 		adapter->rx_buff_pool[pool].producer_index = 0;
427 	adapter->rx_buff_pool[pool].free_map[free_index] = index;
428 
429 	mb();
430 
431 	atomic_dec(&(adapter->rx_buff_pool[pool].available));
432 
433 	return 0;
434 }
435 
436 /* get the current buffer on the rx queue */
437 static inline struct sk_buff *ibmveth_rxq_get_buffer(struct ibmveth_adapter *adapter)
438 {
439 	u64 correlator = adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator;
440 	unsigned int pool = correlator >> 32;
441 	unsigned int index = correlator & 0xffffffffUL;
442 
443 	if (WARN_ON(pool >= IBMVETH_NUM_BUFF_POOLS) ||
444 	    WARN_ON(index >= adapter->rx_buff_pool[pool].size)) {
445 		schedule_work(&adapter->work);
446 		return NULL;
447 	}
448 
449 	return adapter->rx_buff_pool[pool].skbuff[index];
450 }
451 
452 /**
453  * ibmveth_rxq_harvest_buffer - Harvest buffer from pool
454  *
455  * @adapter: pointer to adapter
456  * @reuse:   whether to reuse buffer
457  *
458  * Context: called from ibmveth_poll
459  *
460  * Return:
461  * * %0    - success
462  * * other - non-zero return from ibmveth_remove_buffer_from_pool
463  */
464 static int ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter,
465 				      bool reuse)
466 {
467 	u64 cor;
468 	int rc;
469 
470 	cor = adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator;
471 	rc = ibmveth_remove_buffer_from_pool(adapter, cor, reuse);
472 	if (unlikely(rc))
473 		return rc;
474 
475 	if (++adapter->rx_queue.index == adapter->rx_queue.num_slots) {
476 		adapter->rx_queue.index = 0;
477 		adapter->rx_queue.toggle = !adapter->rx_queue.toggle;
478 	}
479 
480 	return 0;
481 }
482 
483 static void ibmveth_free_tx_ltb(struct ibmveth_adapter *adapter, int idx)
484 {
485 	dma_unmap_single(&adapter->vdev->dev, adapter->tx_ltb_dma[idx],
486 			 adapter->tx_ltb_size, DMA_TO_DEVICE);
487 	kfree(adapter->tx_ltb_ptr[idx]);
488 	adapter->tx_ltb_ptr[idx] = NULL;
489 }
490 
491 static int ibmveth_allocate_tx_ltb(struct ibmveth_adapter *adapter, int idx)
492 {
493 	adapter->tx_ltb_ptr[idx] = kzalloc(adapter->tx_ltb_size,
494 					   GFP_KERNEL);
495 	if (!adapter->tx_ltb_ptr[idx]) {
496 		netdev_err(adapter->netdev,
497 			   "unable to allocate tx long term buffer\n");
498 		return -ENOMEM;
499 	}
500 	adapter->tx_ltb_dma[idx] = dma_map_single(&adapter->vdev->dev,
501 						  adapter->tx_ltb_ptr[idx],
502 						  adapter->tx_ltb_size,
503 						  DMA_TO_DEVICE);
504 	if (dma_mapping_error(&adapter->vdev->dev, adapter->tx_ltb_dma[idx])) {
505 		netdev_err(adapter->netdev,
506 			   "unable to DMA map tx long term buffer\n");
507 		kfree(adapter->tx_ltb_ptr[idx]);
508 		adapter->tx_ltb_ptr[idx] = NULL;
509 		return -ENOMEM;
510 	}
511 
512 	return 0;
513 }
514 
515 static int ibmveth_register_logical_lan(struct ibmveth_adapter *adapter,
516         union ibmveth_buf_desc rxq_desc, u64 mac_address)
517 {
518 	int rc, try_again = 1;
519 
520 	/*
521 	 * After a kexec the adapter will still be open, so our attempt to
522 	 * open it will fail. So if we get a failure we free the adapter and
523 	 * try again, but only once.
524 	 */
525 retry:
526 	rc = h_register_logical_lan(adapter->vdev->unit_address,
527 				    adapter->buffer_list_dma, rxq_desc.desc,
528 				    adapter->filter_list_dma, mac_address);
529 
530 	if (rc != H_SUCCESS && try_again) {
531 		do {
532 			rc = h_free_logical_lan(adapter->vdev->unit_address);
533 		} while (H_IS_LONG_BUSY(rc) || (rc == H_BUSY));
534 
535 		try_again = 0;
536 		goto retry;
537 	}
538 
539 	return rc;
540 }
541 
542 static int ibmveth_open(struct net_device *netdev)
543 {
544 	struct ibmveth_adapter *adapter = netdev_priv(netdev);
545 	u64 mac_address;
546 	int rxq_entries = 1;
547 	unsigned long lpar_rc;
548 	int rc;
549 	union ibmveth_buf_desc rxq_desc;
550 	int i;
551 	struct device *dev;
552 
553 	netdev_dbg(netdev, "open starting\n");
554 
555 	napi_enable(&adapter->napi);
556 
557 	for(i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++)
558 		rxq_entries += adapter->rx_buff_pool[i].size;
559 
560 	rc = -ENOMEM;
561 	adapter->buffer_list_addr = (void*) get_zeroed_page(GFP_KERNEL);
562 	if (!adapter->buffer_list_addr) {
563 		netdev_err(netdev, "unable to allocate list pages\n");
564 		goto out;
565 	}
566 
567 	adapter->filter_list_addr = (void*) get_zeroed_page(GFP_KERNEL);
568 	if (!adapter->filter_list_addr) {
569 		netdev_err(netdev, "unable to allocate filter pages\n");
570 		goto out_free_buffer_list;
571 	}
572 
573 	dev = &adapter->vdev->dev;
574 
575 	adapter->rx_queue.queue_len = sizeof(struct ibmveth_rx_q_entry) *
576 						rxq_entries;
577 	adapter->rx_queue.queue_addr =
578 		dma_alloc_coherent(dev, adapter->rx_queue.queue_len,
579 				   &adapter->rx_queue.queue_dma, GFP_KERNEL);
580 	if (!adapter->rx_queue.queue_addr)
581 		goto out_free_filter_list;
582 
583 	adapter->buffer_list_dma = dma_map_single(dev,
584 			adapter->buffer_list_addr, 4096, DMA_BIDIRECTIONAL);
585 	if (dma_mapping_error(dev, adapter->buffer_list_dma)) {
586 		netdev_err(netdev, "unable to map buffer list pages\n");
587 		goto out_free_queue_mem;
588 	}
589 
590 	adapter->filter_list_dma = dma_map_single(dev,
591 			adapter->filter_list_addr, 4096, DMA_BIDIRECTIONAL);
592 	if (dma_mapping_error(dev, adapter->filter_list_dma)) {
593 		netdev_err(netdev, "unable to map filter list pages\n");
594 		goto out_unmap_buffer_list;
595 	}
596 
597 	for (i = 0; i < netdev->real_num_tx_queues; i++) {
598 		if (ibmveth_allocate_tx_ltb(adapter, i))
599 			goto out_free_tx_ltb;
600 	}
601 
602 	adapter->rx_queue.index = 0;
603 	adapter->rx_queue.num_slots = rxq_entries;
604 	adapter->rx_queue.toggle = 1;
605 
606 	mac_address = ether_addr_to_u64(netdev->dev_addr);
607 
608 	rxq_desc.fields.flags_len = IBMVETH_BUF_VALID |
609 					adapter->rx_queue.queue_len;
610 	rxq_desc.fields.address = adapter->rx_queue.queue_dma;
611 
612 	netdev_dbg(netdev, "buffer list @ 0x%p\n", adapter->buffer_list_addr);
613 	netdev_dbg(netdev, "filter list @ 0x%p\n", adapter->filter_list_addr);
614 	netdev_dbg(netdev, "receive q   @ 0x%p\n", adapter->rx_queue.queue_addr);
615 
616 	h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE);
617 
618 	lpar_rc = ibmveth_register_logical_lan(adapter, rxq_desc, mac_address);
619 
620 	if (lpar_rc != H_SUCCESS) {
621 		netdev_err(netdev, "h_register_logical_lan failed with %ld\n",
622 			   lpar_rc);
623 		netdev_err(netdev, "buffer TCE:0x%llx filter TCE:0x%llx rxq "
624 			   "desc:0x%llx MAC:0x%llx\n",
625 				     adapter->buffer_list_dma,
626 				     adapter->filter_list_dma,
627 				     rxq_desc.desc,
628 				     mac_address);
629 		rc = -ENONET;
630 		goto out_unmap_filter_list;
631 	}
632 
633 	for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) {
634 		if (!adapter->rx_buff_pool[i].active)
635 			continue;
636 		if (ibmveth_alloc_buffer_pool(&adapter->rx_buff_pool[i])) {
637 			netdev_err(netdev, "unable to alloc pool\n");
638 			adapter->rx_buff_pool[i].active = 0;
639 			rc = -ENOMEM;
640 			goto out_free_buffer_pools;
641 		}
642 	}
643 
644 	netdev_dbg(netdev, "registering irq 0x%x\n", netdev->irq);
645 	rc = request_irq(netdev->irq, ibmveth_interrupt, 0, netdev->name,
646 			 netdev);
647 	if (rc != 0) {
648 		netdev_err(netdev, "unable to request irq 0x%x, rc %d\n",
649 			   netdev->irq, rc);
650 		do {
651 			lpar_rc = h_free_logical_lan(adapter->vdev->unit_address);
652 		} while (H_IS_LONG_BUSY(lpar_rc) || (lpar_rc == H_BUSY));
653 
654 		goto out_free_buffer_pools;
655 	}
656 
657 	rc = -ENOMEM;
658 
659 	netdev_dbg(netdev, "initial replenish cycle\n");
660 	ibmveth_interrupt(netdev->irq, netdev);
661 
662 	netif_tx_start_all_queues(netdev);
663 
664 	netdev_dbg(netdev, "open complete\n");
665 
666 	return 0;
667 
668 out_free_buffer_pools:
669 	while (--i >= 0) {
670 		if (adapter->rx_buff_pool[i].active)
671 			ibmveth_free_buffer_pool(adapter,
672 						 &adapter->rx_buff_pool[i]);
673 	}
674 out_unmap_filter_list:
675 	dma_unmap_single(dev, adapter->filter_list_dma, 4096,
676 			 DMA_BIDIRECTIONAL);
677 
678 out_free_tx_ltb:
679 	while (--i >= 0) {
680 		ibmveth_free_tx_ltb(adapter, i);
681 	}
682 
683 out_unmap_buffer_list:
684 	dma_unmap_single(dev, adapter->buffer_list_dma, 4096,
685 			 DMA_BIDIRECTIONAL);
686 out_free_queue_mem:
687 	dma_free_coherent(dev, adapter->rx_queue.queue_len,
688 			  adapter->rx_queue.queue_addr,
689 			  adapter->rx_queue.queue_dma);
690 out_free_filter_list:
691 	free_page((unsigned long)adapter->filter_list_addr);
692 out_free_buffer_list:
693 	free_page((unsigned long)adapter->buffer_list_addr);
694 out:
695 	napi_disable(&adapter->napi);
696 	return rc;
697 }
698 
699 static int ibmveth_close(struct net_device *netdev)
700 {
701 	struct ibmveth_adapter *adapter = netdev_priv(netdev);
702 	struct device *dev = &adapter->vdev->dev;
703 	long lpar_rc;
704 	int i;
705 
706 	netdev_dbg(netdev, "close starting\n");
707 
708 	napi_disable(&adapter->napi);
709 
710 	netif_tx_stop_all_queues(netdev);
711 
712 	h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE);
713 
714 	do {
715 		lpar_rc = h_free_logical_lan(adapter->vdev->unit_address);
716 	} while (H_IS_LONG_BUSY(lpar_rc) || (lpar_rc == H_BUSY));
717 
718 	if (lpar_rc != H_SUCCESS) {
719 		netdev_err(netdev, "h_free_logical_lan failed with %lx, "
720 			   "continuing with close\n", lpar_rc);
721 	}
722 
723 	free_irq(netdev->irq, netdev);
724 
725 	ibmveth_update_rx_no_buffer(adapter);
726 
727 	dma_unmap_single(dev, adapter->buffer_list_dma, 4096,
728 			 DMA_BIDIRECTIONAL);
729 	free_page((unsigned long)adapter->buffer_list_addr);
730 
731 	dma_unmap_single(dev, adapter->filter_list_dma, 4096,
732 			 DMA_BIDIRECTIONAL);
733 	free_page((unsigned long)adapter->filter_list_addr);
734 
735 	dma_free_coherent(dev, adapter->rx_queue.queue_len,
736 			  adapter->rx_queue.queue_addr,
737 			  adapter->rx_queue.queue_dma);
738 
739 	for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++)
740 		if (adapter->rx_buff_pool[i].active)
741 			ibmveth_free_buffer_pool(adapter,
742 						 &adapter->rx_buff_pool[i]);
743 
744 	for (i = 0; i < netdev->real_num_tx_queues; i++)
745 		ibmveth_free_tx_ltb(adapter, i);
746 
747 	netdev_dbg(netdev, "close complete\n");
748 
749 	return 0;
750 }
751 
752 /**
753  * ibmveth_reset - Handle scheduled reset work
754  *
755  * @w: pointer to work_struct embedded in adapter structure
756  *
757  * Context: This routine acquires rtnl_mutex and disables its NAPI through
758  *          ibmveth_close. It can't be called directly in a context that has
759  *          already acquired rtnl_mutex or disabled its NAPI, or directly from
760  *          a poll routine.
761  *
762  * Return: void
763  */
764 static void ibmveth_reset(struct work_struct *w)
765 {
766 	struct ibmveth_adapter *adapter = container_of(w, struct ibmveth_adapter, work);
767 	struct net_device *netdev = adapter->netdev;
768 
769 	netdev_dbg(netdev, "reset starting\n");
770 
771 	rtnl_lock();
772 
773 	dev_close(adapter->netdev);
774 	dev_open(adapter->netdev, NULL);
775 
776 	rtnl_unlock();
777 
778 	netdev_dbg(netdev, "reset complete\n");
779 }
780 
781 static int ibmveth_set_link_ksettings(struct net_device *dev,
782 				      const struct ethtool_link_ksettings *cmd)
783 {
784 	struct ibmveth_adapter *adapter = netdev_priv(dev);
785 
786 	return ethtool_virtdev_set_link_ksettings(dev, cmd,
787 						  &adapter->speed,
788 						  &adapter->duplex);
789 }
790 
791 static int ibmveth_get_link_ksettings(struct net_device *dev,
792 				      struct ethtool_link_ksettings *cmd)
793 {
794 	struct ibmveth_adapter *adapter = netdev_priv(dev);
795 
796 	cmd->base.speed = adapter->speed;
797 	cmd->base.duplex = adapter->duplex;
798 	cmd->base.port = PORT_OTHER;
799 
800 	return 0;
801 }
802 
803 static void ibmveth_init_link_settings(struct net_device *dev)
804 {
805 	struct ibmveth_adapter *adapter = netdev_priv(dev);
806 
807 	adapter->speed = SPEED_1000;
808 	adapter->duplex = DUPLEX_FULL;
809 }
810 
811 static void netdev_get_drvinfo(struct net_device *dev,
812 			       struct ethtool_drvinfo *info)
813 {
814 	strscpy(info->driver, ibmveth_driver_name, sizeof(info->driver));
815 	strscpy(info->version, ibmveth_driver_version, sizeof(info->version));
816 }
817 
818 static netdev_features_t ibmveth_fix_features(struct net_device *dev,
819 	netdev_features_t features)
820 {
821 	/*
822 	 * Since the ibmveth firmware interface does not have the
823 	 * concept of separate tx/rx checksum offload enable, if rx
824 	 * checksum is disabled we also have to disable tx checksum
825 	 * offload. Once we disable rx checksum offload, we are no
826 	 * longer allowed to send tx buffers that are not properly
827 	 * checksummed.
828 	 */
829 
830 	if (!(features & NETIF_F_RXCSUM))
831 		features &= ~NETIF_F_CSUM_MASK;
832 
833 	return features;
834 }
835 
836 static int ibmveth_set_csum_offload(struct net_device *dev, u32 data)
837 {
838 	struct ibmveth_adapter *adapter = netdev_priv(dev);
839 	unsigned long set_attr, clr_attr, ret_attr;
840 	unsigned long set_attr6, clr_attr6;
841 	long ret, ret4, ret6;
842 	int rc1 = 0, rc2 = 0;
843 	int restart = 0;
844 
845 	if (netif_running(dev)) {
846 		restart = 1;
847 		ibmveth_close(dev);
848 	}
849 
850 	set_attr = 0;
851 	clr_attr = 0;
852 	set_attr6 = 0;
853 	clr_attr6 = 0;
854 
855 	if (data) {
856 		set_attr = IBMVETH_ILLAN_IPV4_TCP_CSUM;
857 		set_attr6 = IBMVETH_ILLAN_IPV6_TCP_CSUM;
858 	} else {
859 		clr_attr = IBMVETH_ILLAN_IPV4_TCP_CSUM;
860 		clr_attr6 = IBMVETH_ILLAN_IPV6_TCP_CSUM;
861 	}
862 
863 	ret = h_illan_attributes(adapter->vdev->unit_address, 0, 0, &ret_attr);
864 
865 	if (ret == H_SUCCESS &&
866 	    (ret_attr & IBMVETH_ILLAN_PADDED_PKT_CSUM)) {
867 		ret4 = h_illan_attributes(adapter->vdev->unit_address, clr_attr,
868 					 set_attr, &ret_attr);
869 
870 		if (ret4 != H_SUCCESS) {
871 			netdev_err(dev, "unable to change IPv4 checksum "
872 					"offload settings. %d rc=%ld\n",
873 					data, ret4);
874 
875 			h_illan_attributes(adapter->vdev->unit_address,
876 					   set_attr, clr_attr, &ret_attr);
877 
878 			if (data == 1)
879 				dev->features &= ~NETIF_F_IP_CSUM;
880 
881 		} else {
882 			adapter->fw_ipv4_csum_support = data;
883 		}
884 
885 		ret6 = h_illan_attributes(adapter->vdev->unit_address,
886 					 clr_attr6, set_attr6, &ret_attr);
887 
888 		if (ret6 != H_SUCCESS) {
889 			netdev_err(dev, "unable to change IPv6 checksum "
890 					"offload settings. %d rc=%ld\n",
891 					data, ret6);
892 
893 			h_illan_attributes(adapter->vdev->unit_address,
894 					   set_attr6, clr_attr6, &ret_attr);
895 
896 			if (data == 1)
897 				dev->features &= ~NETIF_F_IPV6_CSUM;
898 
899 		} else
900 			adapter->fw_ipv6_csum_support = data;
901 
902 		if (ret4 == H_SUCCESS || ret6 == H_SUCCESS)
903 			adapter->rx_csum = data;
904 		else
905 			rc1 = -EIO;
906 	} else {
907 		rc1 = -EIO;
908 		netdev_err(dev, "unable to change checksum offload settings."
909 				     " %d rc=%ld ret_attr=%lx\n", data, ret,
910 				     ret_attr);
911 	}
912 
913 	if (restart)
914 		rc2 = ibmveth_open(dev);
915 
916 	return rc1 ? rc1 : rc2;
917 }
918 
919 static int ibmveth_set_tso(struct net_device *dev, u32 data)
920 {
921 	struct ibmveth_adapter *adapter = netdev_priv(dev);
922 	unsigned long set_attr, clr_attr, ret_attr;
923 	long ret1, ret2;
924 	int rc1 = 0, rc2 = 0;
925 	int restart = 0;
926 
927 	if (netif_running(dev)) {
928 		restart = 1;
929 		ibmveth_close(dev);
930 	}
931 
932 	set_attr = 0;
933 	clr_attr = 0;
934 
935 	if (data)
936 		set_attr = IBMVETH_ILLAN_LRG_SR_ENABLED;
937 	else
938 		clr_attr = IBMVETH_ILLAN_LRG_SR_ENABLED;
939 
940 	ret1 = h_illan_attributes(adapter->vdev->unit_address, 0, 0, &ret_attr);
941 
942 	if (ret1 == H_SUCCESS && (ret_attr & IBMVETH_ILLAN_LRG_SND_SUPPORT) &&
943 	    !old_large_send) {
944 		ret2 = h_illan_attributes(adapter->vdev->unit_address, clr_attr,
945 					  set_attr, &ret_attr);
946 
947 		if (ret2 != H_SUCCESS) {
948 			netdev_err(dev, "unable to change tso settings. %d rc=%ld\n",
949 				   data, ret2);
950 
951 			h_illan_attributes(adapter->vdev->unit_address,
952 					   set_attr, clr_attr, &ret_attr);
953 
954 			if (data == 1)
955 				dev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6);
956 			rc1 = -EIO;
957 
958 		} else {
959 			adapter->fw_large_send_support = data;
960 			adapter->large_send = data;
961 		}
962 	} else {
963 		/* Older firmware version of large send offload does not
964 		 * support tcp6/ipv6
965 		 */
966 		if (data == 1) {
967 			dev->features &= ~NETIF_F_TSO6;
968 			netdev_info(dev, "TSO feature requires all partitions to have updated driver");
969 		}
970 		adapter->large_send = data;
971 	}
972 
973 	if (restart)
974 		rc2 = ibmveth_open(dev);
975 
976 	return rc1 ? rc1 : rc2;
977 }
978 
979 static int ibmveth_set_features(struct net_device *dev,
980 	netdev_features_t features)
981 {
982 	struct ibmveth_adapter *adapter = netdev_priv(dev);
983 	int rx_csum = !!(features & NETIF_F_RXCSUM);
984 	int large_send = !!(features & (NETIF_F_TSO | NETIF_F_TSO6));
985 	int rc1 = 0, rc2 = 0;
986 
987 	if (rx_csum != adapter->rx_csum) {
988 		rc1 = ibmveth_set_csum_offload(dev, rx_csum);
989 		if (rc1 && !adapter->rx_csum)
990 			dev->features =
991 				features & ~(NETIF_F_CSUM_MASK |
992 					     NETIF_F_RXCSUM);
993 	}
994 
995 	if (large_send != adapter->large_send) {
996 		rc2 = ibmveth_set_tso(dev, large_send);
997 		if (rc2 && !adapter->large_send)
998 			dev->features =
999 				features & ~(NETIF_F_TSO | NETIF_F_TSO6);
1000 	}
1001 
1002 	return rc1 ? rc1 : rc2;
1003 }
1004 
1005 static void ibmveth_get_strings(struct net_device *dev, u32 stringset, u8 *data)
1006 {
1007 	int i;
1008 
1009 	if (stringset != ETH_SS_STATS)
1010 		return;
1011 
1012 	for (i = 0; i < ARRAY_SIZE(ibmveth_stats); i++, data += ETH_GSTRING_LEN)
1013 		memcpy(data, ibmveth_stats[i].name, ETH_GSTRING_LEN);
1014 }
1015 
1016 static int ibmveth_get_sset_count(struct net_device *dev, int sset)
1017 {
1018 	switch (sset) {
1019 	case ETH_SS_STATS:
1020 		return ARRAY_SIZE(ibmveth_stats);
1021 	default:
1022 		return -EOPNOTSUPP;
1023 	}
1024 }
1025 
1026 static void ibmveth_get_ethtool_stats(struct net_device *dev,
1027 				      struct ethtool_stats *stats, u64 *data)
1028 {
1029 	int i;
1030 	struct ibmveth_adapter *adapter = netdev_priv(dev);
1031 
1032 	for (i = 0; i < ARRAY_SIZE(ibmveth_stats); i++)
1033 		data[i] = IBMVETH_GET_STAT(adapter, ibmveth_stats[i].offset);
1034 }
1035 
1036 static void ibmveth_get_channels(struct net_device *netdev,
1037 				 struct ethtool_channels *channels)
1038 {
1039 	channels->max_tx = ibmveth_real_max_tx_queues();
1040 	channels->tx_count = netdev->real_num_tx_queues;
1041 
1042 	channels->max_rx = netdev->real_num_rx_queues;
1043 	channels->rx_count = netdev->real_num_rx_queues;
1044 }
1045 
1046 static int ibmveth_set_channels(struct net_device *netdev,
1047 				struct ethtool_channels *channels)
1048 {
1049 	struct ibmveth_adapter *adapter = netdev_priv(netdev);
1050 	unsigned int old = netdev->real_num_tx_queues,
1051 		     goal = channels->tx_count;
1052 	int rc, i;
1053 
1054 	/* If ndo_open has not been called yet then don't allocate, just set
1055 	 * desired netdev_queue's and return
1056 	 */
1057 	if (!(netdev->flags & IFF_UP))
1058 		return netif_set_real_num_tx_queues(netdev, goal);
1059 
1060 	/* We have IBMVETH_MAX_QUEUES netdev_queue's allocated
1061 	 * but we may need to alloc/free the ltb's.
1062 	 */
1063 	netif_tx_stop_all_queues(netdev);
1064 
1065 	/* Allocate any queue that we need */
1066 	for (i = old; i < goal; i++) {
1067 		if (adapter->tx_ltb_ptr[i])
1068 			continue;
1069 
1070 		rc = ibmveth_allocate_tx_ltb(adapter, i);
1071 		if (!rc)
1072 			continue;
1073 
1074 		/* if something goes wrong, free everything we just allocated */
1075 		netdev_err(netdev, "Failed to allocate more tx queues, returning to %d queues\n",
1076 			   old);
1077 		goal = old;
1078 		old = i;
1079 		break;
1080 	}
1081 	rc = netif_set_real_num_tx_queues(netdev, goal);
1082 	if (rc) {
1083 		netdev_err(netdev, "Failed to set real tx queues, returning to %d queues\n",
1084 			   old);
1085 		goal = old;
1086 		old = i;
1087 	}
1088 	/* Free any that are no longer needed */
1089 	for (i = old; i > goal; i--) {
1090 		if (adapter->tx_ltb_ptr[i - 1])
1091 			ibmveth_free_tx_ltb(adapter, i - 1);
1092 	}
1093 
1094 	netif_tx_wake_all_queues(netdev);
1095 
1096 	return rc;
1097 }
1098 
1099 static const struct ethtool_ops netdev_ethtool_ops = {
1100 	.get_drvinfo		         = netdev_get_drvinfo,
1101 	.get_link		         = ethtool_op_get_link,
1102 	.get_strings		         = ibmveth_get_strings,
1103 	.get_sset_count		         = ibmveth_get_sset_count,
1104 	.get_ethtool_stats	         = ibmveth_get_ethtool_stats,
1105 	.get_link_ksettings	         = ibmveth_get_link_ksettings,
1106 	.set_link_ksettings              = ibmveth_set_link_ksettings,
1107 	.get_channels			 = ibmveth_get_channels,
1108 	.set_channels			 = ibmveth_set_channels
1109 };
1110 
1111 static int ibmveth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1112 {
1113 	return -EOPNOTSUPP;
1114 }
1115 
1116 static int ibmveth_send(struct ibmveth_adapter *adapter,
1117 			unsigned long desc, unsigned long mss)
1118 {
1119 	unsigned long correlator;
1120 	unsigned int retry_count;
1121 	unsigned long ret;
1122 
1123 	/*
1124 	 * The retry count sets a maximum for the number of broadcast and
1125 	 * multicast destinations within the system.
1126 	 */
1127 	retry_count = 1024;
1128 	correlator = 0;
1129 	do {
1130 		ret = h_send_logical_lan(adapter->vdev->unit_address, desc,
1131 					 correlator, &correlator, mss,
1132 					 adapter->fw_large_send_support);
1133 	} while ((ret == H_BUSY) && (retry_count--));
1134 
1135 	if (ret != H_SUCCESS && ret != H_DROPPED) {
1136 		netdev_err(adapter->netdev, "tx: h_send_logical_lan failed "
1137 			   "with rc=%ld\n", ret);
1138 		return 1;
1139 	}
1140 
1141 	return 0;
1142 }
1143 
1144 static int ibmveth_is_packet_unsupported(struct sk_buff *skb,
1145 					 struct net_device *netdev)
1146 {
1147 	struct ethhdr *ether_header;
1148 	int ret = 0;
1149 
1150 	ether_header = eth_hdr(skb);
1151 
1152 	if (ether_addr_equal(ether_header->h_dest, netdev->dev_addr)) {
1153 		netdev_dbg(netdev, "veth doesn't support loopback packets, dropping packet.\n");
1154 		netdev->stats.tx_dropped++;
1155 		ret = -EOPNOTSUPP;
1156 	}
1157 
1158 	return ret;
1159 }
1160 
1161 static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb,
1162 				      struct net_device *netdev)
1163 {
1164 	struct ibmveth_adapter *adapter = netdev_priv(netdev);
1165 	unsigned int desc_flags, total_bytes;
1166 	union ibmveth_buf_desc desc;
1167 	int i, queue_num = skb_get_queue_mapping(skb);
1168 	unsigned long mss = 0;
1169 
1170 	if (ibmveth_is_packet_unsupported(skb, netdev))
1171 		goto out;
1172 	/* veth can't checksum offload UDP */
1173 	if (skb->ip_summed == CHECKSUM_PARTIAL &&
1174 	    ((skb->protocol == htons(ETH_P_IP) &&
1175 	      ip_hdr(skb)->protocol != IPPROTO_TCP) ||
1176 	     (skb->protocol == htons(ETH_P_IPV6) &&
1177 	      ipv6_hdr(skb)->nexthdr != IPPROTO_TCP)) &&
1178 	    skb_checksum_help(skb)) {
1179 
1180 		netdev_err(netdev, "tx: failed to checksum packet\n");
1181 		netdev->stats.tx_dropped++;
1182 		goto out;
1183 	}
1184 
1185 	desc_flags = IBMVETH_BUF_VALID;
1186 
1187 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
1188 		unsigned char *buf = skb_transport_header(skb) +
1189 						skb->csum_offset;
1190 
1191 		desc_flags |= (IBMVETH_BUF_NO_CSUM | IBMVETH_BUF_CSUM_GOOD);
1192 
1193 		/* Need to zero out the checksum */
1194 		buf[0] = 0;
1195 		buf[1] = 0;
1196 
1197 		if (skb_is_gso(skb) && adapter->fw_large_send_support)
1198 			desc_flags |= IBMVETH_BUF_LRG_SND;
1199 	}
1200 
1201 	if (skb->ip_summed == CHECKSUM_PARTIAL && skb_is_gso(skb)) {
1202 		if (adapter->fw_large_send_support) {
1203 			mss = (unsigned long)skb_shinfo(skb)->gso_size;
1204 			adapter->tx_large_packets++;
1205 		} else if (!skb_is_gso_v6(skb)) {
1206 			/* Put -1 in the IP checksum to tell phyp it
1207 			 * is a largesend packet. Put the mss in
1208 			 * the TCP checksum.
1209 			 */
1210 			ip_hdr(skb)->check = 0xffff;
1211 			tcp_hdr(skb)->check =
1212 				cpu_to_be16(skb_shinfo(skb)->gso_size);
1213 			adapter->tx_large_packets++;
1214 		}
1215 	}
1216 
1217 	/* Copy header into mapped buffer */
1218 	if (unlikely(skb->len > adapter->tx_ltb_size)) {
1219 		netdev_err(adapter->netdev, "tx: packet size (%u) exceeds ltb (%u)\n",
1220 			   skb->len, adapter->tx_ltb_size);
1221 		netdev->stats.tx_dropped++;
1222 		goto out;
1223 	}
1224 	memcpy(adapter->tx_ltb_ptr[queue_num], skb->data, skb_headlen(skb));
1225 	total_bytes = skb_headlen(skb);
1226 	/* Copy frags into mapped buffers */
1227 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1228 		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1229 
1230 		memcpy(adapter->tx_ltb_ptr[queue_num] + total_bytes,
1231 		       skb_frag_address_safe(frag), skb_frag_size(frag));
1232 		total_bytes += skb_frag_size(frag);
1233 	}
1234 
1235 	if (unlikely(total_bytes != skb->len)) {
1236 		netdev_err(adapter->netdev, "tx: incorrect packet len copied into ltb (%u != %u)\n",
1237 			   skb->len, total_bytes);
1238 		netdev->stats.tx_dropped++;
1239 		goto out;
1240 	}
1241 	desc.fields.flags_len = desc_flags | skb->len;
1242 	desc.fields.address = adapter->tx_ltb_dma[queue_num];
1243 	/* finish writing to long_term_buff before VIOS accessing it */
1244 	dma_wmb();
1245 
1246 	if (ibmveth_send(adapter, desc.desc, mss)) {
1247 		adapter->tx_send_failed++;
1248 		netdev->stats.tx_dropped++;
1249 	} else {
1250 		netdev->stats.tx_packets++;
1251 		netdev->stats.tx_bytes += skb->len;
1252 	}
1253 
1254 out:
1255 	dev_consume_skb_any(skb);
1256 	return NETDEV_TX_OK;
1257 
1258 
1259 }
1260 
1261 static void ibmveth_rx_mss_helper(struct sk_buff *skb, u16 mss, int lrg_pkt)
1262 {
1263 	struct tcphdr *tcph;
1264 	int offset = 0;
1265 	int hdr_len;
1266 
1267 	/* only TCP packets will be aggregated */
1268 	if (skb->protocol == htons(ETH_P_IP)) {
1269 		struct iphdr *iph = (struct iphdr *)skb->data;
1270 
1271 		if (iph->protocol == IPPROTO_TCP) {
1272 			offset = iph->ihl * 4;
1273 			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
1274 		} else {
1275 			return;
1276 		}
1277 	} else if (skb->protocol == htons(ETH_P_IPV6)) {
1278 		struct ipv6hdr *iph6 = (struct ipv6hdr *)skb->data;
1279 
1280 		if (iph6->nexthdr == IPPROTO_TCP) {
1281 			offset = sizeof(struct ipv6hdr);
1282 			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
1283 		} else {
1284 			return;
1285 		}
1286 	} else {
1287 		return;
1288 	}
1289 	/* if mss is not set through Large Packet bit/mss in rx buffer,
1290 	 * expect that the mss will be written to the tcp header checksum.
1291 	 */
1292 	tcph = (struct tcphdr *)(skb->data + offset);
1293 	if (lrg_pkt) {
1294 		skb_shinfo(skb)->gso_size = mss;
1295 	} else if (offset) {
1296 		skb_shinfo(skb)->gso_size = ntohs(tcph->check);
1297 		tcph->check = 0;
1298 	}
1299 
1300 	if (skb_shinfo(skb)->gso_size) {
1301 		hdr_len = offset + tcph->doff * 4;
1302 		skb_shinfo(skb)->gso_segs =
1303 				DIV_ROUND_UP(skb->len - hdr_len,
1304 					     skb_shinfo(skb)->gso_size);
1305 	}
1306 }
1307 
1308 static void ibmveth_rx_csum_helper(struct sk_buff *skb,
1309 				   struct ibmveth_adapter *adapter)
1310 {
1311 	struct iphdr *iph = NULL;
1312 	struct ipv6hdr *iph6 = NULL;
1313 	__be16 skb_proto = 0;
1314 	u16 iphlen = 0;
1315 	u16 iph_proto = 0;
1316 	u16 tcphdrlen = 0;
1317 
1318 	skb_proto = be16_to_cpu(skb->protocol);
1319 
1320 	if (skb_proto == ETH_P_IP) {
1321 		iph = (struct iphdr *)skb->data;
1322 
1323 		/* If the IP checksum is not offloaded and if the packet
1324 		 *  is large send, the checksum must be rebuilt.
1325 		 */
1326 		if (iph->check == 0xffff) {
1327 			iph->check = 0;
1328 			iph->check = ip_fast_csum((unsigned char *)iph,
1329 						  iph->ihl);
1330 		}
1331 
1332 		iphlen = iph->ihl * 4;
1333 		iph_proto = iph->protocol;
1334 	} else if (skb_proto == ETH_P_IPV6) {
1335 		iph6 = (struct ipv6hdr *)skb->data;
1336 		iphlen = sizeof(struct ipv6hdr);
1337 		iph_proto = iph6->nexthdr;
1338 	}
1339 
1340 	/* When CSO is enabled the TCP checksum may have be set to NULL by
1341 	 * the sender given that we zeroed out TCP checksum field in
1342 	 * transmit path (refer ibmveth_start_xmit routine). In this case set
1343 	 * up CHECKSUM_PARTIAL. If the packet is forwarded, the checksum will
1344 	 * then be recalculated by the destination NIC (CSO must be enabled
1345 	 * on the destination NIC).
1346 	 *
1347 	 * In an OVS environment, when a flow is not cached, specifically for a
1348 	 * new TCP connection, the first packet information is passed up to
1349 	 * the user space for finding a flow. During this process, OVS computes
1350 	 * checksum on the first packet when CHECKSUM_PARTIAL flag is set.
1351 	 *
1352 	 * So, re-compute TCP pseudo header checksum.
1353 	 */
1354 
1355 	if (iph_proto == IPPROTO_TCP) {
1356 		struct tcphdr *tcph = (struct tcphdr *)(skb->data + iphlen);
1357 
1358 		if (tcph->check == 0x0000) {
1359 			/* Recompute TCP pseudo header checksum  */
1360 			tcphdrlen = skb->len - iphlen;
1361 			if (skb_proto == ETH_P_IP)
1362 				tcph->check =
1363 				 ~csum_tcpudp_magic(iph->saddr,
1364 				iph->daddr, tcphdrlen, iph_proto, 0);
1365 			else if (skb_proto == ETH_P_IPV6)
1366 				tcph->check =
1367 				 ~csum_ipv6_magic(&iph6->saddr,
1368 				&iph6->daddr, tcphdrlen, iph_proto, 0);
1369 			/* Setup SKB fields for checksum offload */
1370 			skb_partial_csum_set(skb, iphlen,
1371 					     offsetof(struct tcphdr, check));
1372 			skb_reset_network_header(skb);
1373 		}
1374 	}
1375 }
1376 
1377 static int ibmveth_poll(struct napi_struct *napi, int budget)
1378 {
1379 	struct ibmveth_adapter *adapter =
1380 			container_of(napi, struct ibmveth_adapter, napi);
1381 	struct net_device *netdev = adapter->netdev;
1382 	int frames_processed = 0;
1383 	unsigned long lpar_rc;
1384 	u16 mss = 0;
1385 
1386 restart_poll:
1387 	while (frames_processed < budget) {
1388 		if (!ibmveth_rxq_pending_buffer(adapter))
1389 			break;
1390 
1391 		smp_rmb();
1392 		if (!ibmveth_rxq_buffer_valid(adapter)) {
1393 			wmb(); /* suggested by larson1 */
1394 			adapter->rx_invalid_buffer++;
1395 			netdev_dbg(netdev, "recycling invalid buffer\n");
1396 			if (unlikely(ibmveth_rxq_harvest_buffer(adapter, true)))
1397 				break;
1398 		} else {
1399 			struct sk_buff *skb, *new_skb;
1400 			int length = ibmveth_rxq_frame_length(adapter);
1401 			int offset = ibmveth_rxq_frame_offset(adapter);
1402 			int csum_good = ibmveth_rxq_csum_good(adapter);
1403 			int lrg_pkt = ibmveth_rxq_large_packet(adapter);
1404 			__sum16 iph_check = 0;
1405 
1406 			skb = ibmveth_rxq_get_buffer(adapter);
1407 			if (unlikely(!skb))
1408 				break;
1409 
1410 			/* if the large packet bit is set in the rx queue
1411 			 * descriptor, the mss will be written by PHYP eight
1412 			 * bytes from the start of the rx buffer, which is
1413 			 * skb->data at this stage
1414 			 */
1415 			if (lrg_pkt) {
1416 				__be64 *rxmss = (__be64 *)(skb->data + 8);
1417 
1418 				mss = (u16)be64_to_cpu(*rxmss);
1419 			}
1420 
1421 			new_skb = NULL;
1422 			if (length < rx_copybreak)
1423 				new_skb = netdev_alloc_skb(netdev, length);
1424 
1425 			if (new_skb) {
1426 				skb_copy_to_linear_data(new_skb,
1427 							skb->data + offset,
1428 							length);
1429 				if (rx_flush)
1430 					ibmveth_flush_buffer(skb->data,
1431 						length + offset);
1432 				if (unlikely(ibmveth_rxq_harvest_buffer(adapter, true)))
1433 					break;
1434 				skb = new_skb;
1435 			} else {
1436 				if (unlikely(ibmveth_rxq_harvest_buffer(adapter, false)))
1437 					break;
1438 				skb_reserve(skb, offset);
1439 			}
1440 
1441 			skb_put(skb, length);
1442 			skb->protocol = eth_type_trans(skb, netdev);
1443 
1444 			/* PHYP without PLSO support places a -1 in the ip
1445 			 * checksum for large send frames.
1446 			 */
1447 			if (skb->protocol == cpu_to_be16(ETH_P_IP)) {
1448 				struct iphdr *iph = (struct iphdr *)skb->data;
1449 
1450 				iph_check = iph->check;
1451 			}
1452 
1453 			if ((length > netdev->mtu + ETH_HLEN) ||
1454 			    lrg_pkt || iph_check == 0xffff) {
1455 				ibmveth_rx_mss_helper(skb, mss, lrg_pkt);
1456 				adapter->rx_large_packets++;
1457 			}
1458 
1459 			if (csum_good) {
1460 				skb->ip_summed = CHECKSUM_UNNECESSARY;
1461 				ibmveth_rx_csum_helper(skb, adapter);
1462 			}
1463 
1464 			napi_gro_receive(napi, skb);	/* send it up */
1465 
1466 			netdev->stats.rx_packets++;
1467 			netdev->stats.rx_bytes += length;
1468 			frames_processed++;
1469 		}
1470 	}
1471 
1472 	ibmveth_replenish_task(adapter);
1473 
1474 	if (frames_processed == budget)
1475 		goto out;
1476 
1477 	if (!napi_complete_done(napi, frames_processed))
1478 		goto out;
1479 
1480 	/* We think we are done - reenable interrupts,
1481 	 * then check once more to make sure we are done.
1482 	 */
1483 	lpar_rc = h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_ENABLE);
1484 	if (WARN_ON(lpar_rc != H_SUCCESS)) {
1485 		schedule_work(&adapter->work);
1486 		goto out;
1487 	}
1488 
1489 	if (ibmveth_rxq_pending_buffer(adapter) && napi_schedule(napi)) {
1490 		lpar_rc = h_vio_signal(adapter->vdev->unit_address,
1491 				       VIO_IRQ_DISABLE);
1492 		goto restart_poll;
1493 	}
1494 
1495 out:
1496 	return frames_processed;
1497 }
1498 
1499 static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance)
1500 {
1501 	struct net_device *netdev = dev_instance;
1502 	struct ibmveth_adapter *adapter = netdev_priv(netdev);
1503 	unsigned long lpar_rc;
1504 
1505 	if (napi_schedule_prep(&adapter->napi)) {
1506 		lpar_rc = h_vio_signal(adapter->vdev->unit_address,
1507 				       VIO_IRQ_DISABLE);
1508 		WARN_ON(lpar_rc != H_SUCCESS);
1509 		__napi_schedule(&adapter->napi);
1510 	}
1511 	return IRQ_HANDLED;
1512 }
1513 
1514 static void ibmveth_set_multicast_list(struct net_device *netdev)
1515 {
1516 	struct ibmveth_adapter *adapter = netdev_priv(netdev);
1517 	unsigned long lpar_rc;
1518 
1519 	if ((netdev->flags & IFF_PROMISC) ||
1520 	    (netdev_mc_count(netdev) > adapter->mcastFilterSize)) {
1521 		lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address,
1522 					   IbmVethMcastEnableRecv |
1523 					   IbmVethMcastDisableFiltering,
1524 					   0);
1525 		if (lpar_rc != H_SUCCESS) {
1526 			netdev_err(netdev, "h_multicast_ctrl rc=%ld when "
1527 				   "entering promisc mode\n", lpar_rc);
1528 		}
1529 	} else {
1530 		struct netdev_hw_addr *ha;
1531 		/* clear the filter table & disable filtering */
1532 		lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address,
1533 					   IbmVethMcastEnableRecv |
1534 					   IbmVethMcastDisableFiltering |
1535 					   IbmVethMcastClearFilterTable,
1536 					   0);
1537 		if (lpar_rc != H_SUCCESS) {
1538 			netdev_err(netdev, "h_multicast_ctrl rc=%ld when "
1539 				   "attempting to clear filter table\n",
1540 				   lpar_rc);
1541 		}
1542 		/* add the addresses to the filter table */
1543 		netdev_for_each_mc_addr(ha, netdev) {
1544 			/* add the multicast address to the filter table */
1545 			u64 mcast_addr;
1546 			mcast_addr = ether_addr_to_u64(ha->addr);
1547 			lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address,
1548 						   IbmVethMcastAddFilter,
1549 						   mcast_addr);
1550 			if (lpar_rc != H_SUCCESS) {
1551 				netdev_err(netdev, "h_multicast_ctrl rc=%ld "
1552 					   "when adding an entry to the filter "
1553 					   "table\n", lpar_rc);
1554 			}
1555 		}
1556 
1557 		/* re-enable filtering */
1558 		lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address,
1559 					   IbmVethMcastEnableFiltering,
1560 					   0);
1561 		if (lpar_rc != H_SUCCESS) {
1562 			netdev_err(netdev, "h_multicast_ctrl rc=%ld when "
1563 				   "enabling filtering\n", lpar_rc);
1564 		}
1565 	}
1566 }
1567 
1568 static int ibmveth_change_mtu(struct net_device *dev, int new_mtu)
1569 {
1570 	struct ibmveth_adapter *adapter = netdev_priv(dev);
1571 	struct vio_dev *viodev = adapter->vdev;
1572 	int new_mtu_oh = new_mtu + IBMVETH_BUFF_OH;
1573 	int i, rc;
1574 	int need_restart = 0;
1575 
1576 	for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++)
1577 		if (new_mtu_oh <= adapter->rx_buff_pool[i].buff_size)
1578 			break;
1579 
1580 	if (i == IBMVETH_NUM_BUFF_POOLS)
1581 		return -EINVAL;
1582 
1583 	/* Deactivate all the buffer pools so that the next loop can activate
1584 	   only the buffer pools necessary to hold the new MTU */
1585 	if (netif_running(adapter->netdev)) {
1586 		need_restart = 1;
1587 		ibmveth_close(adapter->netdev);
1588 	}
1589 
1590 	/* Look for an active buffer pool that can hold the new MTU */
1591 	for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) {
1592 		adapter->rx_buff_pool[i].active = 1;
1593 
1594 		if (new_mtu_oh <= adapter->rx_buff_pool[i].buff_size) {
1595 			WRITE_ONCE(dev->mtu, new_mtu);
1596 			vio_cmo_set_dev_desired(viodev,
1597 						ibmveth_get_desired_dma
1598 						(viodev));
1599 			if (need_restart) {
1600 				return ibmveth_open(adapter->netdev);
1601 			}
1602 			return 0;
1603 		}
1604 	}
1605 
1606 	if (need_restart && (rc = ibmveth_open(adapter->netdev)))
1607 		return rc;
1608 
1609 	return -EINVAL;
1610 }
1611 
1612 #ifdef CONFIG_NET_POLL_CONTROLLER
1613 static void ibmveth_poll_controller(struct net_device *dev)
1614 {
1615 	ibmveth_replenish_task(netdev_priv(dev));
1616 	ibmveth_interrupt(dev->irq, dev);
1617 }
1618 #endif
1619 
1620 /**
1621  * ibmveth_get_desired_dma - Calculate IO memory desired by the driver
1622  *
1623  * @vdev: struct vio_dev for the device whose desired IO mem is to be returned
1624  *
1625  * Return value:
1626  *	Number of bytes of IO data the driver will need to perform well.
1627  */
1628 static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev)
1629 {
1630 	struct net_device *netdev = dev_get_drvdata(&vdev->dev);
1631 	struct ibmveth_adapter *adapter;
1632 	struct iommu_table *tbl;
1633 	unsigned long ret;
1634 	int i;
1635 	int rxqentries = 1;
1636 
1637 	tbl = get_iommu_table_base(&vdev->dev);
1638 
1639 	/* netdev inits at probe time along with the structures we need below*/
1640 	if (netdev == NULL)
1641 		return IOMMU_PAGE_ALIGN(IBMVETH_IO_ENTITLEMENT_DEFAULT, tbl);
1642 
1643 	adapter = netdev_priv(netdev);
1644 
1645 	ret = IBMVETH_BUFF_LIST_SIZE + IBMVETH_FILT_LIST_SIZE;
1646 	ret += IOMMU_PAGE_ALIGN(netdev->mtu, tbl);
1647 	/* add size of mapped tx buffers */
1648 	ret += IOMMU_PAGE_ALIGN(IBMVETH_MAX_TX_BUF_SIZE, tbl);
1649 
1650 	for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) {
1651 		/* add the size of the active receive buffers */
1652 		if (adapter->rx_buff_pool[i].active)
1653 			ret +=
1654 			    adapter->rx_buff_pool[i].size *
1655 			    IOMMU_PAGE_ALIGN(adapter->rx_buff_pool[i].
1656 					     buff_size, tbl);
1657 		rxqentries += adapter->rx_buff_pool[i].size;
1658 	}
1659 	/* add the size of the receive queue entries */
1660 	ret += IOMMU_PAGE_ALIGN(
1661 		rxqentries * sizeof(struct ibmveth_rx_q_entry), tbl);
1662 
1663 	return ret;
1664 }
1665 
1666 static int ibmveth_set_mac_addr(struct net_device *dev, void *p)
1667 {
1668 	struct ibmveth_adapter *adapter = netdev_priv(dev);
1669 	struct sockaddr *addr = p;
1670 	u64 mac_address;
1671 	int rc;
1672 
1673 	if (!is_valid_ether_addr(addr->sa_data))
1674 		return -EADDRNOTAVAIL;
1675 
1676 	mac_address = ether_addr_to_u64(addr->sa_data);
1677 	rc = h_change_logical_lan_mac(adapter->vdev->unit_address, mac_address);
1678 	if (rc) {
1679 		netdev_err(adapter->netdev, "h_change_logical_lan_mac failed with rc=%d\n", rc);
1680 		return rc;
1681 	}
1682 
1683 	eth_hw_addr_set(dev, addr->sa_data);
1684 
1685 	return 0;
1686 }
1687 
1688 static const struct net_device_ops ibmveth_netdev_ops = {
1689 	.ndo_open		= ibmveth_open,
1690 	.ndo_stop		= ibmveth_close,
1691 	.ndo_start_xmit		= ibmveth_start_xmit,
1692 	.ndo_set_rx_mode	= ibmveth_set_multicast_list,
1693 	.ndo_eth_ioctl		= ibmveth_ioctl,
1694 	.ndo_change_mtu		= ibmveth_change_mtu,
1695 	.ndo_fix_features	= ibmveth_fix_features,
1696 	.ndo_set_features	= ibmveth_set_features,
1697 	.ndo_validate_addr	= eth_validate_addr,
1698 	.ndo_set_mac_address    = ibmveth_set_mac_addr,
1699 #ifdef CONFIG_NET_POLL_CONTROLLER
1700 	.ndo_poll_controller	= ibmveth_poll_controller,
1701 #endif
1702 };
1703 
1704 static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
1705 {
1706 	int rc, i, mac_len;
1707 	struct net_device *netdev;
1708 	struct ibmveth_adapter *adapter;
1709 	unsigned char *mac_addr_p;
1710 	__be32 *mcastFilterSize_p;
1711 	long ret;
1712 	unsigned long ret_attr;
1713 
1714 	dev_dbg(&dev->dev, "entering ibmveth_probe for UA 0x%x\n",
1715 		dev->unit_address);
1716 
1717 	mac_addr_p = (unsigned char *)vio_get_attribute(dev, VETH_MAC_ADDR,
1718 							&mac_len);
1719 	if (!mac_addr_p) {
1720 		dev_err(&dev->dev, "Can't find VETH_MAC_ADDR attribute\n");
1721 		return -EINVAL;
1722 	}
1723 	/* Workaround for old/broken pHyp */
1724 	if (mac_len == 8)
1725 		mac_addr_p += 2;
1726 	else if (mac_len != 6) {
1727 		dev_err(&dev->dev, "VETH_MAC_ADDR attribute wrong len %d\n",
1728 			mac_len);
1729 		return -EINVAL;
1730 	}
1731 
1732 	mcastFilterSize_p = (__be32 *)vio_get_attribute(dev,
1733 							VETH_MCAST_FILTER_SIZE,
1734 							NULL);
1735 	if (!mcastFilterSize_p) {
1736 		dev_err(&dev->dev, "Can't find VETH_MCAST_FILTER_SIZE "
1737 			"attribute\n");
1738 		return -EINVAL;
1739 	}
1740 
1741 	netdev = alloc_etherdev_mqs(sizeof(struct ibmveth_adapter), IBMVETH_MAX_QUEUES, 1);
1742 	if (!netdev)
1743 		return -ENOMEM;
1744 
1745 	adapter = netdev_priv(netdev);
1746 	dev_set_drvdata(&dev->dev, netdev);
1747 
1748 	adapter->vdev = dev;
1749 	adapter->netdev = netdev;
1750 	INIT_WORK(&adapter->work, ibmveth_reset);
1751 	adapter->mcastFilterSize = be32_to_cpu(*mcastFilterSize_p);
1752 	ibmveth_init_link_settings(netdev);
1753 
1754 	netif_napi_add_weight(netdev, &adapter->napi, ibmveth_poll, 16);
1755 
1756 	netdev->irq = dev->irq;
1757 	netdev->netdev_ops = &ibmveth_netdev_ops;
1758 	netdev->ethtool_ops = &netdev_ethtool_ops;
1759 	SET_NETDEV_DEV(netdev, &dev->dev);
1760 	netdev->hw_features = NETIF_F_SG;
1761 	if (vio_get_attribute(dev, "ibm,illan-options", NULL) != NULL) {
1762 		netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
1763 				       NETIF_F_RXCSUM;
1764 	}
1765 
1766 	netdev->features |= netdev->hw_features;
1767 
1768 	ret = h_illan_attributes(adapter->vdev->unit_address, 0, 0, &ret_attr);
1769 
1770 	/* If running older firmware, TSO should not be enabled by default */
1771 	if (ret == H_SUCCESS && (ret_attr & IBMVETH_ILLAN_LRG_SND_SUPPORT) &&
1772 	    !old_large_send) {
1773 		netdev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6;
1774 		netdev->features |= netdev->hw_features;
1775 	} else {
1776 		netdev->hw_features |= NETIF_F_TSO;
1777 	}
1778 
1779 	adapter->is_active_trunk = false;
1780 	if (ret == H_SUCCESS && (ret_attr & IBMVETH_ILLAN_ACTIVE_TRUNK)) {
1781 		adapter->is_active_trunk = true;
1782 		netdev->hw_features |= NETIF_F_FRAGLIST;
1783 		netdev->features |= NETIF_F_FRAGLIST;
1784 	}
1785 
1786 	netdev->min_mtu = IBMVETH_MIN_MTU;
1787 	netdev->max_mtu = ETH_MAX_MTU - IBMVETH_BUFF_OH;
1788 
1789 	eth_hw_addr_set(netdev, mac_addr_p);
1790 
1791 	if (firmware_has_feature(FW_FEATURE_CMO))
1792 		memcpy(pool_count, pool_count_cmo, sizeof(pool_count));
1793 
1794 	for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) {
1795 		struct kobject *kobj = &adapter->rx_buff_pool[i].kobj;
1796 		int error;
1797 
1798 		ibmveth_init_buffer_pool(&adapter->rx_buff_pool[i], i,
1799 					 pool_count[i], pool_size[i],
1800 					 pool_active[i]);
1801 		error = kobject_init_and_add(kobj, &ktype_veth_pool,
1802 					     &dev->dev.kobj, "pool%d", i);
1803 		if (!error)
1804 			kobject_uevent(kobj, KOBJ_ADD);
1805 	}
1806 
1807 	rc = netif_set_real_num_tx_queues(netdev, min(num_online_cpus(),
1808 						      IBMVETH_DEFAULT_QUEUES));
1809 	if (rc) {
1810 		netdev_dbg(netdev, "failed to set number of tx queues rc=%d\n",
1811 			   rc);
1812 		free_netdev(netdev);
1813 		return rc;
1814 	}
1815 	adapter->tx_ltb_size = PAGE_ALIGN(IBMVETH_MAX_TX_BUF_SIZE);
1816 	for (i = 0; i < IBMVETH_MAX_QUEUES; i++)
1817 		adapter->tx_ltb_ptr[i] = NULL;
1818 
1819 	netdev_dbg(netdev, "adapter @ 0x%p\n", adapter);
1820 	netdev_dbg(netdev, "registering netdev...\n");
1821 
1822 	ibmveth_set_features(netdev, netdev->features);
1823 
1824 	rc = register_netdev(netdev);
1825 
1826 	if (rc) {
1827 		netdev_dbg(netdev, "failed to register netdev rc=%d\n", rc);
1828 		free_netdev(netdev);
1829 		return rc;
1830 	}
1831 
1832 	netdev_dbg(netdev, "registered\n");
1833 
1834 	return 0;
1835 }
1836 
1837 static void ibmveth_remove(struct vio_dev *dev)
1838 {
1839 	struct net_device *netdev = dev_get_drvdata(&dev->dev);
1840 	struct ibmveth_adapter *adapter = netdev_priv(netdev);
1841 	int i;
1842 
1843 	cancel_work_sync(&adapter->work);
1844 
1845 	for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++)
1846 		kobject_put(&adapter->rx_buff_pool[i].kobj);
1847 
1848 	unregister_netdev(netdev);
1849 
1850 	free_netdev(netdev);
1851 	dev_set_drvdata(&dev->dev, NULL);
1852 }
1853 
1854 static struct attribute veth_active_attr;
1855 static struct attribute veth_num_attr;
1856 static struct attribute veth_size_attr;
1857 
1858 static ssize_t veth_pool_show(struct kobject *kobj,
1859 			      struct attribute *attr, char *buf)
1860 {
1861 	struct ibmveth_buff_pool *pool = container_of(kobj,
1862 						      struct ibmveth_buff_pool,
1863 						      kobj);
1864 
1865 	if (attr == &veth_active_attr)
1866 		return sprintf(buf, "%d\n", pool->active);
1867 	else if (attr == &veth_num_attr)
1868 		return sprintf(buf, "%d\n", pool->size);
1869 	else if (attr == &veth_size_attr)
1870 		return sprintf(buf, "%d\n", pool->buff_size);
1871 	return 0;
1872 }
1873 
1874 /**
1875  * veth_pool_store - sysfs store handler for pool attributes
1876  * @kobj: kobject embedded in pool
1877  * @attr: attribute being changed
1878  * @buf: value being stored
1879  * @count: length of @buf in bytes
1880  *
1881  * Stores new value in pool attribute. Verifies the range of the new value for
1882  * size and buff_size. Verifies that at least one pool remains available to
1883  * receive MTU-sized packets.
1884  *
1885  * Context: Process context.
1886  *          Takes and releases rtnl_mutex to ensure correct ordering of close
1887  *	    and open calls.
1888  * Return:
1889  * * %-EPERM  - Not allowed to disabled all MTU-sized buffer pools
1890  * * %-EINVAL - New pool size or buffer size is out of range
1891  * * count    - Return count for success
1892  * * other    - Return value from a failed ibmveth_open call
1893  */
1894 static ssize_t veth_pool_store(struct kobject *kobj, struct attribute *attr,
1895 			       const char *buf, size_t count)
1896 {
1897 	struct ibmveth_buff_pool *pool = container_of(kobj,
1898 						      struct ibmveth_buff_pool,
1899 						      kobj);
1900 	struct net_device *netdev = dev_get_drvdata(kobj_to_dev(kobj->parent));
1901 	struct ibmveth_adapter *adapter = netdev_priv(netdev);
1902 	long value = simple_strtol(buf, NULL, 10);
1903 	bool change = false;
1904 	u32 newbuff_size;
1905 	u32 oldbuff_size;
1906 	int newactive;
1907 	int oldactive;
1908 	u32 newsize;
1909 	u32 oldsize;
1910 	long rc;
1911 
1912 	rtnl_lock();
1913 
1914 	oldbuff_size = pool->buff_size;
1915 	oldactive = pool->active;
1916 	oldsize = pool->size;
1917 
1918 	newbuff_size = oldbuff_size;
1919 	newactive = oldactive;
1920 	newsize = oldsize;
1921 
1922 	if (attr == &veth_active_attr) {
1923 		if (value && !oldactive) {
1924 			newactive = 1;
1925 			change = true;
1926 		} else if (!value && oldactive) {
1927 			int mtu = netdev->mtu + IBMVETH_BUFF_OH;
1928 			int i;
1929 			/* Make sure there is a buffer pool with buffers that
1930 			   can hold a packet of the size of the MTU */
1931 			for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) {
1932 				if (pool == &adapter->rx_buff_pool[i])
1933 					continue;
1934 				if (!adapter->rx_buff_pool[i].active)
1935 					continue;
1936 				if (mtu <= adapter->rx_buff_pool[i].buff_size)
1937 					break;
1938 			}
1939 
1940 			if (i == IBMVETH_NUM_BUFF_POOLS) {
1941 				netdev_err(netdev, "no active pool >= MTU\n");
1942 				rc = -EPERM;
1943 				goto unlock_err;
1944 			}
1945 
1946 			newactive = 0;
1947 			change = true;
1948 		}
1949 	} else if (attr == &veth_num_attr) {
1950 		if (value <= 0 || value > IBMVETH_MAX_POOL_COUNT) {
1951 			rc = -EINVAL;
1952 			goto unlock_err;
1953 		}
1954 		if (value != oldsize) {
1955 			newsize = value;
1956 			change = true;
1957 		}
1958 	} else if (attr == &veth_size_attr) {
1959 		if (value <= IBMVETH_BUFF_OH || value > IBMVETH_MAX_BUF_SIZE) {
1960 			rc = -EINVAL;
1961 			goto unlock_err;
1962 		}
1963 		if (value != oldbuff_size) {
1964 			newbuff_size = value;
1965 			change = true;
1966 		}
1967 	}
1968 
1969 	if (change) {
1970 		if (netif_running(netdev))
1971 			ibmveth_close(netdev);
1972 
1973 		pool->active = newactive;
1974 		pool->buff_size = newbuff_size;
1975 		pool->size = newsize;
1976 
1977 		if (netif_running(netdev)) {
1978 			rc = ibmveth_open(netdev);
1979 			if (rc) {
1980 				pool->active = oldactive;
1981 				pool->buff_size = oldbuff_size;
1982 				pool->size = oldsize;
1983 				goto unlock_err;
1984 			}
1985 		}
1986 	}
1987 	rtnl_unlock();
1988 
1989 	/* kick the interrupt handler to allocate/deallocate pools */
1990 	ibmveth_interrupt(netdev->irq, netdev);
1991 	return count;
1992 
1993 unlock_err:
1994 	rtnl_unlock();
1995 	return rc;
1996 }
1997 
1998 
1999 #define ATTR(_name, _mode)				\
2000 	struct attribute veth_##_name##_attr = {	\
2001 	.name = __stringify(_name), .mode = _mode,	\
2002 	};
2003 
2004 static ATTR(active, 0644);
2005 static ATTR(num, 0644);
2006 static ATTR(size, 0644);
2007 
2008 static struct attribute *veth_pool_attrs[] = {
2009 	&veth_active_attr,
2010 	&veth_num_attr,
2011 	&veth_size_attr,
2012 	NULL,
2013 };
2014 ATTRIBUTE_GROUPS(veth_pool);
2015 
2016 static const struct sysfs_ops veth_pool_ops = {
2017 	.show   = veth_pool_show,
2018 	.store  = veth_pool_store,
2019 };
2020 
2021 static struct kobj_type ktype_veth_pool = {
2022 	.release        = NULL,
2023 	.sysfs_ops      = &veth_pool_ops,
2024 	.default_groups = veth_pool_groups,
2025 };
2026 
2027 static int ibmveth_resume(struct device *dev)
2028 {
2029 	struct net_device *netdev = dev_get_drvdata(dev);
2030 	ibmveth_interrupt(netdev->irq, netdev);
2031 	return 0;
2032 }
2033 
2034 static const struct vio_device_id ibmveth_device_table[] = {
2035 	{ "network", "IBM,l-lan"},
2036 	{ "", "" }
2037 };
2038 MODULE_DEVICE_TABLE(vio, ibmveth_device_table);
2039 
2040 static const struct dev_pm_ops ibmveth_pm_ops = {
2041 	.resume = ibmveth_resume
2042 };
2043 
2044 static struct vio_driver ibmveth_driver = {
2045 	.id_table	= ibmveth_device_table,
2046 	.probe		= ibmveth_probe,
2047 	.remove		= ibmveth_remove,
2048 	.get_desired_dma = ibmveth_get_desired_dma,
2049 	.name		= ibmveth_driver_name,
2050 	.pm		= &ibmveth_pm_ops,
2051 };
2052 
2053 static int __init ibmveth_module_init(void)
2054 {
2055 	printk(KERN_DEBUG "%s: %s %s\n", ibmveth_driver_name,
2056 	       ibmveth_driver_string, ibmveth_driver_version);
2057 
2058 	return vio_register_driver(&ibmveth_driver);
2059 }
2060 
2061 static void __exit ibmveth_module_exit(void)
2062 {
2063 	vio_unregister_driver(&ibmveth_driver);
2064 }
2065 
2066 module_init(ibmveth_module_init);
2067 module_exit(ibmveth_module_exit);
2068 
2069 #ifdef CONFIG_IBMVETH_KUNIT_TEST
2070 #include <kunit/test.h>
2071 
2072 /**
2073  * ibmveth_reset_kunit - reset routine for running in KUnit environment
2074  *
2075  * @w: pointer to work_struct embedded in adapter structure
2076  *
2077  * Context: Called in the KUnit environment. Does nothing.
2078  *
2079  * Return: void
2080  */
2081 static void ibmveth_reset_kunit(struct work_struct *w)
2082 {
2083 	netdev_dbg(NULL, "reset_kunit starting\n");
2084 	netdev_dbg(NULL, "reset_kunit complete\n");
2085 }
2086 
2087 /**
2088  * ibmveth_remove_buffer_from_pool_test - unit test for some of
2089  *                                        ibmveth_remove_buffer_from_pool
2090  * @test: pointer to kunit structure
2091  *
2092  * Tests the error returns from ibmveth_remove_buffer_from_pool.
2093  * ibmveth_remove_buffer_from_pool also calls WARN_ON, so dmesg should be
2094  * checked to see that these warnings happened.
2095  *
2096  * Return: void
2097  */
2098 static void ibmveth_remove_buffer_from_pool_test(struct kunit *test)
2099 {
2100 	struct ibmveth_adapter *adapter = kunit_kzalloc(test, sizeof(*adapter), GFP_KERNEL);
2101 	struct ibmveth_buff_pool *pool;
2102 	u64 correlator;
2103 
2104 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, adapter);
2105 
2106 	INIT_WORK(&adapter->work, ibmveth_reset_kunit);
2107 
2108 	/* Set sane values for buffer pools */
2109 	for (int i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++)
2110 		ibmveth_init_buffer_pool(&adapter->rx_buff_pool[i], i,
2111 					 pool_count[i], pool_size[i],
2112 					 pool_active[i]);
2113 
2114 	pool = &adapter->rx_buff_pool[0];
2115 	pool->skbuff = kunit_kcalloc(test, pool->size, sizeof(void *), GFP_KERNEL);
2116 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pool->skbuff);
2117 
2118 	correlator = ((u64)IBMVETH_NUM_BUFF_POOLS << 32) | 0;
2119 	KUNIT_EXPECT_EQ(test, -EINVAL, ibmveth_remove_buffer_from_pool(adapter, correlator, false));
2120 	KUNIT_EXPECT_EQ(test, -EINVAL, ibmveth_remove_buffer_from_pool(adapter, correlator, true));
2121 
2122 	correlator = ((u64)0 << 32) | adapter->rx_buff_pool[0].size;
2123 	KUNIT_EXPECT_EQ(test, -EINVAL, ibmveth_remove_buffer_from_pool(adapter, correlator, false));
2124 	KUNIT_EXPECT_EQ(test, -EINVAL, ibmveth_remove_buffer_from_pool(adapter, correlator, true));
2125 
2126 	correlator = (u64)0 | 0;
2127 	pool->skbuff[0] = NULL;
2128 	KUNIT_EXPECT_EQ(test, -EFAULT, ibmveth_remove_buffer_from_pool(adapter, correlator, false));
2129 	KUNIT_EXPECT_EQ(test, -EFAULT, ibmveth_remove_buffer_from_pool(adapter, correlator, true));
2130 
2131 	flush_work(&adapter->work);
2132 }
2133 
2134 /**
2135  * ibmveth_rxq_get_buffer_test - unit test for ibmveth_rxq_get_buffer
2136  * @test: pointer to kunit structure
2137  *
2138  * Tests ibmveth_rxq_get_buffer. ibmveth_rxq_get_buffer also calls WARN_ON for
2139  * the NULL returns, so dmesg should be checked to see that these warnings
2140  * happened.
2141  *
2142  * Return: void
2143  */
2144 static void ibmveth_rxq_get_buffer_test(struct kunit *test)
2145 {
2146 	struct ibmveth_adapter *adapter = kunit_kzalloc(test, sizeof(*adapter), GFP_KERNEL);
2147 	struct sk_buff *skb = kunit_kzalloc(test, sizeof(*skb), GFP_KERNEL);
2148 	struct ibmveth_buff_pool *pool;
2149 
2150 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, adapter);
2151 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb);
2152 
2153 	INIT_WORK(&adapter->work, ibmveth_reset_kunit);
2154 
2155 	adapter->rx_queue.queue_len = 1;
2156 	adapter->rx_queue.index = 0;
2157 	adapter->rx_queue.queue_addr = kunit_kzalloc(test, sizeof(struct ibmveth_rx_q_entry),
2158 						     GFP_KERNEL);
2159 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, adapter->rx_queue.queue_addr);
2160 
2161 	/* Set sane values for buffer pools */
2162 	for (int i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++)
2163 		ibmveth_init_buffer_pool(&adapter->rx_buff_pool[i], i,
2164 					 pool_count[i], pool_size[i],
2165 					 pool_active[i]);
2166 
2167 	pool = &adapter->rx_buff_pool[0];
2168 	pool->skbuff = kunit_kcalloc(test, pool->size, sizeof(void *), GFP_KERNEL);
2169 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pool->skbuff);
2170 
2171 	adapter->rx_queue.queue_addr[0].correlator = (u64)IBMVETH_NUM_BUFF_POOLS << 32 | 0;
2172 	KUNIT_EXPECT_PTR_EQ(test, NULL, ibmveth_rxq_get_buffer(adapter));
2173 
2174 	adapter->rx_queue.queue_addr[0].correlator = (u64)0 << 32 | adapter->rx_buff_pool[0].size;
2175 	KUNIT_EXPECT_PTR_EQ(test, NULL, ibmveth_rxq_get_buffer(adapter));
2176 
2177 	pool->skbuff[0] = skb;
2178 	adapter->rx_queue.queue_addr[0].correlator = (u64)0 << 32 | 0;
2179 	KUNIT_EXPECT_PTR_EQ(test, skb, ibmveth_rxq_get_buffer(adapter));
2180 
2181 	flush_work(&adapter->work);
2182 }
2183 
2184 static struct kunit_case ibmveth_test_cases[] = {
2185 	KUNIT_CASE(ibmveth_remove_buffer_from_pool_test),
2186 	KUNIT_CASE(ibmveth_rxq_get_buffer_test),
2187 	{}
2188 };
2189 
2190 static struct kunit_suite ibmveth_test_suite = {
2191 	.name = "ibmveth-kunit-test",
2192 	.test_cases = ibmveth_test_cases,
2193 };
2194 
2195 kunit_test_suite(ibmveth_test_suite);
2196 #endif
2197