xref: /linux/drivers/block/aoe/aoecmd.c (revision 273b281fa22c293963ee3e6eec418f5dda2dbc83)
1 /* Copyright (c) 2007 Coraid, Inc.  See COPYING for GPL terms. */
2 /*
3  * aoecmd.c
4  * Filesystem request handling methods
5  */
6 
7 #include <linux/ata.h>
8 #include <linux/hdreg.h>
9 #include <linux/blkdev.h>
10 #include <linux/skbuff.h>
11 #include <linux/netdevice.h>
12 #include <linux/genhd.h>
13 #include <linux/moduleparam.h>
14 #include <net/net_namespace.h>
15 #include <asm/unaligned.h>
16 #include "aoe.h"
17 
18 static int aoe_deadsecs = 60 * 3;
19 module_param(aoe_deadsecs, int, 0644);
20 MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev.");
21 
22 static int aoe_maxout = 16;
23 module_param(aoe_maxout, int, 0644);
24 MODULE_PARM_DESC(aoe_maxout,
25 	"Only aoe_maxout outstanding packets for every MAC on eX.Y.");
26 
27 static struct sk_buff *
28 new_skb(ulong len)
29 {
30 	struct sk_buff *skb;
31 
32 	skb = alloc_skb(len, GFP_ATOMIC);
33 	if (skb) {
34 		skb_reset_mac_header(skb);
35 		skb_reset_network_header(skb);
36 		skb->protocol = __constant_htons(ETH_P_AOE);
37 	}
38 	return skb;
39 }
40 
41 static struct frame *
42 getframe(struct aoetgt *t, int tag)
43 {
44 	struct frame *f, *e;
45 
46 	f = t->frames;
47 	e = f + t->nframes;
48 	for (; f<e; f++)
49 		if (f->tag == tag)
50 			return f;
51 	return NULL;
52 }
53 
54 /*
55  * Leave the top bit clear so we have tagspace for userland.
56  * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
57  * This driver reserves tag -1 to mean "unused frame."
58  */
59 static int
60 newtag(struct aoetgt *t)
61 {
62 	register ulong n;
63 
64 	n = jiffies & 0xffff;
65 	return n |= (++t->lasttag & 0x7fff) << 16;
66 }
67 
68 static int
69 aoehdr_atainit(struct aoedev *d, struct aoetgt *t, struct aoe_hdr *h)
70 {
71 	u32 host_tag = newtag(t);
72 
73 	memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
74 	memcpy(h->dst, t->addr, sizeof h->dst);
75 	h->type = __constant_cpu_to_be16(ETH_P_AOE);
76 	h->verfl = AOE_HVER;
77 	h->major = cpu_to_be16(d->aoemajor);
78 	h->minor = d->aoeminor;
79 	h->cmd = AOECMD_ATA;
80 	h->tag = cpu_to_be32(host_tag);
81 
82 	return host_tag;
83 }
84 
85 static inline void
86 put_lba(struct aoe_atahdr *ah, sector_t lba)
87 {
88 	ah->lba0 = lba;
89 	ah->lba1 = lba >>= 8;
90 	ah->lba2 = lba >>= 8;
91 	ah->lba3 = lba >>= 8;
92 	ah->lba4 = lba >>= 8;
93 	ah->lba5 = lba >>= 8;
94 }
95 
96 static void
97 ifrotate(struct aoetgt *t)
98 {
99 	t->ifp++;
100 	if (t->ifp >= &t->ifs[NAOEIFS] || t->ifp->nd == NULL)
101 		t->ifp = t->ifs;
102 	if (t->ifp->nd == NULL) {
103 		printk(KERN_INFO "aoe: no interface to rotate to\n");
104 		BUG();
105 	}
106 }
107 
108 static void
109 skb_pool_put(struct aoedev *d, struct sk_buff *skb)
110 {
111 	__skb_queue_tail(&d->skbpool, skb);
112 }
113 
114 static struct sk_buff *
115 skb_pool_get(struct aoedev *d)
116 {
117 	struct sk_buff *skb = skb_peek(&d->skbpool);
118 
119 	if (skb && atomic_read(&skb_shinfo(skb)->dataref) == 1) {
120 		__skb_unlink(skb, &d->skbpool);
121 		return skb;
122 	}
123 	if (skb_queue_len(&d->skbpool) < NSKBPOOLMAX &&
124 	    (skb = new_skb(ETH_ZLEN)))
125 		return skb;
126 
127 	return NULL;
128 }
129 
130 /* freeframe is where we do our load balancing so it's a little hairy. */
131 static struct frame *
132 freeframe(struct aoedev *d)
133 {
134 	struct frame *f, *e, *rf;
135 	struct aoetgt **t;
136 	struct sk_buff *skb;
137 
138 	if (d->targets[0] == NULL) {	/* shouldn't happen, but I'm paranoid */
139 		printk(KERN_ERR "aoe: NULL TARGETS!\n");
140 		return NULL;
141 	}
142 	t = d->tgt;
143 	t++;
144 	if (t >= &d->targets[NTARGETS] || !*t)
145 		t = d->targets;
146 	for (;;) {
147 		if ((*t)->nout < (*t)->maxout
148 		&& t != d->htgt
149 		&& (*t)->ifp->nd) {
150 			rf = NULL;
151 			f = (*t)->frames;
152 			e = f + (*t)->nframes;
153 			for (; f < e; f++) {
154 				if (f->tag != FREETAG)
155 					continue;
156 				skb = f->skb;
157 				if (!skb
158 				&& !(f->skb = skb = new_skb(ETH_ZLEN)))
159 					continue;
160 				if (atomic_read(&skb_shinfo(skb)->dataref)
161 					!= 1) {
162 					if (!rf)
163 						rf = f;
164 					continue;
165 				}
166 gotone:				skb_shinfo(skb)->nr_frags = skb->data_len = 0;
167 				skb_trim(skb, 0);
168 				d->tgt = t;
169 				ifrotate(*t);
170 				return f;
171 			}
172 			/* Work can be done, but the network layer is
173 			   holding our precious packets.  Try to grab
174 			   one from the pool. */
175 			f = rf;
176 			if (f == NULL) {	/* more paranoia */
177 				printk(KERN_ERR
178 					"aoe: freeframe: %s.\n",
179 					"unexpected null rf");
180 				d->flags |= DEVFL_KICKME;
181 				return NULL;
182 			}
183 			skb = skb_pool_get(d);
184 			if (skb) {
185 				skb_pool_put(d, f->skb);
186 				f->skb = skb;
187 				goto gotone;
188 			}
189 			(*t)->dataref++;
190 			if ((*t)->nout == 0)
191 				d->flags |= DEVFL_KICKME;
192 		}
193 		if (t == d->tgt)	/* we've looped and found nada */
194 			break;
195 		t++;
196 		if (t >= &d->targets[NTARGETS] || !*t)
197 			t = d->targets;
198 	}
199 	return NULL;
200 }
201 
202 static int
203 aoecmd_ata_rw(struct aoedev *d)
204 {
205 	struct frame *f;
206 	struct aoe_hdr *h;
207 	struct aoe_atahdr *ah;
208 	struct buf *buf;
209 	struct bio_vec *bv;
210 	struct aoetgt *t;
211 	struct sk_buff *skb;
212 	ulong bcnt;
213 	char writebit, extbit;
214 
215 	writebit = 0x10;
216 	extbit = 0x4;
217 
218 	f = freeframe(d);
219 	if (f == NULL)
220 		return 0;
221 	t = *d->tgt;
222 	buf = d->inprocess;
223 	bv = buf->bv;
224 	bcnt = t->ifp->maxbcnt;
225 	if (bcnt == 0)
226 		bcnt = DEFAULTBCNT;
227 	if (bcnt > buf->bv_resid)
228 		bcnt = buf->bv_resid;
229 	/* initialize the headers & frame */
230 	skb = f->skb;
231 	h = (struct aoe_hdr *) skb_mac_header(skb);
232 	ah = (struct aoe_atahdr *) (h+1);
233 	skb_put(skb, sizeof *h + sizeof *ah);
234 	memset(h, 0, skb->len);
235 	f->tag = aoehdr_atainit(d, t, h);
236 	t->nout++;
237 	f->waited = 0;
238 	f->buf = buf;
239 	f->bufaddr = page_address(bv->bv_page) + buf->bv_off;
240 	f->bcnt = bcnt;
241 	f->lba = buf->sector;
242 
243 	/* set up ata header */
244 	ah->scnt = bcnt >> 9;
245 	put_lba(ah, buf->sector);
246 	if (d->flags & DEVFL_EXT) {
247 		ah->aflags |= AOEAFL_EXT;
248 	} else {
249 		extbit = 0;
250 		ah->lba3 &= 0x0f;
251 		ah->lba3 |= 0xe0;	/* LBA bit + obsolete 0xa0 */
252 	}
253 	if (bio_data_dir(buf->bio) == WRITE) {
254 		skb_fill_page_desc(skb, 0, bv->bv_page, buf->bv_off, bcnt);
255 		ah->aflags |= AOEAFL_WRITE;
256 		skb->len += bcnt;
257 		skb->data_len = bcnt;
258 		t->wpkts++;
259 	} else {
260 		t->rpkts++;
261 		writebit = 0;
262 	}
263 
264 	ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit;
265 
266 	/* mark all tracking fields and load out */
267 	buf->nframesout += 1;
268 	buf->bv_off += bcnt;
269 	buf->bv_resid -= bcnt;
270 	buf->resid -= bcnt;
271 	buf->sector += bcnt >> 9;
272 	if (buf->resid == 0) {
273 		d->inprocess = NULL;
274 	} else if (buf->bv_resid == 0) {
275 		buf->bv = ++bv;
276 		buf->bv_resid = bv->bv_len;
277 		WARN_ON(buf->bv_resid == 0);
278 		buf->bv_off = bv->bv_offset;
279 	}
280 
281 	skb->dev = t->ifp->nd;
282 	skb = skb_clone(skb, GFP_ATOMIC);
283 	if (skb)
284 		__skb_queue_tail(&d->sendq, skb);
285 	return 1;
286 }
287 
288 /* some callers cannot sleep, and they can call this function,
289  * transmitting the packets later, when interrupts are on
290  */
291 static void
292 aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *queue)
293 {
294 	struct aoe_hdr *h;
295 	struct aoe_cfghdr *ch;
296 	struct sk_buff *skb;
297 	struct net_device *ifp;
298 
299 	read_lock(&dev_base_lock);
300 	for_each_netdev(&init_net, ifp) {
301 		dev_hold(ifp);
302 		if (!is_aoe_netif(ifp))
303 			goto cont;
304 
305 		skb = new_skb(sizeof *h + sizeof *ch);
306 		if (skb == NULL) {
307 			printk(KERN_INFO "aoe: skb alloc failure\n");
308 			goto cont;
309 		}
310 		skb_put(skb, sizeof *h + sizeof *ch);
311 		skb->dev = ifp;
312 		__skb_queue_tail(queue, skb);
313 		h = (struct aoe_hdr *) skb_mac_header(skb);
314 		memset(h, 0, sizeof *h + sizeof *ch);
315 
316 		memset(h->dst, 0xff, sizeof h->dst);
317 		memcpy(h->src, ifp->dev_addr, sizeof h->src);
318 		h->type = __constant_cpu_to_be16(ETH_P_AOE);
319 		h->verfl = AOE_HVER;
320 		h->major = cpu_to_be16(aoemajor);
321 		h->minor = aoeminor;
322 		h->cmd = AOECMD_CFG;
323 
324 cont:
325 		dev_put(ifp);
326 	}
327 	read_unlock(&dev_base_lock);
328 }
329 
330 static void
331 resend(struct aoedev *d, struct aoetgt *t, struct frame *f)
332 {
333 	struct sk_buff *skb;
334 	struct aoe_hdr *h;
335 	struct aoe_atahdr *ah;
336 	char buf[128];
337 	u32 n;
338 
339 	ifrotate(t);
340 	n = newtag(t);
341 	skb = f->skb;
342 	h = (struct aoe_hdr *) skb_mac_header(skb);
343 	ah = (struct aoe_atahdr *) (h+1);
344 
345 	snprintf(buf, sizeof buf,
346 		"%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n",
347 		"retransmit", d->aoemajor, d->aoeminor, f->tag, jiffies, n,
348 		h->src, h->dst, t->nout);
349 	aoechr_error(buf);
350 
351 	f->tag = n;
352 	h->tag = cpu_to_be32(n);
353 	memcpy(h->dst, t->addr, sizeof h->dst);
354 	memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
355 
356 	switch (ah->cmdstat) {
357 	default:
358 		break;
359 	case ATA_CMD_PIO_READ:
360 	case ATA_CMD_PIO_READ_EXT:
361 	case ATA_CMD_PIO_WRITE:
362 	case ATA_CMD_PIO_WRITE_EXT:
363 		put_lba(ah, f->lba);
364 
365 		n = f->bcnt;
366 		if (n > DEFAULTBCNT)
367 			n = DEFAULTBCNT;
368 		ah->scnt = n >> 9;
369 		if (ah->aflags & AOEAFL_WRITE) {
370 			skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr),
371 				offset_in_page(f->bufaddr), n);
372 			skb->len = sizeof *h + sizeof *ah + n;
373 			skb->data_len = n;
374 		}
375 	}
376 	skb->dev = t->ifp->nd;
377 	skb = skb_clone(skb, GFP_ATOMIC);
378 	if (skb == NULL)
379 		return;
380 	__skb_queue_tail(&d->sendq, skb);
381 }
382 
383 static int
384 tsince(int tag)
385 {
386 	int n;
387 
388 	n = jiffies & 0xffff;
389 	n -= tag & 0xffff;
390 	if (n < 0)
391 		n += 1<<16;
392 	return n;
393 }
394 
395 static struct aoeif *
396 getif(struct aoetgt *t, struct net_device *nd)
397 {
398 	struct aoeif *p, *e;
399 
400 	p = t->ifs;
401 	e = p + NAOEIFS;
402 	for (; p < e; p++)
403 		if (p->nd == nd)
404 			return p;
405 	return NULL;
406 }
407 
408 static struct aoeif *
409 addif(struct aoetgt *t, struct net_device *nd)
410 {
411 	struct aoeif *p;
412 
413 	p = getif(t, NULL);
414 	if (!p)
415 		return NULL;
416 	p->nd = nd;
417 	p->maxbcnt = DEFAULTBCNT;
418 	p->lost = 0;
419 	p->lostjumbo = 0;
420 	return p;
421 }
422 
423 static void
424 ejectif(struct aoetgt *t, struct aoeif *ifp)
425 {
426 	struct aoeif *e;
427 	ulong n;
428 
429 	e = t->ifs + NAOEIFS - 1;
430 	n = (e - ifp) * sizeof *ifp;
431 	memmove(ifp, ifp+1, n);
432 	e->nd = NULL;
433 }
434 
435 static int
436 sthtith(struct aoedev *d)
437 {
438 	struct frame *f, *e, *nf;
439 	struct sk_buff *skb;
440 	struct aoetgt *ht = *d->htgt;
441 
442 	f = ht->frames;
443 	e = f + ht->nframes;
444 	for (; f < e; f++) {
445 		if (f->tag == FREETAG)
446 			continue;
447 		nf = freeframe(d);
448 		if (!nf)
449 			return 0;
450 		skb = nf->skb;
451 		*nf = *f;
452 		f->skb = skb;
453 		f->tag = FREETAG;
454 		nf->waited = 0;
455 		ht->nout--;
456 		(*d->tgt)->nout++;
457 		resend(d, *d->tgt, nf);
458 	}
459 	/* he's clean, he's useless.  take away his interfaces */
460 	memset(ht->ifs, 0, sizeof ht->ifs);
461 	d->htgt = NULL;
462 	return 1;
463 }
464 
465 static inline unsigned char
466 ata_scnt(unsigned char *packet) {
467 	struct aoe_hdr *h;
468 	struct aoe_atahdr *ah;
469 
470 	h = (struct aoe_hdr *) packet;
471 	ah = (struct aoe_atahdr *) (h+1);
472 	return ah->scnt;
473 }
474 
475 static void
476 rexmit_timer(ulong vp)
477 {
478 	struct sk_buff_head queue;
479 	struct aoedev *d;
480 	struct aoetgt *t, **tt, **te;
481 	struct aoeif *ifp;
482 	struct frame *f, *e;
483 	register long timeout;
484 	ulong flags, n;
485 
486 	d = (struct aoedev *) vp;
487 
488 	/* timeout is always ~150% of the moving average */
489 	timeout = d->rttavg;
490 	timeout += timeout >> 1;
491 
492 	spin_lock_irqsave(&d->lock, flags);
493 
494 	if (d->flags & DEVFL_TKILL) {
495 		spin_unlock_irqrestore(&d->lock, flags);
496 		return;
497 	}
498 	tt = d->targets;
499 	te = tt + NTARGETS;
500 	for (; tt < te && *tt; tt++) {
501 		t = *tt;
502 		f = t->frames;
503 		e = f + t->nframes;
504 		for (; f < e; f++) {
505 			if (f->tag == FREETAG
506 			|| tsince(f->tag) < timeout)
507 				continue;
508 			n = f->waited += timeout;
509 			n /= HZ;
510 			if (n > aoe_deadsecs) {
511 				/* waited too long.  device failure. */
512 				aoedev_downdev(d);
513 				break;
514 			}
515 
516 			if (n > HELPWAIT /* see if another target can help */
517 			&& (tt != d->targets || d->targets[1]))
518 				d->htgt = tt;
519 
520 			if (t->nout == t->maxout) {
521 				if (t->maxout > 1)
522 					t->maxout--;
523 				t->lastwadj = jiffies;
524 			}
525 
526 			ifp = getif(t, f->skb->dev);
527 			if (ifp && ++ifp->lost > (t->nframes << 1)
528 			&& (ifp != t->ifs || t->ifs[1].nd)) {
529 				ejectif(t, ifp);
530 				ifp = NULL;
531 			}
532 
533 			if (ata_scnt(skb_mac_header(f->skb)) > DEFAULTBCNT / 512
534 			&& ifp && ++ifp->lostjumbo > (t->nframes << 1)
535 			&& ifp->maxbcnt != DEFAULTBCNT) {
536 				printk(KERN_INFO
537 					"aoe: e%ld.%d: "
538 					"too many lost jumbo on "
539 					"%s:%pm - "
540 					"falling back to %d frames.\n",
541 					d->aoemajor, d->aoeminor,
542 					ifp->nd->name, t->addr,
543 					DEFAULTBCNT);
544 				ifp->maxbcnt = 0;
545 			}
546 			resend(d, t, f);
547 		}
548 
549 		/* window check */
550 		if (t->nout == t->maxout
551 		&& t->maxout < t->nframes
552 		&& (jiffies - t->lastwadj)/HZ > 10) {
553 			t->maxout++;
554 			t->lastwadj = jiffies;
555 		}
556 	}
557 
558 	if (!skb_queue_empty(&d->sendq)) {
559 		n = d->rttavg <<= 1;
560 		if (n > MAXTIMER)
561 			d->rttavg = MAXTIMER;
562 	}
563 
564 	if (d->flags & DEVFL_KICKME || d->htgt) {
565 		d->flags &= ~DEVFL_KICKME;
566 		aoecmd_work(d);
567 	}
568 
569 	__skb_queue_head_init(&queue);
570 	skb_queue_splice_init(&d->sendq, &queue);
571 
572 	d->timer.expires = jiffies + TIMERTICK;
573 	add_timer(&d->timer);
574 
575 	spin_unlock_irqrestore(&d->lock, flags);
576 
577 	aoenet_xmit(&queue);
578 }
579 
580 /* enters with d->lock held */
581 void
582 aoecmd_work(struct aoedev *d)
583 {
584 	struct buf *buf;
585 loop:
586 	if (d->htgt && !sthtith(d))
587 		return;
588 	if (d->inprocess == NULL) {
589 		if (list_empty(&d->bufq))
590 			return;
591 		buf = container_of(d->bufq.next, struct buf, bufs);
592 		list_del(d->bufq.next);
593 		d->inprocess = buf;
594 	}
595 	if (aoecmd_ata_rw(d))
596 		goto loop;
597 }
598 
599 /* this function performs work that has been deferred until sleeping is OK
600  */
601 void
602 aoecmd_sleepwork(struct work_struct *work)
603 {
604 	struct aoedev *d = container_of(work, struct aoedev, work);
605 
606 	if (d->flags & DEVFL_GDALLOC)
607 		aoeblk_gdalloc(d);
608 
609 	if (d->flags & DEVFL_NEWSIZE) {
610 		struct block_device *bd;
611 		unsigned long flags;
612 		u64 ssize;
613 
614 		ssize = get_capacity(d->gd);
615 		bd = bdget_disk(d->gd, 0);
616 
617 		if (bd) {
618 			mutex_lock(&bd->bd_inode->i_mutex);
619 			i_size_write(bd->bd_inode, (loff_t)ssize<<9);
620 			mutex_unlock(&bd->bd_inode->i_mutex);
621 			bdput(bd);
622 		}
623 		spin_lock_irqsave(&d->lock, flags);
624 		d->flags |= DEVFL_UP;
625 		d->flags &= ~DEVFL_NEWSIZE;
626 		spin_unlock_irqrestore(&d->lock, flags);
627 	}
628 }
629 
630 static void
631 ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
632 {
633 	u64 ssize;
634 	u16 n;
635 
636 	/* word 83: command set supported */
637 	n = get_unaligned_le16(&id[83 << 1]);
638 
639 	/* word 86: command set/feature enabled */
640 	n |= get_unaligned_le16(&id[86 << 1]);
641 
642 	if (n & (1<<10)) {	/* bit 10: LBA 48 */
643 		d->flags |= DEVFL_EXT;
644 
645 		/* word 100: number lba48 sectors */
646 		ssize = get_unaligned_le64(&id[100 << 1]);
647 
648 		/* set as in ide-disk.c:init_idedisk_capacity */
649 		d->geo.cylinders = ssize;
650 		d->geo.cylinders /= (255 * 63);
651 		d->geo.heads = 255;
652 		d->geo.sectors = 63;
653 	} else {
654 		d->flags &= ~DEVFL_EXT;
655 
656 		/* number lba28 sectors */
657 		ssize = get_unaligned_le32(&id[60 << 1]);
658 
659 		/* NOTE: obsolete in ATA 6 */
660 		d->geo.cylinders = get_unaligned_le16(&id[54 << 1]);
661 		d->geo.heads = get_unaligned_le16(&id[55 << 1]);
662 		d->geo.sectors = get_unaligned_le16(&id[56 << 1]);
663 	}
664 
665 	if (d->ssize != ssize)
666 		printk(KERN_INFO
667 			"aoe: %pm e%ld.%d v%04x has %llu sectors\n",
668 			t->addr,
669 			d->aoemajor, d->aoeminor,
670 			d->fw_ver, (long long)ssize);
671 	d->ssize = ssize;
672 	d->geo.start = 0;
673 	if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
674 		return;
675 	if (d->gd != NULL) {
676 		set_capacity(d->gd, ssize);
677 		d->flags |= DEVFL_NEWSIZE;
678 	} else
679 		d->flags |= DEVFL_GDALLOC;
680 	schedule_work(&d->work);
681 }
682 
683 static void
684 calc_rttavg(struct aoedev *d, int rtt)
685 {
686 	register long n;
687 
688 	n = rtt;
689 	if (n < 0) {
690 		n = -rtt;
691 		if (n < MINTIMER)
692 			n = MINTIMER;
693 		else if (n > MAXTIMER)
694 			n = MAXTIMER;
695 		d->mintimer += (n - d->mintimer) >> 1;
696 	} else if (n < d->mintimer)
697 		n = d->mintimer;
698 	else if (n > MAXTIMER)
699 		n = MAXTIMER;
700 
701 	/* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
702 	n -= d->rttavg;
703 	d->rttavg += n >> 2;
704 }
705 
706 static struct aoetgt *
707 gettgt(struct aoedev *d, char *addr)
708 {
709 	struct aoetgt **t, **e;
710 
711 	t = d->targets;
712 	e = t + NTARGETS;
713 	for (; t < e && *t; t++)
714 		if (memcmp((*t)->addr, addr, sizeof((*t)->addr)) == 0)
715 			return *t;
716 	return NULL;
717 }
718 
719 static inline void
720 diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector)
721 {
722 	unsigned long n_sect = bio->bi_size >> 9;
723 	const int rw = bio_data_dir(bio);
724 	struct hd_struct *part;
725 	int cpu;
726 
727 	cpu = part_stat_lock();
728 	part = disk_map_sector_rcu(disk, sector);
729 
730 	part_stat_inc(cpu, part, ios[rw]);
731 	part_stat_add(cpu, part, ticks[rw], duration);
732 	part_stat_add(cpu, part, sectors[rw], n_sect);
733 	part_stat_add(cpu, part, io_ticks, duration);
734 
735 	part_stat_unlock();
736 }
737 
738 /*
739  * Ensure we don't create aliases in VI caches
740  */
741 static inline void
742 killalias(struct bio *bio)
743 {
744 	struct bio_vec *bv;
745 	int i;
746 
747 	if (bio_data_dir(bio) == READ)
748 		__bio_for_each_segment(bv, bio, i, 0) {
749 			flush_dcache_page(bv->bv_page);
750 		}
751 }
752 
753 void
754 aoecmd_ata_rsp(struct sk_buff *skb)
755 {
756 	struct sk_buff_head queue;
757 	struct aoedev *d;
758 	struct aoe_hdr *hin, *hout;
759 	struct aoe_atahdr *ahin, *ahout;
760 	struct frame *f;
761 	struct buf *buf;
762 	struct aoetgt *t;
763 	struct aoeif *ifp;
764 	register long n;
765 	ulong flags;
766 	char ebuf[128];
767 	u16 aoemajor;
768 
769 	hin = (struct aoe_hdr *) skb_mac_header(skb);
770 	aoemajor = get_unaligned_be16(&hin->major);
771 	d = aoedev_by_aoeaddr(aoemajor, hin->minor);
772 	if (d == NULL) {
773 		snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
774 			"for unknown device %d.%d\n",
775 			 aoemajor, hin->minor);
776 		aoechr_error(ebuf);
777 		return;
778 	}
779 
780 	spin_lock_irqsave(&d->lock, flags);
781 
782 	n = get_unaligned_be32(&hin->tag);
783 	t = gettgt(d, hin->src);
784 	if (t == NULL) {
785 		printk(KERN_INFO "aoe: can't find target e%ld.%d:%pm\n",
786 			d->aoemajor, d->aoeminor, hin->src);
787 		spin_unlock_irqrestore(&d->lock, flags);
788 		return;
789 	}
790 	f = getframe(t, n);
791 	if (f == NULL) {
792 		calc_rttavg(d, -tsince(n));
793 		spin_unlock_irqrestore(&d->lock, flags);
794 		snprintf(ebuf, sizeof ebuf,
795 			"%15s e%d.%d    tag=%08x@%08lx\n",
796 			"unexpected rsp",
797 			get_unaligned_be16(&hin->major),
798 			hin->minor,
799 			get_unaligned_be32(&hin->tag),
800 			jiffies);
801 		aoechr_error(ebuf);
802 		return;
803 	}
804 
805 	calc_rttavg(d, tsince(f->tag));
806 
807 	ahin = (struct aoe_atahdr *) (hin+1);
808 	hout = (struct aoe_hdr *) skb_mac_header(f->skb);
809 	ahout = (struct aoe_atahdr *) (hout+1);
810 	buf = f->buf;
811 
812 	if (ahin->cmdstat & 0xa9) {	/* these bits cleared on success */
813 		printk(KERN_ERR
814 			"aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%d\n",
815 			ahout->cmdstat, ahin->cmdstat,
816 			d->aoemajor, d->aoeminor);
817 		if (buf)
818 			buf->flags |= BUFFL_FAIL;
819 	} else {
820 		if (d->htgt && t == *d->htgt) /* I'll help myself, thank you. */
821 			d->htgt = NULL;
822 		n = ahout->scnt << 9;
823 		switch (ahout->cmdstat) {
824 		case ATA_CMD_PIO_READ:
825 		case ATA_CMD_PIO_READ_EXT:
826 			if (skb->len - sizeof *hin - sizeof *ahin < n) {
827 				printk(KERN_ERR
828 					"aoe: %s.  skb->len=%d need=%ld\n",
829 					"runt data size in read", skb->len, n);
830 				/* fail frame f?  just returning will rexmit. */
831 				spin_unlock_irqrestore(&d->lock, flags);
832 				return;
833 			}
834 			memcpy(f->bufaddr, ahin+1, n);
835 		case ATA_CMD_PIO_WRITE:
836 		case ATA_CMD_PIO_WRITE_EXT:
837 			ifp = getif(t, skb->dev);
838 			if (ifp) {
839 				ifp->lost = 0;
840 				if (n > DEFAULTBCNT)
841 					ifp->lostjumbo = 0;
842 			}
843 			if (f->bcnt -= n) {
844 				f->lba += n >> 9;
845 				f->bufaddr += n;
846 				resend(d, t, f);
847 				goto xmit;
848 			}
849 			break;
850 		case ATA_CMD_ID_ATA:
851 			if (skb->len - sizeof *hin - sizeof *ahin < 512) {
852 				printk(KERN_INFO
853 					"aoe: runt data size in ataid.  skb->len=%d\n",
854 					skb->len);
855 				spin_unlock_irqrestore(&d->lock, flags);
856 				return;
857 			}
858 			ataid_complete(d, t, (char *) (ahin+1));
859 			break;
860 		default:
861 			printk(KERN_INFO
862 				"aoe: unrecognized ata command %2.2Xh for %d.%d\n",
863 				ahout->cmdstat,
864 				get_unaligned_be16(&hin->major),
865 				hin->minor);
866 		}
867 	}
868 
869 	if (buf && --buf->nframesout == 0 && buf->resid == 0) {
870 		diskstats(d->gd, buf->bio, jiffies - buf->stime, buf->sector);
871 		if (buf->flags & BUFFL_FAIL)
872 			bio_endio(buf->bio, -EIO);
873 		else {
874 			killalias(buf->bio);
875 			bio_endio(buf->bio, 0);
876 		}
877 		mempool_free(buf, d->bufpool);
878 	}
879 
880 	f->buf = NULL;
881 	f->tag = FREETAG;
882 	t->nout--;
883 
884 	aoecmd_work(d);
885 xmit:
886 	__skb_queue_head_init(&queue);
887 	skb_queue_splice_init(&d->sendq, &queue);
888 
889 	spin_unlock_irqrestore(&d->lock, flags);
890 	aoenet_xmit(&queue);
891 }
892 
893 void
894 aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
895 {
896 	struct sk_buff_head queue;
897 
898 	__skb_queue_head_init(&queue);
899 	aoecmd_cfg_pkts(aoemajor, aoeminor, &queue);
900 	aoenet_xmit(&queue);
901 }
902 
903 struct sk_buff *
904 aoecmd_ata_id(struct aoedev *d)
905 {
906 	struct aoe_hdr *h;
907 	struct aoe_atahdr *ah;
908 	struct frame *f;
909 	struct sk_buff *skb;
910 	struct aoetgt *t;
911 
912 	f = freeframe(d);
913 	if (f == NULL)
914 		return NULL;
915 
916 	t = *d->tgt;
917 
918 	/* initialize the headers & frame */
919 	skb = f->skb;
920 	h = (struct aoe_hdr *) skb_mac_header(skb);
921 	ah = (struct aoe_atahdr *) (h+1);
922 	skb_put(skb, sizeof *h + sizeof *ah);
923 	memset(h, 0, skb->len);
924 	f->tag = aoehdr_atainit(d, t, h);
925 	t->nout++;
926 	f->waited = 0;
927 
928 	/* set up ata header */
929 	ah->scnt = 1;
930 	ah->cmdstat = ATA_CMD_ID_ATA;
931 	ah->lba3 = 0xa0;
932 
933 	skb->dev = t->ifp->nd;
934 
935 	d->rttavg = MAXTIMER;
936 	d->timer.function = rexmit_timer;
937 
938 	return skb_clone(skb, GFP_ATOMIC);
939 }
940 
941 static struct aoetgt *
942 addtgt(struct aoedev *d, char *addr, ulong nframes)
943 {
944 	struct aoetgt *t, **tt, **te;
945 	struct frame *f, *e;
946 
947 	tt = d->targets;
948 	te = tt + NTARGETS;
949 	for (; tt < te && *tt; tt++)
950 		;
951 
952 	if (tt == te) {
953 		printk(KERN_INFO
954 			"aoe: device addtgt failure; too many targets\n");
955 		return NULL;
956 	}
957 	t = kcalloc(1, sizeof *t, GFP_ATOMIC);
958 	f = kcalloc(nframes, sizeof *f, GFP_ATOMIC);
959 	if (!t || !f) {
960 		kfree(f);
961 		kfree(t);
962 		printk(KERN_INFO "aoe: cannot allocate memory to add target\n");
963 		return NULL;
964 	}
965 
966 	t->nframes = nframes;
967 	t->frames = f;
968 	e = f + nframes;
969 	for (; f < e; f++)
970 		f->tag = FREETAG;
971 	memcpy(t->addr, addr, sizeof t->addr);
972 	t->ifp = t->ifs;
973 	t->maxout = t->nframes;
974 	return *tt = t;
975 }
976 
977 void
978 aoecmd_cfg_rsp(struct sk_buff *skb)
979 {
980 	struct aoedev *d;
981 	struct aoe_hdr *h;
982 	struct aoe_cfghdr *ch;
983 	struct aoetgt *t;
984 	struct aoeif *ifp;
985 	ulong flags, sysminor, aoemajor;
986 	struct sk_buff *sl;
987 	u16 n;
988 
989 	h = (struct aoe_hdr *) skb_mac_header(skb);
990 	ch = (struct aoe_cfghdr *) (h+1);
991 
992 	/*
993 	 * Enough people have their dip switches set backwards to
994 	 * warrant a loud message for this special case.
995 	 */
996 	aoemajor = get_unaligned_be16(&h->major);
997 	if (aoemajor == 0xfff) {
998 		printk(KERN_ERR "aoe: Warning: shelf address is all ones.  "
999 			"Check shelf dip switches.\n");
1000 		return;
1001 	}
1002 
1003 	sysminor = SYSMINOR(aoemajor, h->minor);
1004 	if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) {
1005 		printk(KERN_INFO "aoe: e%ld.%d: minor number too large\n",
1006 			aoemajor, (int) h->minor);
1007 		return;
1008 	}
1009 
1010 	n = be16_to_cpu(ch->bufcnt);
1011 	if (n > aoe_maxout)	/* keep it reasonable */
1012 		n = aoe_maxout;
1013 
1014 	d = aoedev_by_sysminor_m(sysminor);
1015 	if (d == NULL) {
1016 		printk(KERN_INFO "aoe: device sysminor_m failure\n");
1017 		return;
1018 	}
1019 
1020 	spin_lock_irqsave(&d->lock, flags);
1021 
1022 	t = gettgt(d, h->src);
1023 	if (!t) {
1024 		t = addtgt(d, h->src, n);
1025 		if (!t) {
1026 			spin_unlock_irqrestore(&d->lock, flags);
1027 			return;
1028 		}
1029 	}
1030 	ifp = getif(t, skb->dev);
1031 	if (!ifp) {
1032 		ifp = addif(t, skb->dev);
1033 		if (!ifp) {
1034 			printk(KERN_INFO
1035 				"aoe: device addif failure; "
1036 				"too many interfaces?\n");
1037 			spin_unlock_irqrestore(&d->lock, flags);
1038 			return;
1039 		}
1040 	}
1041 	if (ifp->maxbcnt) {
1042 		n = ifp->nd->mtu;
1043 		n -= sizeof (struct aoe_hdr) + sizeof (struct aoe_atahdr);
1044 		n /= 512;
1045 		if (n > ch->scnt)
1046 			n = ch->scnt;
1047 		n = n ? n * 512 : DEFAULTBCNT;
1048 		if (n != ifp->maxbcnt) {
1049 			printk(KERN_INFO
1050 				"aoe: e%ld.%d: setting %d%s%s:%pm\n",
1051 				d->aoemajor, d->aoeminor, n,
1052 				" byte data frames on ", ifp->nd->name,
1053 				t->addr);
1054 			ifp->maxbcnt = n;
1055 		}
1056 	}
1057 
1058 	/* don't change users' perspective */
1059 	if (d->nopen) {
1060 		spin_unlock_irqrestore(&d->lock, flags);
1061 		return;
1062 	}
1063 	d->fw_ver = be16_to_cpu(ch->fwver);
1064 
1065 	sl = aoecmd_ata_id(d);
1066 
1067 	spin_unlock_irqrestore(&d->lock, flags);
1068 
1069 	if (sl) {
1070 		struct sk_buff_head queue;
1071 		__skb_queue_head_init(&queue);
1072 		__skb_queue_tail(&queue, sl);
1073 		aoenet_xmit(&queue);
1074 	}
1075 }
1076 
1077 void
1078 aoecmd_cleanslate(struct aoedev *d)
1079 {
1080 	struct aoetgt **t, **te;
1081 	struct aoeif *p, *e;
1082 
1083 	d->mintimer = MINTIMER;
1084 
1085 	t = d->targets;
1086 	te = t + NTARGETS;
1087 	for (; t < te && *t; t++) {
1088 		(*t)->maxout = (*t)->nframes;
1089 		p = (*t)->ifs;
1090 		e = p + NAOEIFS;
1091 		for (; p < e; p++) {
1092 			p->lostjumbo = 0;
1093 			p->lost = 0;
1094 			p->maxbcnt = DEFAULTBCNT;
1095 		}
1096 	}
1097 }
1098