xref: /linux/drivers/block/aoe/aoecmd.c (revision 858259cf7d1c443c836a2022b78cb281f0a9b95e)
1 /* Copyright (c) 2004 Coraid, Inc.  See COPYING for GPL terms. */
2 /*
3  * aoecmd.c
4  * Filesystem request handling methods
5  */
6 
7 #include <linux/hdreg.h>
8 #include <linux/blkdev.h>
9 #include <linux/skbuff.h>
10 #include <linux/netdevice.h>
11 #include <asm/unaligned.h>
12 #include "aoe.h"
13 
14 #define TIMERTICK (HZ / 10)
15 #define MINTIMER (2 * TIMERTICK)
16 #define MAXTIMER (HZ << 1)
17 #define MAXWAIT (60 * 3)	/* After MAXWAIT seconds, give up and fail dev */
18 
19 static struct sk_buff *
20 new_skb(struct net_device *if_dev, ulong len)
21 {
22 	struct sk_buff *skb;
23 
24 	skb = alloc_skb(len, GFP_ATOMIC);
25 	if (skb) {
26 		skb->nh.raw = skb->mac.raw = skb->data;
27 		skb->dev = if_dev;
28 		skb->protocol = __constant_htons(ETH_P_AOE);
29 		skb->priority = 0;
30 		skb_put(skb, len);
31 		skb->next = skb->prev = NULL;
32 
33 		/* tell the network layer not to perform IP checksums
34 		 * or to get the NIC to do it
35 		 */
36 		skb->ip_summed = CHECKSUM_NONE;
37 	}
38 	return skb;
39 }
40 
41 static struct sk_buff *
42 skb_prepare(struct aoedev *d, struct frame *f)
43 {
44 	struct sk_buff *skb;
45 	char *p;
46 
47 	skb = new_skb(d->ifp, f->ndata + f->writedatalen);
48 	if (!skb) {
49 		printk(KERN_INFO "aoe: skb_prepare: failure to allocate skb\n");
50 		return NULL;
51 	}
52 
53 	p = skb->mac.raw;
54 	memcpy(p, f->data, f->ndata);
55 
56 	if (f->writedatalen) {
57 		p += sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr);
58 		memcpy(p, f->bufaddr, f->writedatalen);
59 	}
60 
61 	return skb;
62 }
63 
64 static struct frame *
65 getframe(struct aoedev *d, int tag)
66 {
67 	struct frame *f, *e;
68 
69 	f = d->frames;
70 	e = f + d->nframes;
71 	for (; f<e; f++)
72 		if (f->tag == tag)
73 			return f;
74 	return NULL;
75 }
76 
77 /*
78  * Leave the top bit clear so we have tagspace for userland.
79  * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
80  * This driver reserves tag -1 to mean "unused frame."
81  */
82 static int
83 newtag(struct aoedev *d)
84 {
85 	register ulong n;
86 
87 	n = jiffies & 0xffff;
88 	return n |= (++d->lasttag & 0x7fff) << 16;
89 }
90 
91 static int
92 aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h)
93 {
94 	u32 host_tag = newtag(d);
95 
96 	memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
97 	memcpy(h->dst, d->addr, sizeof h->dst);
98 	h->type = __constant_cpu_to_be16(ETH_P_AOE);
99 	h->verfl = AOE_HVER;
100 	h->major = cpu_to_be16(d->aoemajor);
101 	h->minor = d->aoeminor;
102 	h->cmd = AOECMD_ATA;
103 	h->tag = cpu_to_be32(host_tag);
104 
105 	return host_tag;
106 }
107 
108 static void
109 aoecmd_ata_rw(struct aoedev *d, struct frame *f)
110 {
111 	struct aoe_hdr *h;
112 	struct aoe_atahdr *ah;
113 	struct buf *buf;
114 	struct sk_buff *skb;
115 	ulong bcnt;
116 	register sector_t sector;
117 	char writebit, extbit;
118 
119 	writebit = 0x10;
120 	extbit = 0x4;
121 
122 	buf = d->inprocess;
123 
124 	sector = buf->sector;
125 	bcnt = buf->bv_resid;
126 	if (bcnt > MAXATADATA)
127 		bcnt = MAXATADATA;
128 
129 	/* initialize the headers & frame */
130 	h = (struct aoe_hdr *) f->data;
131 	ah = (struct aoe_atahdr *) (h+1);
132 	f->ndata = sizeof *h + sizeof *ah;
133 	memset(h, 0, f->ndata);
134 	f->tag = aoehdr_atainit(d, h);
135 	f->waited = 0;
136 	f->buf = buf;
137 	f->bufaddr = buf->bufaddr;
138 
139 	/* set up ata header */
140 	ah->scnt = bcnt >> 9;
141 	ah->lba0 = sector;
142 	ah->lba1 = sector >>= 8;
143 	ah->lba2 = sector >>= 8;
144 	ah->lba3 = sector >>= 8;
145 	if (d->flags & DEVFL_EXT) {
146 		ah->aflags |= AOEAFL_EXT;
147 		ah->lba4 = sector >>= 8;
148 		ah->lba5 = sector >>= 8;
149 	} else {
150 		extbit = 0;
151 		ah->lba3 &= 0x0f;
152 		ah->lba3 |= 0xe0;	/* LBA bit + obsolete 0xa0 */
153 	}
154 
155 	if (bio_data_dir(buf->bio) == WRITE) {
156 		ah->aflags |= AOEAFL_WRITE;
157 		f->writedatalen = bcnt;
158 	} else {
159 		writebit = 0;
160 		f->writedatalen = 0;
161 	}
162 
163 	ah->cmdstat = WIN_READ | writebit | extbit;
164 
165 	/* mark all tracking fields and load out */
166 	buf->nframesout += 1;
167 	buf->bufaddr += bcnt;
168 	buf->bv_resid -= bcnt;
169 /* printk(KERN_INFO "aoe: bv_resid=%ld\n", buf->bv_resid); */
170 	buf->resid -= bcnt;
171 	buf->sector += bcnt >> 9;
172 	if (buf->resid == 0) {
173 		d->inprocess = NULL;
174 	} else if (buf->bv_resid == 0) {
175 		buf->bv++;
176 		buf->bv_resid = buf->bv->bv_len;
177 		buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset;
178 	}
179 
180 	skb = skb_prepare(d, f);
181 	if (skb) {
182 		skb->next = NULL;
183 		if (d->sendq_hd)
184 			d->sendq_tl->next = skb;
185 		else
186 			d->sendq_hd = skb;
187 		d->sendq_tl = skb;
188 	}
189 }
190 
191 /* enters with d->lock held */
192 void
193 aoecmd_work(struct aoedev *d)
194 {
195 	struct frame *f;
196 	struct buf *buf;
197 loop:
198 	f = getframe(d, FREETAG);
199 	if (f == NULL)
200 		return;
201 	if (d->inprocess == NULL) {
202 		if (list_empty(&d->bufq))
203 			return;
204 		buf = container_of(d->bufq.next, struct buf, bufs);
205 		list_del(d->bufq.next);
206 /*printk(KERN_INFO "aoecmd_work: bi_size=%ld\n", buf->bio->bi_size); */
207 		d->inprocess = buf;
208 	}
209 	aoecmd_ata_rw(d, f);
210 	goto loop;
211 }
212 
213 static void
214 rexmit(struct aoedev *d, struct frame *f)
215 {
216 	struct sk_buff *skb;
217 	struct aoe_hdr *h;
218 	char buf[128];
219 	u32 n;
220 
221 	n = newtag(d);
222 
223 	snprintf(buf, sizeof buf,
224 		"%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n",
225 		"retransmit",
226 		d->aoemajor, d->aoeminor, f->tag, jiffies, n);
227 	aoechr_error(buf);
228 
229 	h = (struct aoe_hdr *) f->data;
230 	f->tag = n;
231 	h->tag = cpu_to_be32(n);
232 
233 	skb = skb_prepare(d, f);
234 	if (skb) {
235 		skb->next = NULL;
236 		if (d->sendq_hd)
237 			d->sendq_tl->next = skb;
238 		else
239 			d->sendq_hd = skb;
240 		d->sendq_tl = skb;
241 	}
242 }
243 
244 static int
245 tsince(int tag)
246 {
247 	int n;
248 
249 	n = jiffies & 0xffff;
250 	n -= tag & 0xffff;
251 	if (n < 0)
252 		n += 1<<16;
253 	return n;
254 }
255 
256 static void
257 rexmit_timer(ulong vp)
258 {
259 	struct aoedev *d;
260 	struct frame *f, *e;
261 	struct sk_buff *sl;
262 	register long timeout;
263 	ulong flags, n;
264 
265 	d = (struct aoedev *) vp;
266 	sl = NULL;
267 
268 	/* timeout is always ~150% of the moving average */
269 	timeout = d->rttavg;
270 	timeout += timeout >> 1;
271 
272 	spin_lock_irqsave(&d->lock, flags);
273 
274 	if (d->flags & DEVFL_TKILL) {
275 tdie:		spin_unlock_irqrestore(&d->lock, flags);
276 		return;
277 	}
278 	f = d->frames;
279 	e = f + d->nframes;
280 	for (; f<e; f++) {
281 		if (f->tag != FREETAG && tsince(f->tag) >= timeout) {
282 			n = f->waited += timeout;
283 			n /= HZ;
284 			if (n > MAXWAIT) { /* waited too long.  device failure. */
285 				aoedev_downdev(d);
286 				goto tdie;
287 			}
288 			rexmit(d, f);
289 		}
290 	}
291 
292 	sl = d->sendq_hd;
293 	d->sendq_hd = d->sendq_tl = NULL;
294 	if (sl) {
295 		n = d->rttavg <<= 1;
296 		if (n > MAXTIMER)
297 			d->rttavg = MAXTIMER;
298 	}
299 
300 	d->timer.expires = jiffies + TIMERTICK;
301 	add_timer(&d->timer);
302 
303 	spin_unlock_irqrestore(&d->lock, flags);
304 
305 	aoenet_xmit(sl);
306 }
307 
308 static void
309 ataid_complete(struct aoedev *d, unsigned char *id)
310 {
311 	u64 ssize;
312 	u16 n;
313 
314 	/* word 83: command set supported */
315 	n = le16_to_cpu(get_unaligned((__le16 *) &id[83<<1]));
316 
317 	/* word 86: command set/feature enabled */
318 	n |= le16_to_cpu(get_unaligned((__le16 *) &id[86<<1]));
319 
320 	if (n & (1<<10)) {	/* bit 10: LBA 48 */
321 		d->flags |= DEVFL_EXT;
322 
323 		/* word 100: number lba48 sectors */
324 		ssize = le64_to_cpu(get_unaligned((__le64 *) &id[100<<1]));
325 
326 		/* set as in ide-disk.c:init_idedisk_capacity */
327 		d->geo.cylinders = ssize;
328 		d->geo.cylinders /= (255 * 63);
329 		d->geo.heads = 255;
330 		d->geo.sectors = 63;
331 	} else {
332 		d->flags &= ~DEVFL_EXT;
333 
334 		/* number lba28 sectors */
335 		ssize = le32_to_cpu(get_unaligned((__le32 *) &id[60<<1]));
336 
337 		/* NOTE: obsolete in ATA 6 */
338 		d->geo.cylinders = le16_to_cpu(get_unaligned((__le16 *) &id[54<<1]));
339 		d->geo.heads = le16_to_cpu(get_unaligned((__le16 *) &id[55<<1]));
340 		d->geo.sectors = le16_to_cpu(get_unaligned((__le16 *) &id[56<<1]));
341 	}
342 	d->ssize = ssize;
343 	d->geo.start = 0;
344 	if (d->gd != NULL) {
345 		d->gd->capacity = ssize;
346 		d->flags |= DEVFL_UP;
347 		return;
348 	}
349 	if (d->flags & DEVFL_WORKON) {
350 		printk(KERN_INFO "aoe: ataid_complete: can't schedule work, it's already on!  "
351 			"(This really shouldn't happen).\n");
352 		return;
353 	}
354 	INIT_WORK(&d->work, aoeblk_gdalloc, d);
355 	schedule_work(&d->work);
356 	d->flags |= DEVFL_WORKON;
357 }
358 
359 static void
360 calc_rttavg(struct aoedev *d, int rtt)
361 {
362 	register long n;
363 
364 	n = rtt;
365 	if (n < MINTIMER)
366 		n = MINTIMER;
367 	else if (n > MAXTIMER)
368 		n = MAXTIMER;
369 
370 	/* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
371 	n -= d->rttavg;
372 	d->rttavg += n >> 2;
373 }
374 
375 void
376 aoecmd_ata_rsp(struct sk_buff *skb)
377 {
378 	struct aoedev *d;
379 	struct aoe_hdr *hin;
380 	struct aoe_atahdr *ahin, *ahout;
381 	struct frame *f;
382 	struct buf *buf;
383 	struct sk_buff *sl;
384 	register long n;
385 	ulong flags;
386 	char ebuf[128];
387 	u16 aoemajor;
388 
389 	hin = (struct aoe_hdr *) skb->mac.raw;
390 	aoemajor = be16_to_cpu(hin->major);
391 	d = aoedev_by_aoeaddr(aoemajor, hin->minor);
392 	if (d == NULL) {
393 		snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
394 			"for unknown device %d.%d\n",
395 			 aoemajor, hin->minor);
396 		aoechr_error(ebuf);
397 		return;
398 	}
399 
400 	spin_lock_irqsave(&d->lock, flags);
401 
402 	f = getframe(d, be32_to_cpu(hin->tag));
403 	if (f == NULL) {
404 		spin_unlock_irqrestore(&d->lock, flags);
405 		snprintf(ebuf, sizeof ebuf,
406 			"%15s e%d.%d    tag=%08x@%08lx\n",
407 			"unexpected rsp",
408 			be16_to_cpu(hin->major),
409 			hin->minor,
410 			be32_to_cpu(hin->tag),
411 			jiffies);
412 		aoechr_error(ebuf);
413 		return;
414 	}
415 
416 	calc_rttavg(d, tsince(f->tag));
417 
418 	ahin = (struct aoe_atahdr *) (hin+1);
419 	ahout = (struct aoe_atahdr *) (f->data + sizeof(struct aoe_hdr));
420 	buf = f->buf;
421 
422 	if (ahin->cmdstat & 0xa9) {	/* these bits cleared on success */
423 		printk(KERN_CRIT "aoe: aoecmd_ata_rsp: ata error cmd=%2.2Xh "
424 			"stat=%2.2Xh from e%ld.%ld\n",
425 			ahout->cmdstat, ahin->cmdstat,
426 			d->aoemajor, d->aoeminor);
427 		if (buf)
428 			buf->flags |= BUFFL_FAIL;
429 	} else {
430 		switch (ahout->cmdstat) {
431 		case WIN_READ:
432 		case WIN_READ_EXT:
433 			n = ahout->scnt << 9;
434 			if (skb->len - sizeof *hin - sizeof *ahin < n) {
435 				printk(KERN_CRIT "aoe: aoecmd_ata_rsp: runt "
436 					"ata data size in read.  skb->len=%d\n",
437 					skb->len);
438 				/* fail frame f?  just returning will rexmit. */
439 				spin_unlock_irqrestore(&d->lock, flags);
440 				return;
441 			}
442 			memcpy(f->bufaddr, ahin+1, n);
443 		case WIN_WRITE:
444 		case WIN_WRITE_EXT:
445 			break;
446 		case WIN_IDENTIFY:
447 			if (skb->len - sizeof *hin - sizeof *ahin < 512) {
448 				printk(KERN_INFO "aoe: aoecmd_ata_rsp: runt data size "
449 					"in ataid.  skb->len=%d\n", skb->len);
450 				spin_unlock_irqrestore(&d->lock, flags);
451 				return;
452 			}
453 			ataid_complete(d, (char *) (ahin+1));
454 			/* d->flags |= DEVFL_WC_UPDATE; */
455 			break;
456 		default:
457 			printk(KERN_INFO "aoe: aoecmd_ata_rsp: unrecognized "
458 			       "outbound ata command %2.2Xh for %d.%d\n",
459 			       ahout->cmdstat,
460 			       be16_to_cpu(hin->major),
461 			       hin->minor);
462 		}
463 	}
464 
465 	if (buf) {
466 		buf->nframesout -= 1;
467 		if (buf->nframesout == 0 && buf->resid == 0) {
468 			unsigned long duration = jiffies - buf->start_time;
469 			unsigned long n_sect = buf->bio->bi_size >> 9;
470 			struct gendisk *disk = d->gd;
471 			const int rw = bio_data_dir(buf->bio);
472 
473 			disk_stat_inc(disk, ios[rw]);
474 			disk_stat_add(disk, ticks[rw], duration);
475 			disk_stat_add(disk, sectors[rw], n_sect);
476 			disk_stat_add(disk, io_ticks, duration);
477 			n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
478 			bio_endio(buf->bio, buf->bio->bi_size, n);
479 			mempool_free(buf, d->bufpool);
480 		}
481 	}
482 
483 	f->buf = NULL;
484 	f->tag = FREETAG;
485 
486 	aoecmd_work(d);
487 
488 	sl = d->sendq_hd;
489 	d->sendq_hd = d->sendq_tl = NULL;
490 
491 	spin_unlock_irqrestore(&d->lock, flags);
492 
493 	aoenet_xmit(sl);
494 }
495 
496 void
497 aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
498 {
499 	struct aoe_hdr *h;
500 	struct aoe_cfghdr *ch;
501 	struct sk_buff *skb, *sl;
502 	struct net_device *ifp;
503 
504 	sl = NULL;
505 
506 	read_lock(&dev_base_lock);
507 	for (ifp = dev_base; ifp; dev_put(ifp), ifp = ifp->next) {
508 		dev_hold(ifp);
509 		if (!is_aoe_netif(ifp))
510 			continue;
511 
512 		skb = new_skb(ifp, sizeof *h + sizeof *ch);
513 		if (skb == NULL) {
514 			printk(KERN_INFO "aoe: aoecmd_cfg: skb alloc failure\n");
515 			continue;
516 		}
517 		h = (struct aoe_hdr *) skb->mac.raw;
518 		memset(h, 0, sizeof *h + sizeof *ch);
519 
520 		memset(h->dst, 0xff, sizeof h->dst);
521 		memcpy(h->src, ifp->dev_addr, sizeof h->src);
522 		h->type = __constant_cpu_to_be16(ETH_P_AOE);
523 		h->verfl = AOE_HVER;
524 		h->major = cpu_to_be16(aoemajor);
525 		h->minor = aoeminor;
526 		h->cmd = AOECMD_CFG;
527 
528 		skb->next = sl;
529 		sl = skb;
530 	}
531 	read_unlock(&dev_base_lock);
532 
533 	aoenet_xmit(sl);
534 }
535 
536 /*
537  * Since we only call this in one place (and it only prepares one frame)
538  * we just return the skb.  Usually we'd chain it up to the aoedev sendq.
539  */
540 static struct sk_buff *
541 aoecmd_ata_id(struct aoedev *d)
542 {
543 	struct aoe_hdr *h;
544 	struct aoe_atahdr *ah;
545 	struct frame *f;
546 	struct sk_buff *skb;
547 
548 	f = getframe(d, FREETAG);
549 	if (f == NULL) {
550 		printk(KERN_CRIT "aoe: aoecmd_ata_id: can't get a frame.  "
551 			"This shouldn't happen.\n");
552 		return NULL;
553 	}
554 
555 	/* initialize the headers & frame */
556 	h = (struct aoe_hdr *) f->data;
557 	ah = (struct aoe_atahdr *) (h+1);
558 	f->ndata = sizeof *h + sizeof *ah;
559 	memset(h, 0, f->ndata);
560 	f->tag = aoehdr_atainit(d, h);
561 	f->waited = 0;
562 	f->writedatalen = 0;
563 
564 	/* this message initializes the device, so we reset the rttavg */
565 	d->rttavg = MAXTIMER;
566 
567 	/* set up ata header */
568 	ah->scnt = 1;
569 	ah->cmdstat = WIN_IDENTIFY;
570 	ah->lba3 = 0xa0;
571 
572 	skb = skb_prepare(d, f);
573 
574 	/* we now want to start the rexmit tracking */
575 	d->flags &= ~DEVFL_TKILL;
576 	d->timer.data = (ulong) d;
577 	d->timer.function = rexmit_timer;
578 	d->timer.expires = jiffies + TIMERTICK;
579 	add_timer(&d->timer);
580 
581 	return skb;
582 }
583 
584 void
585 aoecmd_cfg_rsp(struct sk_buff *skb)
586 {
587 	struct aoedev *d;
588 	struct aoe_hdr *h;
589 	struct aoe_cfghdr *ch;
590 	ulong flags, sysminor, aoemajor;
591 	u16 bufcnt;
592 	struct sk_buff *sl;
593 	enum { MAXFRAMES = 8 };
594 
595 	h = (struct aoe_hdr *) skb->mac.raw;
596 	ch = (struct aoe_cfghdr *) (h+1);
597 
598 	/*
599 	 * Enough people have their dip switches set backwards to
600 	 * warrant a loud message for this special case.
601 	 */
602 	aoemajor = be16_to_cpu(h->major);
603 	if (aoemajor == 0xfff) {
604 		printk(KERN_CRIT "aoe: aoecmd_cfg_rsp: Warning: shelf "
605 			"address is all ones.  Check shelf dip switches\n");
606 		return;
607 	}
608 
609 	sysminor = SYSMINOR(aoemajor, h->minor);
610 	if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) {
611 		printk(KERN_INFO
612 			"aoe: e%ld.%d: minor number too large\n",
613 			aoemajor, (int) h->minor);
614 		return;
615 	}
616 
617 	bufcnt = be16_to_cpu(ch->bufcnt);
618 	if (bufcnt > MAXFRAMES)	/* keep it reasonable */
619 		bufcnt = MAXFRAMES;
620 
621 	d = aoedev_set(sysminor, h->src, skb->dev, bufcnt);
622 	if (d == NULL) {
623 		printk(KERN_INFO "aoe: aoecmd_cfg_rsp: device set failure\n");
624 		return;
625 	}
626 
627 	spin_lock_irqsave(&d->lock, flags);
628 
629 	if (d->flags & (DEVFL_UP | DEVFL_CLOSEWAIT)) {
630 		spin_unlock_irqrestore(&d->lock, flags);
631 		return;
632 	}
633 
634 	d->fw_ver = be16_to_cpu(ch->fwver);
635 
636 	/* we get here only if the device is new */
637 	sl = aoecmd_ata_id(d);
638 
639 	spin_unlock_irqrestore(&d->lock, flags);
640 
641 	aoenet_xmit(sl);
642 }
643 
644