xref: /linux/drivers/block/aoe/aoecmd.c (revision 776cfebb430c7b22c208b1b17add97f354d97cab)
1 /* Copyright (c) 2004 Coraid, Inc.  See COPYING for GPL terms. */
2 /*
3  * aoecmd.c
4  * Filesystem request handling methods
5  */
6 
7 #include <linux/hdreg.h>
8 #include <linux/blkdev.h>
9 #include <linux/skbuff.h>
10 #include <linux/netdevice.h>
11 #include "aoe.h"
12 
13 #define TIMERTICK (HZ / 10)
14 #define MINTIMER (2 * TIMERTICK)
15 #define MAXTIMER (HZ << 1)
16 #define MAXWAIT (60 * 3)	/* After MAXWAIT seconds, give up and fail dev */
17 
18 static struct sk_buff *
19 new_skb(struct net_device *if_dev, ulong len)
20 {
21 	struct sk_buff *skb;
22 
23 	skb = alloc_skb(len, GFP_ATOMIC);
24 	if (skb) {
25 		skb->nh.raw = skb->mac.raw = skb->data;
26 		skb->dev = if_dev;
27 		skb->protocol = __constant_htons(ETH_P_AOE);
28 		skb->priority = 0;
29 		skb_put(skb, len);
30 		skb->next = skb->prev = NULL;
31 
32 		/* tell the network layer not to perform IP checksums
33 		 * or to get the NIC to do it
34 		 */
35 		skb->ip_summed = CHECKSUM_NONE;
36 	}
37 	return skb;
38 }
39 
40 static struct sk_buff *
41 skb_prepare(struct aoedev *d, struct frame *f)
42 {
43 	struct sk_buff *skb;
44 	char *p;
45 
46 	skb = new_skb(d->ifp, f->ndata + f->writedatalen);
47 	if (!skb) {
48 		printk(KERN_INFO "aoe: skb_prepare: failure to allocate skb\n");
49 		return NULL;
50 	}
51 
52 	p = skb->mac.raw;
53 	memcpy(p, f->data, f->ndata);
54 
55 	if (f->writedatalen) {
56 		p += sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr);
57 		memcpy(p, f->bufaddr, f->writedatalen);
58 	}
59 
60 	return skb;
61 }
62 
63 static struct frame *
64 getframe(struct aoedev *d, int tag)
65 {
66 	struct frame *f, *e;
67 
68 	f = d->frames;
69 	e = f + d->nframes;
70 	for (; f<e; f++)
71 		if (f->tag == tag)
72 			return f;
73 	return NULL;
74 }
75 
76 /*
77  * Leave the top bit clear so we have tagspace for userland.
78  * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
79  * This driver reserves tag -1 to mean "unused frame."
80  */
81 static int
82 newtag(struct aoedev *d)
83 {
84 	register ulong n;
85 
86 	n = jiffies & 0xffff;
87 	return n |= (++d->lasttag & 0x7fff) << 16;
88 }
89 
90 static int
91 aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h)
92 {
93 	u32 host_tag = newtag(d);
94 
95 	memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
96 	memcpy(h->dst, d->addr, sizeof h->dst);
97 	h->type = __constant_cpu_to_be16(ETH_P_AOE);
98 	h->verfl = AOE_HVER;
99 	h->major = cpu_to_be16(d->aoemajor);
100 	h->minor = d->aoeminor;
101 	h->cmd = AOECMD_ATA;
102 	h->tag = cpu_to_be32(host_tag);
103 
104 	return host_tag;
105 }
106 
107 static void
108 aoecmd_ata_rw(struct aoedev *d, struct frame *f)
109 {
110 	struct aoe_hdr *h;
111 	struct aoe_atahdr *ah;
112 	struct buf *buf;
113 	struct sk_buff *skb;
114 	ulong bcnt;
115 	register sector_t sector;
116 	char writebit, extbit;
117 
118 	writebit = 0x10;
119 	extbit = 0x4;
120 
121 	buf = d->inprocess;
122 
123 	sector = buf->sector;
124 	bcnt = buf->bv_resid;
125 	if (bcnt > MAXATADATA)
126 		bcnt = MAXATADATA;
127 
128 	/* initialize the headers & frame */
129 	h = (struct aoe_hdr *) f->data;
130 	ah = (struct aoe_atahdr *) (h+1);
131 	f->ndata = sizeof *h + sizeof *ah;
132 	memset(h, 0, f->ndata);
133 	f->tag = aoehdr_atainit(d, h);
134 	f->waited = 0;
135 	f->buf = buf;
136 	f->bufaddr = buf->bufaddr;
137 
138 	/* set up ata header */
139 	ah->scnt = bcnt >> 9;
140 	ah->lba0 = sector;
141 	ah->lba1 = sector >>= 8;
142 	ah->lba2 = sector >>= 8;
143 	ah->lba3 = sector >>= 8;
144 	if (d->flags & DEVFL_EXT) {
145 		ah->aflags |= AOEAFL_EXT;
146 		ah->lba4 = sector >>= 8;
147 		ah->lba5 = sector >>= 8;
148 	} else {
149 		extbit = 0;
150 		ah->lba3 &= 0x0f;
151 		ah->lba3 |= 0xe0;	/* LBA bit + obsolete 0xa0 */
152 	}
153 
154 	if (bio_data_dir(buf->bio) == WRITE) {
155 		ah->aflags |= AOEAFL_WRITE;
156 		f->writedatalen = bcnt;
157 	} else {
158 		writebit = 0;
159 		f->writedatalen = 0;
160 	}
161 
162 	ah->cmdstat = WIN_READ | writebit | extbit;
163 
164 	/* mark all tracking fields and load out */
165 	buf->nframesout += 1;
166 	buf->bufaddr += bcnt;
167 	buf->bv_resid -= bcnt;
168 /* printk(KERN_INFO "aoe: bv_resid=%ld\n", buf->bv_resid); */
169 	buf->resid -= bcnt;
170 	buf->sector += bcnt >> 9;
171 	if (buf->resid == 0) {
172 		d->inprocess = NULL;
173 	} else if (buf->bv_resid == 0) {
174 		buf->bv++;
175 		buf->bv_resid = buf->bv->bv_len;
176 		buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset;
177 	}
178 
179 	skb = skb_prepare(d, f);
180 	if (skb) {
181 		skb->next = NULL;
182 		if (d->sendq_hd)
183 			d->sendq_tl->next = skb;
184 		else
185 			d->sendq_hd = skb;
186 		d->sendq_tl = skb;
187 	}
188 }
189 
190 /* enters with d->lock held */
191 void
192 aoecmd_work(struct aoedev *d)
193 {
194 	struct frame *f;
195 	struct buf *buf;
196 loop:
197 	f = getframe(d, FREETAG);
198 	if (f == NULL)
199 		return;
200 	if (d->inprocess == NULL) {
201 		if (list_empty(&d->bufq))
202 			return;
203 		buf = container_of(d->bufq.next, struct buf, bufs);
204 		list_del(d->bufq.next);
205 /*printk(KERN_INFO "aoecmd_work: bi_size=%ld\n", buf->bio->bi_size); */
206 		d->inprocess = buf;
207 	}
208 	aoecmd_ata_rw(d, f);
209 	goto loop;
210 }
211 
212 static void
213 rexmit(struct aoedev *d, struct frame *f)
214 {
215 	struct sk_buff *skb;
216 	struct aoe_hdr *h;
217 	char buf[128];
218 	u32 n;
219 
220 	n = newtag(d);
221 
222 	snprintf(buf, sizeof buf,
223 		"%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n",
224 		"retransmit",
225 		d->aoemajor, d->aoeminor, f->tag, jiffies, n);
226 	aoechr_error(buf);
227 
228 	h = (struct aoe_hdr *) f->data;
229 	f->tag = n;
230 	h->tag = cpu_to_be32(n);
231 
232 	skb = skb_prepare(d, f);
233 	if (skb) {
234 		skb->next = NULL;
235 		if (d->sendq_hd)
236 			d->sendq_tl->next = skb;
237 		else
238 			d->sendq_hd = skb;
239 		d->sendq_tl = skb;
240 	}
241 }
242 
243 static int
244 tsince(int tag)
245 {
246 	int n;
247 
248 	n = jiffies & 0xffff;
249 	n -= tag & 0xffff;
250 	if (n < 0)
251 		n += 1<<16;
252 	return n;
253 }
254 
255 static void
256 rexmit_timer(ulong vp)
257 {
258 	struct aoedev *d;
259 	struct frame *f, *e;
260 	struct sk_buff *sl;
261 	register long timeout;
262 	ulong flags, n;
263 
264 	d = (struct aoedev *) vp;
265 	sl = NULL;
266 
267 	/* timeout is always ~150% of the moving average */
268 	timeout = d->rttavg;
269 	timeout += timeout >> 1;
270 
271 	spin_lock_irqsave(&d->lock, flags);
272 
273 	if (d->flags & DEVFL_TKILL) {
274 tdie:		spin_unlock_irqrestore(&d->lock, flags);
275 		return;
276 	}
277 	f = d->frames;
278 	e = f + d->nframes;
279 	for (; f<e; f++) {
280 		if (f->tag != FREETAG && tsince(f->tag) >= timeout) {
281 			n = f->waited += timeout;
282 			n /= HZ;
283 			if (n > MAXWAIT) { /* waited too long.  device failure. */
284 				aoedev_downdev(d);
285 				goto tdie;
286 			}
287 			rexmit(d, f);
288 		}
289 	}
290 
291 	sl = d->sendq_hd;
292 	d->sendq_hd = d->sendq_tl = NULL;
293 	if (sl) {
294 		n = d->rttavg <<= 1;
295 		if (n > MAXTIMER)
296 			d->rttavg = MAXTIMER;
297 	}
298 
299 	d->timer.expires = jiffies + TIMERTICK;
300 	add_timer(&d->timer);
301 
302 	spin_unlock_irqrestore(&d->lock, flags);
303 
304 	aoenet_xmit(sl);
305 }
306 
307 static void
308 ataid_complete(struct aoedev *d, unsigned char *id)
309 {
310 	u64 ssize;
311 	u16 n;
312 
313 	/* word 83: command set supported */
314 	n = le16_to_cpup((__le16 *) &id[83<<1]);
315 
316 	/* word 86: command set/feature enabled */
317 	n |= le16_to_cpup((__le16 *) &id[86<<1]);
318 
319 	if (n & (1<<10)) {	/* bit 10: LBA 48 */
320 		d->flags |= DEVFL_EXT;
321 
322 		/* word 100: number lba48 sectors */
323 		ssize = le64_to_cpup((__le64 *) &id[100<<1]);
324 
325 		/* set as in ide-disk.c:init_idedisk_capacity */
326 		d->geo.cylinders = ssize;
327 		d->geo.cylinders /= (255 * 63);
328 		d->geo.heads = 255;
329 		d->geo.sectors = 63;
330 	} else {
331 		d->flags &= ~DEVFL_EXT;
332 
333 		/* number lba28 sectors */
334 		ssize = le32_to_cpup((__le32 *) &id[60<<1]);
335 
336 		/* NOTE: obsolete in ATA 6 */
337 		d->geo.cylinders = le16_to_cpup((__le16 *) &id[54<<1]);
338 		d->geo.heads = le16_to_cpup((__le16 *) &id[55<<1]);
339 		d->geo.sectors = le16_to_cpup((__le16 *) &id[56<<1]);
340 	}
341 	d->ssize = ssize;
342 	d->geo.start = 0;
343 	if (d->gd != NULL) {
344 		d->gd->capacity = ssize;
345 		d->flags |= DEVFL_UP;
346 		return;
347 	}
348 	if (d->flags & DEVFL_WORKON) {
349 		printk(KERN_INFO "aoe: ataid_complete: can't schedule work, it's already on!  "
350 			"(This really shouldn't happen).\n");
351 		return;
352 	}
353 	INIT_WORK(&d->work, aoeblk_gdalloc, d);
354 	schedule_work(&d->work);
355 	d->flags |= DEVFL_WORKON;
356 }
357 
358 static void
359 calc_rttavg(struct aoedev *d, int rtt)
360 {
361 	register long n;
362 
363 	n = rtt;
364 	if (n < MINTIMER)
365 		n = MINTIMER;
366 	else if (n > MAXTIMER)
367 		n = MAXTIMER;
368 
369 	/* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
370 	n -= d->rttavg;
371 	d->rttavg += n >> 2;
372 }
373 
374 void
375 aoecmd_ata_rsp(struct sk_buff *skb)
376 {
377 	struct aoedev *d;
378 	struct aoe_hdr *hin;
379 	struct aoe_atahdr *ahin, *ahout;
380 	struct frame *f;
381 	struct buf *buf;
382 	struct sk_buff *sl;
383 	register long n;
384 	ulong flags;
385 	char ebuf[128];
386 	u16 aoemajor;
387 
388 	hin = (struct aoe_hdr *) skb->mac.raw;
389 	aoemajor = be16_to_cpu(hin->major);
390 	d = aoedev_by_aoeaddr(aoemajor, hin->minor);
391 	if (d == NULL) {
392 		snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
393 			"for unknown device %d.%d\n",
394 			 aoemajor, hin->minor);
395 		aoechr_error(ebuf);
396 		return;
397 	}
398 
399 	spin_lock_irqsave(&d->lock, flags);
400 
401 	f = getframe(d, be32_to_cpu(hin->tag));
402 	if (f == NULL) {
403 		spin_unlock_irqrestore(&d->lock, flags);
404 		snprintf(ebuf, sizeof ebuf,
405 			"%15s e%d.%d    tag=%08x@%08lx\n",
406 			"unexpected rsp",
407 			be16_to_cpu(hin->major),
408 			hin->minor,
409 			be32_to_cpu(hin->tag),
410 			jiffies);
411 		aoechr_error(ebuf);
412 		return;
413 	}
414 
415 	calc_rttavg(d, tsince(f->tag));
416 
417 	ahin = (struct aoe_atahdr *) (hin+1);
418 	ahout = (struct aoe_atahdr *) (f->data + sizeof(struct aoe_hdr));
419 	buf = f->buf;
420 
421 	if (ahin->cmdstat & 0xa9) {	/* these bits cleared on success */
422 		printk(KERN_CRIT "aoe: aoecmd_ata_rsp: ata error cmd=%2.2Xh "
423 			"stat=%2.2Xh from e%ld.%ld\n",
424 			ahout->cmdstat, ahin->cmdstat,
425 			d->aoemajor, d->aoeminor);
426 		if (buf)
427 			buf->flags |= BUFFL_FAIL;
428 	} else {
429 		switch (ahout->cmdstat) {
430 		case WIN_READ:
431 		case WIN_READ_EXT:
432 			n = ahout->scnt << 9;
433 			if (skb->len - sizeof *hin - sizeof *ahin < n) {
434 				printk(KERN_CRIT "aoe: aoecmd_ata_rsp: runt "
435 					"ata data size in read.  skb->len=%d\n",
436 					skb->len);
437 				/* fail frame f?  just returning will rexmit. */
438 				spin_unlock_irqrestore(&d->lock, flags);
439 				return;
440 			}
441 			memcpy(f->bufaddr, ahin+1, n);
442 		case WIN_WRITE:
443 		case WIN_WRITE_EXT:
444 			break;
445 		case WIN_IDENTIFY:
446 			if (skb->len - sizeof *hin - sizeof *ahin < 512) {
447 				printk(KERN_INFO "aoe: aoecmd_ata_rsp: runt data size "
448 					"in ataid.  skb->len=%d\n", skb->len);
449 				spin_unlock_irqrestore(&d->lock, flags);
450 				return;
451 			}
452 			ataid_complete(d, (char *) (ahin+1));
453 			/* d->flags |= DEVFL_WC_UPDATE; */
454 			break;
455 		default:
456 			printk(KERN_INFO "aoe: aoecmd_ata_rsp: unrecognized "
457 			       "outbound ata command %2.2Xh for %d.%d\n",
458 			       ahout->cmdstat,
459 			       be16_to_cpu(hin->major),
460 			       hin->minor);
461 		}
462 	}
463 
464 	if (buf) {
465 		buf->nframesout -= 1;
466 		if (buf->nframesout == 0 && buf->resid == 0) {
467 			unsigned long duration = jiffies - buf->start_time;
468 			unsigned long n_sect = buf->bio->bi_size >> 9;
469 			struct gendisk *disk = d->gd;
470 
471 			if (bio_data_dir(buf->bio) == WRITE) {
472 				disk_stat_inc(disk, writes);
473 				disk_stat_add(disk, write_ticks, duration);
474 				disk_stat_add(disk, write_sectors, n_sect);
475 			} else {
476 				disk_stat_inc(disk, reads);
477 				disk_stat_add(disk, read_ticks, duration);
478 				disk_stat_add(disk, read_sectors, n_sect);
479 			}
480 			disk_stat_add(disk, io_ticks, duration);
481 			n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
482 			bio_endio(buf->bio, buf->bio->bi_size, n);
483 			mempool_free(buf, d->bufpool);
484 		}
485 	}
486 
487 	f->buf = NULL;
488 	f->tag = FREETAG;
489 
490 	aoecmd_work(d);
491 
492 	sl = d->sendq_hd;
493 	d->sendq_hd = d->sendq_tl = NULL;
494 
495 	spin_unlock_irqrestore(&d->lock, flags);
496 
497 	aoenet_xmit(sl);
498 }
499 
500 void
501 aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
502 {
503 	struct aoe_hdr *h;
504 	struct aoe_cfghdr *ch;
505 	struct sk_buff *skb, *sl;
506 	struct net_device *ifp;
507 
508 	sl = NULL;
509 
510 	read_lock(&dev_base_lock);
511 	for (ifp = dev_base; ifp; dev_put(ifp), ifp = ifp->next) {
512 		dev_hold(ifp);
513 		if (!is_aoe_netif(ifp))
514 			continue;
515 
516 		skb = new_skb(ifp, sizeof *h + sizeof *ch);
517 		if (skb == NULL) {
518 			printk(KERN_INFO "aoe: aoecmd_cfg: skb alloc failure\n");
519 			continue;
520 		}
521 		h = (struct aoe_hdr *) skb->mac.raw;
522 		memset(h, 0, sizeof *h + sizeof *ch);
523 
524 		memset(h->dst, 0xff, sizeof h->dst);
525 		memcpy(h->src, ifp->dev_addr, sizeof h->src);
526 		h->type = __constant_cpu_to_be16(ETH_P_AOE);
527 		h->verfl = AOE_HVER;
528 		h->major = cpu_to_be16(aoemajor);
529 		h->minor = aoeminor;
530 		h->cmd = AOECMD_CFG;
531 
532 		skb->next = sl;
533 		sl = skb;
534 	}
535 	read_unlock(&dev_base_lock);
536 
537 	aoenet_xmit(sl);
538 }
539 
540 /*
541  * Since we only call this in one place (and it only prepares one frame)
542  * we just return the skb.  Usually we'd chain it up to the aoedev sendq.
543  */
544 static struct sk_buff *
545 aoecmd_ata_id(struct aoedev *d)
546 {
547 	struct aoe_hdr *h;
548 	struct aoe_atahdr *ah;
549 	struct frame *f;
550 	struct sk_buff *skb;
551 
552 	f = getframe(d, FREETAG);
553 	if (f == NULL) {
554 		printk(KERN_CRIT "aoe: aoecmd_ata_id: can't get a frame.  "
555 			"This shouldn't happen.\n");
556 		return NULL;
557 	}
558 
559 	/* initialize the headers & frame */
560 	h = (struct aoe_hdr *) f->data;
561 	ah = (struct aoe_atahdr *) (h+1);
562 	f->ndata = sizeof *h + sizeof *ah;
563 	memset(h, 0, f->ndata);
564 	f->tag = aoehdr_atainit(d, h);
565 	f->waited = 0;
566 	f->writedatalen = 0;
567 
568 	/* this message initializes the device, so we reset the rttavg */
569 	d->rttavg = MAXTIMER;
570 
571 	/* set up ata header */
572 	ah->scnt = 1;
573 	ah->cmdstat = WIN_IDENTIFY;
574 	ah->lba3 = 0xa0;
575 
576 	skb = skb_prepare(d, f);
577 
578 	/* we now want to start the rexmit tracking */
579 	d->flags &= ~DEVFL_TKILL;
580 	d->timer.data = (ulong) d;
581 	d->timer.function = rexmit_timer;
582 	d->timer.expires = jiffies + TIMERTICK;
583 	add_timer(&d->timer);
584 
585 	return skb;
586 }
587 
588 void
589 aoecmd_cfg_rsp(struct sk_buff *skb)
590 {
591 	struct aoedev *d;
592 	struct aoe_hdr *h;
593 	struct aoe_cfghdr *ch;
594 	ulong flags, sysminor, aoemajor;
595 	u16 bufcnt;
596 	struct sk_buff *sl;
597 	enum { MAXFRAMES = 8 };
598 
599 	h = (struct aoe_hdr *) skb->mac.raw;
600 	ch = (struct aoe_cfghdr *) (h+1);
601 
602 	/*
603 	 * Enough people have their dip switches set backwards to
604 	 * warrant a loud message for this special case.
605 	 */
606 	aoemajor = be16_to_cpu(h->major);
607 	if (aoemajor == 0xfff) {
608 		printk(KERN_CRIT "aoe: aoecmd_cfg_rsp: Warning: shelf "
609 			"address is all ones.  Check shelf dip switches\n");
610 		return;
611 	}
612 
613 	sysminor = SYSMINOR(aoemajor, h->minor);
614 	if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) {
615 		printk(KERN_INFO
616 			"aoe: e%ld.%d: minor number too large\n",
617 			aoemajor, (int) h->minor);
618 		return;
619 	}
620 
621 	bufcnt = be16_to_cpu(ch->bufcnt);
622 	if (bufcnt > MAXFRAMES)	/* keep it reasonable */
623 		bufcnt = MAXFRAMES;
624 
625 	d = aoedev_set(sysminor, h->src, skb->dev, bufcnt);
626 	if (d == NULL) {
627 		printk(KERN_INFO "aoe: aoecmd_cfg_rsp: device set failure\n");
628 		return;
629 	}
630 
631 	spin_lock_irqsave(&d->lock, flags);
632 
633 	if (d->flags & (DEVFL_UP | DEVFL_CLOSEWAIT)) {
634 		spin_unlock_irqrestore(&d->lock, flags);
635 		return;
636 	}
637 
638 	d->fw_ver = be16_to_cpu(ch->fwver);
639 
640 	/* we get here only if the device is new */
641 	sl = aoecmd_ata_id(d);
642 
643 	spin_unlock_irqrestore(&d->lock, flags);
644 
645 	aoenet_xmit(sl);
646 }
647 
648