1 /* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */
4 * Filesystem request handling methods
7 #include <linux/hdreg.h>
8 #include <linux/blkdev.h>
9 #include <linux/skbuff.h>
10 #include <linux/netdevice.h>
13 #define TIMERTICK (HZ / 10)
14 #define MINTIMER (2 * TIMERTICK)
15 #define MAXTIMER (HZ << 1)
16 #define MAXWAIT (60 * 3) /* After MAXWAIT seconds, give up and fail dev */
18 static struct sk_buff *
19 new_skb(struct net_device *if_dev, ulong len)
23 skb = alloc_skb(len, GFP_ATOMIC);
25 skb->nh.raw = skb->mac.raw = skb->data;
27 skb->protocol = __constant_htons(ETH_P_AOE);
30 skb->next = skb->prev = NULL;
32 /* tell the network layer not to perform IP checksums
33 * or to get the NIC to do it
35 skb->ip_summed = CHECKSUM_NONE;
40 static struct sk_buff *
41 skb_prepare(struct aoedev *d, struct frame *f)
46 skb = new_skb(d->ifp, f->ndata + f->writedatalen);
48 printk(KERN_INFO "aoe: skb_prepare: failure to allocate skb\n");
53 memcpy(p, f->data, f->ndata);
55 if (f->writedatalen) {
56 p += sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr);
57 memcpy(p, f->bufaddr, f->writedatalen);
64 getframe(struct aoedev *d, int tag)
77 * Leave the top bit clear so we have tagspace for userland.
78 * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
79 * This driver reserves tag -1 to mean "unused frame."
82 newtag(struct aoedev *d)
87 return n |= (++d->lasttag & 0x7fff) << 16;
91 aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h)
93 u16 type = __constant_cpu_to_be16(ETH_P_AOE);
94 u16 aoemajor = __cpu_to_be16(d->aoemajor);
95 u32 host_tag = newtag(d);
96 u32 tag = __cpu_to_be32(host_tag);
98 memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
99 memcpy(h->dst, d->addr, sizeof h->dst);
100 memcpy(h->type, &type, sizeof type);
102 memcpy(h->major, &aoemajor, sizeof aoemajor);
103 h->minor = d->aoeminor;
105 memcpy(h->tag, &tag, sizeof tag);
111 aoecmd_ata_rw(struct aoedev *d, struct frame *f)
114 struct aoe_atahdr *ah;
118 register sector_t sector;
119 char writebit, extbit;
126 sector = buf->sector;
127 bcnt = buf->bv_resid;
128 if (bcnt > MAXATADATA)
131 /* initialize the headers & frame */
132 h = (struct aoe_hdr *) f->data;
133 ah = (struct aoe_atahdr *) (h+1);
134 f->ndata = sizeof *h + sizeof *ah;
135 memset(h, 0, f->ndata);
136 f->tag = aoehdr_atainit(d, h);
139 f->bufaddr = buf->bufaddr;
141 /* set up ata header */
142 ah->scnt = bcnt >> 9;
144 ah->lba1 = sector >>= 8;
145 ah->lba2 = sector >>= 8;
146 ah->lba3 = sector >>= 8;
147 if (d->flags & DEVFL_EXT) {
148 ah->aflags |= AOEAFL_EXT;
149 ah->lba4 = sector >>= 8;
150 ah->lba5 = sector >>= 8;
154 ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
157 if (bio_data_dir(buf->bio) == WRITE) {
158 ah->aflags |= AOEAFL_WRITE;
159 f->writedatalen = bcnt;
165 ah->cmdstat = WIN_READ | writebit | extbit;
167 /* mark all tracking fields and load out */
168 buf->nframesout += 1;
169 buf->bufaddr += bcnt;
170 buf->bv_resid -= bcnt;
171 /* printk(KERN_INFO "aoe: bv_resid=%ld\n", buf->bv_resid); */
173 buf->sector += bcnt >> 9;
174 if (buf->resid == 0) {
176 } else if (buf->bv_resid == 0) {
178 buf->bv_resid = buf->bv->bv_len;
179 buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset;
182 skb = skb_prepare(d, f);
184 skb->next = d->skblist;
189 /* enters with d->lock held */
191 aoecmd_work(struct aoedev *d)
196 f = getframe(d, FREETAG);
199 if (d->inprocess == NULL) {
200 if (list_empty(&d->bufq))
202 buf = container_of(d->bufq.next, struct buf, bufs);
203 list_del(d->bufq.next);
204 /*printk(KERN_INFO "aoecmd_work: bi_size=%ld\n", buf->bio->bi_size); */
212 rexmit(struct aoedev *d, struct frame *f)
222 snprintf(buf, sizeof buf,
223 "%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n",
225 d->aoemajor, d->aoeminor, f->tag, jiffies, n);
228 h = (struct aoe_hdr *) f->data;
230 net_tag = __cpu_to_be32(n);
231 memcpy(h->tag, &net_tag, sizeof net_tag);
233 skb = skb_prepare(d, f);
235 skb->next = d->skblist;
245 n = jiffies & 0xffff;
253 rexmit_timer(ulong vp)
258 register long timeout;
261 d = (struct aoedev *) vp;
264 /* timeout is always ~150% of the moving average */
266 timeout += timeout >> 1;
268 spin_lock_irqsave(&d->lock, flags);
270 if (d->flags & DEVFL_TKILL) {
271 tdie: spin_unlock_irqrestore(&d->lock, flags);
277 if (f->tag != FREETAG && tsince(f->tag) >= timeout) {
278 n = f->waited += timeout;
280 if (n > MAXWAIT) { /* waited too long. device failure. */
293 d->rttavg = MAXTIMER;
296 d->timer.expires = jiffies + TIMERTICK;
297 add_timer(&d->timer);
299 spin_unlock_irqrestore(&d->lock, flags);
305 ataid_complete(struct aoedev *d, unsigned char *id)
310 /* word 83: command set supported */
311 n = __le16_to_cpu(*((u16 *) &id[83<<1]));
313 /* word 86: command set/feature enabled */
314 n |= __le16_to_cpu(*((u16 *) &id[86<<1]));
316 if (n & (1<<10)) { /* bit 10: LBA 48 */
317 d->flags |= DEVFL_EXT;
319 /* word 100: number lba48 sectors */
320 ssize = __le64_to_cpu(*((u64 *) &id[100<<1]));
322 /* set as in ide-disk.c:init_idedisk_capacity */
323 d->geo.cylinders = ssize;
324 d->geo.cylinders /= (255 * 63);
328 d->flags &= ~DEVFL_EXT;
330 /* number lba28 sectors */
331 ssize = __le32_to_cpu(*((u32 *) &id[60<<1]));
333 /* NOTE: obsolete in ATA 6 */
334 d->geo.cylinders = __le16_to_cpu(*((u16 *) &id[54<<1]));
335 d->geo.heads = __le16_to_cpu(*((u16 *) &id[55<<1]));
336 d->geo.sectors = __le16_to_cpu(*((u16 *) &id[56<<1]));
341 d->gd->capacity = ssize;
342 d->flags |= DEVFL_UP;
345 if (d->flags & DEVFL_WORKON) {
346 printk(KERN_INFO "aoe: ataid_complete: can't schedule work, it's already on! "
347 "(This really shouldn't happen).\n");
350 INIT_WORK(&d->work, aoeblk_gdalloc, d);
351 schedule_work(&d->work);
352 d->flags |= DEVFL_WORKON;
356 calc_rttavg(struct aoedev *d, int rtt)
363 else if (n > MAXTIMER)
366 /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
372 aoecmd_ata_rsp(struct sk_buff *skb)
376 struct aoe_atahdr *ahin, *ahout;
384 hin = (struct aoe_hdr *) skb->mac.raw;
385 d = aoedev_bymac(hin->src);
387 snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
388 "for unknown device %d.%d\n",
389 __be16_to_cpu(*((u16 *) hin->major)),
395 spin_lock_irqsave(&d->lock, flags);
397 f = getframe(d, __be32_to_cpu(*((u32 *) hin->tag)));
399 spin_unlock_irqrestore(&d->lock, flags);
400 snprintf(ebuf, sizeof ebuf,
401 "%15s e%d.%d tag=%08x@%08lx\n",
403 __be16_to_cpu(*((u16 *) hin->major)),
405 __be32_to_cpu(*((u32 *) hin->tag)),
411 calc_rttavg(d, tsince(f->tag));
413 ahin = (struct aoe_atahdr *) (hin+1);
414 ahout = (struct aoe_atahdr *) (f->data + sizeof(struct aoe_hdr));
417 if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */
418 printk(KERN_CRIT "aoe: aoecmd_ata_rsp: ata error cmd=%2.2Xh "
419 "stat=%2.2Xh from e%ld.%ld\n",
420 ahout->cmdstat, ahin->cmdstat,
421 d->aoemajor, d->aoeminor);
423 buf->flags |= BUFFL_FAIL;
425 switch (ahout->cmdstat) {
428 n = ahout->scnt << 9;
429 if (skb->len - sizeof *hin - sizeof *ahin < n) {
430 printk(KERN_CRIT "aoe: aoecmd_ata_rsp: runt "
431 "ata data size in read. skb->len=%d\n",
433 /* fail frame f? just returning will rexmit. */
434 spin_unlock_irqrestore(&d->lock, flags);
437 memcpy(f->bufaddr, ahin+1, n);
442 if (skb->len - sizeof *hin - sizeof *ahin < 512) {
443 printk(KERN_INFO "aoe: aoecmd_ata_rsp: runt data size "
444 "in ataid. skb->len=%d\n", skb->len);
445 spin_unlock_irqrestore(&d->lock, flags);
448 ataid_complete(d, (char *) (ahin+1));
449 /* d->flags |= DEVFL_WC_UPDATE; */
452 printk(KERN_INFO "aoe: aoecmd_ata_rsp: unrecognized "
453 "outbound ata command %2.2Xh for %d.%d\n",
455 __be16_to_cpu(*((u16 *) hin->major)),
461 buf->nframesout -= 1;
462 if (buf->nframesout == 0 && buf->resid == 0) {
463 n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
464 bio_endio(buf->bio, buf->bio->bi_size, n);
465 mempool_free(buf, d->bufpool);
477 spin_unlock_irqrestore(&d->lock, flags);
483 aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
486 struct aoe_cfghdr *ch;
487 struct sk_buff *skb, *sl;
488 struct net_device *ifp;
489 u16 aoe_type = __constant_cpu_to_be16(ETH_P_AOE);
490 u16 net_aoemajor = __cpu_to_be16(aoemajor);
494 read_lock(&dev_base_lock);
495 for (ifp = dev_base; ifp; dev_put(ifp), ifp = ifp->next) {
497 if (!is_aoe_netif(ifp))
500 skb = new_skb(ifp, sizeof *h + sizeof *ch);
502 printk(KERN_INFO "aoe: aoecmd_cfg: skb alloc failure\n");
505 h = (struct aoe_hdr *) skb->mac.raw;
506 memset(h, 0, sizeof *h + sizeof *ch);
508 memset(h->dst, 0xff, sizeof h->dst);
509 memcpy(h->src, ifp->dev_addr, sizeof h->src);
510 memcpy(h->type, &aoe_type, sizeof aoe_type);
512 memcpy(h->major, &net_aoemajor, sizeof net_aoemajor);
519 read_unlock(&dev_base_lock);
525 * Since we only call this in one place (and it only prepares one frame)
526 * we just return the skb. Usually we'd chain it up to the d->skblist.
528 static struct sk_buff *
529 aoecmd_ata_id(struct aoedev *d)
532 struct aoe_atahdr *ah;
536 f = getframe(d, FREETAG);
538 printk(KERN_CRIT "aoe: aoecmd_ata_id: can't get a frame. "
539 "This shouldn't happen.\n");
543 /* initialize the headers & frame */
544 h = (struct aoe_hdr *) f->data;
545 ah = (struct aoe_atahdr *) (h+1);
546 f->ndata = sizeof *h + sizeof *ah;
547 memset(h, 0, f->ndata);
548 f->tag = aoehdr_atainit(d, h);
552 /* this message initializes the device, so we reset the rttavg */
553 d->rttavg = MAXTIMER;
555 /* set up ata header */
557 ah->cmdstat = WIN_IDENTIFY;
560 skb = skb_prepare(d, f);
562 /* we now want to start the rexmit tracking */
563 d->flags &= ~DEVFL_TKILL;
564 d->timer.data = (ulong) d;
565 d->timer.function = rexmit_timer;
566 d->timer.expires = jiffies + TIMERTICK;
567 add_timer(&d->timer);
573 aoecmd_cfg_rsp(struct sk_buff *skb)
577 struct aoe_cfghdr *ch;
578 ulong flags, bufcnt, sysminor, aoemajor;
580 enum { MAXFRAMES = 8, MAXSYSMINOR = 255 };
582 h = (struct aoe_hdr *) skb->mac.raw;
583 ch = (struct aoe_cfghdr *) (h+1);
586 * Enough people have their dip switches set backwards to
587 * warrant a loud message for this special case.
589 aoemajor = __be16_to_cpu(*((u16 *) h->major));
590 if (aoemajor == 0xfff) {
591 printk(KERN_CRIT "aoe: aoecmd_cfg_rsp: Warning: shelf "
592 "address is all ones. Check shelf dip switches\n");
596 sysminor = SYSMINOR(aoemajor, h->minor);
597 if (sysminor > MAXSYSMINOR) {
598 printk(KERN_INFO "aoe: aoecmd_cfg_rsp: sysminor %ld too "
599 "large\n", sysminor);
603 bufcnt = __be16_to_cpu(*((u16 *) ch->bufcnt));
604 if (bufcnt > MAXFRAMES) /* keep it reasonable */
607 d = aoedev_set(sysminor, h->src, skb->dev, bufcnt);
609 printk(KERN_INFO "aoe: aoecmd_cfg_rsp: device set failure\n");
613 spin_lock_irqsave(&d->lock, flags);
615 if (d->flags & (DEVFL_UP | DEVFL_CLOSEWAIT)) {
616 spin_unlock_irqrestore(&d->lock, flags);
620 d->fw_ver = __be16_to_cpu(*((u16 *) ch->fwver));
622 /* we get here only if the device is new */
623 sl = aoecmd_ata_id(d);
625 spin_unlock_irqrestore(&d->lock, flags);