1 /* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */
4 * Filesystem request handling methods
7 #include <linux/hdreg.h>
8 #include <linux/blkdev.h>
9 #include <linux/skbuff.h>
10 #include <linux/netdevice.h>
11 #include <asm/unaligned.h>
14 #define TIMERTICK (HZ / 10)
15 #define MINTIMER (2 * TIMERTICK)
16 #define MAXTIMER (HZ << 1)
17 #define MAXWAIT (60 * 3) /* After MAXWAIT seconds, give up and fail dev */
19 static struct sk_buff *
20 new_skb(struct net_device *if_dev, ulong len)
24 skb = alloc_skb(len, GFP_ATOMIC);
26 skb->nh.raw = skb->mac.raw = skb->data;
28 skb->protocol = __constant_htons(ETH_P_AOE);
31 memset(skb->head, 0, len);
32 skb->next = skb->prev = NULL;
34 /* tell the network layer not to perform IP checksums
35 * or to get the NIC to do it
37 skb->ip_summed = CHECKSUM_NONE;
42 static struct sk_buff *
43 skb_prepare(struct aoedev *d, struct frame *f)
48 skb = new_skb(d->ifp, f->ndata + f->writedatalen);
50 printk(KERN_INFO "aoe: skb_prepare: failure to allocate skb\n");
55 memcpy(p, f->data, f->ndata);
57 if (f->writedatalen) {
58 p += sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr);
59 memcpy(p, f->bufaddr, f->writedatalen);
66 getframe(struct aoedev *d, int tag)
79 * Leave the top bit clear so we have tagspace for userland.
80 * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
81 * This driver reserves tag -1 to mean "unused frame."
84 newtag(struct aoedev *d)
89 return n |= (++d->lasttag & 0x7fff) << 16;
93 aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h)
95 u32 host_tag = newtag(d);
97 memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
98 memcpy(h->dst, d->addr, sizeof h->dst);
99 h->type = __constant_cpu_to_be16(ETH_P_AOE);
101 h->major = cpu_to_be16(d->aoemajor);
102 h->minor = d->aoeminor;
104 h->tag = cpu_to_be32(host_tag);
110 aoecmd_ata_rw(struct aoedev *d, struct frame *f)
113 struct aoe_atahdr *ah;
117 register sector_t sector;
118 char writebit, extbit;
125 sector = buf->sector;
126 bcnt = buf->bv_resid;
127 if (bcnt > MAXATADATA)
130 /* initialize the headers & frame */
131 h = (struct aoe_hdr *) f->data;
132 ah = (struct aoe_atahdr *) (h+1);
133 f->ndata = sizeof *h + sizeof *ah;
134 memset(h, 0, f->ndata);
135 f->tag = aoehdr_atainit(d, h);
138 f->bufaddr = buf->bufaddr;
140 /* set up ata header */
141 ah->scnt = bcnt >> 9;
143 ah->lba1 = sector >>= 8;
144 ah->lba2 = sector >>= 8;
145 ah->lba3 = sector >>= 8;
146 if (d->flags & DEVFL_EXT) {
147 ah->aflags |= AOEAFL_EXT;
148 ah->lba4 = sector >>= 8;
149 ah->lba5 = sector >>= 8;
153 ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
156 if (bio_data_dir(buf->bio) == WRITE) {
157 ah->aflags |= AOEAFL_WRITE;
158 f->writedatalen = bcnt;
164 ah->cmdstat = WIN_READ | writebit | extbit;
166 /* mark all tracking fields and load out */
167 buf->nframesout += 1;
168 buf->bufaddr += bcnt;
169 buf->bv_resid -= bcnt;
170 /* printk(KERN_INFO "aoe: bv_resid=%ld\n", buf->bv_resid); */
172 buf->sector += bcnt >> 9;
173 if (buf->resid == 0) {
175 } else if (buf->bv_resid == 0) {
177 buf->bv_resid = buf->bv->bv_len;
178 buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset;
181 skb = skb_prepare(d, f);
185 d->sendq_tl->next = skb;
192 /* enters with d->lock held */
194 aoecmd_work(struct aoedev *d)
199 f = getframe(d, FREETAG);
202 if (d->inprocess == NULL) {
203 if (list_empty(&d->bufq))
205 buf = container_of(d->bufq.next, struct buf, bufs);
206 list_del(d->bufq.next);
207 /*printk(KERN_INFO "aoecmd_work: bi_size=%ld\n", buf->bio->bi_size); */
215 rexmit(struct aoedev *d, struct frame *f)
224 snprintf(buf, sizeof buf,
225 "%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n",
227 d->aoemajor, d->aoeminor, f->tag, jiffies, n);
230 h = (struct aoe_hdr *) f->data;
232 h->tag = cpu_to_be32(n);
234 skb = skb_prepare(d, f);
238 d->sendq_tl->next = skb;
250 n = jiffies & 0xffff;
258 rexmit_timer(ulong vp)
263 register long timeout;
266 d = (struct aoedev *) vp;
269 /* timeout is always ~150% of the moving average */
271 timeout += timeout >> 1;
273 spin_lock_irqsave(&d->lock, flags);
275 if (d->flags & DEVFL_TKILL) {
276 tdie: spin_unlock_irqrestore(&d->lock, flags);
282 if (f->tag != FREETAG && tsince(f->tag) >= timeout) {
283 n = f->waited += timeout;
285 if (n > MAXWAIT) { /* waited too long. device failure. */
294 d->sendq_hd = d->sendq_tl = NULL;
298 d->rttavg = MAXTIMER;
301 d->timer.expires = jiffies + TIMERTICK;
302 add_timer(&d->timer);
304 spin_unlock_irqrestore(&d->lock, flags);
310 ataid_complete(struct aoedev *d, unsigned char *id)
315 /* word 83: command set supported */
316 n = le16_to_cpu(get_unaligned((__le16 *) &id[83<<1]));
318 /* word 86: command set/feature enabled */
319 n |= le16_to_cpu(get_unaligned((__le16 *) &id[86<<1]));
321 if (n & (1<<10)) { /* bit 10: LBA 48 */
322 d->flags |= DEVFL_EXT;
324 /* word 100: number lba48 sectors */
325 ssize = le64_to_cpu(get_unaligned((__le64 *) &id[100<<1]));
327 /* set as in ide-disk.c:init_idedisk_capacity */
328 d->geo.cylinders = ssize;
329 d->geo.cylinders /= (255 * 63);
333 d->flags &= ~DEVFL_EXT;
335 /* number lba28 sectors */
336 ssize = le32_to_cpu(get_unaligned((__le32 *) &id[60<<1]));
338 /* NOTE: obsolete in ATA 6 */
339 d->geo.cylinders = le16_to_cpu(get_unaligned((__le16 *) &id[54<<1]));
340 d->geo.heads = le16_to_cpu(get_unaligned((__le16 *) &id[55<<1]));
341 d->geo.sectors = le16_to_cpu(get_unaligned((__le16 *) &id[56<<1]));
346 d->gd->capacity = ssize;
347 d->flags |= DEVFL_UP;
350 if (d->flags & DEVFL_WORKON) {
351 printk(KERN_INFO "aoe: ataid_complete: can't schedule work, it's already on! "
352 "(This really shouldn't happen).\n");
355 INIT_WORK(&d->work, aoeblk_gdalloc, d);
356 schedule_work(&d->work);
357 d->flags |= DEVFL_WORKON;
361 calc_rttavg(struct aoedev *d, int rtt)
368 else if (n > MAXTIMER)
371 /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
377 aoecmd_ata_rsp(struct sk_buff *skb)
381 struct aoe_atahdr *ahin, *ahout;
390 hin = (struct aoe_hdr *) skb->mac.raw;
391 aoemajor = be16_to_cpu(hin->major);
392 d = aoedev_by_aoeaddr(aoemajor, hin->minor);
394 snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
395 "for unknown device %d.%d\n",
396 aoemajor, hin->minor);
401 spin_lock_irqsave(&d->lock, flags);
403 f = getframe(d, be32_to_cpu(hin->tag));
405 spin_unlock_irqrestore(&d->lock, flags);
406 snprintf(ebuf, sizeof ebuf,
407 "%15s e%d.%d tag=%08x@%08lx\n",
409 be16_to_cpu(hin->major),
411 be32_to_cpu(hin->tag),
417 calc_rttavg(d, tsince(f->tag));
419 ahin = (struct aoe_atahdr *) (hin+1);
420 ahout = (struct aoe_atahdr *) (f->data + sizeof(struct aoe_hdr));
423 if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */
424 printk(KERN_CRIT "aoe: aoecmd_ata_rsp: ata error cmd=%2.2Xh "
425 "stat=%2.2Xh from e%ld.%ld\n",
426 ahout->cmdstat, ahin->cmdstat,
427 d->aoemajor, d->aoeminor);
429 buf->flags |= BUFFL_FAIL;
431 switch (ahout->cmdstat) {
434 n = ahout->scnt << 9;
435 if (skb->len - sizeof *hin - sizeof *ahin < n) {
436 printk(KERN_CRIT "aoe: aoecmd_ata_rsp: runt "
437 "ata data size in read. skb->len=%d\n",
439 /* fail frame f? just returning will rexmit. */
440 spin_unlock_irqrestore(&d->lock, flags);
443 memcpy(f->bufaddr, ahin+1, n);
448 if (skb->len - sizeof *hin - sizeof *ahin < 512) {
449 printk(KERN_INFO "aoe: aoecmd_ata_rsp: runt data size "
450 "in ataid. skb->len=%d\n", skb->len);
451 spin_unlock_irqrestore(&d->lock, flags);
454 ataid_complete(d, (char *) (ahin+1));
455 /* d->flags |= DEVFL_WC_UPDATE; */
458 printk(KERN_INFO "aoe: aoecmd_ata_rsp: unrecognized "
459 "outbound ata command %2.2Xh for %d.%d\n",
461 be16_to_cpu(hin->major),
467 buf->nframesout -= 1;
468 if (buf->nframesout == 0 && buf->resid == 0) {
469 unsigned long duration = jiffies - buf->start_time;
470 unsigned long n_sect = buf->bio->bi_size >> 9;
471 struct gendisk *disk = d->gd;
472 const int rw = bio_data_dir(buf->bio);
474 disk_stat_inc(disk, ios[rw]);
475 disk_stat_add(disk, ticks[rw], duration);
476 disk_stat_add(disk, sectors[rw], n_sect);
477 disk_stat_add(disk, io_ticks, duration);
478 n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
479 bio_endio(buf->bio, buf->bio->bi_size, n);
480 mempool_free(buf, d->bufpool);
490 d->sendq_hd = d->sendq_tl = NULL;
492 spin_unlock_irqrestore(&d->lock, flags);
498 aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
501 struct aoe_cfghdr *ch;
502 struct sk_buff *skb, *sl;
503 struct net_device *ifp;
507 read_lock(&dev_base_lock);
508 for (ifp = dev_base; ifp; dev_put(ifp), ifp = ifp->next) {
510 if (!is_aoe_netif(ifp))
513 skb = new_skb(ifp, sizeof *h + sizeof *ch);
515 printk(KERN_INFO "aoe: aoecmd_cfg: skb alloc failure\n");
518 h = (struct aoe_hdr *) skb->mac.raw;
519 memset(h, 0, sizeof *h + sizeof *ch);
521 memset(h->dst, 0xff, sizeof h->dst);
522 memcpy(h->src, ifp->dev_addr, sizeof h->src);
523 h->type = __constant_cpu_to_be16(ETH_P_AOE);
525 h->major = cpu_to_be16(aoemajor);
532 read_unlock(&dev_base_lock);
538 * Since we only call this in one place (and it only prepares one frame)
539 * we just return the skb. Usually we'd chain it up to the aoedev sendq.
541 static struct sk_buff *
542 aoecmd_ata_id(struct aoedev *d)
545 struct aoe_atahdr *ah;
549 f = getframe(d, FREETAG);
551 printk(KERN_CRIT "aoe: aoecmd_ata_id: can't get a frame. "
552 "This shouldn't happen.\n");
556 /* initialize the headers & frame */
557 h = (struct aoe_hdr *) f->data;
558 ah = (struct aoe_atahdr *) (h+1);
559 f->ndata = sizeof *h + sizeof *ah;
560 memset(h, 0, f->ndata);
561 f->tag = aoehdr_atainit(d, h);
565 /* this message initializes the device, so we reset the rttavg */
566 d->rttavg = MAXTIMER;
568 /* set up ata header */
570 ah->cmdstat = WIN_IDENTIFY;
573 skb = skb_prepare(d, f);
575 /* we now want to start the rexmit tracking */
576 d->flags &= ~DEVFL_TKILL;
577 d->timer.data = (ulong) d;
578 d->timer.function = rexmit_timer;
579 d->timer.expires = jiffies + TIMERTICK;
580 add_timer(&d->timer);
586 aoecmd_cfg_rsp(struct sk_buff *skb)
590 struct aoe_cfghdr *ch;
591 ulong flags, sysminor, aoemajor;
594 enum { MAXFRAMES = 8 };
596 h = (struct aoe_hdr *) skb->mac.raw;
597 ch = (struct aoe_cfghdr *) (h+1);
600 * Enough people have their dip switches set backwards to
601 * warrant a loud message for this special case.
603 aoemajor = be16_to_cpu(h->major);
604 if (aoemajor == 0xfff) {
605 printk(KERN_CRIT "aoe: aoecmd_cfg_rsp: Warning: shelf "
606 "address is all ones. Check shelf dip switches\n");
610 sysminor = SYSMINOR(aoemajor, h->minor);
611 if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) {
613 "aoe: e%ld.%d: minor number too large\n",
614 aoemajor, (int) h->minor);
618 bufcnt = be16_to_cpu(ch->bufcnt);
619 if (bufcnt > MAXFRAMES) /* keep it reasonable */
622 d = aoedev_set(sysminor, h->src, skb->dev, bufcnt);
624 printk(KERN_INFO "aoe: aoecmd_cfg_rsp: device set failure\n");
628 spin_lock_irqsave(&d->lock, flags);
630 if (d->flags & (DEVFL_UP | DEVFL_CLOSEWAIT)) {
631 spin_unlock_irqrestore(&d->lock, flags);
635 d->fw_ver = be16_to_cpu(ch->fwver);
637 /* we get here only if the device is new */
638 sl = aoecmd_ata_id(d);
640 spin_unlock_irqrestore(&d->lock, flags);