1 /* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */
4 * Filesystem request handling methods
7 #include <linux/hdreg.h>
8 #include <linux/blkdev.h>
9 #include <linux/skbuff.h>
10 #include <linux/netdevice.h>
13 #define TIMERTICK (HZ / 10)
14 #define MINTIMER (2 * TIMERTICK)
15 #define MAXTIMER (HZ << 1)
16 #define MAXWAIT (60 * 3) /* After MAXWAIT seconds, give up and fail dev */
18 static struct sk_buff *
19 new_skb(struct net_device *if_dev, ulong len)
23 skb = alloc_skb(len, GFP_ATOMIC);
25 skb->nh.raw = skb->mac.raw = skb->data;
27 skb->protocol = __constant_htons(ETH_P_AOE);
30 skb->next = skb->prev = NULL;
32 /* tell the network layer not to perform IP checksums
33 * or to get the NIC to do it
35 skb->ip_summed = CHECKSUM_NONE;
40 static struct sk_buff *
41 skb_prepare(struct aoedev *d, struct frame *f)
46 skb = new_skb(d->ifp, f->ndata + f->writedatalen);
48 printk(KERN_INFO "aoe: skb_prepare: failure to allocate skb\n");
53 memcpy(p, f->data, f->ndata);
55 if (f->writedatalen) {
56 p += sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr);
57 memcpy(p, f->bufaddr, f->writedatalen);
64 getframe(struct aoedev *d, int tag)
77 * Leave the top bit clear so we have tagspace for userland.
78 * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
79 * This driver reserves tag -1 to mean "unused frame."
82 newtag(struct aoedev *d)
87 return n |= (++d->lasttag & 0x7fff) << 16;
91 aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h)
93 u32 host_tag = newtag(d);
95 memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
96 memcpy(h->dst, d->addr, sizeof h->dst);
97 h->type = __constant_cpu_to_be16(ETH_P_AOE);
99 h->major = cpu_to_be16(d->aoemajor);
100 h->minor = d->aoeminor;
102 h->tag = cpu_to_be32(host_tag);
108 aoecmd_ata_rw(struct aoedev *d, struct frame *f)
111 struct aoe_atahdr *ah;
115 register sector_t sector;
116 char writebit, extbit;
123 sector = buf->sector;
124 bcnt = buf->bv_resid;
125 if (bcnt > MAXATADATA)
128 /* initialize the headers & frame */
129 h = (struct aoe_hdr *) f->data;
130 ah = (struct aoe_atahdr *) (h+1);
131 f->ndata = sizeof *h + sizeof *ah;
132 memset(h, 0, f->ndata);
133 f->tag = aoehdr_atainit(d, h);
136 f->bufaddr = buf->bufaddr;
138 /* set up ata header */
139 ah->scnt = bcnt >> 9;
141 ah->lba1 = sector >>= 8;
142 ah->lba2 = sector >>= 8;
143 ah->lba3 = sector >>= 8;
144 if (d->flags & DEVFL_EXT) {
145 ah->aflags |= AOEAFL_EXT;
146 ah->lba4 = sector >>= 8;
147 ah->lba5 = sector >>= 8;
151 ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
154 if (bio_data_dir(buf->bio) == WRITE) {
155 ah->aflags |= AOEAFL_WRITE;
156 f->writedatalen = bcnt;
162 ah->cmdstat = WIN_READ | writebit | extbit;
164 /* mark all tracking fields and load out */
165 buf->nframesout += 1;
166 buf->bufaddr += bcnt;
167 buf->bv_resid -= bcnt;
168 /* printk(KERN_INFO "aoe: bv_resid=%ld\n", buf->bv_resid); */
170 buf->sector += bcnt >> 9;
171 if (buf->resid == 0) {
173 } else if (buf->bv_resid == 0) {
175 buf->bv_resid = buf->bv->bv_len;
176 buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset;
179 skb = skb_prepare(d, f);
181 skb->next = d->skblist;
186 /* enters with d->lock held */
188 aoecmd_work(struct aoedev *d)
193 f = getframe(d, FREETAG);
196 if (d->inprocess == NULL) {
197 if (list_empty(&d->bufq))
199 buf = container_of(d->bufq.next, struct buf, bufs);
200 list_del(d->bufq.next);
201 /*printk(KERN_INFO "aoecmd_work: bi_size=%ld\n", buf->bio->bi_size); */
209 rexmit(struct aoedev *d, struct frame *f)
218 snprintf(buf, sizeof buf,
219 "%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n",
221 d->aoemajor, d->aoeminor, f->tag, jiffies, n);
224 h = (struct aoe_hdr *) f->data;
226 h->tag = cpu_to_be32(n);
228 skb = skb_prepare(d, f);
230 skb->next = d->skblist;
240 n = jiffies & 0xffff;
248 rexmit_timer(ulong vp)
253 register long timeout;
256 d = (struct aoedev *) vp;
259 /* timeout is always ~150% of the moving average */
261 timeout += timeout >> 1;
263 spin_lock_irqsave(&d->lock, flags);
265 if (d->flags & DEVFL_TKILL) {
266 tdie: spin_unlock_irqrestore(&d->lock, flags);
272 if (f->tag != FREETAG && tsince(f->tag) >= timeout) {
273 n = f->waited += timeout;
275 if (n > MAXWAIT) { /* waited too long. device failure. */
288 d->rttavg = MAXTIMER;
291 d->timer.expires = jiffies + TIMERTICK;
292 add_timer(&d->timer);
294 spin_unlock_irqrestore(&d->lock, flags);
300 ataid_complete(struct aoedev *d, unsigned char *id)
305 /* word 83: command set supported */
306 n = le16_to_cpup((__le16 *) &id[83<<1]);
308 /* word 86: command set/feature enabled */
309 n |= le16_to_cpup((__le16 *) &id[86<<1]);
311 if (n & (1<<10)) { /* bit 10: LBA 48 */
312 d->flags |= DEVFL_EXT;
314 /* word 100: number lba48 sectors */
315 ssize = le64_to_cpup((__le64 *) &id[100<<1]);
317 /* set as in ide-disk.c:init_idedisk_capacity */
318 d->geo.cylinders = ssize;
319 d->geo.cylinders /= (255 * 63);
323 d->flags &= ~DEVFL_EXT;
325 /* number lba28 sectors */
326 ssize = le32_to_cpup((__le32 *) &id[60<<1]);
328 /* NOTE: obsolete in ATA 6 */
329 d->geo.cylinders = le16_to_cpup((__le16 *) &id[54<<1]);
330 d->geo.heads = le16_to_cpup((__le16 *) &id[55<<1]);
331 d->geo.sectors = le16_to_cpup((__le16 *) &id[56<<1]);
336 d->gd->capacity = ssize;
337 d->flags |= DEVFL_UP;
340 if (d->flags & DEVFL_WORKON) {
341 printk(KERN_INFO "aoe: ataid_complete: can't schedule work, it's already on! "
342 "(This really shouldn't happen).\n");
345 INIT_WORK(&d->work, aoeblk_gdalloc, d);
346 schedule_work(&d->work);
347 d->flags |= DEVFL_WORKON;
351 calc_rttavg(struct aoedev *d, int rtt)
358 else if (n > MAXTIMER)
361 /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
367 aoecmd_ata_rsp(struct sk_buff *skb)
371 struct aoe_atahdr *ahin, *ahout;
380 hin = (struct aoe_hdr *) skb->mac.raw;
381 aoemajor = be16_to_cpu(hin->major);
382 d = aoedev_by_aoeaddr(aoemajor, hin->minor);
384 snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
385 "for unknown device %d.%d\n",
386 aoemajor, hin->minor);
391 spin_lock_irqsave(&d->lock, flags);
393 f = getframe(d, be32_to_cpu(hin->tag));
395 spin_unlock_irqrestore(&d->lock, flags);
396 snprintf(ebuf, sizeof ebuf,
397 "%15s e%d.%d tag=%08x@%08lx\n",
399 be16_to_cpu(hin->major),
401 be32_to_cpu(hin->tag),
407 calc_rttavg(d, tsince(f->tag));
409 ahin = (struct aoe_atahdr *) (hin+1);
410 ahout = (struct aoe_atahdr *) (f->data + sizeof(struct aoe_hdr));
413 if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */
414 printk(KERN_CRIT "aoe: aoecmd_ata_rsp: ata error cmd=%2.2Xh "
415 "stat=%2.2Xh from e%ld.%ld\n",
416 ahout->cmdstat, ahin->cmdstat,
417 d->aoemajor, d->aoeminor);
419 buf->flags |= BUFFL_FAIL;
421 switch (ahout->cmdstat) {
424 n = ahout->scnt << 9;
425 if (skb->len - sizeof *hin - sizeof *ahin < n) {
426 printk(KERN_CRIT "aoe: aoecmd_ata_rsp: runt "
427 "ata data size in read. skb->len=%d\n",
429 /* fail frame f? just returning will rexmit. */
430 spin_unlock_irqrestore(&d->lock, flags);
433 memcpy(f->bufaddr, ahin+1, n);
438 if (skb->len - sizeof *hin - sizeof *ahin < 512) {
439 printk(KERN_INFO "aoe: aoecmd_ata_rsp: runt data size "
440 "in ataid. skb->len=%d\n", skb->len);
441 spin_unlock_irqrestore(&d->lock, flags);
444 ataid_complete(d, (char *) (ahin+1));
445 /* d->flags |= DEVFL_WC_UPDATE; */
448 printk(KERN_INFO "aoe: aoecmd_ata_rsp: unrecognized "
449 "outbound ata command %2.2Xh for %d.%d\n",
451 be16_to_cpu(hin->major),
457 buf->nframesout -= 1;
458 if (buf->nframesout == 0 && buf->resid == 0) {
459 unsigned long duration = jiffies - buf->start_time;
460 unsigned long n_sect = buf->bio->bi_size >> 9;
461 struct gendisk *disk = d->gd;
463 if (bio_data_dir(buf->bio) == WRITE) {
464 disk_stat_inc(disk, writes);
465 disk_stat_add(disk, write_ticks, duration);
466 disk_stat_add(disk, write_sectors, n_sect);
468 disk_stat_inc(disk, reads);
469 disk_stat_add(disk, read_ticks, duration);
470 disk_stat_add(disk, read_sectors, n_sect);
472 disk_stat_add(disk, io_ticks, duration);
473 n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
474 bio_endio(buf->bio, buf->bio->bi_size, n);
475 mempool_free(buf, d->bufpool);
487 spin_unlock_irqrestore(&d->lock, flags);
493 aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
496 struct aoe_cfghdr *ch;
497 struct sk_buff *skb, *sl;
498 struct net_device *ifp;
502 read_lock(&dev_base_lock);
503 for (ifp = dev_base; ifp; dev_put(ifp), ifp = ifp->next) {
505 if (!is_aoe_netif(ifp))
508 skb = new_skb(ifp, sizeof *h + sizeof *ch);
510 printk(KERN_INFO "aoe: aoecmd_cfg: skb alloc failure\n");
513 h = (struct aoe_hdr *) skb->mac.raw;
514 memset(h, 0, sizeof *h + sizeof *ch);
516 memset(h->dst, 0xff, sizeof h->dst);
517 memcpy(h->src, ifp->dev_addr, sizeof h->src);
518 h->type = __constant_cpu_to_be16(ETH_P_AOE);
520 h->major = cpu_to_be16(aoemajor);
527 read_unlock(&dev_base_lock);
533 * Since we only call this in one place (and it only prepares one frame)
534 * we just return the skb. Usually we'd chain it up to the d->skblist.
536 static struct sk_buff *
537 aoecmd_ata_id(struct aoedev *d)
540 struct aoe_atahdr *ah;
544 f = getframe(d, FREETAG);
546 printk(KERN_CRIT "aoe: aoecmd_ata_id: can't get a frame. "
547 "This shouldn't happen.\n");
551 /* initialize the headers & frame */
552 h = (struct aoe_hdr *) f->data;
553 ah = (struct aoe_atahdr *) (h+1);
554 f->ndata = sizeof *h + sizeof *ah;
555 memset(h, 0, f->ndata);
556 f->tag = aoehdr_atainit(d, h);
560 /* this message initializes the device, so we reset the rttavg */
561 d->rttavg = MAXTIMER;
563 /* set up ata header */
565 ah->cmdstat = WIN_IDENTIFY;
568 skb = skb_prepare(d, f);
570 /* we now want to start the rexmit tracking */
571 d->flags &= ~DEVFL_TKILL;
572 d->timer.data = (ulong) d;
573 d->timer.function = rexmit_timer;
574 d->timer.expires = jiffies + TIMERTICK;
575 add_timer(&d->timer);
581 aoecmd_cfg_rsp(struct sk_buff *skb)
585 struct aoe_cfghdr *ch;
586 ulong flags, sysminor, aoemajor;
589 enum { MAXFRAMES = 8 };
591 h = (struct aoe_hdr *) skb->mac.raw;
592 ch = (struct aoe_cfghdr *) (h+1);
595 * Enough people have their dip switches set backwards to
596 * warrant a loud message for this special case.
598 aoemajor = be16_to_cpu(h->major);
599 if (aoemajor == 0xfff) {
600 printk(KERN_CRIT "aoe: aoecmd_cfg_rsp: Warning: shelf "
601 "address is all ones. Check shelf dip switches\n");
605 sysminor = SYSMINOR(aoemajor, h->minor);
606 if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) {
608 "aoe: e%ld.%d: minor number too large\n",
609 aoemajor, (int) h->minor);
613 bufcnt = be16_to_cpu(ch->bufcnt);
614 if (bufcnt > MAXFRAMES) /* keep it reasonable */
617 d = aoedev_set(sysminor, h->src, skb->dev, bufcnt);
619 printk(KERN_INFO "aoe: aoecmd_cfg_rsp: device set failure\n");
623 spin_lock_irqsave(&d->lock, flags);
625 if (d->flags & (DEVFL_UP | DEVFL_CLOSEWAIT)) {
626 spin_unlock_irqrestore(&d->lock, flags);
630 d->fw_ver = be16_to_cpu(ch->fwver);
632 /* we get here only if the device is new */
633 sl = aoecmd_ata_id(d);
635 spin_unlock_irqrestore(&d->lock, flags);