/* Last available index we saw. */
u16 last_avail_idx;
+ /* How many are used since we sent last irq? */
+ unsigned int pending_used;
+
/* Eventfd where Guest notifications arrive. */
int eventfd;
* threads and so we need to make sure that changes visible to the Guest happen
* in precise order. */
#define wmb() __asm__ __volatile__("" : : : "memory")
+#define mb() __asm__ __volatile__("" : : : "memory")
/* Convert an iovec element to the given type.
*
/* Each buffer in the virtqueues is actually a chain of descriptors. This
* function returns the next descriptor in the chain, or vq->vring.num if we're
* at the end. */
-static unsigned next_desc(struct virtqueue *vq, unsigned int i)
+static unsigned next_desc(struct vring_desc *desc,
+ unsigned int i, unsigned int max)
{
unsigned int next;
/* If this descriptor says it doesn't chain, we're done. */
- if (!(vq->vring.desc[i].flags & VRING_DESC_F_NEXT))
- return vq->vring.num;
+ if (!(desc[i].flags & VRING_DESC_F_NEXT))
+ return max;
/* Check they're not leading us off end of descriptors. */
- next = vq->vring.desc[i].next;
+ next = desc[i].next;
/* Make sure compiler knows to grab that: we don't want it changing! */
wmb();
- if (next >= vq->vring.num)
+ if (next >= max)
errx(1, "Desc next is %u", next);
return next;
}
+/* This actually sends the interrupt for this virtqueue */
+static void trigger_irq(struct virtqueue *vq)
+{
+ unsigned long buf[] = { LHREQ_IRQ, vq->config.irq };
+
+ /* Don't inform them if nothing used. */
+ if (!vq->pending_used)
+ return;
+ vq->pending_used = 0;
+
+ /* If they don't want an interrupt, don't send one, unless empty. */
+ if ((vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
+ && lg_last_avail(vq) != vq->vring.avail->idx)
+ return;
+
+ /* Send the Guest an interrupt tell them we used something up. */
+ if (write(lguest_fd, buf, sizeof(buf)) != 0)
+ err(1, "Triggering irq %i", vq->config.irq);
+}
+
/* This looks in the virtqueue and for the first available buffer, and converts
* it to an iovec for convenient access. Since descriptors consist of some
* number of output then some number of input descriptors, it's actually two
struct iovec iov[],
unsigned int *out_num, unsigned int *in_num)
{
- unsigned int i, head;
+ unsigned int i, head, max;
+ struct vring_desc *desc;
u16 last_avail = lg_last_avail(vq);
while (last_avail == vq->vring.avail->idx) {
u64 event;
+ /* OK, tell Guest about progress up to now. */
+ trigger_irq(vq);
+
+ /* OK, now we need to know about added descriptors. */
+ vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
+
+ /* They could have slipped one in as we were doing that: make
+ * sure it's written, then check again. */
+ mb();
+ if (last_avail != vq->vring.avail->idx) {
+ vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
+ break;
+ }
+
/* Nothing new? Wait for eventfd to tell us they refilled. */
if (read(vq->eventfd, &event, sizeof(event)) != sizeof(event))
errx(1, "Event read failed?");
+
+ /* We don't need to be notified again. */
+ vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
}
/* Check it isn't doing very strange things with descriptor numbers. */
/* When we start there are none of either input nor output. */
*out_num = *in_num = 0;
+ max = vq->vring.num;
+ desc = vq->vring.desc;
i = head;
+
+ /* If this is an indirect entry, then this buffer contains a descriptor
+ * table which we handle as if it's any normal descriptor chain. */
+ if (desc[i].flags & VRING_DESC_F_INDIRECT) {
+ if (desc[i].len % sizeof(struct vring_desc))
+ errx(1, "Invalid size for indirect buffer table");
+
+ max = desc[i].len / sizeof(struct vring_desc);
+ desc = check_pointer(desc[i].addr, desc[i].len);
+ i = 0;
+ }
+
do {
/* Grab the first descriptor, and check it's OK. */
- iov[*out_num + *in_num].iov_len = vq->vring.desc[i].len;
+ iov[*out_num + *in_num].iov_len = desc[i].len;
iov[*out_num + *in_num].iov_base
- = check_pointer(vq->vring.desc[i].addr,
- vq->vring.desc[i].len);
+ = check_pointer(desc[i].addr, desc[i].len);
/* If this is an input descriptor, increment that count. */
- if (vq->vring.desc[i].flags & VRING_DESC_F_WRITE)
+ if (desc[i].flags & VRING_DESC_F_WRITE)
(*in_num)++;
else {
/* If it's an output descriptor, they're all supposed
}
/* If we've got too many, that implies a descriptor loop. */
- if (*out_num + *in_num > vq->vring.num)
+ if (*out_num + *in_num > max)
errx(1, "Looped descriptor");
- } while ((i = next_desc(vq, i)) != vq->vring.num);
+ } while ((i = next_desc(desc, i, max)) != max);
return head;
}
/* Make sure buffer is written before we update index. */
wmb();
vq->vring.used->idx++;
-}
-
-/* This actually sends the interrupt for this virtqueue */
-static void trigger_irq(struct virtqueue *vq)
-{
- unsigned long buf[] = { LHREQ_IRQ, vq->config.irq };
-
- /* If they don't want an interrupt, don't send one, unless empty. */
- if ((vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
- && lg_last_avail(vq) != vq->vring.avail->idx)
- return;
-
- /* Send the Guest an interrupt tell them we used something up. */
- if (write(lguest_fd, buf, sizeof(buf)) != 0)
- err(1, "Triggering irq %i", vq->config.irq);
+ vq->pending_used++;
}
/* And here's the combo meal deal. Supersize me! */
err(1, "Write to stdout gave %i", len);
iov_consume(iov, out, len);
}
- add_used_and_trigger(vq, head, 0);
+ add_used(vq, head, 0);
}
/*
errx(1, "Input buffers in net output queue?");
if (writev(net_info->tunfd, iov, out) < 0)
errx(1, "Write to tun failed?");
- add_used_and_trigger(vq, head, 0);
+ add_used(vq, head, 0);
+}
+
+/* Will reading from this file descriptor block? */
+static bool will_block(int fd)
+{
+ fd_set fdset;
+ struct timeval zero = { 0, 0 };
+ FD_ZERO(&fdset);
+ FD_SET(fd, &fdset);
+ return select(fd+1, &fdset, NULL, NULL, &zero) != 1;
}
/* This is where we handle packets coming in from the tun device to our
head = wait_for_vq_desc(vq, iov, &out, &in);
if (out)
errx(1, "Output buffers in net input queue?");
+
+ /* Deliver interrupt now, since we're about to sleep. */
+ if (vq->pending_used && will_block(net_info->tunfd))
+ trigger_irq(vq);
+
len = readv(net_info->tunfd, iov, in);
if (len <= 0)
err(1, "Failed to read from tun.");
- add_used_and_trigger(vq, head, len);
+ add_used(vq, head, len);
}
/* This is the helper to create threads. */
add_feature(dev, VIRTIO_NET_F_HOST_TSO4);
add_feature(dev, VIRTIO_NET_F_HOST_TSO6);
add_feature(dev, VIRTIO_NET_F_HOST_ECN);
+ /* We handle indirect ring entries */
+ add_feature(dev, VIRTIO_RING_F_INDIRECT_DESC);
set_config(dev, sizeof(conf), &conf);
/* We don't need the socket any more; setup is done. */
if (out->type & VIRTIO_BLK_T_BARRIER)
fdatasync(vblk->fd);
- add_used_and_trigger(vq, head, wlen);
+ add_used(vq, head, wlen);
}
/*L:198 This actually sets up a virtual block device. */
}
/* Tell the Guest about the new input. */
- add_used_and_trigger(vq, head, totlen);
+ add_used(vq, head, totlen);
}
/* And this creates a "hardware" random number device for the Guest. */