firewire: Fix dualbuffer iso receive mode and drop buffer fill mode.
[pandora-kernel.git] / drivers / firewire / fw-ohci.c
index 1b4b53d..17e13d0 100644 (file)
@@ -45,6 +45,7 @@
 #define descriptor_irq_error           (1 << 4)
 #define descriptor_irq_always          (3 << 4)
 #define descriptor_branch_always       (3 << 2)
+#define descriptor_wait                        (3 << 0)
 
 struct descriptor {
        __le16 req_count;
@@ -55,36 +56,58 @@ struct descriptor {
        __le16 transfer_status;
 } __attribute__((aligned(16)));
 
-struct ar_context {
-       struct fw_ohci *ohci;
-       struct descriptor descriptor;
-       __le32 buffer[512];
-       dma_addr_t descriptor_bus;
-       dma_addr_t buffer_bus;
+struct db_descriptor {
+       __le16 first_size;
+       __le16 control;
+       __le16 second_req_count;
+       __le16 first_req_count;
+       __le32 branch_address;
+       __le16 second_res_count;
+       __le16 first_res_count;
+       __le32 reserved0;
+       __le32 first_buffer;
+       __le32 second_buffer;
+       __le32 reserved1;
+} __attribute__((aligned(16)));
 
-       u32 command_ptr;
-       u32 control_set;
-       u32 control_clear;
+#define control_set(regs)      (regs)
+#define control_clear(regs)    ((regs) + 4)
+#define command_ptr(regs)      ((regs) + 12)
+#define context_match(regs)    ((regs) + 16)
 
-       struct tasklet_struct tasklet;
+struct ar_buffer {
+       struct descriptor descriptor;
+       struct ar_buffer *next;
+       __le32 data[0];
 };
 
-struct at_context {
+struct ar_context {
        struct fw_ohci *ohci;
-       dma_addr_t descriptor_bus;
-       dma_addr_t buffer_bus;
+       struct ar_buffer *current_buffer;
+       struct ar_buffer *last_buffer;
+       void *pointer;
+       u32 regs;
+       struct tasklet_struct tasklet;
+};
 
-       struct list_head list;
+struct context;
 
-       struct {
-               struct descriptor more;
-               __le32 header[4];
-               struct descriptor last;
-       } d;
+typedef int (*descriptor_callback_t)(struct context *ctx,
+                                    struct descriptor *d,
+                                    struct descriptor *last);
+struct context {
+       struct fw_ohci *ohci;
+       u32 regs;
 
-       u32 command_ptr;
-       u32 control_set;
-       u32 control_clear;
+       struct descriptor *buffer;
+       dma_addr_t buffer_bus;
+       size_t buffer_size;
+       struct descriptor *head_descriptor;
+       struct descriptor *tail_descriptor;
+       struct descriptor *tail_descriptor_last;
+       struct descriptor *prev_descriptor;
+
+       descriptor_callback_t callback;
 
        struct tasklet_struct tasklet;
 };
@@ -98,18 +121,9 @@ struct at_context {
 
 struct iso_context {
        struct fw_iso_context base;
-       struct tasklet_struct tasklet;
-       u32 control_set;
-       u32 control_clear;
-       u32 command_ptr;
-       u32 context_match;
-
-       struct descriptor *buffer;
-       dma_addr_t buffer_bus;
-       struct descriptor *head_descriptor;
-       struct descriptor *tail_descriptor;
-       struct descriptor *tail_descriptor_last;
-       struct descriptor *prev_descriptor;
+       struct context context;
+       void *header;
+       size_t header_length;
 };
 
 #define CONFIG_ROM_SIZE 1024
@@ -117,12 +131,15 @@ struct iso_context {
 struct fw_ohci {
        struct fw_card card;
 
+       u32 version;
        __iomem char *registers;
        dma_addr_t self_id_bus;
        __le32 *self_id_cpu;
        struct tasklet_struct bus_reset_tasklet;
+       int node_id;
        int generation;
        int request_generation;
+       u32 bus_seconds;
 
        /* Spinlock for accessing fw_ohci data.  Never call out of
         * this driver with this lock held. */
@@ -138,8 +155,8 @@ struct fw_ohci {
 
        struct ar_context ar_request_ctx;
        struct ar_context ar_response_ctx;
-       struct at_context at_request_ctx;
-       struct at_context at_response_ctx;
+       struct context at_request_ctx;
+       struct context at_response_ctx;
 
        u32 it_context_mask;
        struct iso_context *it_context_list;
@@ -152,7 +169,12 @@ static inline struct fw_ohci *fw_ohci(struct fw_card *card)
        return container_of(card, struct fw_ohci, card);
 }
 
-#define CONTEXT_CYCLE_MATCH_ENABLE     0x80000000
+#define IT_CONTEXT_CYCLE_MATCH_ENABLE  0x80000000
+#define IR_CONTEXT_BUFFER_FILL         0x80000000
+#define IR_CONTEXT_ISOCH_HEADER                0x40000000
+#define IR_CONTEXT_CYCLE_MATCH_ENABLE  0x20000000
+#define IR_CONTEXT_MULTI_CHANNEL_MODE  0x10000000
+#define IR_CONTEXT_DUAL_BUFFER_MODE    0x08000000
 
 #define CONTEXT_RUN    0x8000
 #define CONTEXT_WAKE   0x1000
@@ -168,9 +190,10 @@ static inline struct fw_ohci *fw_ohci(struct fw_card *card)
 #define OHCI_LOOP_COUNT                        500
 #define OHCI1394_PCI_HCI_Control       0x40
 #define SELF_ID_BUF_SIZE               0x800
-
-/* FIXME: Move this to linux/pci_ids.h */
-#define PCI_CLASS_SERIAL_FIREWIRE_OHCI 0x0c0010
+#define OHCI_TCODE_PHY_PACKET          0x0e
+#define OHCI_VERSION_1_1               0x010010
+#define ISO_BUFFER_SIZE                        (64 * 1024)
+#define AT_BUFFER_SIZE                 4096
 
 static char ohci_driver_name[] = KBUILD_MODNAME;
 
@@ -213,35 +236,97 @@ ohci_update_phy_reg(struct fw_card *card, int addr,
        return 0;
 }
 
-static void ar_context_run(struct ar_context *ctx)
+static int ar_context_add_page(struct ar_context *ctx)
 {
-       reg_write(ctx->ohci, ctx->command_ptr, ctx->descriptor_bus | 1);
-       reg_write(ctx->ohci, ctx->control_set, CONTEXT_RUN);
+       struct device *dev = ctx->ohci->card.device;
+       struct ar_buffer *ab;
+       dma_addr_t ab_bus;
+       size_t offset;
+
+       ab = (struct ar_buffer *) __get_free_page(GFP_ATOMIC);
+       if (ab == NULL)
+               return -ENOMEM;
+
+       ab_bus = dma_map_single(dev, ab, PAGE_SIZE, DMA_BIDIRECTIONAL);
+       if (dma_mapping_error(ab_bus)) {
+               free_page((unsigned long) ab);
+               return -ENOMEM;
+       }
+
+       memset(&ab->descriptor, 0, sizeof ab->descriptor);
+       ab->descriptor.control        = cpu_to_le16(descriptor_input_more |
+                                                   descriptor_status |
+                                                   descriptor_branch_always);
+       offset = offsetof(struct ar_buffer, data);
+       ab->descriptor.req_count      = cpu_to_le16(PAGE_SIZE - offset);
+       ab->descriptor.data_address   = cpu_to_le32(ab_bus + offset);
+       ab->descriptor.res_count      = cpu_to_le16(PAGE_SIZE - offset);
+       ab->descriptor.branch_address = 0;
+
+       dma_sync_single_for_device(dev, ab_bus, PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+       ctx->last_buffer->descriptor.branch_address = ab_bus | 1;
+       ctx->last_buffer->next = ab;
+       ctx->last_buffer = ab;
+
+       reg_write(ctx->ohci, control_set(ctx->regs), CONTEXT_WAKE);
        flush_writes(ctx->ohci);
+
+       return 0;
 }
 
-static void ar_context_tasklet(unsigned long data)
+static __le32 *handle_ar_packet(struct ar_context *ctx, __le32 *buffer)
 {
-       struct ar_context *ctx = (struct ar_context *)data;
        struct fw_ohci *ohci = ctx->ohci;
-       u32 status;
-       int length, speed, ack, timestamp, tcode;
+       struct fw_packet p;
+       u32 status, length, tcode;
+
+       p.header[0] = le32_to_cpu(buffer[0]);
+       p.header[1] = le32_to_cpu(buffer[1]);
+       p.header[2] = le32_to_cpu(buffer[2]);
+
+       tcode = (p.header[0] >> 4) & 0x0f;
+       switch (tcode) {
+       case TCODE_WRITE_QUADLET_REQUEST:
+       case TCODE_READ_QUADLET_RESPONSE:
+               p.header[3] = (__force __u32) buffer[3];
+               p.header_length = 16;
+               p.payload_length = 0;
+               break;
+
+       case TCODE_READ_BLOCK_REQUEST :
+               p.header[3] = le32_to_cpu(buffer[3]);
+               p.header_length = 16;
+               p.payload_length = 0;
+               break;
+
+       case TCODE_WRITE_BLOCK_REQUEST:
+       case TCODE_READ_BLOCK_RESPONSE:
+       case TCODE_LOCK_REQUEST:
+       case TCODE_LOCK_RESPONSE:
+               p.header[3] = le32_to_cpu(buffer[3]);
+               p.header_length = 16;
+               p.payload_length = p.header[3] >> 16;
+               break;
+
+       case TCODE_WRITE_RESPONSE:
+       case TCODE_READ_QUADLET_REQUEST:
+       case OHCI_TCODE_PHY_PACKET:
+               p.header_length = 12;
+               p.payload_length = 0;
+               break;
+       }
+
+       p.payload = (void *) buffer + p.header_length;
 
        /* FIXME: What to do about evt_* errors? */
-       length    = le16_to_cpu(ctx->descriptor.req_count) -
-               le16_to_cpu(ctx->descriptor.res_count) - 4;
-       status    = le32_to_cpu(ctx->buffer[length / 4]);
-       ack       = ((status >> 16) & 0x1f) - 16;
-       speed     = (status >> 21) & 0x7;
-       timestamp = status & 0xffff;
-
-       ctx->buffer[0] = le32_to_cpu(ctx->buffer[0]);
-       ctx->buffer[1] = le32_to_cpu(ctx->buffer[1]);
-       ctx->buffer[2] = le32_to_cpu(ctx->buffer[2]);
-
-       tcode = (ctx->buffer[0] >> 4) & 0x0f;
-       if (TCODE_IS_BLOCK_PACKET(tcode))
-               ctx->buffer[3] = le32_to_cpu(ctx->buffer[3]);
+       length = (p.header_length + p.payload_length + 3) / 4;
+       status = le32_to_cpu(buffer[length]);
+
+       p.ack        = ((status >> 16) & 0x1f) - 16;
+       p.speed      = (status >> 21) & 0x7;
+       p.timestamp  = status & 0xffff;
+       p.generation = ohci->request_generation;
 
        /* The OHCI bus reset handler synthesizes a phy packet with
         * the new generation number when a bus reset happens (see
@@ -251,298 +336,546 @@ static void ar_context_tasklet(unsigned long data)
         * we use the unique tlabel for finding the matching
         * request. */
 
-       if (ack + 16 == 0x09)
-               ohci->request_generation = (ctx->buffer[2] >> 16) & 0xff;
+       if (p.ack + 16 == 0x09)
+               ohci->request_generation = (buffer[2] >> 16) & 0xff;
        else if (ctx == &ohci->ar_request_ctx)
-               fw_core_handle_request(&ohci->card, speed, ack, timestamp,
-                                      ohci->request_generation,
-                                      length, ctx->buffer);
+               fw_core_handle_request(&ohci->card, &p);
        else
-               fw_core_handle_response(&ohci->card, speed, ack, timestamp,
-                                       length, ctx->buffer);
+               fw_core_handle_response(&ohci->card, &p);
+
+       return buffer + length + 1;
+}
+
+static void ar_context_tasklet(unsigned long data)
+{
+       struct ar_context *ctx = (struct ar_context *)data;
+       struct fw_ohci *ohci = ctx->ohci;
+       struct ar_buffer *ab;
+       struct descriptor *d;
+       void *buffer, *end;
+
+       ab = ctx->current_buffer;
+       d = &ab->descriptor;
+
+       if (d->res_count == 0) {
+               size_t size, rest, offset;
+
+               /* This descriptor is finished and we may have a
+                * packet split across this and the next buffer. We
+                * reuse the page for reassembling the split packet. */
+
+               offset = offsetof(struct ar_buffer, data);
+               dma_unmap_single(ohci->card.device,
+                                ab->descriptor.data_address - offset,
+                                PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+               buffer = ab;
+               ab = ab->next;
+               d = &ab->descriptor;
+               size = buffer + PAGE_SIZE - ctx->pointer;
+               rest = le16_to_cpu(d->req_count) - le16_to_cpu(d->res_count);
+               memmove(buffer, ctx->pointer, size);
+               memcpy(buffer + size, ab->data, rest);
+               ctx->current_buffer = ab;
+               ctx->pointer = (void *) ab->data + rest;
+               end = buffer + size + rest;
+
+               while (buffer < end)
+                       buffer = handle_ar_packet(ctx, buffer);
+
+               free_page((unsigned long)buffer);
+               ar_context_add_page(ctx);
+       } else {
+               buffer = ctx->pointer;
+               ctx->pointer = end =
+                       (void *) ab + PAGE_SIZE - le16_to_cpu(d->res_count);
+
+               while (buffer < end)
+                       buffer = handle_ar_packet(ctx, buffer);
+       }
+}
+
+static int
+ar_context_init(struct ar_context *ctx, struct fw_ohci *ohci, u32 regs)
+{
+       struct ar_buffer ab;
+
+       ctx->regs        = regs;
+       ctx->ohci        = ohci;
+       ctx->last_buffer = &ab;
+       tasklet_init(&ctx->tasklet, ar_context_tasklet, (unsigned long)ctx);
+
+       ar_context_add_page(ctx);
+       ar_context_add_page(ctx);
+       ctx->current_buffer = ab.next;
+       ctx->pointer = ctx->current_buffer->data;
+
+       reg_write(ctx->ohci, command_ptr(ctx->regs), ab.descriptor.branch_address);
+       reg_write(ctx->ohci, control_set(ctx->regs), CONTEXT_RUN);
+       flush_writes(ctx->ohci);
+
+       return 0;
+}
 
-       ctx->descriptor.data_address = cpu_to_le32(ctx->buffer_bus);
-       ctx->descriptor.req_count    = cpu_to_le16(sizeof ctx->buffer);
-       ctx->descriptor.res_count    = cpu_to_le16(sizeof ctx->buffer);
+static void context_tasklet(unsigned long data)
+{
+       struct context *ctx = (struct context *) data;
+       struct fw_ohci *ohci = ctx->ohci;
+       struct descriptor *d, *last;
+       u32 address;
+       int z;
+
+       dma_sync_single_for_cpu(ohci->card.device, ctx->buffer_bus,
+                               ctx->buffer_size, DMA_TO_DEVICE);
+
+       d    = ctx->tail_descriptor;
+       last = ctx->tail_descriptor_last;
 
-       dma_sync_single_for_device(ohci->card.device, ctx->descriptor_bus,
-                                  sizeof ctx->descriptor_bus, DMA_TO_DEVICE);
+       while (last->branch_address != 0) {
+               address = le32_to_cpu(last->branch_address);
+               z = address & 0xf;
+               d = ctx->buffer + (address - ctx->buffer_bus) / sizeof *d;
+               last = (z == 2) ? d : d + z - 1;
 
-       /* FIXME: We stop and restart the ar context here, what if we
-        * stop while a receive is in progress? Maybe we could just
-        * loop the context back to itself and use it in buffer fill
-        * mode as intended... */
+               if (!ctx->callback(ctx, d, last))
+                       break;
 
-       reg_write(ctx->ohci, ctx->control_clear, CONTEXT_RUN);
-       ar_context_run(ctx);
+               ctx->tail_descriptor      = d;
+               ctx->tail_descriptor_last = last;
+       }
 }
 
 static int
-ar_context_init(struct ar_context *ctx, struct fw_ohci *ohci, u32 control_set)
+context_init(struct context *ctx, struct fw_ohci *ohci,
+            size_t buffer_size, u32 regs,
+            descriptor_callback_t callback)
 {
-       ctx->descriptor_bus =
-               dma_map_single(ohci->card.device, &ctx->descriptor,
-                              sizeof ctx->descriptor, DMA_TO_DEVICE);
-       if (ctx->descriptor_bus == 0)
+       ctx->ohci = ohci;
+       ctx->regs = regs;
+       ctx->buffer_size = buffer_size;
+       ctx->buffer = kmalloc(buffer_size, GFP_KERNEL);
+       if (ctx->buffer == NULL)
                return -ENOMEM;
 
-       if (ctx->descriptor_bus & 0xf)
-               fw_notify("descriptor not 16-byte aligned: 0x%08lx\n",
-                         (unsigned long)ctx->descriptor_bus);
+       tasklet_init(&ctx->tasklet, context_tasklet, (unsigned long)ctx);
+       ctx->callback = callback;
 
        ctx->buffer_bus =
                dma_map_single(ohci->card.device, ctx->buffer,
-                              sizeof ctx->buffer, DMA_FROM_DEVICE);
-
-       if (ctx->buffer_bus == 0) {
-               dma_unmap_single(ohci->card.device, ctx->descriptor_bus,
-                                sizeof ctx->descriptor, DMA_TO_DEVICE);
+                              buffer_size, DMA_TO_DEVICE);
+       if (dma_mapping_error(ctx->buffer_bus)) {
+               kfree(ctx->buffer);
                return -ENOMEM;
        }
 
-       memset(&ctx->descriptor, 0, sizeof ctx->descriptor);
-       ctx->descriptor.control      = cpu_to_le16(descriptor_input_more |
-                                                  descriptor_status |
-                                                  descriptor_branch_always);
-       ctx->descriptor.req_count    = cpu_to_le16(sizeof ctx->buffer);
-       ctx->descriptor.data_address = cpu_to_le32(ctx->buffer_bus);
-       ctx->descriptor.res_count    = cpu_to_le16(sizeof ctx->buffer);
-
-       ctx->control_set   = control_set;
-       ctx->control_clear = control_set + 4;
-       ctx->command_ptr   = control_set + 12;
-       ctx->ohci          = ohci;
+       ctx->head_descriptor      = ctx->buffer;
+       ctx->prev_descriptor      = ctx->buffer;
+       ctx->tail_descriptor      = ctx->buffer;
+       ctx->tail_descriptor_last = ctx->buffer;
 
-       tasklet_init(&ctx->tasklet, ar_context_tasklet, (unsigned long)ctx);
+       /* We put a dummy descriptor in the buffer that has a NULL
+        * branch address and looks like it's been sent.  That way we
+        * have a descriptor to append DMA programs to.  Also, the
+        * ring buffer invariant is that it always has at least one
+        * element so that head == tail means buffer full. */
 
-       ar_context_run(ctx);
+       memset(ctx->head_descriptor, 0, sizeof *ctx->head_descriptor);
+       ctx->head_descriptor->control = cpu_to_le16(descriptor_output_last);
+       ctx->head_descriptor->transfer_status = cpu_to_le16(0x8011);
+       ctx->head_descriptor++;
 
        return 0;
 }
 
 static void
-do_packet_callbacks(struct fw_ohci *ohci, struct list_head *list)
+context_release(struct context *ctx)
 {
-       struct fw_packet *p, *next;
+       struct fw_card *card = &ctx->ohci->card;
 
-       list_for_each_entry_safe(p, next, list, link)
-               p->callback(p, &ohci->card, p->status);
+       dma_unmap_single(card->device, ctx->buffer_bus,
+                        ctx->buffer_size, DMA_TO_DEVICE);
+       kfree(ctx->buffer);
 }
 
-static void
-complete_transmission(struct fw_packet *packet,
-                     int status, struct list_head *list)
+static struct descriptor *
+context_get_descriptors(struct context *ctx, int z, dma_addr_t *d_bus)
 {
-       list_move_tail(&packet->link, list);
-       packet->status = status;
+       struct descriptor *d, *tail, *end;
+
+       d = ctx->head_descriptor;
+       tail = ctx->tail_descriptor;
+       end = ctx->buffer + ctx->buffer_size / sizeof(struct descriptor);
+
+       if (d + z <= tail) {
+               goto has_space;
+       } else if (d > tail && d + z <= end) {
+               goto has_space;
+       } else if (d > tail && ctx->buffer + z <= tail) {
+               d = ctx->buffer;
+               goto has_space;
+       }
+
+       return NULL;
+
+ has_space:
+       memset(d, 0, z * sizeof *d);
+       *d_bus = ctx->buffer_bus + (d - ctx->buffer) * sizeof *d;
+
+       return d;
 }
 
-/* This function prepares the first packet in the context queue for
- * transmission.  Must always be called with the ochi->lock held to
- * ensure proper generation handling and locking around packet queue
- * manipulation. */
-static void
-at_context_setup_packet(struct at_context *ctx, struct list_head *list)
+static void context_run(struct context *ctx, u32 extra)
 {
-       struct fw_packet *packet;
        struct fw_ohci *ohci = ctx->ohci;
-       int z, tcode;
 
-       packet = fw_packet(ctx->list.next);
+       reg_write(ohci, command_ptr(ctx->regs),
+                 le32_to_cpu(ctx->tail_descriptor_last->branch_address));
+       reg_write(ohci, control_clear(ctx->regs), ~0);
+       reg_write(ohci, control_set(ctx->regs), CONTEXT_RUN | extra);
+       flush_writes(ohci);
+}
 
-       memset(&ctx->d, 0, sizeof ctx->d);
-       if (packet->payload_length > 0) {
-               packet->payload_bus = dma_map_single(ohci->card.device,
-                                                    packet->payload,
-                                                    packet->payload_length,
-                                                    DMA_TO_DEVICE);
-               if (packet->payload_bus == 0) {
-                       complete_transmission(packet, -ENOMEM, list);
-                       return;
-               }
+static void context_append(struct context *ctx,
+                          struct descriptor *d, int z, int extra)
+{
+       dma_addr_t d_bus;
 
-               ctx->d.more.control      =
-                       cpu_to_le16(descriptor_output_more |
-                                   descriptor_key_immediate);
-               ctx->d.more.req_count    = cpu_to_le16(packet->header_length);
-               ctx->d.more.res_count    = cpu_to_le16(packet->timestamp);
-               ctx->d.last.control      =
-                       cpu_to_le16(descriptor_output_last |
-                                   descriptor_irq_always |
-                                   descriptor_branch_always);
-               ctx->d.last.req_count    = cpu_to_le16(packet->payload_length);
-               ctx->d.last.data_address = cpu_to_le32(packet->payload_bus);
-               z = 3;
-       } else {
-               ctx->d.more.control   =
-                       cpu_to_le16(descriptor_output_last |
-                                   descriptor_key_immediate |
-                                   descriptor_irq_always |
-                                   descriptor_branch_always);
-               ctx->d.more.req_count = cpu_to_le16(packet->header_length);
-               ctx->d.more.res_count = cpu_to_le16(packet->timestamp);
-               z = 2;
+       d_bus = ctx->buffer_bus + (d - ctx->buffer) * sizeof *d;
+
+       ctx->head_descriptor = d + z + extra;
+       ctx->prev_descriptor->branch_address = cpu_to_le32(d_bus | z);
+       ctx->prev_descriptor = z == 2 ? d : d + z - 1;
+
+       dma_sync_single_for_device(ctx->ohci->card.device, ctx->buffer_bus,
+                                  ctx->buffer_size, DMA_TO_DEVICE);
+
+       reg_write(ctx->ohci, control_set(ctx->regs), CONTEXT_WAKE);
+       flush_writes(ctx->ohci);
+}
+
+static void context_stop(struct context *ctx)
+{
+       u32 reg;
+       int i;
+
+       reg_write(ctx->ohci, control_clear(ctx->regs), CONTEXT_RUN);
+       flush_writes(ctx->ohci);
+
+       for (i = 0; i < 10; i++) {
+               reg = reg_read(ctx->ohci, control_set(ctx->regs));
+               if ((reg & CONTEXT_ACTIVE) == 0)
+                       break;
+
+               fw_notify("context_stop: still active (0x%08x)\n", reg);
+               msleep(1);
+       }
+}
+
+struct driver_data {
+       struct fw_packet *packet;
+};
+
+/* This function apppends a packet to the DMA queue for transmission.
+ * Must always be called with the ochi->lock held to ensure proper
+ * generation handling and locking around packet queue manipulation. */
+static int
+at_context_queue_packet(struct context *ctx, struct fw_packet *packet)
+{
+       struct fw_ohci *ohci = ctx->ohci;
+       dma_addr_t d_bus, payload_bus;
+       struct driver_data *driver_data;
+       struct descriptor *d, *last;
+       __le32 *header;
+       int z, tcode;
+       u32 reg;
+
+       d = context_get_descriptors(ctx, 4, &d_bus);
+       if (d == NULL) {
+               packet->ack = RCODE_SEND_ERROR;
+               return -1;
        }
 
+       d[0].control   = cpu_to_le16(descriptor_key_immediate);
+       d[0].res_count = cpu_to_le16(packet->timestamp);
+
        /* The DMA format for asyncronous link packets is different
         * from the IEEE1394 layout, so shift the fields around
         * accordingly.  If header_length is 8, it's a PHY packet, to
         * which we need to prepend an extra quadlet. */
+
+       header = (__le32 *) &d[1];
        if (packet->header_length > 8) {
-               ctx->d.header[0] = cpu_to_le32((packet->header[0] & 0xffff) |
-                                              (packet->speed << 16));
-               ctx->d.header[1] = cpu_to_le32((packet->header[1] & 0xffff) |
-                                              (packet->header[0] & 0xffff0000));
-               ctx->d.header[2] = cpu_to_le32(packet->header[2]);
+               header[0] = cpu_to_le32((packet->header[0] & 0xffff) |
+                                       (packet->speed << 16));
+               header[1] = cpu_to_le32((packet->header[1] & 0xffff) |
+                                       (packet->header[0] & 0xffff0000));
+               header[2] = cpu_to_le32(packet->header[2]);
 
                tcode = (packet->header[0] >> 4) & 0x0f;
                if (TCODE_IS_BLOCK_PACKET(tcode))
-                       ctx->d.header[3] = cpu_to_le32(packet->header[3]);
+                       header[3] = cpu_to_le32(packet->header[3]);
                else
-                       ctx->d.header[3] = packet->header[3];
+                       header[3] = (__force __le32) packet->header[3];
+
+               d[0].req_count = cpu_to_le16(packet->header_length);
        } else {
-               ctx->d.header[0] =
-                       cpu_to_le32((OHCI1394_phy_tcode << 4) |
-                                   (packet->speed << 16));
-               ctx->d.header[1] = cpu_to_le32(packet->header[0]);
-               ctx->d.header[2] = cpu_to_le32(packet->header[1]);
-               ctx->d.more.req_count = cpu_to_le16(12);
+               header[0] = cpu_to_le32((OHCI1394_phy_tcode << 4) |
+                                       (packet->speed << 16));
+               header[1] = cpu_to_le32(packet->header[0]);
+               header[2] = cpu_to_le32(packet->header[1]);
+               d[0].req_count = cpu_to_le16(12);
        }
 
-       /* FIXME: Document how the locking works. */
-       if (ohci->generation == packet->generation) {
-               reg_write(ctx->ohci, ctx->command_ptr,
-                         ctx->descriptor_bus | z);
-               reg_write(ctx->ohci, ctx->control_set,
-                         CONTEXT_RUN | CONTEXT_WAKE);
+       driver_data = (struct driver_data *) &d[3];
+       driver_data->packet = packet;
+       
+       if (packet->payload_length > 0) {
+               payload_bus =
+                       dma_map_single(ohci->card.device, packet->payload,
+                                      packet->payload_length, DMA_TO_DEVICE);
+               if (dma_mapping_error(payload_bus)) {
+                       packet->ack = RCODE_SEND_ERROR;
+                       return -1;
+               }
+
+               d[2].req_count    = cpu_to_le16(packet->payload_length);
+               d[2].data_address = cpu_to_le32(payload_bus);
+               last = &d[2];
+               z = 3;
        } else {
-               /* We dont return error codes from this function; all
-                * transmission errors are reported through the
-                * callback. */
-               complete_transmission(packet, -ESTALE, list);
+               last = &d[0];
+               z = 2;
        }
-}
 
-static void at_context_stop(struct at_context *ctx)
-{
-       u32 reg;
+       last->control |= cpu_to_le16(descriptor_output_last |
+                                    descriptor_irq_always |
+                                    descriptor_branch_always);
 
-       reg_write(ctx->ohci, ctx->control_clear, CONTEXT_RUN);
+       /* FIXME: Document how the locking works. */
+       if (ohci->generation != packet->generation) {
+               packet->ack = RCODE_GENERATION;
+               return -1;
+       }
 
-       reg = reg_read(ctx->ohci, ctx->control_set);
-       if (reg & CONTEXT_ACTIVE)
-               fw_notify("Tried to stop context, but it is still active "
-                         "(0x%08x).\n", reg);
+       context_append(ctx, d, z, 4 - z);
+
+       /* If the context isn't already running, start it up. */
+       reg = reg_read(ctx->ohci, control_set(ctx->regs));
+       if ((reg & CONTEXT_ACTIVE) == 0)
+               context_run(ctx, 0);
+
+       return 0;
 }
 
-static void at_context_tasklet(unsigned long data)
+static int handle_at_packet(struct context *context,
+                           struct descriptor *d,
+                           struct descriptor *last)
 {
-       struct at_context *ctx = (struct at_context *)data;
-       struct fw_ohci *ohci = ctx->ohci;
+       struct driver_data *driver_data;
        struct fw_packet *packet;
-       LIST_HEAD(list);
-       unsigned long flags;
+       struct fw_ohci *ohci = context->ohci;
+       dma_addr_t payload_bus;
        int evt;
 
-       spin_lock_irqsave(&ohci->lock, flags);
-
-       packet = fw_packet(ctx->list.next);
+       if (last->transfer_status == 0)
+               /* This descriptor isn't done yet, stop iteration. */
+               return 0;
 
-       at_context_stop(ctx);
+       driver_data = (struct driver_data *) &d[3];
+       packet = driver_data->packet;
+       if (packet == NULL)
+               /* This packet was cancelled, just continue. */
+               return 1;
 
-       if (packet->payload_length > 0) {
-               dma_unmap_single(ohci->card.device, packet->payload_bus,
+       payload_bus = le32_to_cpu(last->data_address);
+       if (payload_bus != 0)
+               dma_unmap_single(ohci->card.device, payload_bus,
                                 packet->payload_length, DMA_TO_DEVICE);
-               evt = le16_to_cpu(ctx->d.last.transfer_status) & 0x1f;
-               packet->timestamp = le16_to_cpu(ctx->d.last.res_count);
-       }
-       else {
-               evt = le16_to_cpu(ctx->d.more.transfer_status) & 0x1f;
-               packet->timestamp = le16_to_cpu(ctx->d.more.res_count);
-       }
 
-       if (evt < 16) {
-               switch (evt) {
-               case OHCI1394_evt_timeout:
-                       /* Async response transmit timed out. */
-                       complete_transmission(packet, -ETIMEDOUT, &list);
-                       break;
+       evt = le16_to_cpu(last->transfer_status) & 0x1f;
+       packet->timestamp = le16_to_cpu(last->res_count);
+
+       switch (evt) {
+       case OHCI1394_evt_timeout:
+               /* Async response transmit timed out. */
+               packet->ack = RCODE_CANCELLED;
+               break;
+
+       case OHCI1394_evt_flushed:
+               /* The packet was flushed should give same error as
+                * when we try to use a stale generation count. */
+               packet->ack = RCODE_GENERATION;
+               break;
+
+       case OHCI1394_evt_missing_ack:
+               /* Using a valid (current) generation count, but the
+                * node is not on the bus or not sending acks. */
+               packet->ack = RCODE_NO_ACK;
+               break;
+
+       case ACK_COMPLETE + 0x10:
+       case ACK_PENDING + 0x10:
+       case ACK_BUSY_X + 0x10:
+       case ACK_BUSY_A + 0x10:
+       case ACK_BUSY_B + 0x10:
+       case ACK_DATA_ERROR + 0x10:
+       case ACK_TYPE_ERROR + 0x10:
+               packet->ack = evt - 0x10;
+               break;
+
+       default:
+               packet->ack = RCODE_SEND_ERROR;
+               break;
+       }
 
-               case OHCI1394_evt_flushed:
-                       /* The packet was flushed should give same
-                        * error as when we try to use a stale
-                        * generation count. */
-                       complete_transmission(packet, -ESTALE, &list);
-                       break;
+       packet->callback(packet, &ohci->card, packet->ack);
 
-               case OHCI1394_evt_missing_ack:
-                       /* This would be a higher level software
-                        * error, it is using a valid (current)
-                        * generation count, but the node is not on
-                        * the bus. */
-                       complete_transmission(packet, -ENODEV, &list);
-                       break;
+       return 1;
+}
 
-               default:
-                       complete_transmission(packet, -EIO, &list);
-                       break;
-               }
-       } else
-               complete_transmission(packet, evt - 16, &list);
+#define header_get_destination(q)      (((q) >> 16) & 0xffff)
+#define header_get_tcode(q)            (((q) >> 4) & 0x0f)
+#define header_get_offset_high(q)      (((q) >> 0) & 0xffff)
+#define header_get_data_length(q)      (((q) >> 16) & 0xffff)
+#define header_get_extended_tcode(q)   (((q) >> 0) & 0xffff)
 
-       /* If more packets are queued, set up the next one. */
-       if (!list_empty(&ctx->list))
-               at_context_setup_packet(ctx, &list);
+static void
+handle_local_rom(struct fw_ohci *ohci, struct fw_packet *packet, u32 csr)
+{
+       struct fw_packet response;
+       int tcode, length, i;
 
-       spin_unlock_irqrestore(&ohci->lock, flags);
+       tcode = header_get_tcode(packet->header[0]);
+       if (TCODE_IS_BLOCK_PACKET(tcode))
+               length = header_get_data_length(packet->header[3]);
+       else
+               length = 4;
+
+       i = csr - CSR_CONFIG_ROM;
+       if (i + length > CONFIG_ROM_SIZE) {
+               fw_fill_response(&response, packet->header,
+                                RCODE_ADDRESS_ERROR, NULL, 0);
+       } else if (!TCODE_IS_READ_REQUEST(tcode)) {
+               fw_fill_response(&response, packet->header,
+                                RCODE_TYPE_ERROR, NULL, 0);
+       } else {
+               fw_fill_response(&response, packet->header, RCODE_COMPLETE,
+                                (void *) ohci->config_rom + i, length);
+       }
 
-       do_packet_callbacks(ohci, &list);
+       fw_core_handle_response(&ohci->card, &response);
 }
 
-static int
-at_context_init(struct at_context *ctx, struct fw_ohci *ohci, u32 control_set)
+static void
+handle_local_lock(struct fw_ohci *ohci, struct fw_packet *packet, u32 csr)
 {
-       INIT_LIST_HEAD(&ctx->list);
+       struct fw_packet response;
+       int tcode, length, ext_tcode, sel;
+       __be32 *payload, lock_old;
+       u32 lock_arg, lock_data;
+
+       tcode = header_get_tcode(packet->header[0]);
+       length = header_get_data_length(packet->header[3]);
+       payload = packet->payload;
+       ext_tcode = header_get_extended_tcode(packet->header[3]);
+
+       if (tcode == TCODE_LOCK_REQUEST &&
+           ext_tcode == EXTCODE_COMPARE_SWAP && length == 8) {
+               lock_arg = be32_to_cpu(payload[0]);
+               lock_data = be32_to_cpu(payload[1]);
+       } else if (tcode == TCODE_READ_QUADLET_REQUEST) {
+               lock_arg = 0;
+               lock_data = 0;
+       } else {
+               fw_fill_response(&response, packet->header,
+                                RCODE_TYPE_ERROR, NULL, 0);
+               goto out;
+       }
 
-       ctx->descriptor_bus =
-               dma_map_single(ohci->card.device, &ctx->d,
-                              sizeof ctx->d, DMA_TO_DEVICE);
-       if (ctx->descriptor_bus == 0)
-               return -ENOMEM;
+       sel = (csr - CSR_BUS_MANAGER_ID) / 4;
+       reg_write(ohci, OHCI1394_CSRData, lock_data);
+       reg_write(ohci, OHCI1394_CSRCompareData, lock_arg);
+       reg_write(ohci, OHCI1394_CSRControl, sel);
 
-       ctx->control_set   = control_set;
-       ctx->control_clear = control_set + 4;
-       ctx->command_ptr   = control_set + 12;
-       ctx->ohci          = ohci;
+       if (reg_read(ohci, OHCI1394_CSRControl) & 0x80000000)
+               lock_old = cpu_to_be32(reg_read(ohci, OHCI1394_CSRData));
+       else
+               fw_notify("swap not done yet\n");
 
-       tasklet_init(&ctx->tasklet, at_context_tasklet, (unsigned long)ctx);
+       fw_fill_response(&response, packet->header,
+                        RCODE_COMPLETE, &lock_old, sizeof lock_old);
+ out:
+       fw_core_handle_response(&ohci->card, &response);
+}
 
-       return 0;
+static void
+handle_local_request(struct context *ctx, struct fw_packet *packet)
+{
+       u64 offset;
+       u32 csr;
+
+       if (ctx == &ctx->ohci->at_request_ctx) {
+               packet->ack = ACK_PENDING;
+               packet->callback(packet, &ctx->ohci->card, packet->ack);
+       }
+
+       offset =
+               ((unsigned long long)
+                header_get_offset_high(packet->header[1]) << 32) |
+               packet->header[2];
+       csr = offset - CSR_REGISTER_BASE;
+
+       /* Handle config rom reads. */
+       if (csr >= CSR_CONFIG_ROM && csr < CSR_CONFIG_ROM_END)
+               handle_local_rom(ctx->ohci, packet, csr);
+       else switch (csr) {
+       case CSR_BUS_MANAGER_ID:
+       case CSR_BANDWIDTH_AVAILABLE:
+       case CSR_CHANNELS_AVAILABLE_HI:
+       case CSR_CHANNELS_AVAILABLE_LO:
+               handle_local_lock(ctx->ohci, packet, csr);
+               break;
+       default:
+               if (ctx == &ctx->ohci->at_request_ctx)
+                       fw_core_handle_request(&ctx->ohci->card, packet);
+               else
+                       fw_core_handle_response(&ctx->ohci->card, packet);
+               break;
+       }
+
+       if (ctx == &ctx->ohci->at_response_ctx) {
+               packet->ack = ACK_COMPLETE;
+               packet->callback(packet, &ctx->ohci->card, packet->ack);
+       }
 }
 
 static void
-at_context_transmit(struct at_context *ctx, struct fw_packet *packet)
+at_context_transmit(struct context *ctx, struct fw_packet *packet)
 {
-       LIST_HEAD(list);
        unsigned long flags;
-       int was_empty;
+       int retval;
 
        spin_lock_irqsave(&ctx->ohci->lock, flags);
 
-       was_empty = list_empty(&ctx->list);
-       list_add_tail(&packet->link, &ctx->list);
-       if (was_empty)
-               at_context_setup_packet(ctx, &list);
+       if (header_get_destination(packet->header[0]) == ctx->ohci->node_id &&
+           ctx->ohci->generation == packet->generation) {
+               spin_unlock_irqrestore(&ctx->ohci->lock, flags);
+               handle_local_request(ctx, packet);
+               return;
+       }
 
+       retval = at_context_queue_packet(ctx, packet);
        spin_unlock_irqrestore(&ctx->ohci->lock, flags);
 
-       do_packet_callbacks(ctx->ohci, &list);
+       if (retval < 0)
+               packet->callback(packet, &ctx->ohci->card, packet->ack);
+       
 }
 
 static void bus_reset_tasklet(unsigned long data)
 {
        struct fw_ohci *ohci = (struct fw_ohci *)data;
-       int self_id_count, i, j, reg, node_id;
+       int self_id_count, i, j, reg;
        int generation, new_generation;
        unsigned long flags;
 
@@ -551,7 +884,7 @@ static void bus_reset_tasklet(unsigned long data)
                fw_error("node ID not valid, new bus reset in progress\n");
                return;
        }
-       node_id = reg & 0xffff;
+       ohci->node_id = reg & 0xffff;
 
        /* The count in the SelfIDCount register is the number of
         * bytes in the self ID receive buffer.  Since we also receive
@@ -590,8 +923,8 @@ static void bus_reset_tasklet(unsigned long data)
        spin_lock_irqsave(&ohci->lock, flags);
 
        ohci->generation = generation;
-       at_context_stop(&ohci->at_request_ctx);
-       at_context_stop(&ohci->at_response_ctx);
+       context_stop(&ohci->at_request_ctx);
+       context_stop(&ohci->at_response_ctx);
        reg_write(ohci, OHCI1394_IntEventClear, OHCI1394_busReset);
 
        /* This next bit is unrelated to the AT context stuff but we
@@ -620,14 +953,14 @@ static void bus_reset_tasklet(unsigned long data)
 
        spin_unlock_irqrestore(&ohci->lock, flags);
 
-       fw_core_handle_bus_reset(&ohci->card, node_id, generation,
+       fw_core_handle_bus_reset(&ohci->card, ohci->node_id, generation,
                                 self_id_count, ohci->self_id_buffer);
 }
 
 static irqreturn_t irq_handler(int irq, void *data)
 {
        struct fw_ohci *ohci = data;
-       u32 event, iso_event;
+       u32 event, iso_event, cycle_time;
        int i;
 
        event = reg_read(ohci, OHCI1394_IntEventClear);
@@ -652,24 +985,30 @@ static irqreturn_t irq_handler(int irq, void *data)
        if (event & OHCI1394_respTxComplete)
                tasklet_schedule(&ohci->at_response_ctx.tasklet);
 
-       iso_event = reg_read(ohci, OHCI1394_IsoRecvIntEventSet);
+       iso_event = reg_read(ohci, OHCI1394_IsoRecvIntEventClear);
        reg_write(ohci, OHCI1394_IsoRecvIntEventClear, iso_event);
 
        while (iso_event) {
                i = ffs(iso_event) - 1;
-               tasklet_schedule(&ohci->ir_context_list[i].tasklet);
+               tasklet_schedule(&ohci->ir_context_list[i].context.tasklet);
                iso_event &= ~(1 << i);
        }
 
-       iso_event = reg_read(ohci, OHCI1394_IsoXmitIntEventSet);
+       iso_event = reg_read(ohci, OHCI1394_IsoXmitIntEventClear);
        reg_write(ohci, OHCI1394_IsoXmitIntEventClear, iso_event);
 
        while (iso_event) {
                i = ffs(iso_event) - 1;
-               tasklet_schedule(&ohci->it_context_list[i].tasklet);
+               tasklet_schedule(&ohci->it_context_list[i].context.tasklet);
                iso_event &= ~(1 << i);
        }
 
+       if (event & OHCI1394_cycle64Seconds) {
+               cycle_time = reg_read(ohci, OHCI1394_IsochronousCycleTimer);
+               if ((cycle_time & 0x80000000) == 0)
+                       ohci->bus_seconds++;
+       }
+
        return IRQ_HANDLED;
 }
 
@@ -714,7 +1053,7 @@ static int ohci_enable(struct fw_card *card, u32 *config_rom, size_t length)
        reg_write(ohci, OHCI1394_AsReqFilterHiSet, 0x80000000);
 
        if (request_irq(dev->irq, irq_handler,
-                       SA_SHIRQ, ohci_driver_name, ohci)) {
+                       IRQF_SHARED, ohci_driver_name, ohci)) {
                fw_error("Failed to allocate shared interrupt %d.\n",
                         dev->irq);
                dma_free_coherent(ohci->card.device, CONFIG_ROM_SIZE,
@@ -826,15 +1165,38 @@ static void ohci_send_response(struct fw_card *card, struct fw_packet *packet)
        at_context_transmit(&ohci->at_response_ctx, packet);
 }
 
+static int ohci_cancel_packet(struct fw_card *card, struct fw_packet *packet)
+{
+       struct fw_ohci *ohci = fw_ohci(card);
+       struct context *ctx = &ohci->at_request_ctx;
+       struct driver_data *driver_data = packet->driver_data;
+       int retval = -ENOENT;
+
+       tasklet_disable(&ctx->tasklet);
+
+       if (packet->ack != 0)
+               goto out;
+
+       driver_data->packet = NULL;
+       packet->ack = RCODE_CANCELLED;
+       packet->callback(packet, &ohci->card, packet->ack);
+       retval = 0;
+
+ out:
+       tasklet_enable(&ctx->tasklet);
+
+       return retval;
+}
+
 static int
 ohci_enable_phys_dma(struct fw_card *card, int node_id, int generation)
 {
        struct fw_ohci *ohci = fw_ohci(card);
        unsigned long flags;
-       int retval = 0;
+       int n, retval = 0;
 
-       /* FIXME: make sure this bitmask is cleared when we clear the
-        * busReset interrupt bit. */
+       /* FIXME:  Make sure this bitmask is cleared when we clear the busReset
+        * interrupt bit.  Clear physReqResourceAllBuses on bus reset. */
 
        spin_lock_irqsave(&ohci->lock, flags);
 
@@ -843,87 +1205,120 @@ ohci_enable_phys_dma(struct fw_card *card, int node_id, int generation)
                goto out;
        }
 
-       if (node_id < 32) {
-               reg_write(ohci, OHCI1394_PhyReqFilterLoSet, 1 << node_id);
-       } else {
-               reg_write(ohci, OHCI1394_PhyReqFilterHiSet,
-                         1 << (node_id - 32));
-       }
+       /* NOTE, if the node ID contains a non-local bus ID, physical DMA is
+        * enabled for _all_ nodes on remote buses. */
+
+       n = (node_id & 0xffc0) == LOCAL_BUS ? node_id & 0x3f : 63;
+       if (n < 32)
+               reg_write(ohci, OHCI1394_PhyReqFilterLoSet, 1 << n);
+       else
+               reg_write(ohci, OHCI1394_PhyReqFilterHiSet, 1 << (n - 32));
+
        flush_writes(ohci);
  out:
        spin_unlock_irqrestore(&ohci->lock, flags);
        return retval;
 }
 
-static void ir_context_tasklet(unsigned long data)
+static u64
+ohci_get_bus_time(struct fw_card *card)
 {
-       struct iso_context *ctx = (struct iso_context *)data;
+       struct fw_ohci *ohci = fw_ohci(card);
+       u32 cycle_time;
+       u64 bus_time;
 
-       (void)ctx;
-}
+       cycle_time = reg_read(ohci, OHCI1394_IsochronousCycleTimer);
+       bus_time = ((u64) ohci->bus_seconds << 32) | cycle_time;
 
-#define ISO_BUFFER_SIZE (64 * 1024)
+       return bus_time;
+}
 
-static void flush_iso_context(struct iso_context *ctx)
+static int handle_ir_dualbuffer_packet(struct context *context,
+                                      struct descriptor *d,
+                                      struct descriptor *last)
 {
-       struct fw_ohci *ohci = fw_ohci(ctx->base.card);
-       struct descriptor *d, *last;
-       u32 address;
-       int z;
+       struct iso_context *ctx =
+               container_of(context, struct iso_context, context);
+       struct db_descriptor *db = (struct db_descriptor *) d;
+       __le32 *ir_header;
+       size_t header_length;
+       void *p, *end;
+       int i;
 
-       dma_sync_single_for_cpu(ohci->card.device, ctx->buffer_bus,
-                               ISO_BUFFER_SIZE, DMA_TO_DEVICE);
+       if (db->first_res_count > 0 && db->second_res_count > 0)
+               /* This descriptor isn't done yet, stop iteration. */
+               return 0;
 
-       d    = ctx->tail_descriptor;
-       last = ctx->tail_descriptor_last;
+       header_length = le16_to_cpu(db->first_req_count) -
+               le16_to_cpu(db->first_res_count);
 
-       while (last->branch_address != 0 && last->transfer_status != 0) {
-               address = le32_to_cpu(last->branch_address);
-               z = address & 0xf;
-               d = ctx->buffer + (address - ctx->buffer_bus) / sizeof *d;
+       i = ctx->header_length;
+       p = db + 1;
+       end = p + header_length;
+       while (p < end && i + ctx->base.header_size <= PAGE_SIZE) {
+               memcpy(ctx->header + i, p + 4, ctx->base.header_size);
+               i += ctx->base.header_size;
+               p += ctx->base.header_size + 4;
+       }
 
-               if (z == 2)
-                       last = d;
-               else
-                       last = d + z - 1;
+       ctx->header_length = i;
 
-               if (le16_to_cpu(last->control) & descriptor_irq_always)
-                       ctx->base.callback(&ctx->base,
-                                          0, le16_to_cpu(last->res_count),
-                                          ctx->base.callback_data);
+       if (le16_to_cpu(db->control) & descriptor_irq_always) {
+               ir_header = (__le32 *) (db + 1);
+               ctx->base.callback(&ctx->base,
+                                  le32_to_cpu(ir_header[0]) & 0xffff,
+                                  ctx->header_length, ctx->header,
+                                  ctx->base.callback_data);
+               ctx->header_length = 0;
        }
 
-       ctx->tail_descriptor      = d;
-       ctx->tail_descriptor_last = last;
+       return 1;
 }
 
-static void it_context_tasklet(unsigned long data)
+static int handle_it_packet(struct context *context,
+                           struct descriptor *d,
+                           struct descriptor *last)
 {
-       struct iso_context *ctx = (struct iso_context *)data;
+       struct iso_context *ctx =
+               container_of(context, struct iso_context, context);
+
+       if (last->transfer_status == 0)
+               /* This descriptor isn't done yet, stop iteration. */
+               return 0;
 
-       flush_iso_context(ctx);
+       if (le16_to_cpu(last->control) & descriptor_irq_always)
+               ctx->base.callback(&ctx->base, le16_to_cpu(last->res_count),
+                                  0, NULL, ctx->base.callback_data);
+
+       return 1;
 }
 
-static struct fw_iso_context *ohci_allocate_iso_context(struct fw_card *card,
-                                                       int type)
+static struct fw_iso_context *
+ohci_allocate_iso_context(struct fw_card *card, int type,
+                         int sync, int tags, size_t header_size)
 {
        struct fw_ohci *ohci = fw_ohci(card);
        struct iso_context *ctx, *list;
-       void (*tasklet) (unsigned long data);
-       u32 *mask;
+       descriptor_callback_t callback;
+       u32 *mask, regs;
        unsigned long flags;
-       int index;
+       int index, retval = -ENOMEM;
 
        if (type == FW_ISO_CONTEXT_TRANSMIT) {
                mask = &ohci->it_context_mask;
                list = ohci->it_context_list;
-               tasklet = it_context_tasklet;
+               callback = handle_it_packet;
        } else {
                mask = &ohci->ir_context_mask;
                list = ohci->ir_context_list;
-               tasklet = ir_context_tasklet;
+               callback = handle_ir_dualbuffer_packet;
        }
 
+       /* FIXME: We need a fallback for pre 1.1 OHCI. */
+       if (callback == handle_ir_dualbuffer_packet &&
+           ohci->version < OHCI_VERSION_1_1)
+               return ERR_PTR(-EINVAL);
+
        spin_lock_irqsave(&ohci->lock, flags);
        index = ffs(*mask) - 1;
        if (index >= 0)
@@ -933,60 +1328,82 @@ static struct fw_iso_context *ohci_allocate_iso_context(struct fw_card *card,
        if (index < 0)
                return ERR_PTR(-EBUSY);
 
+       if (type == FW_ISO_CONTEXT_TRANSMIT)
+               regs = OHCI1394_IsoXmitContextBase(index);
+       else
+               regs = OHCI1394_IsoRcvContextBase(index);
+
        ctx = &list[index];
        memset(ctx, 0, sizeof *ctx);
-       tasklet_init(&ctx->tasklet, tasklet, (unsigned long)ctx);
-
-       ctx->buffer = kmalloc(ISO_BUFFER_SIZE, GFP_KERNEL);
-       if (ctx->buffer == NULL) {
-               spin_lock_irqsave(&ohci->lock, flags);
-               *mask |= 1 << index;
-               spin_unlock_irqrestore(&ohci->lock, flags);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       ctx->buffer_bus =
-           dma_map_single(card->device, ctx->buffer,
-                          ISO_BUFFER_SIZE, DMA_TO_DEVICE);
+       ctx->header_length = 0;
+       ctx->header = (void *) __get_free_page(GFP_KERNEL);
+       if (ctx->header == NULL)
+               goto out;
 
-       ctx->head_descriptor      = ctx->buffer;
-       ctx->prev_descriptor      = ctx->buffer;
-       ctx->tail_descriptor      = ctx->buffer;
-       ctx->tail_descriptor_last = ctx->buffer;
+       retval = context_init(&ctx->context, ohci, ISO_BUFFER_SIZE,
+                             regs, callback);
+       if (retval < 0)
+               goto out_with_header;
 
-       /* We put a dummy descriptor in the buffer that has a NULL
-        * branch address and looks like it's been sent.  That way we
-        * have a descriptor to append DMA programs to.  Also, the
-        * ring buffer invariant is that it always has at least one
-        * element so that head == tail means buffer full. */
+       return &ctx->base;
 
-       memset(ctx->head_descriptor, 0, sizeof *ctx->head_descriptor);
-       ctx->head_descriptor->control = cpu_to_le16(descriptor_output_last);
-       ctx->head_descriptor->transfer_status = cpu_to_le16(0x8011);
-       ctx->head_descriptor++;
+ out_with_header:
+       free_page((unsigned long)ctx->header);
+ out:
+       spin_lock_irqsave(&ohci->lock, flags);
+       *mask |= 1 << index;
+       spin_unlock_irqrestore(&ohci->lock, flags);
 
-       return &ctx->base;
+       return ERR_PTR(retval);
 }
 
-static int ohci_send_iso(struct fw_iso_context *base, s32 cycle)
+static int ohci_start_iso(struct fw_iso_context *base, s32 cycle)
 {
-       struct iso_context *ctx = (struct iso_context *)base;
-       struct fw_ohci *ohci = fw_ohci(ctx->base.card);
+       struct iso_context *ctx = container_of(base, struct iso_context, base);
+       struct fw_ohci *ohci = ctx->context.ohci;
        u32 cycle_match = 0;
        int index;
 
-       index = ctx - ohci->it_context_list;
-       if (cycle > 0)
-               cycle_match = CONTEXT_CYCLE_MATCH_ENABLE |
-                       (cycle & 0x7fff) << 16;
+       if (ctx->base.type == FW_ISO_CONTEXT_TRANSMIT) {
+               index = ctx - ohci->it_context_list;
+               if (cycle > 0)
+                       cycle_match = IT_CONTEXT_CYCLE_MATCH_ENABLE |
+                               (cycle & 0x7fff) << 16;
 
-       reg_write(ohci, OHCI1394_IsoXmitIntMaskSet, 1 << index);
-       reg_write(ohci, OHCI1394_IsoXmitCommandPtr(index),
-                 le32_to_cpu(ctx->tail_descriptor_last->branch_address));
-       reg_write(ohci, OHCI1394_IsoXmitContextControlClear(index), ~0);
-       reg_write(ohci, OHCI1394_IsoXmitContextControlSet(index),
-                 CONTEXT_RUN | cycle_match);
+               reg_write(ohci, OHCI1394_IsoXmitIntEventClear, 1 << index);
+               reg_write(ohci, OHCI1394_IsoXmitIntMaskSet, 1 << index);
+               context_run(&ctx->context, cycle_match);
+       } else {
+               index = ctx - ohci->ir_context_list;
+
+               reg_write(ohci, OHCI1394_IsoRecvIntEventClear, 1 << index);
+               reg_write(ohci, OHCI1394_IsoRecvIntMaskSet, 1 << index);
+               reg_write(ohci, context_match(ctx->context.regs),
+                         (ctx->base.tags << 28) |
+                         (ctx->base.sync << 8) | ctx->base.channel);
+               context_run(&ctx->context,
+                           IR_CONTEXT_DUAL_BUFFER_MODE |
+                           IR_CONTEXT_ISOCH_HEADER);
+       }
+
+       return 0;
+}
+
+static int ohci_stop_iso(struct fw_iso_context *base)
+{
+       struct fw_ohci *ohci = fw_ohci(base->card);
+       struct iso_context *ctx = container_of(base, struct iso_context, base);
+       int index;
+
+       if (ctx->base.type == FW_ISO_CONTEXT_TRANSMIT) {
+               index = ctx - ohci->it_context_list;
+               reg_write(ohci, OHCI1394_IsoXmitIntMaskClear, 1 << index);
+       } else {
+               index = ctx - ohci->ir_context_list;
+               reg_write(ohci, OHCI1394_IsoRecvIntMaskClear, 1 << index);
+       }
        flush_writes(ohci);
+       context_stop(&ctx->context);
 
        return 0;
 }
@@ -994,55 +1411,47 @@ static int ohci_send_iso(struct fw_iso_context *base, s32 cycle)
 static void ohci_free_iso_context(struct fw_iso_context *base)
 {
        struct fw_ohci *ohci = fw_ohci(base->card);
-       struct iso_context *ctx = (struct iso_context *)base;
+       struct iso_context *ctx = container_of(base, struct iso_context, base);
        unsigned long flags;
        int index;
 
-       flush_iso_context(ctx);
+       ohci_stop_iso(base);
+       context_release(&ctx->context);
+       free_page((unsigned long)ctx->header);
 
        spin_lock_irqsave(&ohci->lock, flags);
 
        if (ctx->base.type == FW_ISO_CONTEXT_TRANSMIT) {
                index = ctx - ohci->it_context_list;
-               reg_write(ohci, OHCI1394_IsoXmitContextControlClear(index), ~0);
-               reg_write(ohci, OHCI1394_IsoXmitIntMaskClear, 1 << index);
                ohci->it_context_mask |= 1 << index;
        } else {
                index = ctx - ohci->ir_context_list;
-               reg_write(ohci, OHCI1394_IsoRcvContextControlClear(index), ~0);
-               reg_write(ohci, OHCI1394_IsoRecvIntMaskClear, 1 << index);
                ohci->ir_context_mask |= 1 << index;
        }
-       flush_writes(ohci);
-
-       dma_unmap_single(ohci->card.device, ctx->buffer_bus,
-                        ISO_BUFFER_SIZE, DMA_TO_DEVICE);
 
        spin_unlock_irqrestore(&ohci->lock, flags);
 }
 
 static int
-ohci_queue_iso(struct fw_iso_context *base,
-              struct fw_iso_packet *packet, void *payload)
+ohci_queue_iso_transmit(struct fw_iso_context *base,
+                       struct fw_iso_packet *packet,
+                       struct fw_iso_buffer *buffer,
+                       unsigned long payload)
 {
-       struct iso_context *ctx = (struct iso_context *)base;
-       struct fw_ohci *ohci = fw_ohci(ctx->base.card);
-       struct descriptor *d, *end, *last, *tail, *pd;
+       struct iso_context *ctx = container_of(base, struct iso_context, base);
+       struct descriptor *d, *last, *pd;
        struct fw_iso_packet *p;
        __le32 *header;
-       dma_addr_t d_bus;
+       dma_addr_t d_bus, page_bus;
        u32 z, header_z, payload_z, irq;
        u32 payload_index, payload_end_index, next_page_index;
-       int index, page, end_page, i, length, offset;
+       int page, end_page, i, length, offset;
 
        /* FIXME: Cycle lost behavior should be configurable: lose
         * packet, retransmit or terminate.. */
 
        p = packet;
-       payload_index = payload - ctx->base.buffer;
-       d = ctx->head_descriptor;
-       tail = ctx->tail_descriptor;
-       end = ctx->buffer + ISO_BUFFER_SIZE / sizeof(struct descriptor);
+       payload_index = payload;
 
        if (p->skip)
                z = 1;
@@ -1063,21 +1472,9 @@ ohci_queue_iso(struct fw_iso_context *base,
        /* Get header size in number of descriptors. */
        header_z = DIV_ROUND_UP(p->header_length, sizeof *d);
 
-       if (d + z + header_z <= tail) {
-               goto has_space;
-       } else if (d > tail && d + z + header_z <= end) {
-               goto has_space;
-       } else if (d > tail && ctx->buffer + z + header_z <= tail) {
-               d = ctx->buffer;
-               goto has_space;
-       }
-
-       /* No space in buffer */
-       return -1;
-
- has_space:
-       memset(d, 0, (z + header_z) * sizeof *d);
-       d_bus = ctx->buffer_bus + (d - ctx->buffer) * sizeof *d;
+       d = context_get_descriptors(&ctx->context, z + header_z, &d_bus);
+       if (d == NULL)
+               return -ENOMEM;
 
        if (!p->skip) {
                d[0].control   = cpu_to_le16(descriptor_key_immediate);
@@ -1109,40 +1506,129 @@ ohci_queue_iso(struct fw_iso_context *base,
                length             =
                        min(next_page_index, payload_end_index) - payload_index;
                pd[i].req_count    = cpu_to_le16(length);
-               pd[i].data_address = cpu_to_le32(ctx->base.pages[page] + offset);
+
+               page_bus = page_private(buffer->pages[page]);
+               pd[i].data_address = cpu_to_le32(page_bus + offset);
 
                payload_index += length;
        }
 
-       if (z == 2)
-               last = d;
-       else
-               last = d + z - 1;
-
        if (p->interrupt)
                irq = descriptor_irq_always;
        else
                irq = descriptor_no_irq;
 
-       last->control = cpu_to_le16(descriptor_output_last |
-                                   descriptor_status |
-                                   descriptor_branch_always |
-                                   irq);
+       last = z == 2 ? d : d + z - 1;
+       last->control |= cpu_to_le16(descriptor_output_last |
+                                    descriptor_status |
+                                    descriptor_branch_always |
+                                    irq);
 
-       dma_sync_single_for_device(ohci->card.device, ctx->buffer_bus,
-                                  ISO_BUFFER_SIZE, DMA_TO_DEVICE);
+       context_append(&ctx->context, d, z, header_z);
 
-       ctx->head_descriptor = d + z + header_z;
-       ctx->prev_descriptor->branch_address = cpu_to_le32(d_bus | z);
-       ctx->prev_descriptor = last;
+       return 0;
+}
 
-       index = ctx - ohci->it_context_list;
-       reg_write(ohci, OHCI1394_IsoXmitContextControlSet(index), CONTEXT_WAKE);
-       flush_writes(ohci);
+static int
+ohci_queue_iso_receive_dualbuffer(struct fw_iso_context *base,
+                                 struct fw_iso_packet *packet,
+                                 struct fw_iso_buffer *buffer,
+                                 unsigned long payload)
+{
+       struct iso_context *ctx = container_of(base, struct iso_context, base);
+       struct db_descriptor *db = NULL;
+       struct descriptor *d;
+       struct fw_iso_packet *p;
+       dma_addr_t d_bus, page_bus;
+       u32 z, header_z, length, rest;
+       int page, offset, packet_count, header_size;
+
+       /* FIXME: Cycle lost behavior should be configurable: lose
+        * packet, retransmit or terminate.. */
+
+       if (packet->skip) {
+               d = context_get_descriptors(&ctx->context, 2, &d_bus);
+               if (d == NULL)
+                       return -ENOMEM;
+
+               db = (struct db_descriptor *) d;
+               db->control = cpu_to_le16(descriptor_status |
+                                         descriptor_branch_always |
+                                         descriptor_wait);
+               db->first_size = cpu_to_le16(ctx->base.header_size + 4);
+               context_append(&ctx->context, d, 2, 0);
+       }
+
+       p = packet;
+       z = 2;
+
+       /* The OHCI controller puts the status word in the header
+        * buffer too, so we need 4 extra bytes per packet. */
+       packet_count = p->header_length / ctx->base.header_size;
+       header_size = packet_count * (ctx->base.header_size + 4);
+
+       /* Get header size in number of descriptors. */
+       header_z = DIV_ROUND_UP(header_size, sizeof *d);
+       page     = payload >> PAGE_SHIFT;
+       offset   = payload & ~PAGE_MASK;
+       rest     = p->payload_length;
+
+       /* FIXME: OHCI 1.0 doesn't support dual buffer receive */
+       /* FIXME: make packet-per-buffer/dual-buffer a context option */
+       while (rest > 0) {
+               d = context_get_descriptors(&ctx->context,
+                                           z + header_z, &d_bus);
+               if (d == NULL)
+                       return -ENOMEM;
+
+               db = (struct db_descriptor *) d;
+               db->control = cpu_to_le16(descriptor_status |
+                                         descriptor_branch_always);
+               db->first_size = cpu_to_le16(ctx->base.header_size + 4);
+               db->first_req_count = cpu_to_le16(header_size);
+               db->first_res_count = db->first_req_count;
+               db->first_buffer = cpu_to_le32(d_bus + sizeof *db);
+
+               if (offset + rest < PAGE_SIZE)
+                       length = rest;
+               else
+                       length = PAGE_SIZE - offset;
+
+               db->second_req_count = cpu_to_le16(length);
+               db->second_res_count = db->second_req_count;
+               page_bus = page_private(buffer->pages[page]);
+               db->second_buffer = cpu_to_le32(page_bus + offset);
+
+               if (p->interrupt && length == rest)
+                       db->control |= cpu_to_le16(descriptor_irq_always);
+
+               context_append(&ctx->context, d, z, header_z);
+               offset = (offset + length) & ~PAGE_MASK;
+               rest -= length;
+               page++;
+       }
 
        return 0;
 }
 
+static int
+ohci_queue_iso(struct fw_iso_context *base,
+              struct fw_iso_packet *packet,
+              struct fw_iso_buffer *buffer,
+              unsigned long payload)
+{
+       struct iso_context *ctx = container_of(base, struct iso_context, base);
+
+       if (base->type == FW_ISO_CONTEXT_TRANSMIT)
+               return ohci_queue_iso_transmit(base, packet, buffer, payload);
+       else if (ctx->context.ohci->version >= OHCI_VERSION_1_1)
+               return ohci_queue_iso_receive_dualbuffer(base, packet,
+                                                        buffer, payload);
+       else
+               /* FIXME: Implement fallback for OHCI 1.0 controllers. */
+               return -EINVAL;
+}
+
 static const struct fw_card_driver ohci_driver = {
        .name                   = ohci_driver_name,
        .enable                 = ohci_enable,
@@ -1150,12 +1636,15 @@ static const struct fw_card_driver ohci_driver = {
        .set_config_rom         = ohci_set_config_rom,
        .send_request           = ohci_send_request,
        .send_response          = ohci_send_response,
+       .cancel_packet          = ohci_cancel_packet,
        .enable_phys_dma        = ohci_enable_phys_dma,
+       .get_bus_time           = ohci_get_bus_time,
 
        .allocate_iso_context   = ohci_allocate_iso_context,
        .free_iso_context       = ohci_free_iso_context,
        .queue_iso              = ohci_queue_iso,
-       .send_iso               = ohci_send_iso,
+       .start_iso              = ohci_start_iso,
+       .stop_iso               = ohci_stop_iso,
 };
 
 static int software_reset(struct fw_ohci *ohci)
@@ -1279,11 +1768,11 @@ pci_probe(struct pci_dev *dev, const struct pci_device_id *ent)
        ar_context_init(&ohci->ar_response_ctx, ohci,
                        OHCI1394_AsRspRcvContextControlSet);
 
-       at_context_init(&ohci->at_request_ctx, ohci,
-                       OHCI1394_AsReqTrContextControlSet);
+       context_init(&ohci->at_request_ctx, ohci, AT_BUFFER_SIZE,
+                    OHCI1394_AsReqTrContextControlSet, handle_at_packet);
 
-       at_context_init(&ohci->at_response_ctx, ohci,
-                       OHCI1394_AsRspTrContextControlSet);
+       context_init(&ohci->at_response_ctx, ohci, AT_BUFFER_SIZE,
+                    OHCI1394_AsRspTrContextControlSet, handle_at_packet);
 
        reg_write(ohci, OHCI1394_ATRetries,
                  OHCI1394_MAX_AT_REQ_RETRIES |
@@ -1326,7 +1815,8 @@ pci_probe(struct pci_dev *dev, const struct pci_device_id *ent)
                  OHCI1394_RQPkt | OHCI1394_RSPkt |
                  OHCI1394_reqTxComplete | OHCI1394_respTxComplete |
                  OHCI1394_isochRx | OHCI1394_isochTx |
-                 OHCI1394_masterIntEnable);
+                 OHCI1394_masterIntEnable |
+                 OHCI1394_cycle64Seconds);
 
        bus_options = reg_read(ohci, OHCI1394_BusOptions);
        max_receive = (bus_options >> 12) & 0xf;
@@ -1338,7 +1828,9 @@ pci_probe(struct pci_dev *dev, const struct pci_device_id *ent)
        if (error_code < 0)
                return cleanup(ohci, CLEANUP_SELF_ID, error_code);
 
-       fw_notify("Added fw-ohci device %s.\n", dev->dev.bus_id);
+       ohci->version = reg_read(ohci, OHCI1394_Version) & 0x00ff00ff;
+       fw_notify("Added fw-ohci device %s, OHCI version %x.%x\n",
+                 dev->dev.bus_id, ohci->version >> 16, ohci->version & 0xff);
 
        return 0;
 }
@@ -1348,7 +1840,8 @@ static void pci_remove(struct pci_dev *dev)
        struct fw_ohci *ohci;
 
        ohci = pci_get_drvdata(dev);
-       reg_write(ohci, OHCI1394_IntMaskClear, OHCI1394_masterIntEnable);
+       reg_write(ohci, OHCI1394_IntMaskClear, ~0);
+       flush_writes(ohci);
        fw_core_remove_card(&ohci->card);
 
        /* FIXME: Fail all pending packets here, now that the upper