Merge branch 'i2c-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jdelvar...
[pandora-kernel.git] / fs / xfs / xfs_log.c
index 2be0191..3038dd5 100644 (file)
 
 kmem_zone_t    *xfs_log_ticket_zone;
 
-#define xlog_write_adv_cnt(ptr, len, off, bytes) \
-       { (ptr) += (bytes); \
-         (len) -= (bytes); \
-         (off) += (bytes);}
-
 /* Local miscellaneous function prototypes */
-STATIC int      xlog_commit_record(xfs_mount_t *mp, xlog_ticket_t *ticket,
+STATIC int      xlog_commit_record(struct log *log, struct xlog_ticket *ticket,
                                    xlog_in_core_t **, xfs_lsn_t *);
 STATIC xlog_t *  xlog_alloc_log(xfs_mount_t    *mp,
                                xfs_buftarg_t   *log_target,
@@ -59,11 +54,9 @@ STATIC xlog_t *  xlog_alloc_log(xfs_mount_t  *mp,
 STATIC int      xlog_space_left(xlog_t *log, int cycle, int bytes);
 STATIC int      xlog_sync(xlog_t *log, xlog_in_core_t *iclog);
 STATIC void     xlog_dealloc_log(xlog_t *log);
-STATIC int      xlog_write(xfs_mount_t *mp, xfs_log_iovec_t region[],
-                           int nentries, struct xlog_ticket *tic,
-                           xfs_lsn_t *start_lsn,
-                           xlog_in_core_t **commit_iclog,
-                           uint flags);
+STATIC int      xlog_write(struct log *log, struct xfs_log_vec *log_vector,
+                           struct xlog_ticket *tic, xfs_lsn_t *start_lsn,
+                           xlog_in_core_t **commit_iclog, uint flags);
 
 /* local state machine functions */
 STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int);
@@ -102,7 +95,7 @@ STATIC xlog_ticket_t *xlog_ticket_alloc(xlog_t *log,
                                         uint   flags);
 
 #if defined(DEBUG)
-STATIC void    xlog_verify_dest_ptr(xlog_t *log, __psint_t ptr);
+STATIC void    xlog_verify_dest_ptr(xlog_t *log, char *ptr);
 STATIC void    xlog_verify_grant_head(xlog_t *log, int equals);
 STATIC void    xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog,
                                  int count, boolean_t syncing);
@@ -258,7 +251,7 @@ xfs_log_done(
             * If we get an error, just continue and give back the log ticket.
             */
            (((ticket->t_flags & XLOG_TIC_INITED) == 0) &&
-            (xlog_commit_record(mp, ticket, iclog, &lsn)))) {
+            (xlog_commit_record(log, ticket, iclog, &lsn)))) {
                lsn = (xfs_lsn_t) -1;
                if (ticket->t_flags & XLOG_TIC_PERM_RESERV) {
                        flags |= XFS_LOG_REL_PERM_RESERV;
@@ -516,18 +509,10 @@ xfs_log_unmount_write(xfs_mount_t *mp)
 #ifdef DEBUG
        xlog_in_core_t   *first_iclog;
 #endif
-       xfs_log_iovec_t  reg[1];
        xlog_ticket_t   *tic = NULL;
        xfs_lsn_t        lsn;
        int              error;
 
-       /* the data section must be 32 bit size aligned */
-       struct {
-           __uint16_t magic;
-           __uint16_t pad1;
-           __uint32_t pad2; /* may as well make it 64 bits */
-       } magic = { XLOG_UNMOUNT_TYPE, 0, 0 };
-
        /*
         * Don't write out unmount record on read-only mounts.
         * Or, if we are doing a forced umount (typically because of IO errors).
@@ -549,16 +534,30 @@ xfs_log_unmount_write(xfs_mount_t *mp)
        } while (iclog != first_iclog);
 #endif
        if (! (XLOG_FORCED_SHUTDOWN(log))) {
-               reg[0].i_addr = (void*)&magic;
-               reg[0].i_len  = sizeof(magic);
-               reg[0].i_type = XLOG_REG_TYPE_UNMOUNT;
-
                error = xfs_log_reserve(mp, 600, 1, &tic,
                                        XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE);
                if (!error) {
+                       /* the data section must be 32 bit size aligned */
+                       struct {
+                           __uint16_t magic;
+                           __uint16_t pad1;
+                           __uint32_t pad2; /* may as well make it 64 bits */
+                       } magic = {
+                               .magic = XLOG_UNMOUNT_TYPE,
+                       };
+                       struct xfs_log_iovec reg = {
+                               .i_addr = (void *)&magic,
+                               .i_len = sizeof(magic),
+                               .i_type = XLOG_REG_TYPE_UNMOUNT,
+                       };
+                       struct xfs_log_vec vec = {
+                               .lv_niovecs = 1,
+                               .lv_iovecp = &reg,
+                       };
+
                        /* remove inited flag */
-                       ((xlog_ticket_t *)tic)->t_flags = 0;
-                       error = xlog_write(mp, reg, 1, tic, &lsn,
+                       tic->t_flags = 0;
+                       error = xlog_write(log, &vec, tic, &lsn,
                                           NULL, XLOG_UNMOUNT_TRANS);
                        /*
                         * At this point, we're umounting anyway,
@@ -648,10 +647,26 @@ xfs_log_unmount(xfs_mount_t *mp)
        xlog_dealloc_log(mp->m_log);
 }
 
+void
+xfs_log_item_init(
+       struct xfs_mount        *mp,
+       struct xfs_log_item     *item,
+       int                     type,
+       struct xfs_item_ops     *ops)
+{
+       item->li_mountp = mp;
+       item->li_ailp = mp->m_ail;
+       item->li_type = type;
+       item->li_ops = ops;
+}
+
 /*
  * Write region vectors to log.  The write happens using the space reservation
  * of the ticket (tic).  It is not a requirement that all writes for a given
- * transaction occur with one call to xfs_log_write().
+ * transaction occur with one call to xfs_log_write(). However, it is important
+ * to note that the transaction reservation code makes an assumption about the
+ * number of log headers a transaction requires that may be violated if you
+ * don't pass all the transaction vectors in one call....
  */
 int
 xfs_log_write(
@@ -663,11 +678,15 @@ xfs_log_write(
 {
        struct log              *log = mp->m_log;
        int                     error;
+       struct xfs_log_vec      vec = {
+               .lv_niovecs = nentries,
+               .lv_iovecp = reg,
+       };
 
        if (XLOG_FORCED_SHUTDOWN(log))
                return XFS_ERROR(EIO);
 
-       error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0);
+       error = xlog_write(log, &vec, tic, start_lsn, NULL, 0);
        if (error)
                xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
        return error;
@@ -1020,6 +1039,7 @@ xlog_alloc_log(xfs_mount_t        *mp,
        int                     i;
        int                     iclogsize;
        int                     error = ENOMEM;
+       uint                    log2_size = 0;
 
        log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL);
        if (!log) {
@@ -1045,29 +1065,30 @@ xlog_alloc_log(xfs_mount_t      *mp,
 
        error = EFSCORRUPTED;
        if (xfs_sb_version_hassector(&mp->m_sb)) {
-               log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT;
-               if (log->l_sectbb_log < 0 ||
-                   log->l_sectbb_log > mp->m_sectbb_log) {
-                       xlog_warn("XFS: Log sector size (0x%x) out of range.",
-                                               log->l_sectbb_log);
+               log2_size = mp->m_sb.sb_logsectlog;
+               if (log2_size < BBSHIFT) {
+                       xlog_warn("XFS: Log sector size too small "
+                               "(0x%x < 0x%x)", log2_size, BBSHIFT);
                        goto out_free_log;
                }
 
-               /* for larger sector sizes, must have v2 or external log */
-               if (log->l_sectbb_log != 0 &&
-                   (log->l_logBBstart != 0 &&
-                    !xfs_sb_version_haslogv2(&mp->m_sb))) {
-                       xlog_warn("XFS: log sector size (0x%x) invalid "
-                                 "for configuration.", log->l_sectbb_log);
+               log2_size -= BBSHIFT;
+               if (log2_size > mp->m_sectbb_log) {
+                       xlog_warn("XFS: Log sector size too large "
+                               "(0x%x > 0x%x)", log2_size, mp->m_sectbb_log);
                        goto out_free_log;
                }
-               if (mp->m_sb.sb_logsectlog < BBSHIFT) {
-                       xlog_warn("XFS: Log sector log (0x%x) too small.",
-                                               mp->m_sb.sb_logsectlog);
+
+               /* for larger sector sizes, must have v2 or external log */
+               if (log2_size && log->l_logBBstart > 0 &&
+                           !xfs_sb_version_haslogv2(&mp->m_sb)) {
+
+                       xlog_warn("XFS: log sector size (0x%x) invalid "
+                                 "for configuration.", log2_size);
                        goto out_free_log;
                }
        }
-       log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1;
+       log->l_sectBBsize = 1 << log2_size;
 
        xlog_get_iclog_buffer_size(mp, log);
 
@@ -1174,26 +1195,31 @@ out:
  * ticket.  Return the lsn of the commit record.
  */
 STATIC int
-xlog_commit_record(xfs_mount_t  *mp,
-                  xlog_ticket_t *ticket,
-                  xlog_in_core_t **iclog,
-                  xfs_lsn_t    *commitlsnp)
+xlog_commit_record(
+       struct log              *log,
+       struct xlog_ticket      *ticket,
+       struct xlog_in_core     **iclog,
+       xfs_lsn_t               *commitlsnp)
 {
-       int             error;
-       xfs_log_iovec_t reg[1];
-
-       reg[0].i_addr = NULL;
-       reg[0].i_len = 0;
-       reg[0].i_type = XLOG_REG_TYPE_COMMIT;
+       struct xfs_mount *mp = log->l_mp;
+       int     error;
+       struct xfs_log_iovec reg = {
+               .i_addr = NULL,
+               .i_len = 0,
+               .i_type = XLOG_REG_TYPE_COMMIT,
+       };
+       struct xfs_log_vec vec = {
+               .lv_niovecs = 1,
+               .lv_iovecp = &reg,
+       };
 
        ASSERT_ALWAYS(iclog);
-       if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp,
-                              iclog, XLOG_COMMIT_TRANS))) {
+       error = xlog_write(log, &vec, ticket, commitlsnp, iclog,
+                                       XLOG_COMMIT_TRANS);
+       if (error)
                xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
-       }
        return error;
-}      /* xlog_commit_record */
-
+}
 
 /*
  * Push on the buffer cache code if we ever use more than 75% of the on-disk
@@ -1613,6 +1639,192 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
        }
 }
 
+/*
+ * Calculate the potential space needed by the log vector.  Each region gets
+ * its own xlog_op_header_t and may need to be double word aligned.
+ */
+static int
+xlog_write_calc_vec_length(
+       struct xlog_ticket      *ticket,
+       struct xfs_log_vec      *log_vector)
+{
+       struct xfs_log_vec      *lv;
+       int                     headers = 0;
+       int                     len = 0;
+       int                     i;
+
+       /* acct for start rec of xact */
+       if (ticket->t_flags & XLOG_TIC_INITED)
+               headers++;
+
+       for (lv = log_vector; lv; lv = lv->lv_next) {
+               headers += lv->lv_niovecs;
+
+               for (i = 0; i < lv->lv_niovecs; i++) {
+                       struct xfs_log_iovec    *vecp = &lv->lv_iovecp[i];
+
+                       len += vecp->i_len;
+                       xlog_tic_add_region(ticket, vecp->i_len, vecp->i_type);
+               }
+       }
+
+       ticket->t_res_num_ophdrs += headers;
+       len += headers * sizeof(struct xlog_op_header);
+
+       return len;
+}
+
+/*
+ * If first write for transaction, insert start record  We can't be trying to
+ * commit if we are inited.  We can't have any "partial_copy" if we are inited.
+ */
+static int
+xlog_write_start_rec(
+       struct xlog_op_header   *ophdr,
+       struct xlog_ticket      *ticket)
+{
+       if (!(ticket->t_flags & XLOG_TIC_INITED))
+               return 0;
+
+       ophdr->oh_tid   = cpu_to_be32(ticket->t_tid);
+       ophdr->oh_clientid = ticket->t_clientid;
+       ophdr->oh_len = 0;
+       ophdr->oh_flags = XLOG_START_TRANS;
+       ophdr->oh_res2 = 0;
+
+       ticket->t_flags &= ~XLOG_TIC_INITED;
+
+       return sizeof(struct xlog_op_header);
+}
+
+static xlog_op_header_t *
+xlog_write_setup_ophdr(
+       struct log              *log,
+       struct xlog_op_header   *ophdr,
+       struct xlog_ticket      *ticket,
+       uint                    flags)
+{
+       ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
+       ophdr->oh_clientid = ticket->t_clientid;
+       ophdr->oh_res2 = 0;
+
+       /* are we copying a commit or unmount record? */
+       ophdr->oh_flags = flags;
+
+       /*
+        * We've seen logs corrupted with bad transaction client ids.  This
+        * makes sure that XFS doesn't generate them on.  Turn this into an EIO
+        * and shut down the filesystem.
+        */
+       switch (ophdr->oh_clientid)  {
+       case XFS_TRANSACTION:
+       case XFS_VOLUME:
+       case XFS_LOG:
+               break;
+       default:
+               xfs_fs_cmn_err(CE_WARN, log->l_mp,
+                       "Bad XFS transaction clientid 0x%x in ticket 0x%p",
+                       ophdr->oh_clientid, ticket);
+               return NULL;
+       }
+
+       return ophdr;
+}
+
+/*
+ * Set up the parameters of the region copy into the log. This has
+ * to handle region write split across multiple log buffers - this
+ * state is kept external to this function so that this code can
+ * can be written in an obvious, self documenting manner.
+ */
+static int
+xlog_write_setup_copy(
+       struct xlog_ticket      *ticket,
+       struct xlog_op_header   *ophdr,
+       int                     space_available,
+       int                     space_required,
+       int                     *copy_off,
+       int                     *copy_len,
+       int                     *last_was_partial_copy,
+       int                     *bytes_consumed)
+{
+       int                     still_to_copy;
+
+       still_to_copy = space_required - *bytes_consumed;
+       *copy_off = *bytes_consumed;
+
+       if (still_to_copy <= space_available) {
+               /* write of region completes here */
+               *copy_len = still_to_copy;
+               ophdr->oh_len = cpu_to_be32(*copy_len);
+               if (*last_was_partial_copy)
+                       ophdr->oh_flags |= (XLOG_END_TRANS|XLOG_WAS_CONT_TRANS);
+               *last_was_partial_copy = 0;
+               *bytes_consumed = 0;
+               return 0;
+       }
+
+       /* partial write of region, needs extra log op header reservation */
+       *copy_len = space_available;
+       ophdr->oh_len = cpu_to_be32(*copy_len);
+       ophdr->oh_flags |= XLOG_CONTINUE_TRANS;
+       if (*last_was_partial_copy)
+               ophdr->oh_flags |= XLOG_WAS_CONT_TRANS;
+       *bytes_consumed += *copy_len;
+       (*last_was_partial_copy)++;
+
+       /* account for new log op header */
+       ticket->t_curr_res -= sizeof(struct xlog_op_header);
+       ticket->t_res_num_ophdrs++;
+
+       return sizeof(struct xlog_op_header);
+}
+
+static int
+xlog_write_copy_finish(
+       struct log              *log,
+       struct xlog_in_core     *iclog,
+       uint                    flags,
+       int                     *record_cnt,
+       int                     *data_cnt,
+       int                     *partial_copy,
+       int                     *partial_copy_len,
+       int                     log_offset,
+       struct xlog_in_core     **commit_iclog)
+{
+       if (*partial_copy) {
+               /*
+                * This iclog has already been marked WANT_SYNC by
+                * xlog_state_get_iclog_space.
+                */
+               xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
+               *record_cnt = 0;
+               *data_cnt = 0;
+               return xlog_state_release_iclog(log, iclog);
+       }
+
+       *partial_copy = 0;
+       *partial_copy_len = 0;
+
+       if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) {
+               /* no more space in this iclog - push it. */
+               xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
+               *record_cnt = 0;
+               *data_cnt = 0;
+
+               spin_lock(&log->l_icloglock);
+               xlog_state_want_sync(log, iclog);
+               spin_unlock(&log->l_icloglock);
+
+               if (!commit_iclog)
+                       return xlog_state_release_iclog(log, iclog);
+               ASSERT(flags & XLOG_COMMIT_TRANS);
+               *commit_iclog = iclog;
+       }
+
+       return 0;
+}
+
 /*
  * Write some region out to in-core log
  *
@@ -1655,209 +1867,157 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
  */
 STATIC int
 xlog_write(
-       struct xfs_mount        *mp,
-       struct xfs_log_iovec    reg[],
-       int                     nentries,
+       struct log              *log,
+       struct xfs_log_vec      *log_vector,
        struct xlog_ticket      *ticket,
        xfs_lsn_t               *start_lsn,
        struct xlog_in_core     **commit_iclog,
        uint                    flags)
 {
-    xlog_t          *log = mp->m_log;
-    xlog_in_core_t   *iclog = NULL;  /* ptr to current in-core log */
-    xlog_op_header_t *logop_head;    /* ptr to log operation header */
-    __psint_t       ptr;            /* copy address into data region */
-    int                     len;            /* # xlog_write() bytes 2 still copy */
-    int                     index;          /* region index currently copying */
-    int                     log_offset;     /* offset (from 0) into data region */
-    int                     start_rec_copy; /* # bytes to copy for start record */
-    int                     partial_copy;   /* did we split a region? */
-    int                     partial_copy_len;/* # bytes copied if split region */
-    int                     need_copy;      /* # bytes need to memcpy this region */
-    int                     copy_len;       /* # bytes actually memcpy'ing */
-    int                     copy_off;       /* # bytes from entry start */
-    int                     contwr;         /* continued write of in-core log? */
-    int                     error;
-    int                     record_cnt = 0, data_cnt = 0;
-
-    partial_copy_len = partial_copy = 0;
-
-    /* Calculate potential maximum space.  Each region gets its own
-     * xlog_op_header_t and may need to be double word aligned.
-     */
-    len = 0;
-    if (ticket->t_flags & XLOG_TIC_INITED) {    /* acct for start rec of xact */
-       len += sizeof(xlog_op_header_t);
-       ticket->t_res_num_ophdrs++;
-    }
+       struct xlog_in_core     *iclog = NULL;
+       struct xfs_log_iovec    *vecp;
+       struct xfs_log_vec      *lv;
+       int                     len;
+       int                     index;
+       int                     partial_copy = 0;
+       int                     partial_copy_len = 0;
+       int                     contwr = 0;
+       int                     record_cnt = 0;
+       int                     data_cnt = 0;
+       int                     error;
 
-    for (index = 0; index < nentries; index++) {
-       len += sizeof(xlog_op_header_t);            /* each region gets >= 1 */
-       ticket->t_res_num_ophdrs++;
-       len += reg[index].i_len;
-       xlog_tic_add_region(ticket, reg[index].i_len, reg[index].i_type);
-    }
-    contwr = *start_lsn = 0;
+       *start_lsn = 0;
 
-    if (ticket->t_curr_res < len) {
-       xlog_print_tic_res(mp, ticket);
+       len = xlog_write_calc_vec_length(ticket, log_vector);
+       if (ticket->t_curr_res < len) {
+               xlog_print_tic_res(log->l_mp, ticket);
 #ifdef DEBUG
-       xlog_panic(
-               "xfs_log_write: reservation ran out. Need to up reservation");
+               xlog_panic(
+       "xfs_log_write: reservation ran out. Need to up reservation");
 #else
-       /* Customer configurable panic */
-       xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, mp,
-               "xfs_log_write: reservation ran out. Need to up reservation");
-       /* If we did not panic, shutdown the filesystem */
-       xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+               /* Customer configurable panic */
+               xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, log->l_mp,
+       "xfs_log_write: reservation ran out. Need to up reservation");
+
+               /* If we did not panic, shutdown the filesystem */
+               xfs_force_shutdown(log->l_mp, SHUTDOWN_CORRUPT_INCORE);
 #endif
-    } else
+       }
+
        ticket->t_curr_res -= len;
 
-    for (index = 0; index < nentries; ) {
-       if ((error = xlog_state_get_iclog_space(log, len, &iclog, ticket,
-                                              &contwr, &log_offset)))
-               return error;
+       index = 0;
+       lv = log_vector;
+       vecp = lv->lv_iovecp;
+       while (lv && index < lv->lv_niovecs) {
+               void            *ptr;
+               int             log_offset;
 
-       ASSERT(log_offset <= iclog->ic_size - 1);
-       ptr = (__psint_t) ((char *)iclog->ic_datap+log_offset);
+               error = xlog_state_get_iclog_space(log, len, &iclog, ticket,
+                                                  &contwr, &log_offset);
+               if (error)
+                       return error;
 
-       /* start_lsn is the first lsn written to. That's all we need. */
-       if (! *start_lsn)
-           *start_lsn = be64_to_cpu(iclog->ic_header.h_lsn);
+               ASSERT(log_offset <= iclog->ic_size - 1);
+               ptr = iclog->ic_datap + log_offset;
 
-       /* This loop writes out as many regions as can fit in the amount
-        * of space which was allocated by xlog_state_get_iclog_space().
-        */
-       while (index < nentries) {
-           ASSERT(reg[index].i_len % sizeof(__int32_t) == 0);
-           ASSERT((__psint_t)ptr % sizeof(__int32_t) == 0);
-           start_rec_copy = 0;
-
-           /* If first write for transaction, insert start record.
-            * We can't be trying to commit if we are inited.  We can't
-            * have any "partial_copy" if we are inited.
-            */
-           if (ticket->t_flags & XLOG_TIC_INITED) {
-               logop_head              = (xlog_op_header_t *)ptr;
-               logop_head->oh_tid      = cpu_to_be32(ticket->t_tid);
-               logop_head->oh_clientid = ticket->t_clientid;
-               logop_head->oh_len      = 0;
-               logop_head->oh_flags    = XLOG_START_TRANS;
-               logop_head->oh_res2     = 0;
-               ticket->t_flags         &= ~XLOG_TIC_INITED;    /* clear bit */
-               record_cnt++;
-
-               start_rec_copy = sizeof(xlog_op_header_t);
-               xlog_write_adv_cnt(ptr, len, log_offset, start_rec_copy);
-           }
+               /* start_lsn is the first lsn written to. That's all we need. */
+               if (!*start_lsn)
+                       *start_lsn = be64_to_cpu(iclog->ic_header.h_lsn);
 
-           /* Copy log operation header directly into data section */
-           logop_head                  = (xlog_op_header_t *)ptr;
-           logop_head->oh_tid          = cpu_to_be32(ticket->t_tid);
-           logop_head->oh_clientid     = ticket->t_clientid;
-           logop_head->oh_res2         = 0;
+               /*
+                * This loop writes out as many regions as can fit in the amount
+                * of space which was allocated by xlog_state_get_iclog_space().
+                */
+               while (lv && index < lv->lv_niovecs) {
+                       struct xfs_log_iovec    *reg = &vecp[index];
+                       struct xlog_op_header   *ophdr;
+                       int                     start_rec_copy;
+                       int                     copy_len;
+                       int                     copy_off;
+
+                       ASSERT(reg->i_len % sizeof(__int32_t) == 0);
+                       ASSERT((unsigned long)ptr % sizeof(__int32_t) == 0);
+
+                       start_rec_copy = xlog_write_start_rec(ptr, ticket);
+                       if (start_rec_copy) {
+                               record_cnt++;
+                               xlog_write_adv_cnt(&ptr, &len, &log_offset,
+                                                  start_rec_copy);
+                       }
 
-           /* header copied directly */
-           xlog_write_adv_cnt(ptr, len, log_offset, sizeof(xlog_op_header_t));
+                       ophdr = xlog_write_setup_ophdr(log, ptr, ticket, flags);
+                       if (!ophdr)
+                               return XFS_ERROR(EIO);
 
-           /* are we copying a commit or unmount record? */
-           logop_head->oh_flags = flags;
+                       xlog_write_adv_cnt(&ptr, &len, &log_offset,
+                                          sizeof(struct xlog_op_header));
+
+                       len += xlog_write_setup_copy(ticket, ophdr,
+                                                    iclog->ic_size-log_offset,
+                                                    reg->i_len,
+                                                    &copy_off, &copy_len,
+                                                    &partial_copy,
+                                                    &partial_copy_len);
+                       xlog_verify_dest_ptr(log, ptr);
+
+                       /* copy region */
+                       ASSERT(copy_len >= 0);
+                       memcpy(ptr, reg->i_addr + copy_off, copy_len);
+                       xlog_write_adv_cnt(&ptr, &len, &log_offset, copy_len);
+
+                       copy_len += start_rec_copy + sizeof(xlog_op_header_t);
+                       record_cnt++;
+                       data_cnt += contwr ? copy_len : 0;
+
+                       error = xlog_write_copy_finish(log, iclog, flags,
+                                                      &record_cnt, &data_cnt,
+                                                      &partial_copy,
+                                                      &partial_copy_len,
+                                                      log_offset,
+                                                      commit_iclog);
+                       if (error)
+                               return error;
 
-           /*
-            * We've seen logs corrupted with bad transaction client
-            * ids.  This makes sure that XFS doesn't generate them on.
-            * Turn this into an EIO and shut down the filesystem.
-            */
-           switch (logop_head->oh_clientid)  {
-           case XFS_TRANSACTION:
-           case XFS_VOLUME:
-           case XFS_LOG:
-               break;
-           default:
-               xfs_fs_cmn_err(CE_WARN, mp,
-                   "Bad XFS transaction clientid 0x%x in ticket 0x%p",
-                   logop_head->oh_clientid, ticket);
-               return XFS_ERROR(EIO);
-           }
+                       /*
+                        * if we had a partial copy, we need to get more iclog
+                        * space but we don't want to increment the region
+                        * index because there is still more is this region to
+                        * write.
+                        *
+                        * If we completed writing this region, and we flushed
+                        * the iclog (indicated by resetting of the record
+                        * count), then we also need to get more log space. If
+                        * this was the last record, though, we are done and
+                        * can just return.
+                        */
+                       if (partial_copy)
+                               break;
 
-           /* Partial write last time? => (partial_copy != 0)
-            * need_copy is the amount we'd like to copy if everything could
-            * fit in the current memcpy.
-            */
-           need_copy = reg[index].i_len - partial_copy_len;
-
-           copy_off = partial_copy_len;
-           if (need_copy <= iclog->ic_size - log_offset) { /*complete write */
-               copy_len = need_copy;
-               logop_head->oh_len = cpu_to_be32(copy_len);
-               if (partial_copy)
-                   logop_head->oh_flags|= (XLOG_END_TRANS|XLOG_WAS_CONT_TRANS);
-               partial_copy_len = partial_copy = 0;
-           } else {                                        /* partial write */
-               copy_len = iclog->ic_size - log_offset;
-               logop_head->oh_len = cpu_to_be32(copy_len);
-               logop_head->oh_flags |= XLOG_CONTINUE_TRANS;
-               if (partial_copy)
-                       logop_head->oh_flags |= XLOG_WAS_CONT_TRANS;
-               partial_copy_len += copy_len;
-               partial_copy++;
-               len += sizeof(xlog_op_header_t); /* from splitting of region */
-               /* account for new log op header */
-               ticket->t_curr_res -= sizeof(xlog_op_header_t);
-               ticket->t_res_num_ophdrs++;
-           }
-           xlog_verify_dest_ptr(log, ptr);
-
-           /* copy region */
-           ASSERT(copy_len >= 0);
-           memcpy((xfs_caddr_t)ptr, reg[index].i_addr + copy_off, copy_len);
-           xlog_write_adv_cnt(ptr, len, log_offset, copy_len);
-
-           /* make copy_len total bytes copied, including headers */
-           copy_len += start_rec_copy + sizeof(xlog_op_header_t);
-           record_cnt++;
-           data_cnt += contwr ? copy_len : 0;
-           if (partial_copy) {                 /* copied partial region */
-                   /* already marked WANT_SYNC by xlog_state_get_iclog_space */
-                   xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
-                   record_cnt = data_cnt = 0;
-                   if ((error = xlog_state_release_iclog(log, iclog)))
-                           return error;
-                   break;                      /* don't increment index */
-           } else {                            /* copied entire region */
-               index++;
-               partial_copy_len = partial_copy = 0;
-
-               if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) {
-                   xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
-                   record_cnt = data_cnt = 0;
-                   spin_lock(&log->l_icloglock);
-                   xlog_state_want_sync(log, iclog);
-                   spin_unlock(&log->l_icloglock);
-                   if (commit_iclog) {
-                       ASSERT(flags & XLOG_COMMIT_TRANS);
-                       *commit_iclog = iclog;
-                   } else if ((error = xlog_state_release_iclog(log, iclog)))
-                          return error;
-                   if (index == nentries)
-                           return 0;           /* we are done */
-                   else
-                           break;
+                       if (++index == lv->lv_niovecs) {
+                               lv = lv->lv_next;
+                               index = 0;
+                               if (lv)
+                                       vecp = lv->lv_iovecp;
+                       }
+                       if (record_cnt == 0) {
+                               if (!lv)
+                                       return 0;
+                               break;
+                       }
                }
-           } /* if (partial_copy) */
-       } /* while (index < nentries) */
-    } /* for (index = 0; index < nentries; ) */
-    ASSERT(len == 0);
+       }
+
+       ASSERT(len == 0);
+
+       xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
+       if (!commit_iclog)
+               return xlog_state_release_iclog(log, iclog);
 
-    xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
-    if (commit_iclog) {
        ASSERT(flags & XLOG_COMMIT_TRANS);
        *commit_iclog = iclog;
        return 0;
-    }
-    return xlog_state_release_iclog(log, iclog);
-}      /* xlog_write */
+}
 
 
 /*****************************************************************************
@@ -3157,14 +3317,16 @@ xfs_log_ticket_get(
  * Allocate and initialise a new log ticket.
  */
 STATIC xlog_ticket_t *
-xlog_ticket_alloc(xlog_t               *log,
-               int             unit_bytes,
-               int             cnt,
-               char            client,
-               uint            xflags)
+xlog_ticket_alloc(
+       struct log      *log,
+       int             unit_bytes,
+       int             cnt,
+       char            client,
+       uint            xflags)
 {
-       xlog_ticket_t   *tic;
+       struct xlog_ticket *tic;
        uint            num_headers;
+       int             iclog_space;
 
        tic = kmem_zone_zalloc(xfs_log_ticket_zone, KM_SLEEP|KM_MAYFAIL);
        if (!tic)
@@ -3208,16 +3370,40 @@ xlog_ticket_alloc(xlog_t                *log,
        /* for start-rec */
        unit_bytes += sizeof(xlog_op_header_t);
 
-       /* for LR headers */
-       num_headers = ((unit_bytes + log->l_iclog_size-1) >> log->l_iclog_size_log);
+       /*
+        * for LR headers - the space for data in an iclog is the size minus
+        * the space used for the headers. If we use the iclog size, then we
+        * undercalculate the number of headers required.
+        *
+        * Furthermore - the addition of op headers for split-recs might
+        * increase the space required enough to require more log and op
+        * headers, so take that into account too.
+        *
+        * IMPORTANT: This reservation makes the assumption that if this
+        * transaction is the first in an iclog and hence has the LR headers
+        * accounted to it, then the remaining space in the iclog is
+        * exclusively for this transaction.  i.e. if the transaction is larger
+        * than the iclog, it will be the only thing in that iclog.
+        * Fundamentally, this means we must pass the entire log vector to
+        * xlog_write to guarantee this.
+        */
+       iclog_space = log->l_iclog_size - log->l_iclog_hsize;
+       num_headers = howmany(unit_bytes, iclog_space);
+
+       /* for split-recs - ophdrs added when data split over LRs */
+       unit_bytes += sizeof(xlog_op_header_t) * num_headers;
+
+       /* add extra header reservations if we overrun */
+       while (!num_headers ||
+              howmany(unit_bytes, iclog_space) > num_headers) {
+               unit_bytes += sizeof(xlog_op_header_t);
+               num_headers++;
+       }
        unit_bytes += log->l_iclog_hsize * num_headers;
 
        /* for commit-rec LR header - note: padding will subsume the ophdr */
        unit_bytes += log->l_iclog_hsize;
 
-       /* for split-recs - ophdrs added when data split over LRs */
-       unit_bytes += sizeof(xlog_op_header_t) * num_headers;
-
        /* for roundoff padding for transaction data and one for commit record */
        if (xfs_sb_version_haslogv2(&log->l_mp->m_sb) &&
            log->l_mp->m_sb.sb_logsunit > 1) {
@@ -3233,13 +3419,13 @@ xlog_ticket_alloc(xlog_t                *log,
        tic->t_curr_res         = unit_bytes;
        tic->t_cnt              = cnt;
        tic->t_ocnt             = cnt;
-       tic->t_tid              = (xlog_tid_t)((__psint_t)tic & 0xffffffff);
+       tic->t_tid              = random32();
        tic->t_clientid         = client;
        tic->t_flags            = XLOG_TIC_INITED;
        tic->t_trans_type       = 0;
        if (xflags & XFS_LOG_PERM_RESERV)
                tic->t_flags |= XLOG_TIC_PERM_RESERV;
-       sv_init(&(tic->t_wait), SV_DEFAULT, "logtick");
+       sv_init(&tic->t_wait, SV_DEFAULT, "logtick");
 
        xlog_tic_reset_res(tic);
 
@@ -3260,20 +3446,22 @@ xlog_ticket_alloc(xlog_t                *log,
  * part of the log in case we trash the log structure.
  */
 void
-xlog_verify_dest_ptr(xlog_t     *log,
-                    __psint_t  ptr)
+xlog_verify_dest_ptr(
+       struct log      *log,
+       char            *ptr)
 {
        int i;
        int good_ptr = 0;
 
-       for (i=0; i < log->l_iclog_bufs; i++) {
-               if (ptr >= (__psint_t)log->l_iclog_bak[i] &&
-                   ptr <= (__psint_t)log->l_iclog_bak[i]+log->l_iclog_size)
+       for (i = 0; i < log->l_iclog_bufs; i++) {
+               if (ptr >= log->l_iclog_bak[i] &&
+                   ptr <= log->l_iclog_bak[i] + log->l_iclog_size)
                        good_ptr++;
        }
-       if (! good_ptr)
+
+       if (!good_ptr)
                xlog_panic("xlog_verify_dest_ptr: invalid ptr");
-}      /* xlog_verify_dest_ptr */
+}
 
 STATIC void
 xlog_verify_grant_head(xlog_t *log, int equals)