[XFS] Convert l_flushsema to a sv_t
authorMatthew Wilcox <willy@linux.intel.com>
Mon, 19 May 2008 06:34:27 +0000 (16:34 +1000)
committerNiv Sardi <xaiki@debian.org>
Mon, 28 Jul 2008 06:58:12 +0000 (16:58 +1000)
The l_flushsema doesn't exactly have completion semantics, nor mutex
semantics. It's used as a list of tasks which are waiting to be notified
that a flush has completed. It was also being used in a way that was
potentially racy, depending on the semaphore implementation.

By using a sv_t instead of a semaphore we avoid the need for a separate
counter, since we know we just need to wake everything on the queue.

Original waitqueue implementation from Matthew Wilcox. Cleanup and
conversion to sv_t by Christoph Hellwig.

SGI-PV: 981507
SGI-Modid: xfs-linux-melb:xfs-kern:31059a

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
fs/xfs/xfs_log.c
fs/xfs/xfs_log_priv.h

index 6195cc8..91b00a5 100644 (file)
@@ -1232,7 +1232,7 @@ xlog_alloc_log(xfs_mount_t        *mp,
 
        spin_lock_init(&log->l_icloglock);
        spin_lock_init(&log->l_grant_lock);
-       initnsema(&log->l_flushsema, 0, "ic-flush");
+       sv_init(&log->l_flush_wait, 0, "flush_wait");
 
        /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
        ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
@@ -1577,7 +1577,6 @@ xlog_dealloc_log(xlog_t *log)
                kmem_free(iclog);
                iclog = next_iclog;
        }
-       freesema(&log->l_flushsema);
        spinlock_destroy(&log->l_icloglock);
        spinlock_destroy(&log->l_grant_lock);
 
@@ -2101,6 +2100,7 @@ xlog_state_do_callback(
        int                funcdidcallbacks; /* flag: function did callbacks */
        int                repeats;     /* for issuing console warnings if
                                         * looping too many times */
+       int                wake = 0;
 
        spin_lock(&log->l_icloglock);
        first_iclog = iclog = log->l_iclog;
@@ -2282,15 +2282,13 @@ xlog_state_do_callback(
        }
 #endif
 
-       flushcnt = 0;
-       if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR)) {
-               flushcnt = log->l_flushcnt;
-               log->l_flushcnt = 0;
-       }
+       if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR))
+               wake = 1;
        spin_unlock(&log->l_icloglock);
-       while (flushcnt--)
-               vsema(&log->l_flushsema);
-}      /* xlog_state_do_callback */
+
+       if (wake)
+               sv_broadcast(&log->l_flush_wait);
+}
 
 
 /*
@@ -2388,16 +2386,15 @@ restart:
        }
 
        iclog = log->l_iclog;
-       if (! (iclog->ic_state == XLOG_STATE_ACTIVE)) {
-               log->l_flushcnt++;
-               spin_unlock(&log->l_icloglock);
+       if (iclog->ic_state != XLOG_STATE_ACTIVE) {
                xlog_trace_iclog(iclog, XLOG_TRACE_SLEEP_FLUSH);
                XFS_STATS_INC(xs_log_noiclogs);
-               /* Ensure that log writes happen */
-               psema(&log->l_flushsema, PINOD);
+
+               /* Wait for log writes to have flushed */
+               sv_wait(&log->l_flush_wait, 0, &log->l_icloglock, 0);
                goto restart;
        }
-       ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
+
        head = &iclog->ic_header;
 
        atomic_inc(&iclog->ic_refcnt);  /* prevents sync */
index 8952a39..6245913 100644 (file)
@@ -423,10 +423,8 @@ typedef struct log {
        int                     l_logBBsize;    /* size of log in BB chunks */
 
        /* The following block of fields are changed while holding icloglock */
-       sema_t                  l_flushsema ____cacheline_aligned_in_smp;
-                                               /* iclog flushing semaphore */
-       int                     l_flushcnt;     /* # of procs waiting on this
-                                                * sema */
+       sv_t                    l_flush_wait ____cacheline_aligned_in_smp;
+                                               /* waiting for iclog flush */
        int                     l_covered_state;/* state of "covering disk
                                                 * log entries" */
        xlog_in_core_t          *l_iclog;       /* head log queue       */