ext4: defer clearing of PageWriteback after extent conversion
authorJan Kara <jack@suse.cz>
Tue, 4 Jun 2013 18:23:41 +0000 (14:23 -0400)
committerTheodore Ts'o <tytso@mit.edu>
Tue, 4 Jun 2013 18:23:41 +0000 (14:23 -0400)
Currently PageWriteback bit gets cleared from put_io_page() called
from ext4_end_bio().  This is somewhat inconvenient as extent tree is
not fully updated at that time (unwritten extents are not marked as
written) so we cannot read the data back yet.  This design was
dictated by lock ordering as we cannot start a transaction while
PageWriteback bit is set (we could easily deadlock with
ext4_da_writepages()).  But now that we use transaction reservation
for extent conversion, locking issues are solved and we can move
PageWriteback bit clearing after extent conversion is done.  As a
result we can remove wait for unwritten extent conversion from
ext4_sync_file() because it already implicitely happens through
wait_on_page_writeback().

We implement deferring of PageWriteback clearing by queueing completed
bios to appropriate io_end and processing all the pages when io_end is
going to be freed instead of at the moment ext4_io_end() is called.

Reviewed-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
fs/ext4/ext4.h
fs/ext4/fsync.c
fs/ext4/page-io.c

index b69a733..74db579 100644 (file)
@@ -180,8 +180,7 @@ struct ext4_map_blocks {
  * Flags for ext4_io_end->flags
  */
 #define        EXT4_IO_END_UNWRITTEN   0x0001
-#define EXT4_IO_END_ERROR      0x0002
-#define EXT4_IO_END_DIRECT     0x0004
+#define EXT4_IO_END_DIRECT     0x0002
 
 /*
  * For converting uninitialized extents on a work queue. 'handle' is used for
@@ -192,6 +191,8 @@ typedef struct ext4_io_end {
        handle_t                *handle;        /* handle reserved for extent
                                                 * conversion */
        struct inode            *inode;         /* file being written to */
+       struct bio              *bio;           /* Linked list of completed
+                                                * bios covering the extent */
        unsigned int            flag;           /* unwritten or not */
        loff_t                  offset;         /* offset in the file */
        ssize_t                 size;           /* size of the extent */
index e0ba8a4..dcc881b 100644 (file)
@@ -132,10 +132,6 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
        if (inode->i_sb->s_flags & MS_RDONLY)
                goto out;
 
-       ret = ext4_flush_unwritten_io(inode);
-       if (ret < 0)
-               goto out;
-
        if (!journal) {
                ret = __sync_inode(inode, datasync);
                if (!ret && !hlist_empty(&inode->i_dentry))
index bcdfd6b..755741c 100644 (file)
@@ -64,14 +64,83 @@ void ext4_ioend_shutdown(struct inode *inode)
                cancel_work_sync(&EXT4_I(inode)->i_unrsv_conversion_work);
 }
 
+/*
+ * Print an buffer I/O error compatible with the fs/buffer.c.  This
+ * provides compatibility with dmesg scrapers that look for a specific
+ * buffer I/O error message.  We really need a unified error reporting
+ * structure to userspace ala Digital Unix's uerf system, but it's
+ * probably not going to happen in my lifetime, due to LKML politics...
+ */
+static void buffer_io_error(struct buffer_head *bh)
+{
+       char b[BDEVNAME_SIZE];
+       printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n",
+                       bdevname(bh->b_bdev, b),
+                       (unsigned long long)bh->b_blocknr);
+}
+
+static void ext4_finish_bio(struct bio *bio)
+{
+       int i;
+       int error = !test_bit(BIO_UPTODATE, &bio->bi_flags);
+
+       for (i = 0; i < bio->bi_vcnt; i++) {
+               struct bio_vec *bvec = &bio->bi_io_vec[i];
+               struct page *page = bvec->bv_page;
+               struct buffer_head *bh, *head;
+               unsigned bio_start = bvec->bv_offset;
+               unsigned bio_end = bio_start + bvec->bv_len;
+               unsigned under_io = 0;
+               unsigned long flags;
+
+               if (!page)
+                       continue;
+
+               if (error) {
+                       SetPageError(page);
+                       set_bit(AS_EIO, &page->mapping->flags);
+               }
+               bh = head = page_buffers(page);
+               /*
+                * We check all buffers in the page under BH_Uptodate_Lock
+                * to avoid races with other end io clearing async_write flags
+                */
+               local_irq_save(flags);
+               bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
+               do {
+                       if (bh_offset(bh) < bio_start ||
+                           bh_offset(bh) + bh->b_size > bio_end) {
+                               if (buffer_async_write(bh))
+                                       under_io++;
+                               continue;
+                       }
+                       clear_buffer_async_write(bh);
+                       if (error)
+                               buffer_io_error(bh);
+               } while ((bh = bh->b_this_page) != head);
+               bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
+               local_irq_restore(flags);
+               if (!under_io)
+                       end_page_writeback(page);
+       }
+}
+
 static void ext4_release_io_end(ext4_io_end_t *io_end)
 {
+       struct bio *bio, *next_bio;
+
        BUG_ON(!list_empty(&io_end->list));
        BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
        WARN_ON(io_end->handle);
 
        if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count))
                wake_up_all(ext4_ioend_wq(io_end->inode));
+
+       for (bio = io_end->bio; bio; bio = next_bio) {
+               next_bio = bio->bi_private;
+               ext4_finish_bio(bio);
+               bio_put(bio);
+       }
        if (io_end->flag & EXT4_IO_END_DIRECT)
                inode_dio_done(io_end->inode);
        if (io_end->iocb)
@@ -267,79 +336,31 @@ ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end)
        return io_end;
 }
 
-/*
- * Print an buffer I/O error compatible with the fs/buffer.c.  This
- * provides compatibility with dmesg scrapers that look for a specific
- * buffer I/O error message.  We really need a unified error reporting
- * structure to userspace ala Digital Unix's uerf system, but it's
- * probably not going to happen in my lifetime, due to LKML politics...
- */
-static void buffer_io_error(struct buffer_head *bh)
-{
-       char b[BDEVNAME_SIZE];
-       printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n",
-                       bdevname(bh->b_bdev, b),
-                       (unsigned long long)bh->b_blocknr);
-}
-
 static void ext4_end_bio(struct bio *bio, int error)
 {
        ext4_io_end_t *io_end = bio->bi_private;
-       struct inode *inode;
-       int i;
-       int blocksize;
        sector_t bi_sector = bio->bi_sector;
 
        BUG_ON(!io_end);
-       inode = io_end->inode;
-       blocksize = 1 << inode->i_blkbits;
-       bio->bi_private = NULL;
        bio->bi_end_io = NULL;
        if (test_bit(BIO_UPTODATE, &bio->bi_flags))
                error = 0;
-       for (i = 0; i < bio->bi_vcnt; i++) {
-               struct bio_vec *bvec = &bio->bi_io_vec[i];
-               struct page *page = bvec->bv_page;
-               struct buffer_head *bh, *head;
-               unsigned bio_start = bvec->bv_offset;
-               unsigned bio_end = bio_start + bvec->bv_len;
-               unsigned under_io = 0;
-               unsigned long flags;
 
-               if (!page)
-                       continue;
-
-               if (error) {
-                       SetPageError(page);
-                       set_bit(AS_EIO, &page->mapping->flags);
-               }
-               bh = head = page_buffers(page);
+       if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
                /*
-                * We check all buffers in the page under BH_Uptodate_Lock
-                * to avoid races with other end io clearing async_write flags
+                * Link bio into list hanging from io_end. We have to do it
+                * atomically as bio completions can be racing against each
+                * other.
                 */
-               local_irq_save(flags);
-               bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
-               do {
-                       if (bh_offset(bh) < bio_start ||
-                           bh_offset(bh) + blocksize > bio_end) {
-                               if (buffer_async_write(bh))
-                                       under_io++;
-                               continue;
-                       }
-                       clear_buffer_async_write(bh);
-                       if (error)
-                               buffer_io_error(bh);
-               } while ((bh = bh->b_this_page) != head);
-               bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
-               local_irq_restore(flags);
-               if (!under_io)
-                       end_page_writeback(page);
+               bio->bi_private = xchg(&io_end->bio, bio);
+       } else {
+               ext4_finish_bio(bio);
+               bio_put(bio);
        }
-       bio_put(bio);
 
        if (error) {
-               io_end->flag |= EXT4_IO_END_ERROR;
+               struct inode *inode = io_end->inode;
+
                ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
                             "(offset %llu size %ld starting block %llu)",
                             inode->i_ino,
@@ -348,7 +369,6 @@ static void ext4_end_bio(struct bio *bio, int error)
                             (unsigned long long)
                             bi_sector >> (inode->i_blkbits - 9));
        }
-
        ext4_put_io_end_defer(io_end);
 }