Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/hch/vfs...

[pandora-kernel.git] / fs / gfs2 / file.c
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c

index edeb9e8..ce36a56 100644 (file)
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -59,15 +59,24 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin)
         struct gfs2_holder i_gh;
         loff_t error;
  
-       if (origin == 2) {
+       switch (origin) {
+       case SEEK_END: /* These reference inode->i_size */
+       case SEEK_DATA:
+       case SEEK_HOLE:
                 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
                                            &i_gh);
                 if (!error) {
-                       error = generic_file_llseek_unlocked(file, offset, origin);
+                       error = generic_file_llseek(file, offset, origin);
                         gfs2_glock_dq_uninit(&i_gh);
                 }
-       } else
-               error = generic_file_llseek_unlocked(file, offset, origin);
+               break;
+       case SEEK_CUR:
+       case SEEK_SET:
+               error = generic_file_llseek(file, offset, origin);
+               break;
+       default:
+               error = -EINVAL;
+       }
  
         return error;
  }
@@ -357,8 +366,15 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
         unsigned int data_blocks, ind_blocks, rblocks;
         struct gfs2_holder gh;
         struct gfs2_alloc *al;
+       loff_t size;
         int ret;
  
+       /* Wait if fs is frozen. This is racy so we check again later on
+        * and retry if the fs has been frozen after the page lock has
+        * been acquired
+        */
+       vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+
         gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
         ret = gfs2_glock_nq(&gh);
         if (ret)
@@ -367,8 +383,15 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
         set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
         set_bit(GIF_SW_PAGED, &ip->i_flags);
  
-       if (!gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE))
+       if (!gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE)) {
+               lock_page(page);
+               if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
+                       ret = -EAGAIN;
+                       unlock_page(page);
+               }
                 goto out_unlock;
+       }
+
         ret = -ENOMEM;
         al = gfs2_alloc_get(ip);
         if (al == NULL)
@@ -388,7 +411,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
                 rblocks += data_blocks ? data_blocks : 1;
         if (ind_blocks || data_blocks) {
                 rblocks += RES_STATFS + RES_QUOTA;
-               rblocks += gfs2_rg_blocks(al);
+               rblocks += gfs2_rg_blocks(ip);
         }
         ret = gfs2_trans_begin(sdp, rblocks, 0);
         if (ret)
@@ -396,21 +419,29 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
  
         lock_page(page);
         ret = -EINVAL;
-       last_index = ip->i_inode.i_size >> PAGE_CACHE_SHIFT;
-       if (page->index > last_index)
-               goto out_unlock_page;
+       size = i_size_read(inode);
+       last_index = (size - 1) >> PAGE_CACHE_SHIFT;
+       /* Check page index against inode size */
+       if (size == 0 || (page->index > last_index))
+               goto out_trans_end;
+
+       ret = -EAGAIN;
+       /* If truncated, we must retry the operation, we may have raced
+        * with the glock demotion code.
+        */
+       if (!PageUptodate(page) || page->mapping != inode->i_mapping)
+               goto out_trans_end;
+
+       /* Unstuff, if required, and allocate backing blocks for page */
         ret = 0;
-       if (!PageUptodate(page) || page->mapping != ip->i_inode.i_mapping)
-               goto out_unlock_page;
-       if (gfs2_is_stuffed(ip)) {
+       if (gfs2_is_stuffed(ip))
                 ret = gfs2_unstuff_dinode(ip, page);
-               if (ret)
-                       goto out_unlock_page;
-       }
-       ret = gfs2_allocate_page_backing(page);
+       if (ret == 0)
+               ret = gfs2_allocate_page_backing(page);
  
-out_unlock_page:
-       unlock_page(page);
+out_trans_end:
+       if (ret)
+               unlock_page(page);
         gfs2_trans_end(sdp);
  out_trans_fail:
         gfs2_inplace_release(ip);
@@ -422,11 +453,17 @@ out_unlock:
         gfs2_glock_dq(&gh);
  out:
         gfs2_holder_uninit(&gh);
-       if (ret == -ENOMEM)
-               ret = VM_FAULT_OOM;
-       else if (ret)
-               ret = VM_FAULT_SIGBUS;
-       return ret;
+       if (ret == 0) {
+               set_page_dirty(page);
+               /* This check must be post dropping of transaction lock */
+               if (inode->i_sb->s_frozen == SB_UNFROZEN) {
+                       wait_on_page_writeback(page);
+               } else {
+                       ret = -EAGAIN;
+                       unlock_page(page);
+               }
+       }
+       return block_page_mkwrite_return(ret);
  }
  
  static const struct vm_operations_struct gfs2_vm_ops = {
@@ -551,8 +588,16 @@ static int gfs2_close(struct inode *inode, struct file *file)
   * @end: the end position in the file to sync
   * @datasync: set if we can ignore timestamp changes
   *
- * The VFS will flush data for us. We only need to worry
- * about metadata here.
+ * We split the data flushing here so that we don't wait for the data
+ * until after we've also sent the metadata to disk. Note that for
+ * data=ordered, we will write & wait for the data at the log flush
+ * stage anyway, so this is unlikely to make much of a difference
+ * except in the data=writeback case.
+ *
+ * If the fdatawrite fails due to any reason except -EIO, we will
+ * continue the remainder of the fsync, although we'll still report
+ * the error at the end. This is to match filemap_write_and_wait_range()
+ * behaviour.
   *
   * Returns: errno
   */
@@ -560,30 +605,34 @@ static int gfs2_close(struct inode *inode, struct file *file)
  static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
                       int datasync)
  {
-       struct inode *inode = file->f_mapping->host;
+       struct address_space *mapping = file->f_mapping;
+       struct inode *inode = mapping->host;
         int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC);
         struct gfs2_inode *ip = GFS2_I(inode);
-       int ret;
+       int ret, ret1 = 0;
  
-       ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
-       if (ret)
-               return ret;
-       mutex_lock(&inode->i_mutex);
+       if (mapping->nrpages) {
+               ret1 = filemap_fdatawrite_range(mapping, start, end);
+               if (ret1 == -EIO)
+                       return ret1;
+       }
  
         if (datasync)
                 sync_state &= ~I_DIRTY_SYNC;
  
         if (sync_state) {
                 ret = sync_inode_metadata(inode, 1);
-               if (ret) {
-                       mutex_unlock(&inode->i_mutex);
+               if (ret)
                         return ret;
-               }
-               gfs2_ail_flush(ip->i_gl);
+               if (gfs2_is_jdata(ip))
+                       filemap_write_and_wait(mapping);
+               gfs2_ail_flush(ip->i_gl, 1);
         }
  
-       mutex_unlock(&inode->i_mutex);
-       return 0;
+       if (mapping->nrpages)
+               ret = filemap_fdatawait_range(mapping, start, end);
+
+       return ret ? ret : ret1;
  }
  
  /**
@@ -620,135 +669,18 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
         return generic_file_aio_write(iocb, iov, nr_segs, pos);
  }
  
-static int empty_write_end(struct page *page, unsigned from,
-                          unsigned to, int mode)
-{
-       struct inode *inode = page->mapping->host;
-       struct gfs2_inode *ip = GFS2_I(inode);
-       struct buffer_head *bh;
-       unsigned offset, blksize = 1 << inode->i_blkbits;
-       pgoff_t end_index = i_size_read(inode) >> PAGE_CACHE_SHIFT;
-
-       zero_user(page, from, to-from);
-       mark_page_accessed(page);
-
-       if (page->index < end_index || !(mode & FALLOC_FL_KEEP_SIZE)) {
-               if (!gfs2_is_writeback(ip))
-                       gfs2_page_add_databufs(ip, page, from, to);
-
-               block_commit_write(page, from, to);
-               return 0;
-       }
-
-       offset = 0;
-       bh = page_buffers(page);
-       while (offset < to) {
-               if (offset >= from) {
-                       set_buffer_uptodate(bh);
-                       mark_buffer_dirty(bh);
-                       clear_buffer_new(bh);
-                       write_dirty_buffer(bh, WRITE);
-               }
-               offset += blksize;
-               bh = bh->b_this_page;
-       }
-
-       offset = 0;
-       bh = page_buffers(page);
-       while (offset < to) {
-               if (offset >= from) {
-                       wait_on_buffer(bh);
-                       if (!buffer_uptodate(bh))
-                               return -EIO;
-               }
-               offset += blksize;
-               bh = bh->b_this_page;
-       }
-       return 0;
-}
-
-static int needs_empty_write(sector_t block, struct inode *inode)
-{
-       int error;
-       struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
-
-       bh_map.b_size = 1 << inode->i_blkbits;
-       error = gfs2_block_map(inode, block, &bh_map, 0);
-       if (unlikely(error))
-               return error;
-       return !buffer_mapped(&bh_map);
-}
-
-static int write_empty_blocks(struct page *page, unsigned from, unsigned to,
-                             int mode)
-{
-       struct inode *inode = page->mapping->host;
-       unsigned start, end, next, blksize;
-       sector_t block = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
-       int ret;
-
-       blksize = 1 << inode->i_blkbits;
-       next = end = 0;
-       while (next < from) {
-               next += blksize;
-               block++;
-       }
-       start = next;
-       do {
-               next += blksize;
-               ret = needs_empty_write(block, inode);
-               if (unlikely(ret < 0))
-                       return ret;
-               if (ret == 0) {
-                       if (end) {
-                               ret = __block_write_begin(page, start, end - start,
-                                                         gfs2_block_map);
-                               if (unlikely(ret))
-                                       return ret;
-                               ret = empty_write_end(page, start, end, mode);
-                               if (unlikely(ret))
-                                       return ret;
-                               end = 0;
-                       }
-                       start = next;
-               }
-               else
-                       end = next;
-               block++;
-       } while (next < to);
-
-       if (end) {
-               ret = __block_write_begin(page, start, end - start, gfs2_block_map);
-               if (unlikely(ret))
-                       return ret;
-               ret = empty_write_end(page, start, end, mode);
-               if (unlikely(ret))
-                       return ret;
-       }
-
-       return 0;
-}
-
  static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
                            int mode)
  {
         struct gfs2_inode *ip = GFS2_I(inode);
         struct buffer_head *dibh;
         int error;
-       u64 start = offset >> PAGE_CACHE_SHIFT;
-       unsigned int start_offset = offset & ~PAGE_CACHE_MASK;
-       u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT;
-       pgoff_t curr;
-       struct page *page;
-       unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK;
-       unsigned int from, to;
-
-       if (!end_offset)
-               end_offset = PAGE_CACHE_SIZE;
+       unsigned int nr_blks;
+       sector_t lblock = offset >> inode->i_blkbits;
  
         error = gfs2_meta_inode_buffer(ip, &dibh);
         if (unlikely(error))
-               goto out;
+               return error;
  
         gfs2_trans_add_bh(ip->i_gl, dibh, 1);
  
@@ -758,40 +690,31 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
                         goto out;
         }
  
-       curr = start;
-       offset = start << PAGE_CACHE_SHIFT;
-       from = start_offset;
-       to = PAGE_CACHE_SIZE;
-       while (curr <= end) {
-               page = grab_cache_page_write_begin(inode->i_mapping, curr,
-                                                  AOP_FLAG_NOFS);
-               if (unlikely(!page)) {
-                       error = -ENOMEM;
-                       goto out;
-               }
+       while (len) {
+               struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
+               bh_map.b_size = len;
+               set_buffer_zeronew(&bh_map);
  
-               if (curr == end)
-                       to = end_offset;
-               error = write_empty_blocks(page, from, to, mode);
-               if (!error && offset + to > inode->i_size &&
-                   !(mode & FALLOC_FL_KEEP_SIZE)) {
-                       i_size_write(inode, offset + to);
-               }
-               unlock_page(page);
-               page_cache_release(page);
-               if (error)
+               error = gfs2_block_map(inode, lblock, &bh_map, 1);
+               if (unlikely(error))
                         goto out;
-               curr++;
-               offset += PAGE_CACHE_SIZE;
-               from = 0;
+               len -= bh_map.b_size;
+               nr_blks = bh_map.b_size >> inode->i_blkbits;
+               lblock += nr_blks;
+               if (!buffer_new(&bh_map))
+                       continue;
+               if (unlikely(!buffer_zeronew(&bh_map))) {
+                       error = -EIO;
+                       goto out;
+               }
         }
+       if (offset + len > inode->i_size && !(mode & FALLOC_FL_KEEP_SIZE))
+               i_size_write(inode, offset + len);
  
-       gfs2_dinode_out(ip, dibh->b_data);
         mark_inode_dirty(inode);
  
-       brelse(dibh);
-
  out:
+       brelse(dibh);
         return error;
  }
  
@@ -799,7 +722,7 @@ static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len,
                             unsigned int *data_blocks, unsigned int *ind_blocks)
  {
         const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-       unsigned int max_blocks = ip->i_alloc->al_rgd->rd_free_clone;
+       unsigned int max_blocks = ip->i_rgd->rd_free_clone;
         unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1);
  
         for (tmp = max_data; tmp > sdp->sd_diptrs;) {
@@ -831,6 +754,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
         int error;
         loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1);
         loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
+       loff_t max_chunk_size = UINT_MAX & bsize_mask;
         next = (next + 1) << sdp->sd_sb.sb_bsize_shift;
  
         /* We only support the FALLOC_FL_KEEP_SIZE mode */
@@ -884,11 +808,12 @@ retry:
                         goto out_qunlock;
                 }
                 max_bytes = bytes;
-               calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks);
+               calc_max_reserv(ip, (len > max_chunk_size)? max_chunk_size: len,
+                               &max_bytes, &data_blocks, &ind_blocks);
                 al->al_requested = data_blocks + ind_blocks;
  
                 rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
-                         RES_RG_HDR + gfs2_rg_blocks(al);
+                         RES_RG_HDR + gfs2_rg_blocks(ip);
                 if (gfs2_is_jdata(ip))
                         rblocks += data_blocks ? data_blocks : 1;