fs: introduce new truncate sequence

[pandora-kernel.git] / fs / buffer.c
diff --git a/fs/buffer.c b/fs/buffer.c

index e8aa708..d54812b 100644 (file)
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1949,14 +1949,11 @@ static int __block_commit_write(struct inode *inode, struct page *page,
  }
  
  /*
- * block_write_begin takes care of the basic task of block allocation and
- * bringing partial write blocks uptodate first.
- *
- * If *pagep is not NULL, then block_write_begin uses the locked page
- * at *pagep rather than allocating its own. In this case, the page will
- * not be unlocked or deallocated on failure.
+ * Filesystems implementing the new truncate sequence should use the
+ * _newtrunc postfix variant which won't incorrectly call vmtruncate.
+ * The filesystem needs to handle block truncation upon failure.
   */
-int block_write_begin(struct file *file, struct address_space *mapping,
+int block_write_begin_newtrunc(struct file *file, struct address_space *mapping,
                         loff_t pos, unsigned len, unsigned flags,
                         struct page **pagep, void **fsdata,
                         get_block_t *get_block)
@@ -1992,20 +1989,50 @@ int block_write_begin(struct file *file, struct address_space *mapping,
                         unlock_page(page);
                         page_cache_release(page);
                         *pagep = NULL;
-
-                       /*
-                        * prepare_write() may have instantiated a few blocks
-                        * outside i_size.  Trim these off again. Don't need
-                        * i_size_read because we hold i_mutex.
-                        */
-                       if (pos + len > inode->i_size)
-                               vmtruncate(inode, inode->i_size);
                 }
         }
  
  out:
         return status;
  }
+EXPORT_SYMBOL(block_write_begin_newtrunc);
+
+/*
+ * block_write_begin takes care of the basic task of block allocation and
+ * bringing partial write blocks uptodate first.
+ *
+ * If *pagep is not NULL, then block_write_begin uses the locked page
+ * at *pagep rather than allocating its own. In this case, the page will
+ * not be unlocked or deallocated on failure.
+ */
+int block_write_begin(struct file *file, struct address_space *mapping,
+                       loff_t pos, unsigned len, unsigned flags,
+                       struct page **pagep, void **fsdata,
+                       get_block_t *get_block)
+{
+       int ret;
+
+       ret = block_write_begin_newtrunc(file, mapping, pos, len, flags,
+                                       pagep, fsdata, get_block);
+
+       /*
+        * prepare_write() may have instantiated a few blocks
+        * outside i_size.  Trim these off again. Don't need
+        * i_size_read because we hold i_mutex.
+        *
+        * Filesystems which pass down their own page also cannot
+        * call into vmtruncate here because it would lead to lock
+        * inversion problems (*pagep is locked). This is a further
+        * example of where the old truncate sequence is inadequate.
+        */
+       if (unlikely(ret) && *pagep == NULL) {
+               loff_t isize = mapping->host->i_size;
+               if (pos + len > isize)
+                       vmtruncate(mapping->host, isize);
+       }
+
+       return ret;
+}
  EXPORT_SYMBOL(block_write_begin);
  
  int block_write_end(struct file *file, struct address_space *mapping,
@@ -2324,7 +2351,7 @@ out:
   * For moronic filesystems that do not allow holes in file.
   * We may have to extend the file.
   */
-int cont_write_begin(struct file *file, struct address_space *mapping,
+int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping,
                         loff_t pos, unsigned len, unsigned flags,
                         struct page **pagep, void **fsdata,
                         get_block_t *get_block, loff_t *bytes)
@@ -2345,11 +2372,30 @@ int cont_write_begin(struct file *file, struct address_space *mapping,
         }
  
         *pagep = NULL;
-       err = block_write_begin(file, mapping, pos, len,
+       err = block_write_begin_newtrunc(file, mapping, pos, len,
                                 flags, pagep, fsdata, get_block);
  out:
         return err;
  }
+EXPORT_SYMBOL(cont_write_begin_newtrunc);
+
+int cont_write_begin(struct file *file, struct address_space *mapping,
+                       loff_t pos, unsigned len, unsigned flags,
+                       struct page **pagep, void **fsdata,
+                       get_block_t *get_block, loff_t *bytes)
+{
+       int ret;
+
+       ret = cont_write_begin_newtrunc(file, mapping, pos, len, flags,
+                                       pagep, fsdata, get_block, bytes);
+       if (unlikely(ret)) {
+               loff_t isize = mapping->host->i_size;
+               if (pos + len > isize)
+                       vmtruncate(mapping->host, isize);
+       }
+
+       return ret;
+}
  EXPORT_SYMBOL(cont_write_begin);
  
  int block_prepare_write(struct page *page, unsigned from, unsigned to,
@@ -2381,7 +2427,7 @@ EXPORT_SYMBOL(block_commit_write);
   *
   * We are not allowed to take the i_mutex here so we have to play games to
   * protect against truncate races as the page could now be beyond EOF.  Because
- * vmtruncate() writes the inode size before removing pages, once we have the
+ * truncate writes the inode size before removing pages, once we have the
   * page lock we can determine safely if the page is beyond EOF. If it is not
   * beyond EOF, then the page is guaranteed safe against truncation until we
   * unlock the page.
@@ -2464,10 +2510,11 @@ static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
  }
  
  /*
- * On entry, the page is fully not uptodate.
- * On exit the page is fully uptodate in the areas outside (from,to)
+ * Filesystems implementing the new truncate sequence should use the
+ * _newtrunc postfix variant which won't incorrectly call vmtruncate.
+ * The filesystem needs to handle block truncation upon failure.
   */
-int nobh_write_begin(struct file *file, struct address_space *mapping,
+int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping,
                         loff_t pos, unsigned len, unsigned flags,
                         struct page **pagep, void **fsdata,
                         get_block_t *get_block)
@@ -2500,8 +2547,8 @@ int nobh_write_begin(struct file *file, struct address_space *mapping,
                 unlock_page(page);
                 page_cache_release(page);
                 *pagep = NULL;
-               return block_write_begin(file, mapping, pos, len, flags, pagep,
-                                       fsdata, get_block);
+               return block_write_begin_newtrunc(file, mapping, pos, len,
+                                       flags, pagep, fsdata, get_block);
         }
  
         if (PageMappedToDisk(page))
@@ -2605,8 +2652,34 @@ out_release:
         page_cache_release(page);
         *pagep = NULL;
  
-       if (pos + len > inode->i_size)
-               vmtruncate(inode, inode->i_size);
+       return ret;
+}
+EXPORT_SYMBOL(nobh_write_begin_newtrunc);
+
+/*
+ * On entry, the page is fully not uptodate.
+ * On exit the page is fully uptodate in the areas outside (from,to)
+ */
+int nobh_write_begin(struct file *file, struct address_space *mapping,
+                       loff_t pos, unsigned len, unsigned flags,
+                       struct page **pagep, void **fsdata,
+                       get_block_t *get_block)
+{
+       int ret;
+
+       ret = nobh_write_begin_newtrunc(file, mapping, pos, len, flags,
+                                       pagep, fsdata, get_block);
+
+       /*
+        * prepare_write() may have instantiated a few blocks
+        * outside i_size.  Trim these off again. Don't need
+        * i_size_read because we hold i_mutex.
+        */
+       if (unlikely(ret)) {
+               loff_t isize = mapping->host->i_size;
+               if (pos + len > isize)
+                       vmtruncate(mapping->host, isize);
+       }
  
         return ret;
  }