ocfs2: Remove data locks

author Mark Fasheh <mark.fasheh@oracle.com>

Thu, 18 Oct 2007 22:23:46 +0000 (15:23 -0700)

committer Mark Fasheh <mark.fasheh@oracle.com>

Fri, 25 Jan 2008 22:45:57 +0000 (14:45 -0800)
author Mark Fasheh <mark.fasheh@oracle.com>
Thu, 18 Oct 2007 22:23:46 +0000 (15:23 -0700)
committer Mark Fasheh <mark.fasheh@oracle.com>
Fri, 25 Jan 2008 22:45:57 +0000 (14:45 -0800)
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c

index 56f7790..5fc27cf 100644 (file)
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -305,21 +305,12 @@ static int ocfs2_readpage(struct file *file, struct page *page)
                 goto out_alloc;
         }
  
-       ret = ocfs2_data_lock_with_page(inode, 0, page);
-       if (ret != 0) {
-               if (ret == AOP_TRUNCATED_PAGE)
-                       unlock = 0;
-               mlog_errno(ret);
-               goto out_alloc;
-       }
-
         if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
                 ret = ocfs2_readpage_inline(inode, page);
         else
                 ret = block_read_full_page(page, ocfs2_get_block);
         unlock = 0;
  
-       ocfs2_data_unlock(inode, 0);
  out_alloc:
         up_read(&OCFS2_I(inode)->ip_alloc_sem);
  out_meta_unlock:
@@ -638,34 +629,12 @@ static ssize_t ocfs2_direct_IO(int rw,
         if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
                 return 0;
  
-       if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) {
-               /*
-                * We get PR data locks even for O_DIRECT.  This
-                * allows concurrent O_DIRECT I/O but doesn't let
-                * O_DIRECT with extending and buffered zeroing writes
-                * race.  If they did race then the buffered zeroing
-                * could be written back after the O_DIRECT I/O.  It's
-                * one thing to tell people not to mix buffered and
-                * O_DIRECT writes, but expecting them to understand
-                * that file extension is also an implicit buffered
-                * write is too much.  By getting the PR we force
-                * writeback of the buffered zeroing before
-                * proceeding.
-                */
-               ret = ocfs2_data_lock(inode, 0);
-               if (ret < 0) {
-                       mlog_errno(ret);
-                       goto out;
-               }
-               ocfs2_data_unlock(inode, 0);
-       }
-
         ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
                                             inode->i_sb->s_bdev, iov, offset,
                                             nr_segs, 
                                             ocfs2_direct_IO_get_blocks,
                                             ocfs2_dio_end_io);
-out:
+
         mlog_exit(ret);
         return ret;
  }
@@ -1769,25 +1738,17 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping,
          */
         down_write(&OCFS2_I(inode)->ip_alloc_sem);
  
-       ret = ocfs2_data_lock(inode, 1);
-       if (ret) {
-               mlog_errno(ret);
-               goto out_fail;
-       }
-
         ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep,
                                        fsdata, di_bh, NULL);
         if (ret) {
                 mlog_errno(ret);
-               goto out_fail_data;
+               goto out_fail;
         }
  
         brelse(di_bh);
  
         return 0;
  
-out_fail_data:
-       ocfs2_data_unlock(inode, 1);
  out_fail:
         up_write(&OCFS2_I(inode)->ip_alloc_sem);
  
@@ -1908,7 +1869,6 @@ static int ocfs2_write_end(struct file *file, struct address_space *mapping,
  
         ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata);
  
-       ocfs2_data_unlock(inode, 1);
         up_write(&OCFS2_I(inode)->ip_alloc_sem);
         ocfs2_meta_unlock(inode, 1);
  
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h

index 79bd666..b2e832a 100644 (file)
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -38,6 +38,9 @@
   * locking semantics of the file system using the protocol.  It should 
   * be somewhere else, I'm sure, but right now it isn't.
   *
+ * New in version 10:
+ *     - Meta/data locks combined
+ *
   * New in version 9:
   *     - All votes removed
   *
@@ -63,7 +66,7 @@
   *     - full 64 bit i_size in the metadata lock lvbs
   *     - introduction of "rw" lock and pushing meta/data locking down
   */
-#define O2NET_PROTOCOL_VERSION 9ULL
+#define O2NET_PROTOCOL_VERSION 10ULL
  struct o2net_handshake {
         __be64  protocol_version;
         __be64  connector_id;
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c

index 7e36abe..ecf58c6 100644 (file)
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -232,12 +232,6 @@ static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = {
         .flags          = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
  };
  
-static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = {
-       .get_osb        = ocfs2_get_inode_osb,
-       .downconvert_worker = ocfs2_data_convert_worker,
-       .flags          = 0,
-};
-
  static struct ocfs2_lock_res_ops ocfs2_super_lops = {
         .flags          = LOCK_TYPE_REQUIRES_REFRESH,
  };
@@ -261,7 +255,6 @@ static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {
  static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
  {
         return lockres->l_type == OCFS2_LOCK_TYPE_META ||
-               lockres->l_type == OCFS2_LOCK_TYPE_DATA ||
                 lockres->l_type == OCFS2_LOCK_TYPE_RW ||
                 lockres->l_type == OCFS2_LOCK_TYPE_OPEN;
  }
@@ -405,9 +398,6 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
                 case OCFS2_LOCK_TYPE_META:
                         ops = &ocfs2_inode_meta_lops;
                         break;
-               case OCFS2_LOCK_TYPE_DATA:
-                       ops = &ocfs2_inode_data_lops;
-                       break;
                 case OCFS2_LOCK_TYPE_OPEN:
                         ops = &ocfs2_inode_open_lops;
                         break;
@@ -1154,12 +1144,6 @@ int ocfs2_create_new_inode_locks(struct inode *inode)
                 goto bail;
         }
  
-       ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_data_lockres, 1, 1);
-       if (ret) {
-               mlog_errno(ret);
-               goto bail;
-       }
-
         ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0);
         if (ret) {
                 mlog_errno(ret);
@@ -1312,67 +1296,6 @@ out:
         mlog_exit_void();
  }
  
-int ocfs2_data_lock_full(struct inode *inode,
-                        int write,
-                        int arg_flags)
-{
-       int status = 0, level;
-       struct ocfs2_lock_res *lockres;
-       struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-
-       BUG_ON(!inode);
-
-       mlog_entry_void();
-
-       mlog(0, "inode %llu take %s DATA lock\n",
-            (unsigned long long)OCFS2_I(inode)->ip_blkno,
-            write ? "EXMODE" : "PRMODE");
-
-       /* We'll allow faking a readonly data lock for
-        * rodevices. */
-       if (ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb))) {
-               if (write) {
-                       status = -EROFS;
-                       mlog_errno(status);
-               }
-               goto out;
-       }
-
-       if (ocfs2_mount_local(osb))
-               goto out;
-
-       lockres = &OCFS2_I(inode)->ip_data_lockres;
-
-       level = write ? LKM_EXMODE : LKM_PRMODE;
-
-       status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level,
-                                   0, arg_flags);
-       if (status < 0 && status != -EAGAIN)
-               mlog_errno(status);
-
-out:
-       mlog_exit(status);
-       return status;
-}
-
-/* see ocfs2_meta_lock_with_page() */
-int ocfs2_data_lock_with_page(struct inode *inode,
-                             int write,
-                             struct page *page)
-{
-       int ret;
-
-       ret = ocfs2_data_lock_full(inode, write, OCFS2_LOCK_NONBLOCK);
-       if (ret == -EAGAIN) {
-               unlock_page(page);
-               if (ocfs2_data_lock(inode, write) == 0)
-                       ocfs2_data_unlock(inode, write);
-               ret = AOP_TRUNCATED_PAGE;
-       }
-
-       return ret;
-}
-
  static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
                                         struct ocfs2_lock_res *lockres)
  {
@@ -1404,26 +1327,6 @@ static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
         mlog_exit_void();
  }
  
-void ocfs2_data_unlock(struct inode *inode,
-                      int write)
-{
-       int level = write ? LKM_EXMODE : LKM_PRMODE;
-       struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_data_lockres;
-       struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-
-       mlog_entry_void();
-
-       mlog(0, "inode %llu drop %s DATA lock\n",
-            (unsigned long long)OCFS2_I(inode)->ip_blkno,
-            write ? "EXMODE" : "PRMODE");
-
-       if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) &&
-           !ocfs2_mount_local(osb))
-               ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
-
-       mlog_exit_void();
-}
-
  #define OCFS2_SEC_BITS   34
  #define OCFS2_SEC_SHIFT  (64 - 34)
  #define OCFS2_NSEC_MASK  ((1ULL << OCFS2_SEC_SHIFT) - 1)
@@ -2591,13 +2494,6 @@ int ocfs2_drop_inode_locks(struct inode *inode)
  
         status = err;
  
-       err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
-                             &OCFS2_I(inode)->ip_data_lockres);
-       if (err < 0)
-               mlog_errno(err);
-       if (err < 0 && !status)
-               status = err;
-
         err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
                               &OCFS2_I(inode)->ip_meta_lockres);
         if (err < 0)
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h

index 931f6ee..3fd7729 100644 (file)
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -49,7 +49,7 @@ struct ocfs2_meta_lvb {
         __be32       lvb_reserved2;
  };
  
-/* ocfs2_meta_lock_full() and ocfs2_data_lock_full() 'arg_flags' flags */
+/* ocfs2_meta_lock_full() 'arg_flags' flags */
  /* don't wait on recovery. */
  #define OCFS2_META_LOCK_RECOVERY       (0x01)
  /* Instruct the dlm not to queue ourselves on the other node. */
@@ -69,15 +69,6 @@ void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
  void ocfs2_lock_res_free(struct ocfs2_lock_res *res);
  int ocfs2_create_new_inode_locks(struct inode *inode);
  int ocfs2_drop_inode_locks(struct inode *inode);
-int ocfs2_data_lock_full(struct inode *inode,
-                        int write,
-                        int arg_flags);
-#define ocfs2_data_lock(inode, write) ocfs2_data_lock_full(inode, write, 0)
-int ocfs2_data_lock_with_page(struct inode *inode,
-                             int write,
-                             struct page *page);
-void ocfs2_data_unlock(struct inode *inode,
-                      int write);
  int ocfs2_rw_lock(struct inode *inode, int write);
  void ocfs2_rw_unlock(struct inode *inode, int write);
  int ocfs2_open_lock(struct inode *inode);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c

index b75b2e1..c5c183a 100644 (file)
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -382,18 +382,13 @@ static int ocfs2_truncate_file(struct inode *inode,
  
         down_write(&OCFS2_I(inode)->ip_alloc_sem);
  
-       /* This forces other nodes to sync and drop their pages. Do
-        * this even if we have a truncate without allocation change -
-        * ocfs2 cluster sizes can be much greater than page size, so
-        * we have to truncate them anyway.  */
-       status = ocfs2_data_lock(inode, 1);
-       if (status < 0) {
-               up_write(&OCFS2_I(inode)->ip_alloc_sem);
-
-               mlog_errno(status);
-               goto bail;
-       }
-
+       /*
+        * The inode lock forced other nodes to sync and drop their
+        * pages, which (correctly) happens even if we have a truncate
+        * without allocation change - ocfs2 cluster sizes can be much
+        * greater than page size, so we have to truncate them
+        * anyway.
+        */
         unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
         truncate_inode_pages(inode->i_mapping, new_i_size);
  
@@ -403,7 +398,7 @@ static int ocfs2_truncate_file(struct inode *inode,
                 if (status)
                         mlog_errno(status);
  
-               goto bail_unlock_data;
+               goto bail_unlock_sem;
         }
  
         /* alright, we're going to need to do a full blown alloc size
@@ -413,25 +408,23 @@ static int ocfs2_truncate_file(struct inode *inode,
         status = ocfs2_orphan_for_truncate(osb, inode, di_bh, new_i_size);
         if (status < 0) {
                 mlog_errno(status);
-               goto bail_unlock_data;
+               goto bail_unlock_sem;
         }
  
         status = ocfs2_prepare_truncate(osb, inode, di_bh, &tc);
         if (status < 0) {
                 mlog_errno(status);
-               goto bail_unlock_data;
+               goto bail_unlock_sem;
         }
  
         status = ocfs2_commit_truncate(osb, inode, di_bh, tc);
         if (status < 0) {
                 mlog_errno(status);
-               goto bail_unlock_data;
+               goto bail_unlock_sem;
         }
  
         /* TODO: orphan dir cleanup here. */
-bail_unlock_data:
-       ocfs2_data_unlock(inode, 1);
-
+bail_unlock_sem:
         up_write(&OCFS2_I(inode)->ip_alloc_sem);
  
  bail:
@@ -917,7 +910,7 @@ static int ocfs2_extend_file(struct inode *inode,
                              struct buffer_head *di_bh,
                              u64 new_i_size)
  {
-       int ret = 0, data_locked = 0;
+       int ret = 0;
         struct ocfs2_inode_info *oi = OCFS2_I(inode);
  
         BUG_ON(!di_bh);
@@ -943,20 +936,6 @@ static int ocfs2_extend_file(struct inode *inode,
             && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
                 goto out_update_size;
  
-       /* 
-        * protect the pages that ocfs2_zero_extend is going to be
-        * pulling into the page cache.. we do this before the
-        * metadata extend so that we don't get into the situation
-        * where we've extended the metadata but can't get the data
-        * lock to zero.
-        */
-       ret = ocfs2_data_lock(inode, 1);
-       if (ret < 0) {
-               mlog_errno(ret);
-               goto out;
-       }
-       data_locked = 1;
-
         /*
          * The alloc sem blocks people in read/write from reading our
          * allocation until we're done changing it. We depend on
@@ -980,7 +959,7 @@ static int ocfs2_extend_file(struct inode *inode,
                         up_write(&oi->ip_alloc_sem);
  
                         mlog_errno(ret);
-                       goto out_unlock;
+                       goto out;
                 }
         }
  
@@ -991,7 +970,7 @@ static int ocfs2_extend_file(struct inode *inode,
  
         if (ret < 0) {
                 mlog_errno(ret);
-               goto out_unlock;
+               goto out;
         }
  
  out_update_size:
@@ -999,10 +978,6 @@ out_update_size:
         if (ret < 0)
                 mlog_errno(ret);
  
-out_unlock:
-       if (data_locked)
-               ocfs2_data_unlock(inode, 1);
-
  out:
         return ret;
  }
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c

index 86cf073..8ff201d 100644 (file)
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -332,10 +332,6 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
                                   OCFS2_LOCK_TYPE_RW, inode->i_generation,
                                   inode);
  
-       ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres,
-                                 OCFS2_LOCK_TYPE_DATA, inode->i_generation,
-                                 inode);
-
         ocfs2_set_inode_flags(inode);
  
         status = 0;
@@ -1014,7 +1010,6 @@ void ocfs2_clear_inode(struct inode *inode)
          * the downconvert thread while waiting to destroy the locks. */
         ocfs2_mark_lockres_freeing(&oi->ip_rw_lockres);
         ocfs2_mark_lockres_freeing(&oi->ip_meta_lockres);
-       ocfs2_mark_lockres_freeing(&oi->ip_data_lockres);
         ocfs2_mark_lockres_freeing(&oi->ip_open_lockres);
  
         /* We very well may get a clear_inode before all an inodes
@@ -1038,7 +1033,6 @@ void ocfs2_clear_inode(struct inode *inode)
  
         ocfs2_lock_res_free(&oi->ip_rw_lockres);
         ocfs2_lock_res_free(&oi->ip_meta_lockres);
-       ocfs2_lock_res_free(&oi->ip_data_lockres);
         ocfs2_lock_res_free(&oi->ip_open_lockres);
  
         ocfs2_metadata_cache_purge(inode);
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h

index 70e881c..d1c54da 100644 (file)
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -35,7 +35,6 @@ struct ocfs2_inode_info
  
         struct ocfs2_lock_res           ip_rw_lockres;
         struct ocfs2_lock_res           ip_meta_lockres;
-       struct ocfs2_lock_res           ip_data_lockres;
         struct ocfs2_lock_res           ip_open_lockres;
  
         /* protects allocation changes on this inode. */
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c

index 9875615..a7f0ccc 100644 (file)
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -181,17 +181,8 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
          */
         down_write(&OCFS2_I(inode)->ip_alloc_sem);
  
-       ret = ocfs2_data_lock(inode, 1);
-       if (ret < 0) {
-               mlog_errno(ret);
-               goto out_meta_unlock;
-       }
-
         ret = __ocfs2_page_mkwrite(inode, di_bh, page);
  
-       ocfs2_data_unlock(inode, 1);
-
-out_meta_unlock:
         up_write(&OCFS2_I(inode)->ip_alloc_sem);
  
         brelse(di_bh);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c

index 1996820..064eba0 100644 (file)
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1020,7 +1020,6 @@ static void ocfs2_inode_init_once(struct kmem_cache *cachep, void *data)
  
         ocfs2_lock_res_init_once(&oi->ip_rw_lockres);
         ocfs2_lock_res_init_once(&oi->ip_meta_lockres);
-       ocfs2_lock_res_init_once(&oi->ip_data_lockres);
         ocfs2_lock_res_init_once(&oi->ip_open_lockres);
  
         ocfs2_metadata_cache_init(&oi->vfs_inode);
author	Mark Fasheh <mark.fasheh@oracle.com>
	Thu, 18 Oct 2007 22:23:46 +0000 (15:23 -0700)
committer	Mark Fasheh <mark.fasheh@oracle.com>
	Fri, 25 Jan 2008 22:45:57 +0000 (14:45 -0800)
fs/ocfs2/aops.c		patch \| blob \| history
fs/ocfs2/cluster/tcp_internal.h		patch \| blob \| history
fs/ocfs2/dlmglue.c		patch \| blob \| history
fs/ocfs2/dlmglue.h		patch \| blob \| history
fs/ocfs2/file.c		patch \| blob \| history
fs/ocfs2/inode.c		patch \| blob \| history
fs/ocfs2/inode.h		patch \| blob \| history
fs/ocfs2/mmap.c		patch \| blob \| history
fs/ocfs2/super.c		patch \| blob \| history