Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec...
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 24 Jun 2009 02:36:02 +0000 (19:36 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 24 Jun 2009 02:36:02 +0000 (19:36 -0700)
* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2:
  ocfs2/trivial: Wrap ocfs2_sysfile_cluster_lock_key within define.
  ocfs2: Add lockdep annotations
  vfs: Set special lockdep map for dirs only if not set by fs
  ocfs2: Disable orphan scanning for local and hard-ro mounts
  ocfs2: Do not initialize lvb in ocfs2_orphan_scan_lock_res_init()
  ocfs2: Stop orphan scan as early as possible during umount
  ocfs2: Fix ocfs2_osb_dump()
  ocfs2: Pin journal head before accessing jh->b_committed_data
  ocfs2: Update atime in splice read if necessary.
  ocfs2: Provide the ocfs2_dlm_lvb_valid() stack API.

17 files changed:
fs/inode.c
fs/ocfs2/dlmglue.c
fs/ocfs2/dlmglue.h
fs/ocfs2/file.c
fs/ocfs2/inode.c
fs/ocfs2/journal.c
fs/ocfs2/journal.h
fs/ocfs2/namei.c
fs/ocfs2/ocfs2.h
fs/ocfs2/stack_o2cb.c
fs/ocfs2/stack_user.c
fs/ocfs2/stackglue.c
fs/ocfs2/stackglue.h
fs/ocfs2/suballoc.c
fs/ocfs2/super.c
fs/ocfs2/sysfile.c
include/linux/lockdep.h

index f643be5..04c785b 100644 (file)
@@ -665,12 +665,17 @@ void unlock_new_inode(struct inode *inode)
        if (inode->i_mode & S_IFDIR) {
                struct file_system_type *type = inode->i_sb->s_type;
 
-               /*
-                * ensure nobody is actually holding i_mutex
-                */
-               mutex_destroy(&inode->i_mutex);
-               mutex_init(&inode->i_mutex);
-               lockdep_set_class(&inode->i_mutex, &type->i_mutex_dir_key);
+               /* Set new key only if filesystem hasn't already changed it */
+               if (!lockdep_match_class(&inode->i_mutex,
+                   &type->i_mutex_key)) {
+                       /*
+                        * ensure nobody is actually holding i_mutex
+                        */
+                       mutex_destroy(&inode->i_mutex);
+                       mutex_init(&inode->i_mutex);
+                       lockdep_set_class(&inode->i_mutex,
+                                         &type->i_mutex_dir_key);
+               }
        }
 #endif
        /*
index 6cdeaa7..110bb57 100644 (file)
@@ -92,6 +92,9 @@ struct ocfs2_unblock_ctl {
        enum ocfs2_unblock_action unblock_action;
 };
 
+/* Lockdep class keys */
+struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES];
+
 static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
                                        int new_level);
 static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres);
@@ -317,9 +320,16 @@ static int ocfs2_lock_create(struct ocfs2_super *osb,
                             u32 dlm_flags);
 static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
                                                     int wanted);
-static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
-                                struct ocfs2_lock_res *lockres,
-                                int level);
+static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
+                                  struct ocfs2_lock_res *lockres,
+                                  int level, unsigned long caller_ip);
+static inline void ocfs2_cluster_unlock(struct ocfs2_super *osb,
+                                       struct ocfs2_lock_res *lockres,
+                                       int level)
+{
+       __ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_);
+}
+
 static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres);
 static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres);
 static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres);
@@ -489,6 +499,13 @@ static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
        ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug);
 
        ocfs2_init_lock_stats(res);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+       if (type != OCFS2_LOCK_TYPE_OPEN)
+               lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type],
+                                &lockdep_keys[type], 0);
+       else
+               res->l_lockdep_map.key = NULL;
+#endif
 }
 
 void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
@@ -644,14 +661,10 @@ static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,
 static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res,
                                            struct ocfs2_super *osb)
 {
-       struct ocfs2_orphan_scan_lvb *lvb;
-
        ocfs2_lock_res_init_once(res);
        ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name);
        ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN,
                                   &ocfs2_orphan_scan_lops, osb);
-       lvb = ocfs2_dlm_lvb(&res->l_lksb);
-       lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
 }
 
 void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
@@ -1256,11 +1269,13 @@ static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw,
        return ret;
 }
 
-static int ocfs2_cluster_lock(struct ocfs2_super *osb,
-                             struct ocfs2_lock_res *lockres,
-                             int level,
-                             u32 lkm_flags,
-                             int arg_flags)
+static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
+                               struct ocfs2_lock_res *lockres,
+                               int level,
+                               u32 lkm_flags,
+                               int arg_flags,
+                               int l_subclass,
+                               unsigned long caller_ip)
 {
        struct ocfs2_mask_waiter mw;
        int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR);
@@ -1403,13 +1418,37 @@ out:
        }
        ocfs2_update_lock_stats(lockres, level, &mw, ret);
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+       if (!ret && lockres->l_lockdep_map.key != NULL) {
+               if (level == DLM_LOCK_PR)
+                       rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass,
+                               !!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
+                               caller_ip);
+               else
+                       rwsem_acquire(&lockres->l_lockdep_map, l_subclass,
+                               !!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
+                               caller_ip);
+       }
+#endif
        mlog_exit(ret);
        return ret;
 }
 
-static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
-                                struct ocfs2_lock_res *lockres,
-                                int level)
+static inline int ocfs2_cluster_lock(struct ocfs2_super *osb,
+                                    struct ocfs2_lock_res *lockres,
+                                    int level,
+                                    u32 lkm_flags,
+                                    int arg_flags)
+{
+       return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags,
+                                   0, _RET_IP_);
+}
+
+
+static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
+                                  struct ocfs2_lock_res *lockres,
+                                  int level,
+                                  unsigned long caller_ip)
 {
        unsigned long flags;
 
@@ -1418,6 +1457,10 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
        ocfs2_dec_holders(lockres, level);
        ocfs2_downconvert_on_unlock(osb, lockres);
        spin_unlock_irqrestore(&lockres->l_lock, flags);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+       if (lockres->l_lockdep_map.key != NULL)
+               rwsem_release(&lockres->l_lockdep_map, 1, caller_ip);
+#endif
        mlog_exit_void();
 }
 
@@ -1989,7 +2032,8 @@ static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
 {
        struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
 
-       if (lvb->lvb_version == OCFS2_LVB_VERSION
+       if (ocfs2_dlm_lvb_valid(&lockres->l_lksb)
+           && lvb->lvb_version == OCFS2_LVB_VERSION
            && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)
                return 1;
        return 0;
@@ -2162,10 +2206,11 @@ static int ocfs2_assign_bh(struct inode *inode,
  * returns < 0 error if the callback will never be called, otherwise
  * the result of the lock will be communicated via the callback.
  */
-int ocfs2_inode_lock_full(struct inode *inode,
-                        struct buffer_head **ret_bh,
-                        int ex,
-                        int arg_flags)
+int ocfs2_inode_lock_full_nested(struct inode *inode,
+                                struct buffer_head **ret_bh,
+                                int ex,
+                                int arg_flags,
+                                int subclass)
 {
        int status, level, acquired;
        u32 dlm_flags;
@@ -2203,7 +2248,8 @@ int ocfs2_inode_lock_full(struct inode *inode,
        if (arg_flags & OCFS2_META_LOCK_NOQUEUE)
                dlm_flags |= DLM_LKF_NOQUEUE;
 
-       status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags);
+       status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags,
+                                     arg_flags, subclass, _RET_IP_);
        if (status < 0) {
                if (status != -EAGAIN && status != -EIOCBRETRY)
                        mlog_errno(status);
@@ -2369,35 +2415,45 @@ void ocfs2_inode_unlock(struct inode *inode,
        mlog_exit_void();
 }
 
-int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex)
+int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno)
 {
        struct ocfs2_lock_res *lockres;
        struct ocfs2_orphan_scan_lvb *lvb;
-       int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
        int status = 0;
 
+       if (ocfs2_is_hard_readonly(osb))
+               return -EROFS;
+
+       if (ocfs2_mount_local(osb))
+               return 0;
+
        lockres = &osb->osb_orphan_scan.os_lockres;
-       status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
+       status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
        if (status < 0)
                return status;
 
        lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
-       if (lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION)
+       if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
+           lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION)
                *seqno = be32_to_cpu(lvb->lvb_os_seqno);
+       else
+               *seqno = osb->osb_orphan_scan.os_seqno + 1;
+
        return status;
 }
 
-void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex)
+void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno)
 {
        struct ocfs2_lock_res *lockres;
        struct ocfs2_orphan_scan_lvb *lvb;
-       int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
 
-       lockres = &osb->osb_orphan_scan.os_lockres;
-       lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
-       lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
-       lvb->lvb_os_seqno = cpu_to_be32(seqno);
-       ocfs2_cluster_unlock(osb, lockres, level);
+       if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) {
+               lockres = &osb->osb_orphan_scan.os_lockres;
+               lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
+               lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
+               lvb->lvb_os_seqno = cpu_to_be32(seqno);
+               ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
+       }
 }
 
 int ocfs2_super_lock(struct ocfs2_super *osb,
@@ -3627,7 +3683,8 @@ static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
        struct ocfs2_global_disk_dqinfo *gdinfo;
        int status = 0;
 
-       if (lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {
+       if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
+           lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {
                info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace);
                info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace);
                oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms);
index 31b90d7..7553836 100644 (file)
@@ -78,6 +78,14 @@ struct ocfs2_orphan_scan_lvb {
 /* don't block waiting for the downconvert thread, instead return -EAGAIN */
 #define OCFS2_LOCK_NONBLOCK            (0x04)
 
+/* Locking subclasses of inode cluster lock */
+enum {
+       OI_LS_NORMAL = 0,
+       OI_LS_PARENT,
+       OI_LS_RENAME1,
+       OI_LS_RENAME2,
+};
+
 int ocfs2_dlm_init(struct ocfs2_super *osb);
 void ocfs2_dlm_shutdown(struct ocfs2_super *osb, int hangup_pending);
 void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res);
@@ -104,25 +112,31 @@ void ocfs2_open_unlock(struct inode *inode);
 int ocfs2_inode_lock_atime(struct inode *inode,
                          struct vfsmount *vfsmnt,
                          int *level);
-int ocfs2_inode_lock_full(struct inode *inode,
+int ocfs2_inode_lock_full_nested(struct inode *inode,
                         struct buffer_head **ret_bh,
                         int ex,
-                        int arg_flags);
+                        int arg_flags,
+                        int subclass);
 int ocfs2_inode_lock_with_page(struct inode *inode,
                              struct buffer_head **ret_bh,
                              int ex,
                              struct page *page);
+/* Variants without special locking class or flags */
+#define ocfs2_inode_lock_full(i, r, e, f)\
+               ocfs2_inode_lock_full_nested(i, r, e, f, OI_LS_NORMAL)
+#define ocfs2_inode_lock_nested(i, b, e, s)\
+               ocfs2_inode_lock_full_nested(i, b, e, 0, s)
 /* 99% of the time we don't want to supply any additional flags --
  * those are for very specific cases only. */
-#define ocfs2_inode_lock(i, b, e) ocfs2_inode_lock_full(i, b, e, 0)
+#define ocfs2_inode_lock(i, b, e) ocfs2_inode_lock_full_nested(i, b, e, 0, OI_LS_NORMAL)
 void ocfs2_inode_unlock(struct inode *inode,
                       int ex);
 int ocfs2_super_lock(struct ocfs2_super *osb,
                     int ex);
 void ocfs2_super_unlock(struct ocfs2_super *osb,
                        int ex);
-int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex);
-void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex);
+int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno);
+void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno);
 
 int ocfs2_rename_lock(struct ocfs2_super *osb);
 void ocfs2_rename_unlock(struct ocfs2_super *osb);
index 07267e0..62442e4 100644 (file)
@@ -2026,7 +2026,7 @@ static ssize_t ocfs2_file_splice_read(struct file *in,
                                      size_t len,
                                      unsigned int flags)
 {
-       int ret = 0;
+       int ret = 0, lock_level = 0;
        struct inode *inode = in->f_path.dentry->d_inode;
 
        mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe,
@@ -2037,12 +2037,12 @@ static ssize_t ocfs2_file_splice_read(struct file *in,
        /*
         * See the comment in ocfs2_file_aio_read()
         */
-       ret = ocfs2_inode_lock(inode, NULL, 0);
+       ret = ocfs2_inode_lock_atime(inode, in->f_vfsmnt, &lock_level);
        if (ret < 0) {
                mlog_errno(ret);
                goto bail;
        }
-       ocfs2_inode_unlock(inode, 0);
+       ocfs2_inode_unlock(inode, lock_level);
 
        ret = generic_file_splice_read(in, ppos, pipe, len, flags);
 
index 10e1fa8..4dc8890 100644 (file)
@@ -215,6 +215,8 @@ bail:
 static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
 {
        struct ocfs2_find_inode_args *args = opaque;
+       static struct lock_class_key ocfs2_quota_ip_alloc_sem_key,
+                                    ocfs2_file_ip_alloc_sem_key;
 
        mlog_entry("inode = %p, opaque = %p\n", inode, opaque);
 
@@ -223,6 +225,15 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
        if (args->fi_sysfile_type != 0)
                lockdep_set_class(&inode->i_mutex,
                        &ocfs2_sysfile_lock_key[args->fi_sysfile_type]);
+       if (args->fi_sysfile_type == USER_QUOTA_SYSTEM_INODE ||
+           args->fi_sysfile_type == GROUP_QUOTA_SYSTEM_INODE ||
+           args->fi_sysfile_type == LOCAL_USER_QUOTA_SYSTEM_INODE ||
+           args->fi_sysfile_type == LOCAL_GROUP_QUOTA_SYSTEM_INODE)
+               lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem,
+                                 &ocfs2_quota_ip_alloc_sem_key);
+       else
+               lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem,
+                                 &ocfs2_file_ip_alloc_sem_key);
 
        mlog_exit(0);
        return 0;
index 4a3b9e6..f033760 100644 (file)
@@ -1880,13 +1880,20 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
 
        os = &osb->osb_orphan_scan;
 
-       status = ocfs2_orphan_scan_lock(osb, &seqno, DLM_LOCK_EX);
+       if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
+               goto out;
+
+       status = ocfs2_orphan_scan_lock(osb, &seqno);
        if (status < 0) {
                if (status != -EAGAIN)
                        mlog_errno(status);
                goto out;
        }
 
+       /* Do no queue the tasks if the volume is being umounted */
+       if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
+               goto unlock;
+
        if (os->os_seqno != seqno) {
                os->os_seqno = seqno;
                goto unlock;
@@ -1903,7 +1910,7 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
        os->os_count++;
        os->os_scantime = CURRENT_TIME;
 unlock:
-       ocfs2_orphan_scan_unlock(osb, seqno, DLM_LOCK_EX);
+       ocfs2_orphan_scan_unlock(osb, seqno);
 out:
        return;
 }
@@ -1920,8 +1927,9 @@ void ocfs2_orphan_scan_work(struct work_struct *work)
 
        mutex_lock(&os->os_lock);
        ocfs2_queue_orphan_scan(osb);
-       schedule_delayed_work(&os->os_orphan_scan_work,
-                             ocfs2_orphan_scan_timeout());
+       if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE)
+               schedule_delayed_work(&os->os_orphan_scan_work,
+                                     ocfs2_orphan_scan_timeout());
        mutex_unlock(&os->os_lock);
 }
 
@@ -1930,26 +1938,33 @@ void ocfs2_orphan_scan_stop(struct ocfs2_super *osb)
        struct ocfs2_orphan_scan *os;
 
        os = &osb->osb_orphan_scan;
-       mutex_lock(&os->os_lock);
-       cancel_delayed_work(&os->os_orphan_scan_work);
-       mutex_unlock(&os->os_lock);
+       if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) {
+               atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
+               mutex_lock(&os->os_lock);
+               cancel_delayed_work(&os->os_orphan_scan_work);
+               mutex_unlock(&os->os_lock);
+       }
 }
 
-int ocfs2_orphan_scan_init(struct ocfs2_super *osb)
+void ocfs2_orphan_scan_init(struct ocfs2_super *osb)
 {
        struct ocfs2_orphan_scan *os;
 
        os = &osb->osb_orphan_scan;
        os->os_osb = osb;
        os->os_count = 0;
+       os->os_seqno = 0;
        os->os_scantime = CURRENT_TIME;
        mutex_init(&os->os_lock);
-
-       INIT_DELAYED_WORK(&os->os_orphan_scan_work,
-                         ocfs2_orphan_scan_work);
-       schedule_delayed_work(&os->os_orphan_scan_work,
-                             ocfs2_orphan_scan_timeout());
-       return 0;
+       INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work);
+
+       if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
+               atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
+       else {
+               atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
+               schedule_delayed_work(&os->os_orphan_scan_work,
+                                     ocfs2_orphan_scan_timeout());
+       }
 }
 
 struct ocfs2_orphan_filldir_priv {
index 61045ee..5432c7f 100644 (file)
@@ -144,7 +144,7 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,
 }
 
 /* Exported only for the journal struct init code in super.c. Do not call. */
-int ocfs2_orphan_scan_init(struct ocfs2_super *osb);
+void ocfs2_orphan_scan_init(struct ocfs2_super *osb);
 void ocfs2_orphan_scan_stop(struct ocfs2_super *osb);
 void ocfs2_orphan_scan_exit(struct ocfs2_super *osb);
 
index 33464c6..8601f93 100644 (file)
@@ -118,7 +118,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
        mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len,
             dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno);
 
-       status = ocfs2_inode_lock(dir, NULL, 0);
+       status = ocfs2_inode_lock_nested(dir, NULL, 0, OI_LS_PARENT);
        if (status < 0) {
                if (status != -ENOENT)
                        mlog_errno(status);
@@ -636,7 +636,7 @@ static int ocfs2_link(struct dentry *old_dentry,
        if (S_ISDIR(inode->i_mode))
                return -EPERM;
 
-       err = ocfs2_inode_lock(dir, &parent_fe_bh, 1);
+       err = ocfs2_inode_lock_nested(dir, &parent_fe_bh, 1, OI_LS_PARENT);
        if (err < 0) {
                if (err != -ENOENT)
                        mlog_errno(err);
@@ -800,7 +800,8 @@ static int ocfs2_unlink(struct inode *dir,
                return -EPERM;
        }
 
-       status = ocfs2_inode_lock(dir, &parent_node_bh, 1);
+       status = ocfs2_inode_lock_nested(dir, &parent_node_bh, 1,
+                                        OI_LS_PARENT);
        if (status < 0) {
                if (status != -ENOENT)
                        mlog_errno(status);
@@ -978,7 +979,8 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
                        inode1 = tmpinode;
                }
                /* lock id2 */
-               status = ocfs2_inode_lock(inode2, bh2, 1);
+               status = ocfs2_inode_lock_nested(inode2, bh2, 1,
+                                                OI_LS_RENAME1);
                if (status < 0) {
                        if (status != -ENOENT)
                                mlog_errno(status);
@@ -987,7 +989,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
        }
 
        /* lock id1 */
-       status = ocfs2_inode_lock(inode1, bh1, 1);
+       status = ocfs2_inode_lock_nested(inode1, bh1, 1, OI_LS_RENAME2);
        if (status < 0) {
                /*
                 * An error return must mean that no cluster locks
@@ -1103,7 +1105,8 @@ static int ocfs2_rename(struct inode *old_dir,
         * won't have to concurrently downconvert the inode and the
         * dentry locks.
         */
-       status = ocfs2_inode_lock(old_inode, &old_inode_bh, 1);
+       status = ocfs2_inode_lock_nested(old_inode, &old_inode_bh, 1,
+                                        OI_LS_PARENT);
        if (status < 0) {
                if (status != -ENOENT)
                        mlog_errno(status);
index 18c1d9e..c9345eb 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/workqueue.h>
 #include <linux/kref.h>
 #include <linux/mutex.h>
+#include <linux/lockdep.h>
 #ifndef CONFIG_OCFS2_COMPAT_JBD
 # include <linux/jbd2.h>
 #else
@@ -152,6 +153,14 @@ struct ocfs2_lock_res {
        unsigned int             l_lock_max_exmode;        /* Max wait for EX */
        unsigned int             l_lock_refresh;           /* Disk refreshes */
 #endif
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+       struct lockdep_map       l_lockdep_map;
+#endif
+};
+
+enum ocfs2_orphan_scan_state {
+       ORPHAN_SCAN_ACTIVE,
+       ORPHAN_SCAN_INACTIVE
 };
 
 struct ocfs2_orphan_scan {
@@ -162,6 +171,7 @@ struct ocfs2_orphan_scan {
        struct timespec         os_scantime;  /* time this node ran the scan */
        u32                     os_count;      /* tracks node specific scans */
        u32                     os_seqno;       /* tracks cluster wide scans */
+       atomic_t                os_state;              /* ACTIVE or INACTIVE */
 };
 
 struct ocfs2_dlm_debug {
index fcd120f..3f66137 100644 (file)
@@ -236,6 +236,16 @@ static int o2cb_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
        return dlm_status_to_errno(lksb->lksb_o2dlm.status);
 }
 
+/*
+ * o2dlm aways has a "valid" LVB. If the dlm loses track of the LVB
+ * contents, it will zero out the LVB.  Thus the caller can always trust
+ * the contents.
+ */
+static int o2cb_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb)
+{
+       return 1;
+}
+
 static void *o2cb_dlm_lvb(union ocfs2_dlm_lksb *lksb)
 {
        return (void *)(lksb->lksb_o2dlm.lvb);
@@ -354,6 +364,7 @@ static struct ocfs2_stack_operations o2cb_stack_ops = {
        .dlm_lock       = o2cb_dlm_lock,
        .dlm_unlock     = o2cb_dlm_unlock,
        .lock_status    = o2cb_dlm_lock_status,
+       .lvb_valid      = o2cb_dlm_lvb_valid,
        .lock_lvb       = o2cb_dlm_lvb,
        .dump_lksb      = o2cb_dump_lksb,
 };
index 9b76d41..ff4c798 100644 (file)
@@ -738,6 +738,13 @@ static int user_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
        return lksb->lksb_fsdlm.sb_status;
 }
 
+static int user_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb)
+{
+       int invalid = lksb->lksb_fsdlm.sb_flags & DLM_SBF_VALNOTVALID;
+
+       return !invalid;
+}
+
 static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb)
 {
        if (!lksb->lksb_fsdlm.sb_lvbptr)
@@ -873,6 +880,7 @@ static struct ocfs2_stack_operations ocfs2_user_plugin_ops = {
        .dlm_lock       = user_dlm_lock,
        .dlm_unlock     = user_dlm_unlock,
        .lock_status    = user_dlm_lock_status,
+       .lvb_valid      = user_dlm_lvb_valid,
        .lock_lvb       = user_dlm_lvb,
        .plock          = user_plock,
        .dump_lksb      = user_dlm_dump_lksb,
index 68b668b..3f2f1c4 100644 (file)
@@ -6,7 +6,7 @@
  * Code which implements an OCFS2 specific interface to underlying
  * cluster stacks.
  *
- * Copyright (C) 2007 Oracle.  All rights reserved.
+ * Copyright (C) 2007, 2009 Oracle.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
@@ -271,11 +271,12 @@ int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
 }
 EXPORT_SYMBOL_GPL(ocfs2_dlm_lock_status);
 
-/*
- * Why don't we cast to ocfs2_meta_lvb?  The "clean" answer is that we
- * don't cast at the glue level.  The real answer is that the header
- * ordering is nigh impossible.
- */
+int ocfs2_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb)
+{
+       return active_stack->sp_ops->lvb_valid(lksb);
+}
+EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb_valid);
+
 void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb)
 {
        return active_stack->sp_ops->lock_lvb(lksb);
index c571af3..03a44d6 100644 (file)
@@ -185,6 +185,11 @@ struct ocfs2_stack_operations {
         */
        int (*lock_status)(union ocfs2_dlm_lksb *lksb);
 
+       /*
+        * Return non-zero if the LVB is valid.
+        */
+       int (*lvb_valid)(union ocfs2_dlm_lksb *lksb);
+
        /*
         * Pull the lvb pointer off of the stack-specific lksb.
         */
@@ -252,6 +257,7 @@ int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn,
                     struct ocfs2_lock_res *astarg);
 
 int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb);
+int ocfs2_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb);
 void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb);
 void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb);
 
index 8439f6b..73a16d4 100644 (file)
@@ -923,14 +923,23 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
                                         int nr)
 {
        struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
+       int ret;
 
        if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
                return 0;
-       if (!buffer_jbd(bg_bh) || !bh2jh(bg_bh)->b_committed_data)
+
+       if (!buffer_jbd(bg_bh))
                return 1;
 
+       jbd_lock_bh_state(bg_bh);
        bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data;
-       return !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
+       if (bg)
+               ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
+       else
+               ret = 1;
+       jbd_unlock_bh_state(bg_bh);
+
+       return ret;
 }
 
 static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
@@ -1885,6 +1894,7 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle,
        unsigned int tmp;
        int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
        struct ocfs2_group_desc *undo_bg = NULL;
+       int cluster_bitmap = 0;
 
        mlog_entry_void();
 
@@ -1905,18 +1915,28 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle,
        }
 
        if (ocfs2_is_cluster_bitmap(alloc_inode))
-               undo_bg = (struct ocfs2_group_desc *) bh2jh(group_bh)->b_committed_data;
+               cluster_bitmap = 1;
+
+       if (cluster_bitmap) {
+               jbd_lock_bh_state(group_bh);
+               undo_bg = (struct ocfs2_group_desc *)
+                                       bh2jh(group_bh)->b_committed_data;
+               BUG_ON(!undo_bg);
+       }
 
        tmp = num_bits;
        while(tmp--) {
                ocfs2_clear_bit((bit_off + tmp),
                                (unsigned long *) bg->bg_bitmap);
-               if (ocfs2_is_cluster_bitmap(alloc_inode))
+               if (cluster_bitmap)
                        ocfs2_set_bit(bit_off + tmp,
                                      (unsigned long *) undo_bg->bg_bitmap);
        }
        le16_add_cpu(&bg->bg_free_bits_count, num_bits);
 
+       if (cluster_bitmap)
+               jbd_unlock_bh_state(group_bh);
+
        status = ocfs2_journal_dirty(handle, group_bh);
        if (status < 0)
                mlog_errno(status);
index 0d3ed74..7efb349 100644 (file)
@@ -205,11 +205,10 @@ static const match_table_t tokens = {
 #ifdef CONFIG_DEBUG_FS
 static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
 {
-       int out = 0;
-       int i;
        struct ocfs2_cluster_connection *cconn = osb->cconn;
        struct ocfs2_recovery_map *rm = osb->recovery_map;
-       struct ocfs2_orphan_scan *os;
+       struct ocfs2_orphan_scan *os = &osb->osb_orphan_scan;
+       int i, out = 0;
 
        out += snprintf(buf + out, len - out,
                        "%10s => Id: %-s  Uuid: %-s  Gen: 0x%X  Label: %-s\n",
@@ -234,20 +233,24 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
                        "%10s => Opts: 0x%lX  AtimeQuanta: %u\n", "Mount",
                        osb->s_mount_opt, osb->s_atime_quantum);
 
-       out += snprintf(buf + out, len - out,
-                       "%10s => Stack: %s  Name: %*s  Version: %d.%d\n",
-                       "Cluster",
-                       (*osb->osb_cluster_stack == '\0' ?
-                        "o2cb" : osb->osb_cluster_stack),
-                       cconn->cc_namelen, cconn->cc_name,
-                       cconn->cc_version.pv_major, cconn->cc_version.pv_minor);
+       if (cconn) {
+               out += snprintf(buf + out, len - out,
+                               "%10s => Stack: %s  Name: %*s  "
+                               "Version: %d.%d\n", "Cluster",
+                               (*osb->osb_cluster_stack == '\0' ?
+                                "o2cb" : osb->osb_cluster_stack),
+                               cconn->cc_namelen, cconn->cc_name,
+                               cconn->cc_version.pv_major,
+                               cconn->cc_version.pv_minor);
+       }
 
        spin_lock(&osb->dc_task_lock);
        out += snprintf(buf + out, len - out,
                        "%10s => Pid: %d  Count: %lu  WakeSeq: %lu  "
                        "WorkSeq: %lu\n", "DownCnvt",
-                       task_pid_nr(osb->dc_task), osb->blocked_lock_count,
-                       osb->dc_wake_sequence, osb->dc_work_sequence);
+                       (osb->dc_task ?  task_pid_nr(osb->dc_task) : -1),
+                       osb->blocked_lock_count, osb->dc_wake_sequence,
+                       osb->dc_work_sequence);
        spin_unlock(&osb->dc_task_lock);
 
        spin_lock(&osb->osb_lock);
@@ -267,14 +270,15 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
 
        out += snprintf(buf + out, len - out,
                        "%10s => Pid: %d  Interval: %lu  Needs: %d\n", "Commit",
-                       task_pid_nr(osb->commit_task), osb->osb_commit_interval,
+                       (osb->commit_task ? task_pid_nr(osb->commit_task) : -1),
+                       osb->osb_commit_interval,
                        atomic_read(&osb->needs_checkpoint));
 
        out += snprintf(buf + out, len - out,
-                       "%10s => State: %d  NumTxns: %d  TxnId: %lu\n",
+                       "%10s => State: %d  TxnId: %lu  NumTxns: %d\n",
                        "Journal", osb->journal->j_state,
-                       atomic_read(&osb->journal->j_num_trans),
-                       osb->journal->j_trans_id);
+                       osb->journal->j_trans_id,
+                       atomic_read(&osb->journal->j_num_trans));
 
        out += snprintf(buf + out, len - out,
                        "%10s => GlobalAllocs: %d  LocalAllocs: %d  "
@@ -300,9 +304,18 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
                        atomic_read(&osb->s_num_inodes_stolen));
        spin_unlock(&osb->osb_lock);
 
+       out += snprintf(buf + out, len - out, "OrphanScan => ");
+       out += snprintf(buf + out, len - out, "Local: %u  Global: %u ",
+                       os->os_count, os->os_seqno);
+       out += snprintf(buf + out, len - out, " Last Scan: ");
+       if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
+               out += snprintf(buf + out, len - out, "Disabled\n");
+       else
+               out += snprintf(buf + out, len - out, "%lu seconds ago\n",
+                               (get_seconds() - os->os_scantime.tv_sec));
+
        out += snprintf(buf + out, len - out, "%10s => %3s  %10s\n",
                        "Slots", "Num", "RecoGen");
-
        for (i = 0; i < osb->max_slots; ++i) {
                out += snprintf(buf + out, len - out,
                                "%10s  %c %3d  %10d\n",
@@ -311,13 +324,6 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
                                i, osb->slot_recovery_generations[i]);
        }
 
-       os = &osb->osb_orphan_scan;
-       out += snprintf(buf + out, len - out, "Orphan Scan=> ");
-       out += snprintf(buf + out, len - out, "Local: %u  Global: %u ",
-                       os->os_count, os->os_seqno);
-       out += snprintf(buf + out, len - out, " Last Scan: %lu seconds ago\n",
-                       (get_seconds() - os->os_scantime.tv_sec));
-
        return out;
 }
 
@@ -1175,6 +1181,9 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
        atomic_set(&osb->vol_state, VOLUME_MOUNTED_QUOTAS);
        wake_up(&osb->osb_mount_event);
 
+       /* Start this when the mount is almost sure of being successful */
+       ocfs2_orphan_scan_init(osb);
+
        mlog_exit(status);
        return status;
 
@@ -1810,14 +1819,15 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
 
        debugfs_remove(osb->osb_ctxt);
 
+       /* Orphan scan should be stopped as early as possible */
+       ocfs2_orphan_scan_stop(osb);
+
        ocfs2_disable_quotas(osb);
 
        ocfs2_shutdown_local_alloc(osb);
 
        ocfs2_truncate_log_shutdown(osb);
 
-       ocfs2_orphan_scan_stop(osb);
-
        /* This will disable recovery and flush any recovery work. */
        ocfs2_recovery_exit(osb);
 
@@ -1978,13 +1988,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
                goto bail;
        }
 
-       status = ocfs2_orphan_scan_init(osb);
-       if (status) {
-               mlog(ML_ERROR, "Unable to initialize delayed orphan scan\n");
-               mlog_errno(status);
-               goto bail;
-       }
-
        init_waitqueue_head(&osb->checkpoint_event);
        atomic_set(&osb->needs_checkpoint, 0);
 
index ab713eb..40e5370 100644 (file)
@@ -50,6 +50,10 @@ static inline int is_in_system_inode_array(struct ocfs2_super *osb,
                                           int type,
                                           u32 slot);
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static struct lock_class_key ocfs2_sysfile_cluster_lock_key[NUM_SYSTEM_INODES];
+#endif
+
 static inline int is_global_system_inode(int type)
 {
        return type >= OCFS2_FIRST_ONLINE_SYSTEM_INODE &&
@@ -118,6 +122,21 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb,
                inode = NULL;
                goto bail;
        }
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+       if (type == LOCAL_USER_QUOTA_SYSTEM_INODE ||
+           type == LOCAL_GROUP_QUOTA_SYSTEM_INODE ||
+           type == JOURNAL_SYSTEM_INODE) {
+               /* Ignore inode lock on these inodes as the lock does not
+                * really belong to any process and lockdep cannot handle
+                * that */
+               OCFS2_I(inode)->ip_inode_lockres.l_lockdep_map.key = NULL;
+       } else {
+               lockdep_init_map(&OCFS2_I(inode)->ip_inode_lockres.
+                                                               l_lockdep_map,
+                                ocfs2_system_inodes[type].si_name,
+                                &ocfs2_sysfile_cluster_lock_key[type], 0);
+       }
+#endif
 bail:
 
        return inode;
index da5a5a1..b25d1b5 100644 (file)
@@ -258,6 +258,16 @@ extern void lockdep_init_map(struct lockdep_map *lock, const char *name,
 #define lockdep_set_subclass(lock, sub)        \
                lockdep_init_map(&(lock)->dep_map, #lock, \
                                 (lock)->dep_map.key, sub)
+/*
+ * Compare locking classes
+ */
+#define lockdep_match_class(lock, key) lockdep_match_key(&(lock)->dep_map, key)
+
+static inline int lockdep_match_key(struct lockdep_map *lock,
+                                   struct lock_class_key *key)
+{
+       return lock->key == key;
+}
 
 /*
  * Acquire a lock.
@@ -326,6 +336,11 @@ static inline void lockdep_on(void)
 #define lockdep_set_class_and_subclass(lock, key, sub) \
                do { (void)(key); } while (0)
 #define lockdep_set_subclass(lock, sub)                do { } while (0)
+/*
+ * We don't define lockdep_match_class() and lockdep_match_key() for !LOCKDEP
+ * case since the result is not well defined and the caller should rather
+ * #ifdef the call himself.
+ */
 
 # define INIT_LOCKDEP
 # define lockdep_reset()               do { debug_locks = 1; } while (0)