Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 11 Jan 2011 19:28:34 +0000 (11:28 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 11 Jan 2011 19:28:34 +0000 (11:28 -0800)
* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2: (22 commits)
  MAINTAINERS: Update Joel Becker's email address
  ocfs2: Remove unused truncate function from alloc.c
  ocfs2/cluster: dereferencing before checking in nst_seq_show()
  ocfs2: fix build for OCFS2_FS_STATS not enabled
  ocfs2/cluster: Show o2net timing statistics
  ocfs2/cluster: Track process message timing stats for each socket
  ocfs2/cluster: Track send message timing stats for each socket
  ocfs2/cluster: Use ktime instead of timeval in struct o2net_sock_container
  ocfs2/cluster: Replace timeval with ktime in struct o2net_send_tracking
  ocfs2: Add DEBUG_FS dependency
  ocfs2/dlm: Hard code the values for enums
  ocfs2/dlm: Minor cleanup
  ocfs2/dlm: Cleanup dlmdebug.c
  ocfs2: Release buffer_head in case of error in ocfs2_double_lock.
  ocfs2/cluster: Pin the local node when o2hb thread starts
  ocfs2/cluster: Show pin state for each o2hb region
  ocfs2/cluster: Pin/unpin o2hb regions
  ocfs2/cluster: Remove dropped region from o2hb quorum region bitmap
  ocfs2/cluster: Pin the remote node item in configfs
  ocfs2/dlm: make existing convertion precedent over new lock
  ...

18 files changed:
MAINTAINERS
fs/ocfs2/Kconfig
fs/ocfs2/alloc.c
fs/ocfs2/alloc.h
fs/ocfs2/aops.c
fs/ocfs2/cluster/heartbeat.c
fs/ocfs2/cluster/netdebug.c
fs/ocfs2/cluster/tcp.c
fs/ocfs2/cluster/tcp_internal.h
fs/ocfs2/dlm/dlmast.c
fs/ocfs2/dlm/dlmcommon.h
fs/ocfs2/dlm/dlmdebug.c
fs/ocfs2/dlm/dlmdebug.h
fs/ocfs2/dlm/dlmdomain.c
fs/ocfs2/dlm/dlmlock.c
fs/ocfs2/dlm/dlmthread.c
fs/ocfs2/namei.c
fs/ocfs2/ocfs2.h

index bb6c1ac..42f991e 100644 (file)
@@ -1785,7 +1785,8 @@ S:        Maintained
 F:     drivers/usb/atm/cxacru.c
 
 CONFIGFS
-M:     Joel Becker <joel.becker@oracle.com>
+M:     Joel Becker <jlbec@evilplan.org>
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/configfs.git
 S:     Supported
 F:     fs/configfs/
 F:     include/linux/configfs.h
@@ -4549,7 +4550,7 @@ F:        include/linux/oprofile.h
 
 ORACLE CLUSTER FILESYSTEM 2 (OCFS2)
 M:     Mark Fasheh <mfasheh@suse.com>
-M:     Joel Becker <joel.becker@oracle.com>
+M:     Joel Becker <jlbec@evilplan.org>
 L:     ocfs2-devel@oss.oracle.com (moderated for non-subscribers)
 W:     http://oss.oracle.com/projects/ocfs2/
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2.git
index 0d84066..ab152c0 100644 (file)
@@ -51,7 +51,7 @@ config OCFS2_FS_USERSPACE_CLUSTER
 
 config OCFS2_FS_STATS
        bool "OCFS2 statistics"
-       depends on OCFS2_FS
+       depends on OCFS2_FS && DEBUG_FS
        default y
        help
          This option allows some fs statistics to be captured. Enabling
index 592fae5..e4984e2 100644 (file)
@@ -565,7 +565,6 @@ static inline int ocfs2_et_sanity_check(struct ocfs2_extent_tree *et)
        return ret;
 }
 
-static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc);
 static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
                                         struct ocfs2_extent_block *eb);
 static void ocfs2_adjust_rightmost_records(handle_t *handle,
@@ -5858,6 +5857,7 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
 
        ocfs2_journal_dirty(handle, tl_bh);
 
+       osb->truncated_clusters += num_clusters;
 bail:
        mlog_exit(status);
        return status;
@@ -5929,6 +5929,8 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
                i--;
        }
 
+       osb->truncated_clusters = 0;
+
 bail:
        mlog_exit(status);
        return status;
@@ -7138,64 +7140,6 @@ bail:
        return status;
 }
 
-/*
- * Expects the inode to already be locked.
- */
-int ocfs2_prepare_truncate(struct ocfs2_super *osb,
-                          struct inode *inode,
-                          struct buffer_head *fe_bh,
-                          struct ocfs2_truncate_context **tc)
-{
-       int status;
-       unsigned int new_i_clusters;
-       struct ocfs2_dinode *fe;
-       struct ocfs2_extent_block *eb;
-       struct buffer_head *last_eb_bh = NULL;
-
-       mlog_entry_void();
-
-       *tc = NULL;
-
-       new_i_clusters = ocfs2_clusters_for_bytes(osb->sb,
-                                                 i_size_read(inode));
-       fe = (struct ocfs2_dinode *) fe_bh->b_data;
-
-       mlog(0, "fe->i_clusters = %u, new_i_clusters = %u, fe->i_size ="
-            "%llu\n", le32_to_cpu(fe->i_clusters), new_i_clusters,
-            (unsigned long long)le64_to_cpu(fe->i_size));
-
-       *tc = kzalloc(sizeof(struct ocfs2_truncate_context), GFP_KERNEL);
-       if (!(*tc)) {
-               status = -ENOMEM;
-               mlog_errno(status);
-               goto bail;
-       }
-       ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc);
-
-       if (fe->id2.i_list.l_tree_depth) {
-               status = ocfs2_read_extent_block(INODE_CACHE(inode),
-                                                le64_to_cpu(fe->i_last_eb_blk),
-                                                &last_eb_bh);
-               if (status < 0) {
-                       mlog_errno(status);
-                       goto bail;
-               }
-               eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
-       }
-
-       (*tc)->tc_last_eb_bh = last_eb_bh;
-
-       status = 0;
-bail:
-       if (status < 0) {
-               if (*tc)
-                       ocfs2_free_truncate_context(*tc);
-               *tc = NULL;
-       }
-       mlog_exit_void();
-       return status;
-}
-
 /*
  * 'start' is inclusive, 'end' is not.
  */
@@ -7270,18 +7214,3 @@ out_commit:
 out:
        return ret;
 }
-
-static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc)
-{
-       /*
-        * The caller is responsible for completing deallocation
-        * before freeing the context.
-        */
-       if (tc->tc_dealloc.c_first_suballocator != NULL)
-               mlog(ML_NOTICE,
-                    "Truncate completion has non-empty dealloc context\n");
-
-       brelse(tc->tc_last_eb_bh);
-
-       kfree(tc);
-}
index 55762b5..3bd08a0 100644 (file)
@@ -228,10 +228,6 @@ struct ocfs2_truncate_context {
 
 int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
                                  u64 range_start, u64 range_end);
-int ocfs2_prepare_truncate(struct ocfs2_super *osb,
-                          struct inode *inode,
-                          struct buffer_head *fe_bh,
-                          struct ocfs2_truncate_context **tc);
 int ocfs2_commit_truncate(struct ocfs2_super *osb,
                          struct inode *inode,
                          struct buffer_head *di_bh);
index 0d7c554..1fbb0e2 100644 (file)
@@ -1630,6 +1630,43 @@ static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
        return ret;
 }
 
+/*
+ * Try to flush truncate logs if we can free enough clusters from it.
+ * As for return value, "< 0" means error, "0" no space and "1" means
+ * we have freed enough spaces and let the caller try to allocate again.
+ */
+static int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
+                                         unsigned int needed)
+{
+       tid_t target;
+       int ret = 0;
+       unsigned int truncated_clusters;
+
+       mutex_lock(&osb->osb_tl_inode->i_mutex);
+       truncated_clusters = osb->truncated_clusters;
+       mutex_unlock(&osb->osb_tl_inode->i_mutex);
+
+       /*
+        * Check whether we can succeed in allocating if we free
+        * the truncate log.
+        */
+       if (truncated_clusters < needed)
+               goto out;
+
+       ret = ocfs2_flush_truncate_log(osb);
+       if (ret) {
+               mlog_errno(ret);
+               goto out;
+       }
+
+       if (jbd2_journal_start_commit(osb->journal->j_journal, &target)) {
+               jbd2_log_wait_commit(osb->journal->j_journal, target);
+               ret = 1;
+       }
+out:
+       return ret;
+}
+
 int ocfs2_write_begin_nolock(struct file *filp,
                             struct address_space *mapping,
                             loff_t pos, unsigned len, unsigned flags,
@@ -1637,7 +1674,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
                             struct buffer_head *di_bh, struct page *mmap_page)
 {
        int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS;
-       unsigned int clusters_to_alloc, extents_to_split;
+       unsigned int clusters_to_alloc, extents_to_split, clusters_need = 0;
        struct ocfs2_write_ctxt *wc;
        struct inode *inode = mapping->host;
        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -1646,7 +1683,9 @@ int ocfs2_write_begin_nolock(struct file *filp,
        struct ocfs2_alloc_context *meta_ac = NULL;
        handle_t *handle;
        struct ocfs2_extent_tree et;
+       int try_free = 1, ret1;
 
+try_again:
        ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh);
        if (ret) {
                mlog_errno(ret);
@@ -1681,6 +1720,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
                mlog_errno(ret);
                goto out;
        } else if (ret == 1) {
+               clusters_need = wc->w_clen;
                ret = ocfs2_refcount_cow(inode, filp, di_bh,
                                         wc->w_cpos, wc->w_clen, UINT_MAX);
                if (ret) {
@@ -1695,6 +1735,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
                mlog_errno(ret);
                goto out;
        }
+       clusters_need += clusters_to_alloc;
 
        di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
 
@@ -1817,6 +1858,22 @@ out:
                ocfs2_free_alloc_context(data_ac);
        if (meta_ac)
                ocfs2_free_alloc_context(meta_ac);
+
+       if (ret == -ENOSPC && try_free) {
+               /*
+                * Try to free some truncate log so that we can have enough
+                * clusters to allocate.
+                */
+               try_free = 0;
+
+               ret1 = ocfs2_try_to_free_truncate_log(osb, clusters_need);
+               if (ret1 == 1)
+                       goto try_again;
+
+               if (ret1 < 0)
+                       mlog_errno(ret1);
+       }
+
        return ret;
 }
 
index 9e3d45b..a6cc053 100644 (file)
@@ -82,6 +82,7 @@ static unsigned long o2hb_failed_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)];
 #define O2HB_DB_TYPE_REGION_LIVENODES  4
 #define O2HB_DB_TYPE_REGION_NUMBER     5
 #define O2HB_DB_TYPE_REGION_ELAPSED_TIME       6
+#define O2HB_DB_TYPE_REGION_PINNED     7
 struct o2hb_debug_buf {
        int db_type;
        int db_size;
@@ -101,6 +102,7 @@ static struct o2hb_debug_buf *o2hb_db_failedregions;
 #define O2HB_DEBUG_FAILEDREGIONS       "failed_regions"
 #define O2HB_DEBUG_REGION_NUMBER       "num"
 #define O2HB_DEBUG_REGION_ELAPSED_TIME "elapsed_time_in_ms"
+#define O2HB_DEBUG_REGION_PINNED       "pinned"
 
 static struct dentry *o2hb_debug_dir;
 static struct dentry *o2hb_debug_livenodes;
@@ -132,6 +134,33 @@ char *o2hb_heartbeat_mode_desc[O2HB_HEARTBEAT_NUM_MODES] = {
 unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD;
 unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL;
 
+/*
+ * o2hb_dependent_users tracks the number of registered callbacks that depend
+ * on heartbeat. o2net and o2dlm are two entities that register this callback.
+ * However only o2dlm depends on the heartbeat. It does not want the heartbeat
+ * to stop while a dlm domain is still active.
+ */
+unsigned int o2hb_dependent_users;
+
+/*
+ * In global heartbeat mode, all regions are pinned if there are one or more
+ * dependent users and the quorum region count is <= O2HB_PIN_CUT_OFF. All
+ * regions are unpinned if the region count exceeds the cut off or the number
+ * of dependent users falls to zero.
+ */
+#define O2HB_PIN_CUT_OFF               3
+
+/*
+ * In local heartbeat mode, we assume the dlm domain name to be the same as
+ * region uuid. This is true for domains created for the file system but not
+ * necessarily true for userdlm domains. This is a known limitation.
+ *
+ * In global heartbeat mode, we pin/unpin all o2hb regions. This solution
+ * works for both file system and userdlm domains.
+ */
+static int o2hb_region_pin(const char *region_uuid);
+static void o2hb_region_unpin(const char *region_uuid);
+
 /* Only sets a new threshold if there are no active regions.
  *
  * No locking or otherwise interesting code is required for reading
@@ -186,7 +215,9 @@ struct o2hb_region {
        struct config_item      hr_item;
 
        struct list_head        hr_all_item;
-       unsigned                hr_unclean_stop:1;
+       unsigned                hr_unclean_stop:1,
+                               hr_item_pinned:1,
+                               hr_item_dropped:1;
 
        /* protected by the hr_callback_sem */
        struct task_struct      *hr_task;
@@ -212,9 +243,11 @@ struct o2hb_region {
        struct dentry           *hr_debug_livenodes;
        struct dentry           *hr_debug_regnum;
        struct dentry           *hr_debug_elapsed_time;
+       struct dentry           *hr_debug_pinned;
        struct o2hb_debug_buf   *hr_db_livenodes;
        struct o2hb_debug_buf   *hr_db_regnum;
        struct o2hb_debug_buf   *hr_db_elapsed_time;
+       struct o2hb_debug_buf   *hr_db_pinned;
 
        /* let the person setting up hb wait for it to return until it
         * has reached a 'steady' state.  This will be fixed when we have
@@ -701,6 +734,14 @@ static void o2hb_set_quorum_device(struct o2hb_region *reg,
               config_item_name(&reg->hr_item));
 
        set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
+
+       /*
+        * If global heartbeat active, unpin all regions if the
+        * region count > CUT_OFF
+        */
+       if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
+                          O2NM_MAX_REGIONS) > O2HB_PIN_CUT_OFF)
+               o2hb_region_unpin(NULL);
 }
 
 static int o2hb_check_slot(struct o2hb_region *reg,
@@ -1041,6 +1082,9 @@ static int o2hb_thread(void *data)
 
        set_user_nice(current, -20);
 
+       /* Pin node */
+       o2nm_depend_this_node();
+
        while (!kthread_should_stop() && !reg->hr_unclean_stop) {
                /* We track the time spent inside
                 * o2hb_do_disk_heartbeat so that we avoid more than
@@ -1090,6 +1134,9 @@ static int o2hb_thread(void *data)
                mlog_errno(ret);
        }
 
+       /* Unpin node */
+       o2nm_undepend_this_node();
+
        mlog(ML_HEARTBEAT|ML_KTHREAD, "hb thread exiting\n");
 
        return 0;
@@ -1142,6 +1189,12 @@ static int o2hb_debug_open(struct inode *inode, struct file *file)
                                                 reg->hr_last_timeout_start));
                goto done;
 
+       case O2HB_DB_TYPE_REGION_PINNED:
+               reg = (struct o2hb_region *)db->db_data;
+               out += snprintf(buf + out, PAGE_SIZE - out, "%u\n",
+                               !!reg->hr_item_pinned);
+               goto done;
+
        default:
                goto done;
        }
@@ -1315,6 +1368,8 @@ int o2hb_init(void)
        memset(o2hb_quorum_region_bitmap, 0, sizeof(o2hb_quorum_region_bitmap));
        memset(o2hb_failed_region_bitmap, 0, sizeof(o2hb_failed_region_bitmap));
 
+       o2hb_dependent_users = 0;
+
        return o2hb_debug_init();
 }
 
@@ -1384,6 +1439,7 @@ static void o2hb_region_release(struct config_item *item)
        debugfs_remove(reg->hr_debug_livenodes);
        debugfs_remove(reg->hr_debug_regnum);
        debugfs_remove(reg->hr_debug_elapsed_time);
+       debugfs_remove(reg->hr_debug_pinned);
        debugfs_remove(reg->hr_debug_dir);
 
        spin_lock(&o2hb_live_lock);
@@ -1948,6 +2004,18 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir)
                goto bail;
        }
 
+       reg->hr_debug_pinned =
+                       o2hb_debug_create(O2HB_DEBUG_REGION_PINNED,
+                                         reg->hr_debug_dir,
+                                         &(reg->hr_db_pinned),
+                                         sizeof(*(reg->hr_db_pinned)),
+                                         O2HB_DB_TYPE_REGION_PINNED,
+                                         0, 0, reg);
+       if (!reg->hr_debug_pinned) {
+               mlog_errno(ret);
+               goto bail;
+       }
+
        ret = 0;
 bail:
        return ret;
@@ -2002,15 +2070,20 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
 {
        struct task_struct *hb_task;
        struct o2hb_region *reg = to_o2hb_region(item);
+       int quorum_region = 0;
 
        /* stop the thread when the user removes the region dir */
        spin_lock(&o2hb_live_lock);
        if (o2hb_global_heartbeat_active()) {
                clear_bit(reg->hr_region_num, o2hb_region_bitmap);
                clear_bit(reg->hr_region_num, o2hb_live_region_bitmap);
+               if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap))
+                       quorum_region = 1;
+               clear_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
        }
        hb_task = reg->hr_task;
        reg->hr_task = NULL;
+       reg->hr_item_dropped = 1;
        spin_unlock(&o2hb_live_lock);
 
        if (hb_task)
@@ -2028,7 +2101,27 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
        if (o2hb_global_heartbeat_active())
                printk(KERN_NOTICE "o2hb: Heartbeat stopped on region %s\n",
                       config_item_name(&reg->hr_item));
+
        config_item_put(item);
+
+       if (!o2hb_global_heartbeat_active() || !quorum_region)
+               return;
+
+       /*
+        * If global heartbeat active and there are dependent users,
+        * pin all regions if quorum region count <= CUT_OFF
+        */
+       spin_lock(&o2hb_live_lock);
+
+       if (!o2hb_dependent_users)
+               goto unlock;
+
+       if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
+                          O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF)
+               o2hb_region_pin(NULL);
+
+unlock:
+       spin_unlock(&o2hb_live_lock);
 }
 
 struct o2hb_heartbeat_group_attribute {
@@ -2214,63 +2307,138 @@ void o2hb_setup_callback(struct o2hb_callback_func *hc,
 }
 EXPORT_SYMBOL_GPL(o2hb_setup_callback);
 
-static struct o2hb_region *o2hb_find_region(const char *region_uuid)
+/*
+ * In local heartbeat mode, region_uuid passed matches the dlm domain name.
+ * In global heartbeat mode, region_uuid passed is NULL.
+ *
+ * In local, we only pin the matching region. In global we pin all the active
+ * regions.
+ */
+static int o2hb_region_pin(const char *region_uuid)
 {
-       struct o2hb_region *p, *reg = NULL;
+       int ret = 0, found = 0;
+       struct o2hb_region *reg;
+       char *uuid;
 
        assert_spin_locked(&o2hb_live_lock);
 
-       list_for_each_entry(p, &o2hb_all_regions, hr_all_item) {
-               if (!strcmp(region_uuid, config_item_name(&p->hr_item))) {
-                       reg = p;
-                       break;
+       list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) {
+               uuid = config_item_name(&reg->hr_item);
+
+               /* local heartbeat */
+               if (region_uuid) {
+                       if (strcmp(region_uuid, uuid))
+                               continue;
+                       found = 1;
+               }
+
+               if (reg->hr_item_pinned || reg->hr_item_dropped)
+                       goto skip_pin;
+
+               /* Ignore ENOENT only for local hb (userdlm domain) */
+               ret = o2nm_depend_item(&reg->hr_item);
+               if (!ret) {
+                       mlog(ML_CLUSTER, "Pin region %s\n", uuid);
+                       reg->hr_item_pinned = 1;
+               } else {
+                       if (ret == -ENOENT && found)
+                               ret = 0;
+                       else {
+                               mlog(ML_ERROR, "Pin region %s fails with %d\n",
+                                    uuid, ret);
+                               break;
+                       }
                }
+skip_pin:
+               if (found)
+                       break;
        }
 
-       return reg;
+       return ret;
 }
 
-static int o2hb_region_get(const char *region_uuid)
+/*
+ * In local heartbeat mode, region_uuid passed matches the dlm domain name.
+ * In global heartbeat mode, region_uuid passed is NULL.
+ *
+ * In local, we only unpin the matching region. In global we unpin all the
+ * active regions.
+ */
+static void o2hb_region_unpin(const char *region_uuid)
 {
-       int ret = 0;
        struct o2hb_region *reg;
+       char *uuid;
+       int found = 0;
 
-       spin_lock(&o2hb_live_lock);
+       assert_spin_locked(&o2hb_live_lock);
 
-       reg = o2hb_find_region(region_uuid);
-       if (!reg)
-               ret = -ENOENT;
-       spin_unlock(&o2hb_live_lock);
+       list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) {
+               uuid = config_item_name(&reg->hr_item);
+               if (region_uuid) {
+                       if (strcmp(region_uuid, uuid))
+                               continue;
+                       found = 1;
+               }
 
-       if (ret)
-               goto out;
+               if (reg->hr_item_pinned) {
+                       mlog(ML_CLUSTER, "Unpin region %s\n", uuid);
+                       o2nm_undepend_item(&reg->hr_item);
+                       reg->hr_item_pinned = 0;
+               }
+               if (found)
+                       break;
+       }
+}
 
-       ret = o2nm_depend_this_node();
-       if (ret)
-               goto out;
+static int o2hb_region_inc_user(const char *region_uuid)
+{
+       int ret = 0;
 
-       ret = o2nm_depend_item(&reg->hr_item);
-       if (ret)
-               o2nm_undepend_this_node();
+       spin_lock(&o2hb_live_lock);
 
-out:
+       /* local heartbeat */
+       if (!o2hb_global_heartbeat_active()) {
+           ret = o2hb_region_pin(region_uuid);
+           goto unlock;
+       }
+
+       /*
+        * if global heartbeat active and this is the first dependent user,
+        * pin all regions if quorum region count <= CUT_OFF
+        */
+       o2hb_dependent_users++;
+       if (o2hb_dependent_users > 1)
+               goto unlock;
+
+       if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
+                          O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF)
+               ret = o2hb_region_pin(NULL);
+
+unlock:
+       spin_unlock(&o2hb_live_lock);
        return ret;
 }
 
-static void o2hb_region_put(const char *region_uuid)
+void o2hb_region_dec_user(const char *region_uuid)
 {
-       struct o2hb_region *reg;
-
        spin_lock(&o2hb_live_lock);
 
-       reg = o2hb_find_region(region_uuid);
+       /* local heartbeat */
+       if (!o2hb_global_heartbeat_active()) {
+           o2hb_region_unpin(region_uuid);
+           goto unlock;
+       }
 
-       spin_unlock(&o2hb_live_lock);
+       /*
+        * if global heartbeat active and there are no dependent users,
+        * unpin all quorum regions
+        */
+       o2hb_dependent_users--;
+       if (!o2hb_dependent_users)
+               o2hb_region_unpin(NULL);
 
-       if (reg) {
-               o2nm_undepend_item(&reg->hr_item);
-               o2nm_undepend_this_node();
-       }
+unlock:
+       spin_unlock(&o2hb_live_lock);
 }
 
 int o2hb_register_callback(const char *region_uuid,
@@ -2291,9 +2459,11 @@ int o2hb_register_callback(const char *region_uuid,
        }
 
        if (region_uuid) {
-               ret = o2hb_region_get(region_uuid);
-               if (ret)
+               ret = o2hb_region_inc_user(region_uuid);
+               if (ret) {
+                       mlog_errno(ret);
                        goto out;
+               }
        }
 
        down_write(&o2hb_callback_sem);
@@ -2311,7 +2481,7 @@ int o2hb_register_callback(const char *region_uuid,
        up_write(&o2hb_callback_sem);
        ret = 0;
 out:
-       mlog(ML_HEARTBEAT, "returning %d on behalf of %p for funcs %p\n",
+       mlog(ML_CLUSTER, "returning %d on behalf of %p for funcs %p\n",
             ret, __builtin_return_address(0), hc);
        return ret;
 }
@@ -2322,7 +2492,7 @@ void o2hb_unregister_callback(const char *region_uuid,
 {
        BUG_ON(hc->hc_magic != O2HB_CB_MAGIC);
 
-       mlog(ML_HEARTBEAT, "on behalf of %p for funcs %p\n",
+       mlog(ML_CLUSTER, "on behalf of %p for funcs %p\n",
             __builtin_return_address(0), hc);
 
        /* XXX Can this happen _with_ a region reference? */
@@ -2330,7 +2500,7 @@ void o2hb_unregister_callback(const char *region_uuid,
                return;
 
        if (region_uuid)
-               o2hb_region_put(region_uuid);
+               o2hb_region_dec_user(region_uuid);
 
        down_write(&o2hb_callback_sem);
 
index a3f150e..3a58359 100644 (file)
 #define O2NET_DEBUG_DIR                "o2net"
 #define SC_DEBUG_NAME          "sock_containers"
 #define NST_DEBUG_NAME         "send_tracking"
+#define STATS_DEBUG_NAME       "stats"
+
+#define SHOW_SOCK_CONTAINERS   0
+#define SHOW_SOCK_STATS                1
 
 static struct dentry *o2net_dentry;
 static struct dentry *sc_dentry;
 static struct dentry *nst_dentry;
+static struct dentry *stats_dentry;
 
 static DEFINE_SPINLOCK(o2net_debug_lock);
 
@@ -123,37 +128,42 @@ static void *nst_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 static int nst_seq_show(struct seq_file *seq, void *v)
 {
        struct o2net_send_tracking *nst, *dummy_nst = seq->private;
+       ktime_t now;
+       s64 sock, send, status;
 
        spin_lock(&o2net_debug_lock);
        nst = next_nst(dummy_nst);
+       if (!nst)
+               goto out;
 
-       if (nst != NULL) {
-               /* get_task_comm isn't exported.  oh well. */
-               seq_printf(seq, "%p:\n"
-                          "  pid:          %lu\n"
-                          "  tgid:         %lu\n"
-                          "  process name: %s\n"
-                          "  node:         %u\n"
-                          "  sc:           %p\n"
-                          "  message id:   %d\n"
-                          "  message type: %u\n"
-                          "  message key:  0x%08x\n"
-                          "  sock acquiry: %lu.%ld\n"
-                          "  send start:   %lu.%ld\n"
-                          "  wait start:   %lu.%ld\n",
-                          nst, (unsigned long)nst->st_task->pid,
-                          (unsigned long)nst->st_task->tgid,
-                          nst->st_task->comm, nst->st_node,
-                          nst->st_sc, nst->st_id, nst->st_msg_type,
-                          nst->st_msg_key,
-                          nst->st_sock_time.tv_sec,
-                          (long)nst->st_sock_time.tv_usec,
-                          nst->st_send_time.tv_sec,
-                          (long)nst->st_send_time.tv_usec,
-                          nst->st_status_time.tv_sec,
-                          (long)nst->st_status_time.tv_usec);
-       }
+       now = ktime_get();
+       sock = ktime_to_us(ktime_sub(now, nst->st_sock_time));
+       send = ktime_to_us(ktime_sub(now, nst->st_send_time));
+       status = ktime_to_us(ktime_sub(now, nst->st_status_time));
+
+       /* get_task_comm isn't exported.  oh well. */
+       seq_printf(seq, "%p:\n"
+                  "  pid:          %lu\n"
+                  "  tgid:         %lu\n"
+                  "  process name: %s\n"
+                  "  node:         %u\n"
+                  "  sc:           %p\n"
+                  "  message id:   %d\n"
+                  "  message type: %u\n"
+                  "  message key:  0x%08x\n"
+                  "  sock acquiry: %lld usecs ago\n"
+                  "  send start:   %lld usecs ago\n"
+                  "  wait start:   %lld usecs ago\n",
+                  nst, (unsigned long)task_pid_nr(nst->st_task),
+                  (unsigned long)nst->st_task->tgid,
+                  nst->st_task->comm, nst->st_node,
+                  nst->st_sc, nst->st_id, nst->st_msg_type,
+                  nst->st_msg_key,
+                  (long long)sock,
+                  (long long)send,
+                  (long long)status);
 
+out:
        spin_unlock(&o2net_debug_lock);
 
        return 0;
@@ -228,6 +238,11 @@ void o2net_debug_del_sc(struct o2net_sock_container *sc)
        spin_unlock(&o2net_debug_lock);
 }
 
+struct o2net_sock_debug {
+       int dbg_ctxt;
+       struct o2net_sock_container *dbg_sock;
+};
+
 static struct o2net_sock_container
                        *next_sc(struct o2net_sock_container *sc_start)
 {
@@ -253,7 +268,8 @@ static struct o2net_sock_container
 
 static void *sc_seq_start(struct seq_file *seq, loff_t *pos)
 {
-       struct o2net_sock_container *sc, *dummy_sc = seq->private;
+       struct o2net_sock_debug *sd = seq->private;
+       struct o2net_sock_container *sc, *dummy_sc = sd->dbg_sock;
 
        spin_lock(&o2net_debug_lock);
        sc = next_sc(dummy_sc);
@@ -264,7 +280,8 @@ static void *sc_seq_start(struct seq_file *seq, loff_t *pos)
 
 static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-       struct o2net_sock_container *sc, *dummy_sc = seq->private;
+       struct o2net_sock_debug *sd = seq->private;
+       struct o2net_sock_container *sc, *dummy_sc = sd->dbg_sock;
 
        spin_lock(&o2net_debug_lock);
        sc = next_sc(dummy_sc);
@@ -276,65 +293,107 @@ static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
        return sc; /* unused, just needs to be null when done */
 }
 
-#define TV_SEC_USEC(TV) TV.tv_sec, (long)TV.tv_usec
+#ifdef CONFIG_OCFS2_FS_STATS
+# define sc_send_count(_s)             ((_s)->sc_send_count)
+# define sc_recv_count(_s)             ((_s)->sc_recv_count)
+# define sc_tv_acquiry_total_ns(_s)    (ktime_to_ns((_s)->sc_tv_acquiry_total))
+# define sc_tv_send_total_ns(_s)       (ktime_to_ns((_s)->sc_tv_send_total))
+# define sc_tv_status_total_ns(_s)     (ktime_to_ns((_s)->sc_tv_status_total))
+# define sc_tv_process_total_ns(_s)    (ktime_to_ns((_s)->sc_tv_process_total))
+#else
+# define sc_send_count(_s)             (0U)
+# define sc_recv_count(_s)             (0U)
+# define sc_tv_acquiry_total_ns(_s)    (0LL)
+# define sc_tv_send_total_ns(_s)       (0LL)
+# define sc_tv_status_total_ns(_s)     (0LL)
+# define sc_tv_process_total_ns(_s)    (0LL)
+#endif
+
+/* So that debugfs.ocfs2 can determine which format is being used */
+#define O2NET_STATS_STR_VERSION                1
+static void sc_show_sock_stats(struct seq_file *seq,
+                              struct o2net_sock_container *sc)
+{
+       if (!sc)
+               return;
+
+       seq_printf(seq, "%d,%u,%lu,%lld,%lld,%lld,%lu,%lld\n", O2NET_STATS_STR_VERSION,
+                  sc->sc_node->nd_num, (unsigned long)sc_send_count(sc),
+                  (long long)sc_tv_acquiry_total_ns(sc),
+                  (long long)sc_tv_send_total_ns(sc),
+                  (long long)sc_tv_status_total_ns(sc),
+                  (unsigned long)sc_recv_count(sc),
+                  (long long)sc_tv_process_total_ns(sc));
+}
+
+static void sc_show_sock_container(struct seq_file *seq,
+                                  struct o2net_sock_container *sc)
+{
+       struct inet_sock *inet = NULL;
+       __be32 saddr = 0, daddr = 0;
+       __be16 sport = 0, dport = 0;
+
+       if (!sc)
+               return;
+
+       if (sc->sc_sock) {
+               inet = inet_sk(sc->sc_sock->sk);
+               /* the stack's structs aren't sparse endian clean */
+               saddr = (__force __be32)inet->inet_saddr;
+               daddr = (__force __be32)inet->inet_daddr;
+               sport = (__force __be16)inet->inet_sport;
+               dport = (__force __be16)inet->inet_dport;
+       }
+
+       /* XXX sigh, inet-> doesn't have sparse annotation so any
+        * use of it here generates a warning with -Wbitwise */
+       seq_printf(seq, "%p:\n"
+                  "  krefs:           %d\n"
+                  "  sock:            %pI4:%u -> "
+                                     "%pI4:%u\n"
+                  "  remote node:     %s\n"
+                  "  page off:        %zu\n"
+                  "  handshake ok:    %u\n"
+                  "  timer:           %lld usecs\n"
+                  "  data ready:      %lld usecs\n"
+                  "  advance start:   %lld usecs\n"
+                  "  advance stop:    %lld usecs\n"
+                  "  func start:      %lld usecs\n"
+                  "  func stop:       %lld usecs\n"
+                  "  func key:        0x%08x\n"
+                  "  func type:       %u\n",
+                  sc,
+                  atomic_read(&sc->sc_kref.refcount),
+                  &saddr, inet ? ntohs(sport) : 0,
+                  &daddr, inet ? ntohs(dport) : 0,
+                  sc->sc_node->nd_name,
+                  sc->sc_page_off,
+                  sc->sc_handshake_ok,
+                  (long long)ktime_to_us(sc->sc_tv_timer),
+                  (long long)ktime_to_us(sc->sc_tv_data_ready),
+                  (long long)ktime_to_us(sc->sc_tv_advance_start),
+                  (long long)ktime_to_us(sc->sc_tv_advance_stop),
+                  (long long)ktime_to_us(sc->sc_tv_func_start),
+                  (long long)ktime_to_us(sc->sc_tv_func_stop),
+                  sc->sc_msg_key,
+                  sc->sc_msg_type);
+}
 
 static int sc_seq_show(struct seq_file *seq, void *v)
 {
-       struct o2net_sock_container *sc, *dummy_sc = seq->private;
+       struct o2net_sock_debug *sd = seq->private;
+       struct o2net_sock_container *sc, *dummy_sc = sd->dbg_sock;
 
        spin_lock(&o2net_debug_lock);
        sc = next_sc(dummy_sc);
 
-       if (sc != NULL) {
-               struct inet_sock *inet = NULL;
-
-               __be32 saddr = 0, daddr = 0;
-               __be16 sport = 0, dport = 0;
-
-               if (sc->sc_sock) {
-                       inet = inet_sk(sc->sc_sock->sk);
-                       /* the stack's structs aren't sparse endian clean */
-                       saddr = (__force __be32)inet->inet_saddr;
-                       daddr = (__force __be32)inet->inet_daddr;
-                       sport = (__force __be16)inet->inet_sport;
-                       dport = (__force __be16)inet->inet_dport;
-               }
-
-               /* XXX sigh, inet-> doesn't have sparse annotation so any
-                * use of it here generates a warning with -Wbitwise */
-               seq_printf(seq, "%p:\n"
-                          "  krefs:           %d\n"
-                          "  sock:            %pI4:%u -> "
-                                             "%pI4:%u\n"
-                          "  remote node:     %s\n"
-                          "  page off:        %zu\n"
-                          "  handshake ok:    %u\n"
-                          "  timer:           %lu.%ld\n"
-                          "  data ready:      %lu.%ld\n"
-                          "  advance start:   %lu.%ld\n"
-                          "  advance stop:    %lu.%ld\n"
-                          "  func start:      %lu.%ld\n"
-                          "  func stop:       %lu.%ld\n"
-                          "  func key:        %u\n"
-                          "  func type:       %u\n",
-                          sc,
-                          atomic_read(&sc->sc_kref.refcount),
-                          &saddr, inet ? ntohs(sport) : 0,
-                          &daddr, inet ? ntohs(dport) : 0,
-                          sc->sc_node->nd_name,
-                          sc->sc_page_off,
-                          sc->sc_handshake_ok,
-                          TV_SEC_USEC(sc->sc_tv_timer),
-                          TV_SEC_USEC(sc->sc_tv_data_ready),
-                          TV_SEC_USEC(sc->sc_tv_advance_start),
-                          TV_SEC_USEC(sc->sc_tv_advance_stop),
-                          TV_SEC_USEC(sc->sc_tv_func_start),
-                          TV_SEC_USEC(sc->sc_tv_func_stop),
-                          sc->sc_msg_key,
-                          sc->sc_msg_type);
+       if (sc) {
+               if (sd->dbg_ctxt == SHOW_SOCK_CONTAINERS)
+                       sc_show_sock_container(seq, sc);
+               else
+                       sc_show_sock_stats(seq, sc);
        }
 
-
        spin_unlock(&o2net_debug_lock);
 
        return 0;
@@ -351,7 +410,7 @@ static const struct seq_operations sc_seq_ops = {
        .show = sc_seq_show,
 };
 
-static int sc_fop_open(struct inode *inode, struct file *file)
+static int sc_common_open(struct file *file, struct o2net_sock_debug *sd)
 {
        struct o2net_sock_container *dummy_sc;
        struct seq_file *seq;
@@ -369,7 +428,8 @@ static int sc_fop_open(struct inode *inode, struct file *file)
                goto out;
 
        seq = file->private_data;
-       seq->private = dummy_sc;
+       seq->private = sd;
+       sd->dbg_sock = dummy_sc;
        o2net_debug_add_sc(dummy_sc);
 
        dummy_sc = NULL;
@@ -382,12 +442,48 @@ out:
 static int sc_fop_release(struct inode *inode, struct file *file)
 {
        struct seq_file *seq = file->private_data;
-       struct o2net_sock_container *dummy_sc = seq->private;
+       struct o2net_sock_debug *sd = seq->private;
+       struct o2net_sock_container *dummy_sc = sd->dbg_sock;
 
        o2net_debug_del_sc(dummy_sc);
        return seq_release_private(inode, file);
 }
 
+static int stats_fop_open(struct inode *inode, struct file *file)
+{
+       struct o2net_sock_debug *sd;
+
+       sd = kmalloc(sizeof(struct o2net_sock_debug), GFP_KERNEL);
+       if (sd == NULL)
+               return -ENOMEM;
+
+       sd->dbg_ctxt = SHOW_SOCK_STATS;
+       sd->dbg_sock = NULL;
+
+       return sc_common_open(file, sd);
+}
+
+static const struct file_operations stats_seq_fops = {
+       .open = stats_fop_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = sc_fop_release,
+};
+
+static int sc_fop_open(struct inode *inode, struct file *file)
+{
+       struct o2net_sock_debug *sd;
+
+       sd = kmalloc(sizeof(struct o2net_sock_debug), GFP_KERNEL);
+       if (sd == NULL)
+               return -ENOMEM;
+
+       sd->dbg_ctxt = SHOW_SOCK_CONTAINERS;
+       sd->dbg_sock = NULL;
+
+       return sc_common_open(file, sd);
+}
+
 static const struct file_operations sc_seq_fops = {
        .open = sc_fop_open,
        .read = seq_read,
@@ -419,25 +515,29 @@ int o2net_debugfs_init(void)
                goto bail;
        }
 
+       stats_dentry = debugfs_create_file(STATS_DEBUG_NAME, S_IFREG|S_IRUSR,
+                                          o2net_dentry, NULL,
+                                          &stats_seq_fops);
+       if (!stats_dentry) {
+               mlog_errno(-ENOMEM);
+               goto bail;
+       }
+
        return 0;
 bail:
-       if (sc_dentry)
-               debugfs_remove(sc_dentry);
-       if (nst_dentry)
-               debugfs_remove(nst_dentry);
-       if (o2net_dentry)
-               debugfs_remove(o2net_dentry);
+       debugfs_remove(stats_dentry);
+       debugfs_remove(sc_dentry);
+       debugfs_remove(nst_dentry);
+       debugfs_remove(o2net_dentry);
        return -ENOMEM;
 }
 
 void o2net_debugfs_exit(void)
 {
-       if (sc_dentry)
-               debugfs_remove(sc_dentry);
-       if (nst_dentry)
-               debugfs_remove(nst_dentry);
-       if (o2net_dentry)
-               debugfs_remove(o2net_dentry);
+       debugfs_remove(stats_dentry);
+       debugfs_remove(sc_dentry);
+       debugfs_remove(nst_dentry);
+       debugfs_remove(o2net_dentry);
 }
 
 #endif /* CONFIG_DEBUG_FS */
index 9aa426e..3b11cb1 100644 (file)
@@ -153,63 +153,114 @@ static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
        nst->st_node = node;
 }
 
-static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
+static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
 {
-       do_gettimeofday(&nst->st_sock_time);
+       nst->st_sock_time = ktime_get();
 }
 
-static void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
+static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
 {
-       do_gettimeofday(&nst->st_send_time);
+       nst->st_send_time = ktime_get();
 }
 
-static void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
+static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
 {
-       do_gettimeofday(&nst->st_status_time);
+       nst->st_status_time = ktime_get();
 }
 
-static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
-                                        struct o2net_sock_container *sc)
+static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
+                                               struct o2net_sock_container *sc)
 {
        nst->st_sc = sc;
 }
 
-static void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id)
+static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst,
+                                       u32 msg_id)
 {
        nst->st_id = msg_id;
 }
 
-#else  /* CONFIG_DEBUG_FS */
-
-static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
-                                 u32 msgkey, struct task_struct *task, u8 node)
+static inline void o2net_set_sock_timer(struct o2net_sock_container *sc)
 {
+       sc->sc_tv_timer = ktime_get();
 }
 
-static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
+static inline void o2net_set_data_ready_time(struct o2net_sock_container *sc)
 {
+       sc->sc_tv_data_ready = ktime_get();
 }
 
-static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
+static inline void o2net_set_advance_start_time(struct o2net_sock_container *sc)
 {
+       sc->sc_tv_advance_start = ktime_get();
 }
 
-static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
+static inline void o2net_set_advance_stop_time(struct o2net_sock_container *sc)
 {
+       sc->sc_tv_advance_stop = ktime_get();
 }
 
-static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
-                                               struct o2net_sock_container *sc)
+static inline void o2net_set_func_start_time(struct o2net_sock_container *sc)
 {
+       sc->sc_tv_func_start = ktime_get();
 }
 
-static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst,
-                                       u32 msg_id)
+static inline void o2net_set_func_stop_time(struct o2net_sock_container *sc)
 {
+       sc->sc_tv_func_stop = ktime_get();
 }
 
+static ktime_t o2net_get_func_run_time(struct o2net_sock_container *sc)
+{
+       return ktime_sub(sc->sc_tv_func_stop, sc->sc_tv_func_start);
+}
+#else  /* CONFIG_DEBUG_FS */
+# define o2net_init_nst(a, b, c, d, e)
+# define o2net_set_nst_sock_time(a)
+# define o2net_set_nst_send_time(a)
+# define o2net_set_nst_status_time(a)
+# define o2net_set_nst_sock_container(a, b)
+# define o2net_set_nst_msg_id(a, b)
+# define o2net_set_sock_timer(a)
+# define o2net_set_data_ready_time(a)
+# define o2net_set_advance_start_time(a)
+# define o2net_set_advance_stop_time(a)
+# define o2net_set_func_start_time(a)
+# define o2net_set_func_stop_time(a)
+# define o2net_get_func_run_time(a)            (ktime_t)0
 #endif /* CONFIG_DEBUG_FS */
 
+#ifdef CONFIG_OCFS2_FS_STATS
+static void o2net_update_send_stats(struct o2net_send_tracking *nst,
+                                   struct o2net_sock_container *sc)
+{
+       sc->sc_tv_status_total = ktime_add(sc->sc_tv_status_total,
+                                          ktime_sub(ktime_get(),
+                                                    nst->st_status_time));
+       sc->sc_tv_send_total = ktime_add(sc->sc_tv_send_total,
+                                        ktime_sub(nst->st_status_time,
+                                                  nst->st_send_time));
+       sc->sc_tv_acquiry_total = ktime_add(sc->sc_tv_acquiry_total,
+                                           ktime_sub(nst->st_send_time,
+                                                     nst->st_sock_time));
+       sc->sc_send_count++;
+}
+
+static void o2net_update_recv_stats(struct o2net_sock_container *sc)
+{
+       sc->sc_tv_process_total = ktime_add(sc->sc_tv_process_total,
+                                           o2net_get_func_run_time(sc));
+       sc->sc_recv_count++;
+}
+
+#else
+
+# define o2net_update_send_stats(a, b)
+
+# define o2net_update_recv_stats(sc)
+
+#endif /* CONFIG_OCFS2_FS_STATS */
+
 static inline int o2net_reconnect_delay(void)
 {
        return o2nm_single_cluster->cl_reconnect_delay_ms;
@@ -355,6 +406,7 @@ static void sc_kref_release(struct kref *kref)
                sc->sc_sock = NULL;
        }
 
+       o2nm_undepend_item(&sc->sc_node->nd_item);
        o2nm_node_put(sc->sc_node);
        sc->sc_node = NULL;
 
@@ -376,6 +428,7 @@ static struct o2net_sock_container *sc_alloc(struct o2nm_node *node)
 {
        struct o2net_sock_container *sc, *ret = NULL;
        struct page *page = NULL;
+       int status = 0;
 
        page = alloc_page(GFP_NOFS);
        sc = kzalloc(sizeof(*sc), GFP_NOFS);
@@ -386,6 +439,13 @@ static struct o2net_sock_container *sc_alloc(struct o2nm_node *node)
        o2nm_node_get(node);
        sc->sc_node = node;
 
+       /* pin the node item of the remote node */
+       status = o2nm_depend_item(&node->nd_item);
+       if (status) {
+               mlog_errno(status);
+               o2nm_node_put(node);
+               goto out;
+       }
        INIT_WORK(&sc->sc_connect_work, o2net_sc_connect_completed);
        INIT_WORK(&sc->sc_rx_work, o2net_rx_until_empty);
        INIT_WORK(&sc->sc_shutdown_work, o2net_shutdown_sc);
@@ -546,7 +606,7 @@ static void o2net_data_ready(struct sock *sk, int bytes)
        if (sk->sk_user_data) {
                struct o2net_sock_container *sc = sk->sk_user_data;
                sclog(sc, "data_ready hit\n");
-               do_gettimeofday(&sc->sc_tv_data_ready);
+               o2net_set_data_ready_time(sc);
                o2net_sc_queue_work(sc, &sc->sc_rx_work);
                ready = sc->sc_data_ready;
        } else {
@@ -1070,6 +1130,8 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
        o2net_set_nst_status_time(&nst);
        wait_event(nsw.ns_wq, o2net_nsw_completed(nn, &nsw));
 
+       o2net_update_send_stats(&nst, sc);
+
        /* Note that we avoid overwriting the callers status return
         * variable if a system error was reported on the other
         * side. Callers beware. */
@@ -1183,13 +1245,15 @@ static int o2net_process_message(struct o2net_sock_container *sc,
        if (syserr != O2NET_ERR_NONE)
                goto out_respond;
 
-       do_gettimeofday(&sc->sc_tv_func_start);
+       o2net_set_func_start_time(sc);
        sc->sc_msg_key = be32_to_cpu(hdr->key);
        sc->sc_msg_type = be16_to_cpu(hdr->msg_type);
        handler_status = (nmh->nh_func)(hdr, sizeof(struct o2net_msg) +
                                             be16_to_cpu(hdr->data_len),
                                        nmh->nh_func_data, &ret_data);
-       do_gettimeofday(&sc->sc_tv_func_stop);
+       o2net_set_func_stop_time(sc);
+
+       o2net_update_recv_stats(sc);
 
 out_respond:
        /* this destroys the hdr, so don't use it after this */
@@ -1300,7 +1364,7 @@ static int o2net_advance_rx(struct o2net_sock_container *sc)
        size_t datalen;
 
        sclog(sc, "receiving\n");
-       do_gettimeofday(&sc->sc_tv_advance_start);
+       o2net_set_advance_start_time(sc);
 
        if (unlikely(sc->sc_handshake_ok == 0)) {
                if(sc->sc_page_off < sizeof(struct o2net_handshake)) {
@@ -1375,7 +1439,7 @@ static int o2net_advance_rx(struct o2net_sock_container *sc)
 
 out:
        sclog(sc, "ret = %d\n", ret);
-       do_gettimeofday(&sc->sc_tv_advance_stop);
+       o2net_set_advance_stop_time(sc);
        return ret;
 }
 
@@ -1475,27 +1539,28 @@ static void o2net_idle_timer(unsigned long data)
 {
        struct o2net_sock_container *sc = (struct o2net_sock_container *)data;
        struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num);
-       struct timeval now;
 
-       do_gettimeofday(&now);
+#ifdef CONFIG_DEBUG_FS
+       ktime_t now = ktime_get();
+#endif
 
        printk(KERN_NOTICE "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u "
             "seconds, shutting it down.\n", SC_NODEF_ARGS(sc),
                     o2net_idle_timeout() / 1000,
                     o2net_idle_timeout() % 1000);
-       mlog(ML_NOTICE, "here are some times that might help debug the "
-            "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv "
-            "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n",
-            sc->sc_tv_timer.tv_sec, (long) sc->sc_tv_timer.tv_usec,
-            now.tv_sec, (long) now.tv_usec,
-            sc->sc_tv_data_ready.tv_sec, (long) sc->sc_tv_data_ready.tv_usec,
-            sc->sc_tv_advance_start.tv_sec,
-            (long) sc->sc_tv_advance_start.tv_usec,
-            sc->sc_tv_advance_stop.tv_sec,
-            (long) sc->sc_tv_advance_stop.tv_usec,
+
+#ifdef CONFIG_DEBUG_FS
+       mlog(ML_NOTICE, "Here are some times that might help debug the "
+            "situation: (Timer: %lld, Now %lld, DataReady %lld, Advance %lld-%lld, "
+            "Key 0x%08x, Func %u, FuncTime %lld-%lld)\n",
+            (long long)ktime_to_us(sc->sc_tv_timer), (long long)ktime_to_us(now),
+            (long long)ktime_to_us(sc->sc_tv_data_ready),
+            (long long)ktime_to_us(sc->sc_tv_advance_start),
+            (long long)ktime_to_us(sc->sc_tv_advance_stop),
             sc->sc_msg_key, sc->sc_msg_type,
-            sc->sc_tv_func_start.tv_sec, (long) sc->sc_tv_func_start.tv_usec,
-            sc->sc_tv_func_stop.tv_sec, (long) sc->sc_tv_func_stop.tv_usec);
+            (long long)ktime_to_us(sc->sc_tv_func_start),
+            (long long)ktime_to_us(sc->sc_tv_func_stop));
+#endif
 
        /*
         * Initialize the nn_timeout so that the next connection attempt
@@ -1511,7 +1576,7 @@ static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc)
        o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work);
        o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work,
                      msecs_to_jiffies(o2net_keepalive_delay()));
-       do_gettimeofday(&sc->sc_tv_timer);
+       o2net_set_sock_timer(sc);
        mod_timer(&sc->sc_idle_timeout,
               jiffies + msecs_to_jiffies(o2net_idle_timeout()));
 }
index 15fdbdf..4cbcb65 100644 (file)
@@ -166,18 +166,27 @@ struct o2net_sock_container {
        /* original handlers for the sockets */
        void                    (*sc_state_change)(struct sock *sk);
        void                    (*sc_data_ready)(struct sock *sk, int bytes);
-#ifdef CONFIG_DEBUG_FS
-       struct list_head        sc_net_debug_item;
-#endif
-       struct timeval          sc_tv_timer;
-       struct timeval          sc_tv_data_ready;
-       struct timeval          sc_tv_advance_start;
-       struct timeval          sc_tv_advance_stop;
-       struct timeval          sc_tv_func_start;
-       struct timeval          sc_tv_func_stop;
+
        u32                     sc_msg_key;
        u16                     sc_msg_type;
 
+#ifdef CONFIG_DEBUG_FS
+       struct list_head        sc_net_debug_item;
+       ktime_t                 sc_tv_timer;
+       ktime_t                 sc_tv_data_ready;
+       ktime_t                 sc_tv_advance_start;
+       ktime_t                 sc_tv_advance_stop;
+       ktime_t                 sc_tv_func_start;
+       ktime_t                 sc_tv_func_stop;
+#endif
+#ifdef CONFIG_OCFS2_FS_STATS
+       ktime_t                 sc_tv_acquiry_total;
+       ktime_t                 sc_tv_send_total;
+       ktime_t                 sc_tv_status_total;
+       u32                     sc_send_count;
+       u32                     sc_recv_count;
+       ktime_t                 sc_tv_process_total;
+#endif
        struct mutex            sc_send_lock;
 };
 
@@ -220,9 +229,9 @@ struct o2net_send_tracking {
        u32                             st_msg_type;
        u32                             st_msg_key;
        u8                              st_node;
-       struct timeval                  st_sock_time;
-       struct timeval                  st_send_time;
-       struct timeval                  st_status_time;
+       ktime_t                         st_sock_time;
+       ktime_t                         st_send_time;
+       ktime_t                         st_status_time;
 };
 #else
 struct o2net_send_tracking {
index f449991..3a3ed4b 100644 (file)
@@ -90,19 +90,29 @@ static int dlm_should_cancel_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
 
 void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
 {
-       mlog_entry_void();
+       struct dlm_lock_resource *res;
 
        BUG_ON(!dlm);
        BUG_ON(!lock);
 
+       res = lock->lockres;
+
        assert_spin_locked(&dlm->ast_lock);
+
        if (!list_empty(&lock->ast_list)) {
-               mlog(ML_ERROR, "ast list not empty!!  pending=%d, newlevel=%d\n",
+               mlog(ML_ERROR, "%s: res %.*s, lock %u:%llu, "
+                    "AST list not empty, pending %d, newlevel %d\n",
+                    dlm->name, res->lockname.len, res->lockname.name,
+                    dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+                    dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
                     lock->ast_pending, lock->ml.type);
                BUG();
        }
        if (lock->ast_pending)
-               mlog(0, "lock has an ast getting flushed right now\n");
+               mlog(0, "%s: res %.*s, lock %u:%llu, AST getting flushed\n",
+                    dlm->name, res->lockname.len, res->lockname.name,
+                    dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+                    dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
 
        /* putting lock on list, add a ref */
        dlm_lock_get(lock);
@@ -110,9 +120,10 @@ void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
 
        /* check to see if this ast obsoletes the bast */
        if (dlm_should_cancel_bast(dlm, lock)) {
-               struct dlm_lock_resource *res = lock->lockres;
-               mlog(0, "%s: cancelling bast for %.*s\n",
-                    dlm->name, res->lockname.len, res->lockname.name);
+               mlog(0, "%s: res %.*s, lock %u:%llu, Cancelling BAST\n",
+                    dlm->name, res->lockname.len, res->lockname.name,
+                    dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+                    dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
                lock->bast_pending = 0;
                list_del_init(&lock->bast_list);
                lock->ml.highest_blocked = LKM_IVMODE;
@@ -134,8 +145,6 @@ void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
 
 void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
 {
-       mlog_entry_void();
-
        BUG_ON(!dlm);
        BUG_ON(!lock);
 
@@ -147,15 +156,21 @@ void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
 
 void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
 {
-       mlog_entry_void();
+       struct dlm_lock_resource *res;
 
        BUG_ON(!dlm);
        BUG_ON(!lock);
+
        assert_spin_locked(&dlm->ast_lock);
 
+       res = lock->lockres;
+
        BUG_ON(!list_empty(&lock->bast_list));
        if (lock->bast_pending)
-               mlog(0, "lock has a bast getting flushed right now\n");
+               mlog(0, "%s: res %.*s, lock %u:%llu, BAST getting flushed\n",
+                    dlm->name, res->lockname.len, res->lockname.name,
+                    dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+                    dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
 
        /* putting lock on list, add a ref */
        dlm_lock_get(lock);
@@ -167,8 +182,6 @@ void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
 
 void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
 {
-       mlog_entry_void();
-
        BUG_ON(!dlm);
        BUG_ON(!lock);
 
@@ -213,7 +226,10 @@ void dlm_do_local_ast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
        dlm_astlockfunc_t *fn;
        struct dlm_lockstatus *lksb;
 
-       mlog_entry_void();
+       mlog(0, "%s: res %.*s, lock %u:%llu, Local AST\n", dlm->name,
+            res->lockname.len, res->lockname.name,
+            dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+            dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
 
        lksb = lock->lksb;
        fn = lock->ast;
@@ -231,7 +247,10 @@ int dlm_do_remote_ast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
        struct dlm_lockstatus *lksb;
        int lksbflags;
 
-       mlog_entry_void();
+       mlog(0, "%s: res %.*s, lock %u:%llu, Remote AST\n", dlm->name,
+            res->lockname.len, res->lockname.name,
+            dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+            dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
 
        lksb = lock->lksb;
        BUG_ON(lock->ml.node == dlm->node_num);
@@ -250,9 +269,14 @@ void dlm_do_local_bast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
 {
        dlm_bastlockfunc_t *fn = lock->bast;
 
-       mlog_entry_void();
        BUG_ON(lock->ml.node != dlm->node_num);
 
+       mlog(0, "%s: res %.*s, lock %u:%llu, Local BAST, blocked %d\n",
+            dlm->name, res->lockname.len, res->lockname.name,
+            dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+            dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
+            blocked_type);
+
        (*fn)(lock->astdata, blocked_type);
 }
 
@@ -332,7 +356,8 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
        /* cannot get a proxy ast message if this node owns it */
        BUG_ON(res->owner == dlm->node_num);
 
-       mlog(0, "lockres %.*s\n", res->lockname.len, res->lockname.name);
+       mlog(0, "%s: res %.*s\n", dlm->name, res->lockname.len,
+            res->lockname.name);
 
        spin_lock(&res->spinlock);
        if (res->state & DLM_LOCK_RES_RECOVERING) {
@@ -382,8 +407,12 @@ do_ast:
        if (past->type == DLM_AST) {
                /* do not alter lock refcount.  switching lists. */
                list_move_tail(&lock->list, &res->granted);
-               mlog(0, "ast: Adding to granted list... type=%d, "
-                    "convert_type=%d\n", lock->ml.type, lock->ml.convert_type);
+               mlog(0, "%s: res %.*s, lock %u:%llu, Granted type %d => %d\n",
+                    dlm->name, res->lockname.len, res->lockname.name,
+                    dlm_get_lock_cookie_node(be64_to_cpu(cookie)),
+                    dlm_get_lock_cookie_seq(be64_to_cpu(cookie)),
+                    lock->ml.type, lock->ml.convert_type);
+
                if (lock->ml.convert_type != LKM_IVMODE) {
                        lock->ml.type = lock->ml.convert_type;
                        lock->ml.convert_type = LKM_IVMODE;
@@ -426,9 +455,9 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
        size_t veclen = 1;
        int status;
 
-       mlog_entry("res %.*s, to=%u, type=%d, blocked_type=%d\n",
-                  res->lockname.len, res->lockname.name, lock->ml.node,
-                  msg_type, blocked_type);
+       mlog(0, "%s: res %.*s, to %u, type %d, blocked_type %d\n", dlm->name,
+            res->lockname.len, res->lockname.name, lock->ml.node, msg_type,
+            blocked_type);
 
        memset(&past, 0, sizeof(struct dlm_proxy_ast));
        past.node_idx = dlm->node_num;
@@ -441,7 +470,6 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
        vec[0].iov_len = sizeof(struct dlm_proxy_ast);
        vec[0].iov_base = &past;
        if (flags & DLM_LKSB_GET_LVB) {
-               mlog(0, "returning requested LVB data\n");
                be32_add_cpu(&past.flags, LKM_GET_LVB);
                vec[1].iov_len = DLM_LVB_LEN;
                vec[1].iov_base = lock->lksb->lvb;
@@ -451,8 +479,8 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
        ret = o2net_send_message_vec(DLM_PROXY_AST_MSG, dlm->key, vec, veclen,
                                     lock->ml.node, &status);
        if (ret < 0)
-               mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
-                    "node %u\n", ret, DLM_PROXY_AST_MSG, dlm->key,
+               mlog(ML_ERROR, "%s: res %.*s, error %d send AST to node %u\n",
+                    dlm->name, res->lockname.len, res->lockname.name, ret,
                     lock->ml.node);
        else {
                if (status == DLM_RECOVERING) {
index b36d0bf..4bdf7ba 100644 (file)
 #define dlm_lockid_hash(_n, _l) full_name_hash(_n, _l)
 
 enum dlm_mle_type {
-       DLM_MLE_BLOCK,
-       DLM_MLE_MASTER,
-       DLM_MLE_MIGRATION,
-       DLM_MLE_NUM_TYPES
+       DLM_MLE_BLOCK = 0,
+       DLM_MLE_MASTER = 1,
+       DLM_MLE_MIGRATION = 2,
+       DLM_MLE_NUM_TYPES = 3,
 };
 
 struct dlm_master_list_entry {
@@ -82,8 +82,8 @@ struct dlm_master_list_entry {
 
 enum dlm_ast_type {
        DLM_AST = 0,
-       DLM_BAST,
-       DLM_ASTUNLOCK
+       DLM_BAST = 1,
+       DLM_ASTUNLOCK = 2,
 };
 
 
@@ -119,9 +119,9 @@ struct dlm_recovery_ctxt
 
 enum dlm_ctxt_state {
        DLM_CTXT_NEW = 0,
-       DLM_CTXT_JOINED,
-       DLM_CTXT_IN_SHUTDOWN,
-       DLM_CTXT_LEAVING,
+       DLM_CTXT_JOINED = 1,
+       DLM_CTXT_IN_SHUTDOWN = 2,
+       DLM_CTXT_LEAVING = 3,
 };
 
 struct dlm_ctxt
@@ -388,8 +388,8 @@ struct dlm_lock
 
 enum dlm_lockres_list {
        DLM_GRANTED_LIST = 0,
-       DLM_CONVERTING_LIST,
-       DLM_BLOCKED_LIST
+       DLM_CONVERTING_LIST = 1,
+       DLM_BLOCKED_LIST = 2,
 };
 
 static inline int dlm_lvb_is_empty(char *lvb)
@@ -427,27 +427,27 @@ struct dlm_node_iter
 
 
 enum {
-       DLM_MASTER_REQUEST_MSG    = 500,
-       DLM_UNUSED_MSG1,         /* 501 */
-       DLM_ASSERT_MASTER_MSG,   /* 502 */
-       DLM_CREATE_LOCK_MSG,     /* 503 */
-       DLM_CONVERT_LOCK_MSG,    /* 504 */
-       DLM_PROXY_AST_MSG,       /* 505 */
-       DLM_UNLOCK_LOCK_MSG,     /* 506 */
-       DLM_DEREF_LOCKRES_MSG,   /* 507 */
-       DLM_MIGRATE_REQUEST_MSG, /* 508 */
-       DLM_MIG_LOCKRES_MSG,     /* 509 */
-       DLM_QUERY_JOIN_MSG,      /* 510 */
-       DLM_ASSERT_JOINED_MSG,   /* 511 */
-       DLM_CANCEL_JOIN_MSG,     /* 512 */
-       DLM_EXIT_DOMAIN_MSG,     /* 513 */
-       DLM_MASTER_REQUERY_MSG,  /* 514 */
-       DLM_LOCK_REQUEST_MSG,    /* 515 */
-       DLM_RECO_DATA_DONE_MSG,  /* 516 */
-       DLM_BEGIN_RECO_MSG,      /* 517 */
-       DLM_FINALIZE_RECO_MSG,   /* 518 */
-       DLM_QUERY_REGION,        /* 519 */
-       DLM_QUERY_NODEINFO,      /* 520 */
+       DLM_MASTER_REQUEST_MSG          = 500,
+       DLM_UNUSED_MSG1                 = 501,
+       DLM_ASSERT_MASTER_MSG           = 502,
+       DLM_CREATE_LOCK_MSG             = 503,
+       DLM_CONVERT_LOCK_MSG            = 504,
+       DLM_PROXY_AST_MSG               = 505,
+       DLM_UNLOCK_LOCK_MSG             = 506,
+       DLM_DEREF_LOCKRES_MSG           = 507,
+       DLM_MIGRATE_REQUEST_MSG         = 508,
+       DLM_MIG_LOCKRES_MSG             = 509,
+       DLM_QUERY_JOIN_MSG              = 510,
+       DLM_ASSERT_JOINED_MSG           = 511,
+       DLM_CANCEL_JOIN_MSG             = 512,
+       DLM_EXIT_DOMAIN_MSG             = 513,
+       DLM_MASTER_REQUERY_MSG          = 514,
+       DLM_LOCK_REQUEST_MSG            = 515,
+       DLM_RECO_DATA_DONE_MSG          = 516,
+       DLM_BEGIN_RECO_MSG              = 517,
+       DLM_FINALIZE_RECO_MSG           = 518,
+       DLM_QUERY_REGION                = 519,
+       DLM_QUERY_NODEINFO              = 520,
 };
 
 struct dlm_reco_node_data
@@ -460,19 +460,19 @@ struct dlm_reco_node_data
 enum {
        DLM_RECO_NODE_DATA_DEAD = -1,
        DLM_RECO_NODE_DATA_INIT = 0,
-       DLM_RECO_NODE_DATA_REQUESTING,
-       DLM_RECO_NODE_DATA_REQUESTED,
-       DLM_RECO_NODE_DATA_RECEIVING,
-       DLM_RECO_NODE_DATA_DONE,
-       DLM_RECO_NODE_DATA_FINALIZE_SENT,
+       DLM_RECO_NODE_DATA_REQUESTING = 1,
+       DLM_RECO_NODE_DATA_REQUESTED = 2,
+       DLM_RECO_NODE_DATA_RECEIVING = 3,
+       DLM_RECO_NODE_DATA_DONE = 4,
+       DLM_RECO_NODE_DATA_FINALIZE_SENT = 5,
 };
 
 
 enum {
        DLM_MASTER_RESP_NO = 0,
-       DLM_MASTER_RESP_YES,
-       DLM_MASTER_RESP_MAYBE,
-       DLM_MASTER_RESP_ERROR
+       DLM_MASTER_RESP_YES = 1,
+       DLM_MASTER_RESP_MAYBE = 2,
+       DLM_MASTER_RESP_ERROR = 3,
 };
 
 
@@ -649,9 +649,9 @@ struct dlm_proxy_ast
 #define DLM_MOD_KEY (0x666c6172)
 enum dlm_query_join_response_code {
        JOIN_DISALLOW = 0,
-       JOIN_OK,
-       JOIN_OK_NO_MAP,
-       JOIN_PROTOCOL_MISMATCH,
+       JOIN_OK = 1,
+       JOIN_OK_NO_MAP = 2,
+       JOIN_PROTOCOL_MISMATCH = 3,
 };
 
 struct dlm_query_join_packet {
index 272ec86..04a32be 100644 (file)
@@ -370,92 +370,46 @@ static void dlm_debug_get(struct dlm_debug_ctxt *dc)
        kref_get(&dc->debug_refcnt);
 }
 
-static struct debug_buffer *debug_buffer_allocate(void)
+static int debug_release(struct inode *inode, struct file *file)
 {
-       struct debug_buffer *db = NULL;
-
-       db = kzalloc(sizeof(struct debug_buffer), GFP_KERNEL);
-       if (!db)
-               goto bail;
-
-       db->len = PAGE_SIZE;
-       db->buf = kmalloc(db->len, GFP_KERNEL);
-       if (!db->buf)
-               goto bail;
-
-       return db;
-bail:
-       kfree(db);
-       return NULL;
-}
-
-static ssize_t debug_buffer_read(struct file *file, char __user *buf,
-                                size_t nbytes, loff_t *ppos)
-{
-       struct debug_buffer *db = file->private_data;
-
-       return simple_read_from_buffer(buf, nbytes, ppos, db->buf, db->len);
-}
-
-static loff_t debug_buffer_llseek(struct file *file, loff_t off, int whence)
-{
-       struct debug_buffer *db = file->private_data;
-       loff_t new = -1;
-
-       switch (whence) {
-       case 0:
-               new = off;
-               break;
-       case 1:
-               new = file->f_pos + off;
-               break;
-       }
-
-       if (new < 0 || new > db->len)
-               return -EINVAL;
-
-       return (file->f_pos = new);
+       free_page((unsigned long)file->private_data);
+       return 0;
 }
 
-static int debug_buffer_release(struct inode *inode, struct file *file)
+static ssize_t debug_read(struct file *file, char __user *buf,
+                         size_t nbytes, loff_t *ppos)
 {
-       struct debug_buffer *db = file->private_data;
-
-       if (db)
-               kfree(db->buf);
-       kfree(db);
-
-       return 0;
+       return simple_read_from_buffer(buf, nbytes, ppos, file->private_data,
+                                      i_size_read(file->f_mapping->host));
 }
 /* end - util funcs */
 
 /* begin - purge list funcs */
-static int debug_purgelist_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
+static int debug_purgelist_print(struct dlm_ctxt *dlm, char *buf, int len)
 {
        struct dlm_lock_resource *res;
        int out = 0;
        unsigned long total = 0;
 
-       out += snprintf(db->buf + out, db->len - out,
+       out += snprintf(buf + out, len - out,
                        "Dumping Purgelist for Domain: %s\n", dlm->name);
 
        spin_lock(&dlm->spinlock);
        list_for_each_entry(res, &dlm->purge_list, purge) {
                ++total;
-               if (db->len - out < 100)
+               if (len - out < 100)
                        continue;
                spin_lock(&res->spinlock);
                out += stringify_lockname(res->lockname.name,
                                          res->lockname.len,
-                                         db->buf + out, db->len - out);
-               out += snprintf(db->buf + out, db->len - out, "\t%ld\n",
+                                         buf + out, len - out);
+               out += snprintf(buf + out, len - out, "\t%ld\n",
                                (jiffies - res->last_used)/HZ);
                spin_unlock(&res->spinlock);
        }
        spin_unlock(&dlm->spinlock);
 
-       out += snprintf(db->buf + out, db->len - out,
-                       "Total on list: %ld\n", total);
+       out += snprintf(buf + out, len - out, "Total on list: %ld\n", total);
 
        return out;
 }
@@ -463,15 +417,15 @@ static int debug_purgelist_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
 static int debug_purgelist_open(struct inode *inode, struct file *file)
 {
        struct dlm_ctxt *dlm = inode->i_private;
-       struct debug_buffer *db;
+       char *buf = NULL;
 
-       db = debug_buffer_allocate();
-       if (!db)
+       buf = (char *) get_zeroed_page(GFP_NOFS);
+       if (!buf)
                goto bail;
 
-       db->len = debug_purgelist_print(dlm, db);
+       i_size_write(inode, debug_purgelist_print(dlm, buf, PAGE_SIZE - 1));
 
-       file->private_data = db;
+       file->private_data = buf;
 
        return 0;
 bail:
@@ -480,14 +434,14 @@ bail:
 
 static const struct file_operations debug_purgelist_fops = {
        .open =         debug_purgelist_open,
-       .release =      debug_buffer_release,
-       .read =         debug_buffer_read,
-       .llseek =       debug_buffer_llseek,
+       .release =      debug_release,
+       .read =         debug_read,
+       .llseek =       generic_file_llseek,
 };
 /* end - purge list funcs */
 
 /* begin - debug mle funcs */
-static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
+static int debug_mle_print(struct dlm_ctxt *dlm, char *buf, int len)
 {
        struct dlm_master_list_entry *mle;
        struct hlist_head *bucket;
@@ -495,7 +449,7 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
        int i, out = 0;
        unsigned long total = 0, longest = 0, bucket_count = 0;
 
-       out += snprintf(db->buf + out, db->len - out,
+       out += snprintf(buf + out, len - out,
                        "Dumping MLEs for Domain: %s\n", dlm->name);
 
        spin_lock(&dlm->master_lock);
@@ -506,16 +460,16 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
                                          master_hash_node);
                        ++total;
                        ++bucket_count;
-                       if (db->len - out < 200)
+                       if (len - out < 200)
                                continue;
-                       out += dump_mle(mle, db->buf + out, db->len - out);
+                       out += dump_mle(mle, buf + out, len - out);
                }
                longest = max(longest, bucket_count);
                bucket_count = 0;
        }
        spin_unlock(&dlm->master_lock);
 
-       out += snprintf(db->buf + out, db->len - out,
+       out += snprintf(buf + out, len - out,
                        "Total: %ld, Longest: %ld\n", total, longest);
        return out;
 }
@@ -523,15 +477,15 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
 static int debug_mle_open(struct inode *inode, struct file *file)
 {
        struct dlm_ctxt *dlm = inode->i_private;
-       struct debug_buffer *db;
+       char *buf = NULL;
 
-       db = debug_buffer_allocate();
-       if (!db)
+       buf = (char *) get_zeroed_page(GFP_NOFS);
+       if (!buf)
                goto bail;
 
-       db->len = debug_mle_print(dlm, db);
+       i_size_write(inode, debug_mle_print(dlm, buf, PAGE_SIZE - 1));
 
-       file->private_data = db;
+       file->private_data = buf;
 
        return 0;
 bail:
@@ -540,9 +494,9 @@ bail:
 
 static const struct file_operations debug_mle_fops = {
        .open =         debug_mle_open,
-       .release =      debug_buffer_release,
-       .read =         debug_buffer_read,
-       .llseek =       debug_buffer_llseek,
+       .release =      debug_release,
+       .read =         debug_read,
+       .llseek =       generic_file_llseek,
 };
 
 /* end - debug mle funcs */
@@ -757,7 +711,7 @@ static const struct file_operations debug_lockres_fops = {
 /* end - debug lockres funcs */
 
 /* begin - debug state funcs */
-static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
+static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len)
 {
        int out = 0;
        struct dlm_reco_node_data *node;
@@ -781,35 +735,35 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
        }
 
        /* Domain: xxxxxxxxxx  Key: 0xdfbac769 */
-       out += snprintf(db->buf + out, db->len - out,
+       out += snprintf(buf + out, len - out,
                        "Domain: %s  Key: 0x%08x  Protocol: %d.%d\n",
                        dlm->name, dlm->key, dlm->dlm_locking_proto.pv_major,
                        dlm->dlm_locking_proto.pv_minor);
 
        /* Thread Pid: xxx  Node: xxx  State: xxxxx */
-       out += snprintf(db->buf + out, db->len - out,
+       out += snprintf(buf + out, len - out,
                        "Thread Pid: %d  Node: %d  State: %s\n",
-                       dlm->dlm_thread_task->pid, dlm->node_num, state);
+                       task_pid_nr(dlm->dlm_thread_task), dlm->node_num, state);
 
        /* Number of Joins: xxx  Joining Node: xxx */
-       out += snprintf(db->buf + out, db->len - out,
+       out += snprintf(buf + out, len - out,
                        "Number of Joins: %d  Joining Node: %d\n",
                        dlm->num_joins, dlm->joining_node);
 
        /* Domain Map: xx xx xx */
-       out += snprintf(db->buf + out, db->len - out, "Domain Map: ");
+       out += snprintf(buf + out, len - out, "Domain Map: ");
        out += stringify_nodemap(dlm->domain_map, O2NM_MAX_NODES,
-                                db->buf + out, db->len - out);
-       out += snprintf(db->buf + out, db->len - out, "\n");
+                                buf + out, len - out);
+       out += snprintf(buf + out, len - out, "\n");
 
        /* Live Map: xx xx xx */
-       out += snprintf(db->buf + out, db->len - out, "Live Map: ");
+       out += snprintf(buf + out, len - out, "Live Map: ");
        out += stringify_nodemap(dlm->live_nodes_map, O2NM_MAX_NODES,
-                                db->buf + out, db->len - out);
-       out += snprintf(db->buf + out, db->len - out, "\n");
+                                buf + out, len - out);
+       out += snprintf(buf + out, len - out, "\n");
 
        /* Lock Resources: xxx (xxx) */
-       out += snprintf(db->buf + out, db->len - out,
+       out += snprintf(buf + out, len - out,
                        "Lock Resources: %d (%d)\n",
                        atomic_read(&dlm->res_cur_count),
                        atomic_read(&dlm->res_tot_count));
@@ -821,29 +775,29 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
                cur_mles += atomic_read(&dlm->mle_cur_count[i]);
 
        /* MLEs: xxx (xxx) */
-       out += snprintf(db->buf + out, db->len - out,
+       out += snprintf(buf + out, len - out,
                        "MLEs: %d (%d)\n", cur_mles, tot_mles);
 
        /*  Blocking: xxx (xxx) */
-       out += snprintf(db->buf + out, db->len - out,
+       out += snprintf(buf + out, len - out,
                        "  Blocking: %d (%d)\n",
                        atomic_read(&dlm->mle_cur_count[DLM_MLE_BLOCK]),
                        atomic_read(&dlm->mle_tot_count[DLM_MLE_BLOCK]));
 
        /*  Mastery: xxx (xxx) */
-       out += snprintf(db->buf + out, db->len - out,
+       out += snprintf(buf + out, len - out,
                        "  Mastery: %d (%d)\n",
                        atomic_read(&dlm->mle_cur_count[DLM_MLE_MASTER]),
                        atomic_read(&dlm->mle_tot_count[DLM_MLE_MASTER]));
 
        /*  Migration: xxx (xxx) */
-       out += snprintf(db->buf + out, db->len - out,
+       out += snprintf(buf + out, len - out,
                        "  Migration: %d (%d)\n",
                        atomic_read(&dlm->mle_cur_count[DLM_MLE_MIGRATION]),
                        atomic_read(&dlm->mle_tot_count[DLM_MLE_MIGRATION]));
 
        /* Lists: Dirty=Empty  Purge=InUse  PendingASTs=Empty  ... */
-       out += snprintf(db->buf + out, db->len - out,
+       out += snprintf(buf + out, len - out,
                        "Lists: Dirty=%s  Purge=%s  PendingASTs=%s  "
                        "PendingBASTs=%s\n",
                        (list_empty(&dlm->dirty_list) ? "Empty" : "InUse"),
@@ -852,12 +806,12 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
                        (list_empty(&dlm->pending_basts) ? "Empty" : "InUse"));
 
        /* Purge Count: xxx  Refs: xxx */
-       out += snprintf(db->buf + out, db->len - out,
+       out += snprintf(buf + out, len - out,
                        "Purge Count: %d  Refs: %d\n", dlm->purge_count,
                        atomic_read(&dlm->dlm_refs.refcount));
 
        /* Dead Node: xxx */
-       out += snprintf(db->buf + out, db->len - out,
+       out += snprintf(buf + out, len - out,
                        "Dead Node: %d\n", dlm->reco.dead_node);
 
        /* What about DLM_RECO_STATE_FINALIZE? */
@@ -867,19 +821,19 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
                state = "INACTIVE";
 
        /* Recovery Pid: xxxx  Master: xxx  State: xxxx */
-       out += snprintf(db->buf + out, db->len - out,
+       out += snprintf(buf + out, len - out,
                        "Recovery Pid: %d  Master: %d  State: %s\n",
-                       dlm->dlm_reco_thread_task->pid,
+                       task_pid_nr(dlm->dlm_reco_thread_task),
                        dlm->reco.new_master, state);
 
        /* Recovery Map: xx xx */
-       out += snprintf(db->buf + out, db->len - out, "Recovery Map: ");
+       out += snprintf(buf + out, len - out, "Recovery Map: ");
        out += stringify_nodemap(dlm->recovery_map, O2NM_MAX_NODES,
-                                db->buf + out, db->len - out);
-       out += snprintf(db->buf + out, db->len - out, "\n");
+                                buf + out, len - out);
+       out += snprintf(buf + out, len - out, "\n");
 
        /* Recovery Node State: */
-       out += snprintf(db->buf + out, db->len - out, "Recovery Node State:\n");
+       out += snprintf(buf + out, len - out, "Recovery Node State:\n");
        list_for_each_entry(node, &dlm->reco.node_data, list) {
                switch (node->state) {
                case DLM_RECO_NODE_DATA_INIT:
@@ -907,7 +861,7 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
                        state = "BAD";
                        break;
                }
-               out += snprintf(db->buf + out, db->len - out, "\t%u - %s\n",
+               out += snprintf(buf + out, len - out, "\t%u - %s\n",
                                node->node_num, state);
        }
 
@@ -919,15 +873,15 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
 static int debug_state_open(struct inode *inode, struct file *file)
 {
        struct dlm_ctxt *dlm = inode->i_private;
-       struct debug_buffer *db = NULL;
+       char *buf = NULL;
 
-       db = debug_buffer_allocate();
-       if (!db)
+       buf = (char *) get_zeroed_page(GFP_NOFS);
+       if (!buf)
                goto bail;
 
-       db->len = debug_state_print(dlm, db);
+       i_size_write(inode, debug_state_print(dlm, buf, PAGE_SIZE - 1));
 
-       file->private_data = db;
+       file->private_data = buf;
 
        return 0;
 bail:
@@ -936,9 +890,9 @@ bail:
 
 static const struct file_operations debug_state_fops = {
        .open =         debug_state_open,
-       .release =      debug_buffer_release,
-       .read =         debug_buffer_read,
-       .llseek =       debug_buffer_llseek,
+       .release =      debug_release,
+       .read =         debug_read,
+       .llseek =       generic_file_llseek,
 };
 /* end  - debug state funcs */
 
@@ -1002,14 +956,10 @@ void dlm_debug_shutdown(struct dlm_ctxt *dlm)
        struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt;
 
        if (dc) {
-               if (dc->debug_purgelist_dentry)
-                       debugfs_remove(dc->debug_purgelist_dentry);
-               if (dc->debug_mle_dentry)
-                       debugfs_remove(dc->debug_mle_dentry);
-               if (dc->debug_lockres_dentry)
-                       debugfs_remove(dc->debug_lockres_dentry);
-               if (dc->debug_state_dentry)
-                       debugfs_remove(dc->debug_state_dentry);
+               debugfs_remove(dc->debug_purgelist_dentry);
+               debugfs_remove(dc->debug_mle_dentry);
+               debugfs_remove(dc->debug_lockres_dentry);
+               debugfs_remove(dc->debug_state_dentry);
                dlm_debug_put(dc);
        }
 }
@@ -1040,8 +990,7 @@ bail:
 
 void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm)
 {
-       if (dlm->dlm_debugfs_subroot)
-               debugfs_remove(dlm->dlm_debugfs_subroot);
+       debugfs_remove(dlm->dlm_debugfs_subroot);
 }
 
 /* debugfs root */
@@ -1057,7 +1006,6 @@ int dlm_create_debugfs_root(void)
 
 void dlm_destroy_debugfs_root(void)
 {
-       if (dlm_debugfs_root)
-               debugfs_remove(dlm_debugfs_root);
+       debugfs_remove(dlm_debugfs_root);
 }
 #endif /* CONFIG_DEBUG_FS */
index 8c686d2..1f27c48 100644 (file)
@@ -37,11 +37,6 @@ struct dlm_debug_ctxt {
        struct dentry *debug_purgelist_dentry;
 };
 
-struct debug_buffer {
-       int len;
-       char *buf;
-};
-
 struct debug_lockres {
        int dl_len;
        char *dl_buf;
index cc2aaa9..7e38a07 100644 (file)
@@ -460,8 +460,6 @@ redo_bucket:
                }
                cond_resched_lock(&dlm->spinlock);
                num += n;
-               mlog(0, "%s: touched %d lockreses in bucket %d "
-                    "(tot=%d)\n", dlm->name, n, i, num);
        }
        spin_unlock(&dlm->spinlock);
        wake_up(&dlm->dlm_thread_wq);
@@ -1661,8 +1659,8 @@ bail:
 
 static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm)
 {
-       o2hb_unregister_callback(NULL, &dlm->dlm_hb_up);
-       o2hb_unregister_callback(NULL, &dlm->dlm_hb_down);
+       o2hb_unregister_callback(dlm->name, &dlm->dlm_hb_up);
+       o2hb_unregister_callback(dlm->name, &dlm->dlm_hb_down);
        o2net_unregister_handler_list(&dlm->dlm_domain_handlers);
 }
 
@@ -1674,13 +1672,13 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm)
 
        o2hb_setup_callback(&dlm->dlm_hb_down, O2HB_NODE_DOWN_CB,
                            dlm_hb_node_down_cb, dlm, DLM_HB_NODE_DOWN_PRI);
-       status = o2hb_register_callback(NULL, &dlm->dlm_hb_down);
+       status = o2hb_register_callback(dlm->name, &dlm->dlm_hb_down);
        if (status)
                goto bail;
 
        o2hb_setup_callback(&dlm->dlm_hb_up, O2HB_NODE_UP_CB,
                            dlm_hb_node_up_cb, dlm, DLM_HB_NODE_UP_PRI);
-       status = o2hb_register_callback(NULL, &dlm->dlm_hb_up);
+       status = o2hb_register_callback(dlm->name, &dlm->dlm_hb_up);
        if (status)
                goto bail;
 
index 69cf369..7009292 100644 (file)
@@ -106,6 +106,9 @@ static int dlm_can_grant_new_lock(struct dlm_lock_resource *res,
 
                if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type))
                        return 0;
+               if (!dlm_lock_compatible(tmplock->ml.convert_type,
+                                        lock->ml.type))
+                       return 0;
        }
 
        return 1;
index 2211acf..1d6d1d2 100644 (file)
@@ -122,15 +122,13 @@ int __dlm_lockres_unused(struct dlm_lock_resource *res)
 void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
                              struct dlm_lock_resource *res)
 {
-       mlog_entry("%.*s\n", res->lockname.len, res->lockname.name);
-
        assert_spin_locked(&dlm->spinlock);
        assert_spin_locked(&res->spinlock);
 
        if (__dlm_lockres_unused(res)){
                if (list_empty(&res->purge)) {
-                       mlog(0, "putting lockres %.*s:%p onto purge list\n",
-                            res->lockname.len, res->lockname.name, res);
+                       mlog(0, "%s: Adding res %.*s to purge list\n",
+                            dlm->name, res->lockname.len, res->lockname.name);
 
                        res->last_used = jiffies;
                        dlm_lockres_get(res);
@@ -138,8 +136,8 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
                        dlm->purge_count++;
                }
        } else if (!list_empty(&res->purge)) {
-               mlog(0, "removing lockres %.*s:%p from purge list, owner=%u\n",
-                    res->lockname.len, res->lockname.name, res, res->owner);
+               mlog(0, "%s: Removing res %.*s from purge list\n",
+                    dlm->name, res->lockname.len, res->lockname.name);
 
                list_del_init(&res->purge);
                dlm_lockres_put(res);
@@ -150,7 +148,6 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
 void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
                            struct dlm_lock_resource *res)
 {
-       mlog_entry("%.*s\n", res->lockname.len, res->lockname.name);
        spin_lock(&dlm->spinlock);
        spin_lock(&res->spinlock);
 
@@ -171,9 +168,8 @@ static void dlm_purge_lockres(struct dlm_ctxt *dlm,
 
        master = (res->owner == dlm->node_num);
 
-
-       mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len,
-            res->lockname.name, master);
+       mlog(0, "%s: Purging res %.*s, master %d\n", dlm->name,
+            res->lockname.len, res->lockname.name, master);
 
        if (!master) {
                res->state |= DLM_LOCK_RES_DROPPING_REF;
@@ -189,27 +185,25 @@ static void dlm_purge_lockres(struct dlm_ctxt *dlm,
                /* clear our bit from the master's refmap, ignore errors */
                ret = dlm_drop_lockres_ref(dlm, res);
                if (ret < 0) {
-                       mlog_errno(ret);
+                       mlog(ML_ERROR, "%s: deref %.*s failed %d\n", dlm->name,
+                            res->lockname.len, res->lockname.name, ret);
                        if (!dlm_is_host_down(ret))
                                BUG();
                }
-               mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n",
-                    dlm->name, res->lockname.len, res->lockname.name, ret);
                spin_lock(&dlm->spinlock);
                spin_lock(&res->spinlock);
        }
 
        if (!list_empty(&res->purge)) {
-               mlog(0, "removing lockres %.*s:%p from purgelist, "
-                    "master = %d\n", res->lockname.len, res->lockname.name,
-                    res, master);
+               mlog(0, "%s: Removing res %.*s from purgelist, master %d\n",
+                    dlm->name, res->lockname.len, res->lockname.name, master);
                list_del_init(&res->purge);
                dlm_lockres_put(res);
                dlm->purge_count--;
        }
 
        if (!__dlm_lockres_unused(res)) {
-               mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n",
+               mlog(ML_ERROR, "%s: res %.*s in use after deref\n",
                     dlm->name, res->lockname.len, res->lockname.name);
                __dlm_print_one_lock_resource(res);
                BUG();
@@ -266,10 +260,10 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
                unused = __dlm_lockres_unused(lockres);
                if (!unused ||
                    (lockres->state & DLM_LOCK_RES_MIGRATING)) {
-                       mlog(0, "lockres %s:%.*s: is in use or "
-                            "being remastered, used %d, state %d\n",
-                            dlm->name, lockres->lockname.len,
-                            lockres->lockname.name, !unused, lockres->state);
+                       mlog(0, "%s: res %.*s is in use or being remastered, "
+                            "used %d, state %d\n", dlm->name,
+                            lockres->lockname.len, lockres->lockname.name,
+                            !unused, lockres->state);
                        list_move_tail(&dlm->purge_list, &lockres->purge);
                        spin_unlock(&lockres->spinlock);
                        continue;
@@ -296,15 +290,12 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm,
        struct list_head *head;
        int can_grant = 1;
 
-       //mlog(0, "res->lockname.len=%d\n", res->lockname.len);
-       //mlog(0, "res->lockname.name=%p\n", res->lockname.name);
-       //mlog(0, "shuffle res %.*s\n", res->lockname.len,
-       //        res->lockname.name);
-
-       /* because this function is called with the lockres
+       /*
+        * Because this function is called with the lockres
         * spinlock, and because we know that it is not migrating/
         * recovering/in-progress, it is fine to reserve asts and
-        * basts right before queueing them all throughout */
+        * basts right before queueing them all throughout
+        */
        assert_spin_locked(&dlm->ast_lock);
        assert_spin_locked(&res->spinlock);
        BUG_ON((res->state & (DLM_LOCK_RES_MIGRATING|
@@ -314,13 +305,13 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm,
 converting:
        if (list_empty(&res->converting))
                goto blocked;
-       mlog(0, "res %.*s has locks on a convert queue\n", res->lockname.len,
-            res->lockname.name);
+       mlog(0, "%s: res %.*s has locks on the convert queue\n", dlm->name,
+            res->lockname.len, res->lockname.name);
 
        target = list_entry(res->converting.next, struct dlm_lock, list);
        if (target->ml.convert_type == LKM_IVMODE) {
-               mlog(ML_ERROR, "%.*s: converting a lock with no "
-                    "convert_type!\n", res->lockname.len, res->lockname.name);
+               mlog(ML_ERROR, "%s: res %.*s converting lock to invalid mode\n",
+                    dlm->name, res->lockname.len, res->lockname.name);
                BUG();
        }
        head = &res->granted;
@@ -365,9 +356,12 @@ converting:
                spin_lock(&target->spinlock);
                BUG_ON(target->ml.highest_blocked != LKM_IVMODE);
 
-               mlog(0, "calling ast for converting lock: %.*s, have: %d, "
-                    "granting: %d, node: %u\n", res->lockname.len,
-                    res->lockname.name, target->ml.type,
+               mlog(0, "%s: res %.*s, AST for Converting lock %u:%llu, type "
+                    "%d => %d, node %u\n", dlm->name, res->lockname.len,
+                    res->lockname.name,
+                    dlm_get_lock_cookie_node(be64_to_cpu(target->ml.cookie)),
+                    dlm_get_lock_cookie_seq(be64_to_cpu(target->ml.cookie)),
+                    target->ml.type,
                     target->ml.convert_type, target->ml.node);
 
                target->ml.type = target->ml.convert_type;
@@ -428,11 +422,14 @@ blocked:
                spin_lock(&target->spinlock);
                BUG_ON(target->ml.highest_blocked != LKM_IVMODE);
 
-               mlog(0, "calling ast for blocked lock: %.*s, granting: %d, "
-                    "node: %u\n", res->lockname.len, res->lockname.name,
+               mlog(0, "%s: res %.*s, AST for Blocked lock %u:%llu, type %d, "
+                    "node %u\n", dlm->name, res->lockname.len,
+                    res->lockname.name,
+                    dlm_get_lock_cookie_node(be64_to_cpu(target->ml.cookie)),
+                    dlm_get_lock_cookie_seq(be64_to_cpu(target->ml.cookie)),
                     target->ml.type, target->ml.node);
 
-               // target->ml.type is already correct
+               /* target->ml.type is already correct */
                list_move_tail(&target->list, &res->granted);
 
                BUG_ON(!target->lksb);
@@ -453,7 +450,6 @@ leave:
 /* must have NO locks when calling this with res !=NULL * */
 void dlm_kick_thread(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
 {
-       mlog_entry("dlm=%p, res=%p\n", dlm, res);
        if (res) {
                spin_lock(&dlm->spinlock);
                spin_lock(&res->spinlock);
@@ -466,8 +462,6 @@ void dlm_kick_thread(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
 
 void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
 {
-       mlog_entry("dlm=%p, res=%p\n", dlm, res);
-
        assert_spin_locked(&dlm->spinlock);
        assert_spin_locked(&res->spinlock);
 
@@ -484,13 +478,16 @@ void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
                        res->state |= DLM_LOCK_RES_DIRTY;
                }
        }
+
+       mlog(0, "%s: res %.*s\n", dlm->name, res->lockname.len,
+            res->lockname.name);
 }
 
 
 /* Launch the NM thread for the mounted volume */
 int dlm_launch_thread(struct dlm_ctxt *dlm)
 {
-       mlog(0, "starting dlm thread...\n");
+       mlog(0, "Starting dlm_thread...\n");
 
        dlm->dlm_thread_task = kthread_run(dlm_thread, dlm, "dlm_thread");
        if (IS_ERR(dlm->dlm_thread_task)) {
@@ -505,7 +502,7 @@ int dlm_launch_thread(struct dlm_ctxt *dlm)
 void dlm_complete_thread(struct dlm_ctxt *dlm)
 {
        if (dlm->dlm_thread_task) {
-               mlog(ML_KTHREAD, "waiting for dlm thread to exit\n");
+               mlog(ML_KTHREAD, "Waiting for dlm thread to exit\n");
                kthread_stop(dlm->dlm_thread_task);
                dlm->dlm_thread_task = NULL;
        }
@@ -536,7 +533,12 @@ static void dlm_flush_asts(struct dlm_ctxt *dlm)
                /* get an extra ref on lock */
                dlm_lock_get(lock);
                res = lock->lockres;
-               mlog(0, "delivering an ast for this lockres\n");
+               mlog(0, "%s: res %.*s, Flush AST for lock %u:%llu, type %d, "
+                    "node %u\n", dlm->name, res->lockname.len,
+                    res->lockname.name,
+                    dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+                    dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
+                    lock->ml.type, lock->ml.node);
 
                BUG_ON(!lock->ast_pending);
 
@@ -557,9 +559,9 @@ static void dlm_flush_asts(struct dlm_ctxt *dlm)
                /* possible that another ast was queued while
                 * we were delivering the last one */
                if (!list_empty(&lock->ast_list)) {
-                       mlog(0, "aha another ast got queued while "
-                            "we were finishing the last one.  will "
-                            "keep the ast_pending flag set.\n");
+                       mlog(0, "%s: res %.*s, AST queued while flushing last "
+                            "one\n", dlm->name, res->lockname.len,
+                            res->lockname.name);
                } else
                        lock->ast_pending = 0;
 
@@ -590,8 +592,12 @@ static void dlm_flush_asts(struct dlm_ctxt *dlm)
                dlm_lock_put(lock);
                spin_unlock(&dlm->ast_lock);
 
-               mlog(0, "delivering a bast for this lockres "
-                    "(blocked = %d\n", hi);
+               mlog(0, "%s: res %.*s, Flush BAST for lock %u:%llu, "
+                    "blocked %d, node %u\n",
+                    dlm->name, res->lockname.len, res->lockname.name,
+                    dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+                    dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
+                    hi, lock->ml.node);
 
                if (lock->ml.node != dlm->node_num) {
                        ret = dlm_send_proxy_bast(dlm, res, lock, hi);
@@ -605,9 +611,9 @@ static void dlm_flush_asts(struct dlm_ctxt *dlm)
                /* possible that another bast was queued while
                 * we were delivering the last one */
                if (!list_empty(&lock->bast_list)) {
-                       mlog(0, "aha another bast got queued while "
-                            "we were finishing the last one.  will "
-                            "keep the bast_pending flag set.\n");
+                       mlog(0, "%s: res %.*s, BAST queued while flushing last "
+                            "one\n", dlm->name, res->lockname.len,
+                            res->lockname.name);
                } else
                        lock->bast_pending = 0;
 
@@ -675,11 +681,12 @@ static int dlm_thread(void *data)
                        spin_lock(&res->spinlock);
                        if (res->owner != dlm->node_num) {
                                __dlm_print_one_lock_resource(res);
-                               mlog(ML_ERROR, "inprog:%s, mig:%s, reco:%s, dirty:%s\n",
-                                    res->state & DLM_LOCK_RES_IN_PROGRESS ? "yes" : "no",
-                                    res->state & DLM_LOCK_RES_MIGRATING ? "yes" : "no",
-                                    res->state & DLM_LOCK_RES_RECOVERING ? "yes" : "no",
-                                    res->state & DLM_LOCK_RES_DIRTY ? "yes" : "no");
+                               mlog(ML_ERROR, "%s: inprog %d, mig %d, reco %d,"
+                                    " dirty %d\n", dlm->name,
+                                    !!(res->state & DLM_LOCK_RES_IN_PROGRESS),
+                                    !!(res->state & DLM_LOCK_RES_MIGRATING),
+                                    !!(res->state & DLM_LOCK_RES_RECOVERING),
+                                    !!(res->state & DLM_LOCK_RES_DIRTY));
                        }
                        BUG_ON(res->owner != dlm->node_num);
 
@@ -693,8 +700,8 @@ static int dlm_thread(void *data)
                                res->state &= ~DLM_LOCK_RES_DIRTY;
                                spin_unlock(&res->spinlock);
                                spin_unlock(&dlm->ast_lock);
-                               mlog(0, "delaying list shuffling for in-"
-                                    "progress lockres %.*s, state=%d\n",
+                               mlog(0, "%s: res %.*s, inprogress, delay list "
+                                    "shuffle, state %d\n", dlm->name,
                                     res->lockname.len, res->lockname.name,
                                     res->state);
                                delay = 1;
@@ -706,10 +713,6 @@ static int dlm_thread(void *data)
                         * spinlock and do NOT have the dlm lock.
                         * safe to reserve/queue asts and run the lists. */
 
-                       mlog(0, "calling dlm_shuffle_lists with dlm=%s, "
-                            "res=%.*s\n", dlm->name,
-                            res->lockname.len, res->lockname.name);
-
                        /* called while holding lockres lock */
                        dlm_shuffle_lists(dlm, res);
                        res->state &= ~DLM_LOCK_RES_DIRTY;
@@ -733,7 +736,8 @@ in_progress:
                        /* unlikely, but we may need to give time to
                         * other tasks */
                        if (!--n) {
-                               mlog(0, "throttling dlm_thread\n");
+                               mlog(0, "%s: Throttling dlm thread\n",
+                                    dlm->name);
                                break;
                        }
                }
index d14cad6..30c5231 100644 (file)
@@ -1017,8 +1017,11 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
                 * An error return must mean that no cluster locks
                 * were held on function exit.
                 */
-               if (oi1->ip_blkno != oi2->ip_blkno)
+               if (oi1->ip_blkno != oi2->ip_blkno) {
                        ocfs2_inode_unlock(inode2, 1);
+                       brelse(*bh2);
+                       *bh2 = NULL;
+               }
 
                if (status != -ENOENT)
                        mlog_errno(status);
index 70dd3b1..51cd689 100644 (file)
@@ -420,6 +420,11 @@ struct ocfs2_super
        struct inode                    *osb_tl_inode;
        struct buffer_head              *osb_tl_bh;
        struct delayed_work             osb_truncate_log_wq;
+       /*
+        * How many clusters in our truncate log.
+        * It must be protected by osb_tl_inode->i_mutex.
+        */
+       unsigned int truncated_clusters;
 
        struct ocfs2_node_map           osb_recovering_orphan_dirs;
        unsigned int                    *osb_orphan_wipes;