ocfs2: Cache extent records

author Mark Fasheh <mark.fasheh@oracle.com>

Tue, 24 Apr 2007 01:53:12 +0000 (18:53 -0700)

committer Mark Fasheh <mark.fasheh@oracle.com>

Thu, 26 Apr 2007 22:10:40 +0000 (15:10 -0700)
author Mark Fasheh <mark.fasheh@oracle.com>
Tue, 24 Apr 2007 01:53:12 +0000 (18:53 -0700)
committer Mark Fasheh <mark.fasheh@oracle.com>
Thu, 26 Apr 2007 22:10:40 +0000 (15:10 -0700)
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c

index 412a288..a0c8667 100644 (file)
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -2417,6 +2417,8 @@ out_add:
         status = ocfs2_do_insert_extent(inode, handle, fe_bh, &rec, &insert);
         if (status < 0)
                 mlog_errno(status);
+       else
+               ocfs2_extent_map_insert_rec(inode, &rec);
  
  bail:
         if (bh)
@@ -3640,6 +3642,9 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
                 mlog_errno(status);
                 goto bail;
         }
+
+       ocfs2_extent_map_trunc(inode, new_highest_cpos);
+
  start:
         /*
          * Check that we still have allocation to delete.
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c

index 43267ee..27e43b0 100644 (file)
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1613,6 +1613,8 @@ static int ocfs2_meta_lock_update(struct inode *inode,
          * for the inode metadata. */
         ocfs2_metadata_cache_purge(inode);
  
+       ocfs2_extent_map_trunc(inode, 0);
+
         if (ocfs2_meta_lvb_is_trustable(inode, lockres)) {
                 mlog(0, "Trusting LVB on inode %llu\n",
                      (unsigned long long)oi->ip_blkno);
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c

index f35e04f..ba2b2ab 100644 (file)
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -38,6 +38,254 @@
  
  #include "buffer_head_io.h"
  
+/*
+ * The extent caching implementation is intentionally trivial.
+ *
+ * We only cache a small number of extents stored directly on the
+ * inode, so linear order operations are acceptable. If we ever want
+ * to increase the size of the extent map, then these algorithms must
+ * get smarter.
+ */
+
+void ocfs2_extent_map_init(struct inode *inode)
+{
+       struct ocfs2_inode_info *oi = OCFS2_I(inode);
+
+       oi->ip_extent_map.em_num_items = 0;
+       INIT_LIST_HEAD(&oi->ip_extent_map.em_list);
+}
+
+static void __ocfs2_extent_map_lookup(struct ocfs2_extent_map *em,
+                                     unsigned int cpos,
+                                     struct ocfs2_extent_map_item **ret_emi)
+{
+       unsigned int range;
+       struct ocfs2_extent_map_item *emi;
+
+       *ret_emi = NULL;
+
+       list_for_each_entry(emi, &em->em_list, ei_list) {
+               range = emi->ei_cpos + emi->ei_clusters;
+
+               if (cpos >= emi->ei_cpos && cpos < range) {
+                       list_move(&emi->ei_list, &em->em_list);
+
+                       *ret_emi = emi;
+                       break;
+               }
+       }
+}
+
+static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos,
+                                  unsigned int *phys, unsigned int *len,
+                                  unsigned int *flags)
+{
+       unsigned int coff;
+       struct ocfs2_inode_info *oi = OCFS2_I(inode);
+       struct ocfs2_extent_map_item *emi;
+
+       spin_lock(&oi->ip_lock);
+
+       __ocfs2_extent_map_lookup(&oi->ip_extent_map, cpos, &emi);
+       if (emi) {
+               coff = cpos - emi->ei_cpos;
+               *phys = emi->ei_phys + coff;
+               if (len)
+                       *len = emi->ei_clusters - coff;
+               if (flags)
+                       *flags = emi->ei_flags;
+       }
+
+       spin_unlock(&oi->ip_lock);
+
+       if (emi == NULL)
+               return -ENOENT;
+
+       return 0;
+}
+
+/*
+ * Forget about all clusters equal to or greater than cpos.
+ */
+void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos)
+{
+       struct list_head *p, *n;
+       struct ocfs2_extent_map_item *emi;
+       struct ocfs2_inode_info *oi = OCFS2_I(inode);
+       struct ocfs2_extent_map *em = &oi->ip_extent_map;
+       LIST_HEAD(tmp_list);
+       unsigned int range;
+
+       spin_lock(&oi->ip_lock);
+       list_for_each_safe(p, n, &em->em_list) {
+               emi = list_entry(p, struct ocfs2_extent_map_item, ei_list);
+
+               if (emi->ei_cpos >= cpos) {
+                       /* Full truncate of this record. */
+                       list_move(&emi->ei_list, &tmp_list);
+                       BUG_ON(em->em_num_items == 0);
+                       em->em_num_items--;
+                       continue;
+               }
+
+               range = emi->ei_cpos + emi->ei_clusters;
+               if (range > cpos) {
+                       /* Partial truncate */
+                       emi->ei_clusters = cpos - emi->ei_cpos;
+               }
+       }
+       spin_unlock(&oi->ip_lock);
+
+       list_for_each_safe(p, n, &tmp_list) {
+               emi = list_entry(p, struct ocfs2_extent_map_item, ei_list);
+               list_del(&emi->ei_list);
+               kfree(emi);
+       }
+}
+
+/*
+ * Is any part of emi2 contained within emi1
+ */
+static int ocfs2_ei_is_contained(struct ocfs2_extent_map_item *emi1,
+                                struct ocfs2_extent_map_item *emi2)
+{
+       unsigned int range1, range2;
+
+       /*
+        * Check if logical start of emi2 is inside emi1
+        */
+       range1 = emi1->ei_cpos + emi1->ei_clusters;
+       if (emi2->ei_cpos >= emi1->ei_cpos && emi2->ei_cpos < range1)
+               return 1;
+
+       /*
+        * Check if logical end of emi2 is inside emi1
+        */
+       range2 = emi2->ei_cpos + emi2->ei_clusters;
+       if (range2 > emi1->ei_cpos && range2 <= range1)
+               return 1;
+
+       return 0;
+}
+
+static void ocfs2_copy_emi_fields(struct ocfs2_extent_map_item *dest,
+                                 struct ocfs2_extent_map_item *src)
+{
+       dest->ei_cpos = src->ei_cpos;
+       dest->ei_phys = src->ei_phys;
+       dest->ei_clusters = src->ei_clusters;
+       dest->ei_flags = src->ei_flags;
+}
+
+/*
+ * Try to merge emi with ins. Returns 1 if merge succeeds, zero
+ * otherwise.
+ */
+static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi,
+                                        struct ocfs2_extent_map_item *ins)
+{
+       /*
+        * Handle contiguousness
+        */
+       if (ins->ei_phys == (emi->ei_phys + emi->ei_clusters) &&
+           ins->ei_cpos == (emi->ei_cpos + emi->ei_clusters) &&
+           ins->ei_flags == emi->ei_flags) {
+               emi->ei_clusters += ins->ei_clusters;
+               return 1;
+       } else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys &&
+                  (ins->ei_cpos + ins->ei_clusters) == emi->ei_phys &&
+                  ins->ei_flags == emi->ei_flags) {
+               emi->ei_phys = ins->ei_phys;
+               emi->ei_cpos = ins->ei_cpos;
+               emi->ei_clusters += ins->ei_clusters;
+               return 1;
+       }
+
+       /*
+        * Overlapping extents - this shouldn't happen unless we've
+        * split an extent to change it's flags. That is exceedingly
+        * rare, so there's no sense in trying to optimize it yet.
+        */
+       if (ocfs2_ei_is_contained(emi, ins) ||
+           ocfs2_ei_is_contained(ins, emi)) {
+               ocfs2_copy_emi_fields(emi, ins);
+               return 1;
+       }
+
+       /* No merge was possible. */
+       return 0;
+}
+
+/*
+ * In order to reduce complexity on the caller, this insert function
+ * is intentionally liberal in what it will accept.
+ *
+ * The only rule is that the truncate call *must* be used whenever
+ * records have been deleted. This avoids inserting overlapping
+ * records with different physical mappings.
+ */
+void ocfs2_extent_map_insert_rec(struct inode *inode,
+                                struct ocfs2_extent_rec *rec)
+{
+       struct ocfs2_inode_info *oi = OCFS2_I(inode);
+       struct ocfs2_extent_map *em = &oi->ip_extent_map;
+       struct ocfs2_extent_map_item *emi, *new_emi = NULL;
+       struct ocfs2_extent_map_item ins;
+
+       ins.ei_cpos = le32_to_cpu(rec->e_cpos);
+       ins.ei_phys = ocfs2_blocks_to_clusters(inode->i_sb,
+                                              le64_to_cpu(rec->e_blkno));
+       ins.ei_clusters = le16_to_cpu(rec->e_leaf_clusters);
+       ins.ei_flags = rec->e_flags;
+
+search:
+       spin_lock(&oi->ip_lock);
+
+       list_for_each_entry(emi, &em->em_list, ei_list) {
+               if (ocfs2_try_to_merge_extent_map(emi, &ins)) {
+                       list_move(&emi->ei_list, &em->em_list);
+                       spin_unlock(&oi->ip_lock);
+                       goto out;
+               }
+       }
+
+       /*
+        * No item could be merged.
+        *
+        * Either allocate and add a new item, or overwrite the last recently
+        * inserted.
+        */
+
+       if (em->em_num_items < OCFS2_MAX_EXTENT_MAP_ITEMS) {
+               if (new_emi == NULL) {
+                       spin_unlock(&oi->ip_lock);
+
+                       new_emi = kmalloc(sizeof(*new_emi), GFP_NOFS);
+                       if (new_emi == NULL)
+                               goto out;
+
+                       goto search;
+               }
+
+               ocfs2_copy_emi_fields(new_emi, &ins);
+               list_add(&new_emi->ei_list, &em->em_list);
+               em->em_num_items++;
+               new_emi = NULL;
+       } else {
+               BUG_ON(list_empty(&em->em_list) || em->em_num_items == 0);
+               emi = list_entry(em->em_list.prev,
+                                struct ocfs2_extent_map_item, ei_list);
+               list_move(&emi->ei_list, &em->em_list);
+               ocfs2_copy_emi_fields(emi, &ins);
+       }
+
+       spin_unlock(&oi->ip_lock);
+
+out:
+       if (new_emi)
+               kfree(new_emi);
+}
+
  /*
   * Return the 1st index within el which contains an extent start
   * larger than v_cluster.
@@ -174,6 +422,11 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
         struct ocfs2_extent_rec *rec;
         u32 coff;
  
+       ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
+                                     num_clusters, extent_flags);
+       if (ret == 0)
+               goto out;
+
         ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno,
                                &di_bh, OCFS2_BH_CACHED, inode);
         if (ret) {
@@ -245,6 +498,8 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
                         *num_clusters = ocfs2_rec_clusters(el, rec) - coff;
  
                 flags = rec->e_flags;
+
+               ocfs2_extent_map_insert_rec(inode, rec);
         }
  
         if (extent_flags)
diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h

index 1d745e1..de91e3e 100644 (file)
--- a/fs/ocfs2/extent_map.h
+++ b/fs/ocfs2/extent_map.h
@@ -25,6 +25,26 @@
  #ifndef _EXTENT_MAP_H
  #define _EXTENT_MAP_H
  
+struct ocfs2_extent_map_item {
+       unsigned int                    ei_cpos;
+       unsigned int                    ei_phys;
+       unsigned int                    ei_clusters;
+       unsigned int                    ei_flags;
+
+       struct list_head                ei_list;
+};
+
+#define OCFS2_MAX_EXTENT_MAP_ITEMS                     3
+struct ocfs2_extent_map {
+       unsigned int                    em_num_items;
+       struct list_head                em_list;
+};
+
+void ocfs2_extent_map_init(struct inode *inode);
+void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cluster);
+void ocfs2_extent_map_insert_rec(struct inode *inode,
+                                struct ocfs2_extent_rec *rec);
+
  int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, u32 *p_cluster,
                        u32 *num_clusters, unsigned int *extent_flags);
  int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c

index 4bfc98c..21a6050 100644 (file)
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1008,6 +1008,8 @@ void ocfs2_clear_inode(struct inode *inode)
                         "Clear inode of %llu, inode has io markers\n",
                         (unsigned long long)oi->ip_blkno);
  
+       ocfs2_extent_map_trunc(inode, 0);
+
         status = ocfs2_drop_inode_locks(inode);
         if (status < 0)
                 mlog_errno(status);
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h

index aa84353..03ae075 100644 (file)
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -26,6 +26,8 @@
  #ifndef OCFS2_INODE_H
  #define OCFS2_INODE_H
  
+#include "extent_map.h"
+
  /* OCFS2 Inode Private Data */
  struct ocfs2_inode_info
  {
@@ -63,6 +65,8 @@ struct ocfs2_inode_info
  
         struct ocfs2_caching_info       ip_metadata_cache;
  
+       struct ocfs2_extent_map         ip_extent_map;
+
         struct inode                    vfs_inode;
  };
  
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c

index 6ab5235..5c9e824 100644 (file)
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -942,6 +942,7 @@ static void ocfs2_inode_init_once(void *data,
                 oi->ip_flags = 0;
                 oi->ip_open_count = 0;
                 spin_lock_init(&oi->ip_lock);
+               ocfs2_extent_map_init(&oi->vfs_inode);
                 INIT_LIST_HEAD(&oi->ip_io_markers);
                 oi->ip_created_trans = 0;
                 oi->ip_last_trans = 0;
author	Mark Fasheh <mark.fasheh@oracle.com>
	Tue, 24 Apr 2007 01:53:12 +0000 (18:53 -0700)
committer	Mark Fasheh <mark.fasheh@oracle.com>
	Thu, 26 Apr 2007 22:10:40 +0000 (15:10 -0700)
fs/ocfs2/alloc.c		patch \| blob \| history
fs/ocfs2/dlmglue.c		patch \| blob \| history
fs/ocfs2/extent_map.c		patch \| blob \| history
fs/ocfs2/extent_map.h		patch \| blob \| history
fs/ocfs2/inode.c		patch \| blob \| history
fs/ocfs2/inode.h		patch \| blob \| history
fs/ocfs2/super.c		patch \| blob \| history