Merge branch 'move_extents' of git://oss.oracle.com/git/tye/linux-2.6 into ocfs2...
[pandora-kernel.git] / fs / ocfs2 / ioctl.c
index 8f13c59..bc91072 100644 (file)
 #include "ioctl.h"
 #include "resize.h"
 #include "refcounttree.h"
+#include "sysfile.h"
+#include "dir.h"
+#include "buffer_head_io.h"
+#include "suballoc.h"
+#include "move_extents.h"
 
 #include <linux/ext2_fs.h>
 
  * be -EFAULT.  The error will be returned from the ioctl(2) call.  It's
  * just a best-effort to tell userspace that this request caused the error.
  */
-static inline void __o2info_set_request_error(struct ocfs2_info_request *kreq,
+static inline void o2info_set_request_error(struct ocfs2_info_request *kreq,
                                        struct ocfs2_info_request __user *req)
 {
        kreq->ir_flags |= OCFS2_INFO_FL_ERROR;
        (void)put_user(kreq->ir_flags, (__u32 __user *)&(req->ir_flags));
 }
 
-#define o2info_set_request_error(a, b) \
-               __o2info_set_request_error((struct ocfs2_info_request *)&(a), b)
-
-static inline void __o2info_set_request_filled(struct ocfs2_info_request *req)
+static inline void o2info_set_request_filled(struct ocfs2_info_request *req)
 {
        req->ir_flags |= OCFS2_INFO_FL_FILLED;
 }
 
-#define o2info_set_request_filled(a) \
-               __o2info_set_request_filled((struct ocfs2_info_request *)&(a))
-
-static inline void __o2info_clear_request_filled(struct ocfs2_info_request *req)
+static inline void o2info_clear_request_filled(struct ocfs2_info_request *req)
 {
        req->ir_flags &= ~OCFS2_INFO_FL_FILLED;
 }
 
-#define o2info_clear_request_filled(a) \
-               __o2info_clear_request_filled((struct ocfs2_info_request *)&(a))
+static inline int o2info_coherent(struct ocfs2_info_request *req)
+{
+       return (!(req->ir_flags & OCFS2_INFO_FL_NON_COHERENT));
+}
 
 static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags)
 {
@@ -153,7 +154,7 @@ int ocfs2_info_handle_blocksize(struct inode *inode,
 
        oib.ib_blocksize = inode->i_sb->s_blocksize;
 
-       o2info_set_request_filled(oib);
+       o2info_set_request_filled(&oib.ib_req);
 
        if (o2info_to_user(oib, req))
                goto bail;
@@ -161,7 +162,7 @@ int ocfs2_info_handle_blocksize(struct inode *inode,
        status = 0;
 bail:
        if (status)
-               o2info_set_request_error(oib, req);
+               o2info_set_request_error(&oib.ib_req, req);
 
        return status;
 }
@@ -178,7 +179,7 @@ int ocfs2_info_handle_clustersize(struct inode *inode,
 
        oic.ic_clustersize = osb->s_clustersize;
 
-       o2info_set_request_filled(oic);
+       o2info_set_request_filled(&oic.ic_req);
 
        if (o2info_to_user(oic, req))
                goto bail;
@@ -186,7 +187,7 @@ int ocfs2_info_handle_clustersize(struct inode *inode,
        status = 0;
 bail:
        if (status)
-               o2info_set_request_error(oic, req);
+               o2info_set_request_error(&oic.ic_req, req);
 
        return status;
 }
@@ -203,7 +204,7 @@ int ocfs2_info_handle_maxslots(struct inode *inode,
 
        oim.im_max_slots = osb->max_slots;
 
-       o2info_set_request_filled(oim);
+       o2info_set_request_filled(&oim.im_req);
 
        if (o2info_to_user(oim, req))
                goto bail;
@@ -211,7 +212,7 @@ int ocfs2_info_handle_maxslots(struct inode *inode,
        status = 0;
 bail:
        if (status)
-               o2info_set_request_error(oim, req);
+               o2info_set_request_error(&oim.im_req, req);
 
        return status;
 }
@@ -228,7 +229,7 @@ int ocfs2_info_handle_label(struct inode *inode,
 
        memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN);
 
-       o2info_set_request_filled(oil);
+       o2info_set_request_filled(&oil.il_req);
 
        if (o2info_to_user(oil, req))
                goto bail;
@@ -236,7 +237,7 @@ int ocfs2_info_handle_label(struct inode *inode,
        status = 0;
 bail:
        if (status)
-               o2info_set_request_error(oil, req);
+               o2info_set_request_error(&oil.il_req, req);
 
        return status;
 }
@@ -253,7 +254,7 @@ int ocfs2_info_handle_uuid(struct inode *inode,
 
        memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1);
 
-       o2info_set_request_filled(oiu);
+       o2info_set_request_filled(&oiu.iu_req);
 
        if (o2info_to_user(oiu, req))
                goto bail;
@@ -261,7 +262,7 @@ int ocfs2_info_handle_uuid(struct inode *inode,
        status = 0;
 bail:
        if (status)
-               o2info_set_request_error(oiu, req);
+               o2info_set_request_error(&oiu.iu_req, req);
 
        return status;
 }
@@ -280,7 +281,7 @@ int ocfs2_info_handle_fs_features(struct inode *inode,
        oif.if_incompat_features = osb->s_feature_incompat;
        oif.if_ro_compat_features = osb->s_feature_ro_compat;
 
-       o2info_set_request_filled(oif);
+       o2info_set_request_filled(&oif.if_req);
 
        if (o2info_to_user(oif, req))
                goto bail;
@@ -288,7 +289,7 @@ int ocfs2_info_handle_fs_features(struct inode *inode,
        status = 0;
 bail:
        if (status)
-               o2info_set_request_error(oif, req);
+               o2info_set_request_error(&oif.if_req, req);
 
        return status;
 }
@@ -305,7 +306,7 @@ int ocfs2_info_handle_journal_size(struct inode *inode,
 
        oij.ij_journal_size = osb->journal->j_inode->i_size;
 
-       o2info_set_request_filled(oij);
+       o2info_set_request_filled(&oij.ij_req);
 
        if (o2info_to_user(oij, req))
                goto bail;
@@ -313,7 +314,408 @@ int ocfs2_info_handle_journal_size(struct inode *inode,
        status = 0;
 bail:
        if (status)
-               o2info_set_request_error(oij, req);
+               o2info_set_request_error(&oij.ij_req, req);
+
+       return status;
+}
+
+int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb,
+                               struct inode *inode_alloc, u64 blkno,
+                               struct ocfs2_info_freeinode *fi, u32 slot)
+{
+       int status = 0, unlock = 0;
+
+       struct buffer_head *bh = NULL;
+       struct ocfs2_dinode *dinode_alloc = NULL;
+
+       if (inode_alloc)
+               mutex_lock(&inode_alloc->i_mutex);
+
+       if (o2info_coherent(&fi->ifi_req)) {
+               status = ocfs2_inode_lock(inode_alloc, &bh, 0);
+               if (status < 0) {
+                       mlog_errno(status);
+                       goto bail;
+               }
+               unlock = 1;
+       } else {
+               status = ocfs2_read_blocks_sync(osb, blkno, 1, &bh);
+               if (status < 0) {
+                       mlog_errno(status);
+                       goto bail;
+               }
+       }
+
+       dinode_alloc = (struct ocfs2_dinode *)bh->b_data;
+
+       fi->ifi_stat[slot].lfi_total =
+               le32_to_cpu(dinode_alloc->id1.bitmap1.i_total);
+       fi->ifi_stat[slot].lfi_free =
+               le32_to_cpu(dinode_alloc->id1.bitmap1.i_total) -
+               le32_to_cpu(dinode_alloc->id1.bitmap1.i_used);
+
+bail:
+       if (unlock)
+               ocfs2_inode_unlock(inode_alloc, 0);
+
+       if (inode_alloc)
+               mutex_unlock(&inode_alloc->i_mutex);
+
+       brelse(bh);
+
+       return status;
+}
+
+int ocfs2_info_handle_freeinode(struct inode *inode,
+                               struct ocfs2_info_request __user *req)
+{
+       u32 i;
+       u64 blkno = -1;
+       char namebuf[40];
+       int status = -EFAULT, type = INODE_ALLOC_SYSTEM_INODE;
+       struct ocfs2_info_freeinode *oifi = NULL;
+       struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+       struct inode *inode_alloc = NULL;
+
+       oifi = kzalloc(sizeof(struct ocfs2_info_freeinode), GFP_KERNEL);
+       if (!oifi) {
+               status = -ENOMEM;
+               mlog_errno(status);
+               goto bail;
+       }
+
+       if (o2info_from_user(*oifi, req))
+               goto bail;
+
+       oifi->ifi_slotnum = osb->max_slots;
+
+       for (i = 0; i < oifi->ifi_slotnum; i++) {
+               if (o2info_coherent(&oifi->ifi_req)) {
+                       inode_alloc = ocfs2_get_system_file_inode(osb, type, i);
+                       if (!inode_alloc) {
+                               mlog(ML_ERROR, "unable to get alloc inode in "
+                                   "slot %u\n", i);
+                               status = -EIO;
+                               goto bail;
+                       }
+               } else {
+                       ocfs2_sprintf_system_inode_name(namebuf,
+                                                       sizeof(namebuf),
+                                                       type, i);
+                       status = ocfs2_lookup_ino_from_name(osb->sys_root_inode,
+                                                           namebuf,
+                                                           strlen(namebuf),
+                                                           &blkno);
+                       if (status < 0) {
+                               status = -ENOENT;
+                               goto bail;
+                       }
+               }
+
+               status = ocfs2_info_scan_inode_alloc(osb, inode_alloc, blkno, oifi, i);
+               if (status < 0)
+                       goto bail;
+
+               iput(inode_alloc);
+               inode_alloc = NULL;
+       }
+
+       o2info_set_request_filled(&oifi->ifi_req);
+
+       if (o2info_to_user(*oifi, req))
+               goto bail;
+
+       status = 0;
+bail:
+       if (status)
+               o2info_set_request_error(&oifi->ifi_req, req);
+
+       kfree(oifi);
+
+       return status;
+}
+
+static void o2ffg_update_histogram(struct ocfs2_info_free_chunk_list *hist,
+                                  unsigned int chunksize)
+{
+       int index;
+
+       index = __ilog2_u32(chunksize);
+       if (index >= OCFS2_INFO_MAX_HIST)
+               index = OCFS2_INFO_MAX_HIST - 1;
+
+       hist->fc_chunks[index]++;
+       hist->fc_clusters[index] += chunksize;
+}
+
+static void o2ffg_update_stats(struct ocfs2_info_freefrag_stats *stats,
+                              unsigned int chunksize)
+{
+       if (chunksize > stats->ffs_max)
+               stats->ffs_max = chunksize;
+
+       if (chunksize < stats->ffs_min)
+               stats->ffs_min = chunksize;
+
+       stats->ffs_avg += chunksize;
+       stats->ffs_free_chunks_real++;
+}
+
+void ocfs2_info_update_ffg(struct ocfs2_info_freefrag *ffg,
+                          unsigned int chunksize)
+{
+       o2ffg_update_histogram(&(ffg->iff_ffs.ffs_fc_hist), chunksize);
+       o2ffg_update_stats(&(ffg->iff_ffs), chunksize);
+}
+
+int ocfs2_info_freefrag_scan_chain(struct ocfs2_super *osb,
+                                  struct inode *gb_inode,
+                                  struct ocfs2_dinode *gb_dinode,
+                                  struct ocfs2_chain_rec *rec,
+                                  struct ocfs2_info_freefrag *ffg,
+                                  u32 chunks_in_group)
+{
+       int status = 0, used;
+       u64 blkno;
+
+       struct buffer_head *bh = NULL;
+       struct ocfs2_group_desc *bg = NULL;
+
+       unsigned int max_bits, num_clusters;
+       unsigned int offset = 0, cluster, chunk;
+       unsigned int chunk_free, last_chunksize = 0;
+
+       if (!le32_to_cpu(rec->c_free))
+               goto bail;
+
+       do {
+               if (!bg)
+                       blkno = le64_to_cpu(rec->c_blkno);
+               else
+                       blkno = le64_to_cpu(bg->bg_next_group);
+
+               if (bh) {
+                       brelse(bh);
+                       bh = NULL;
+               }
+
+               if (o2info_coherent(&ffg->iff_req))
+                       status = ocfs2_read_group_descriptor(gb_inode,
+                                                            gb_dinode,
+                                                            blkno, &bh);
+               else
+                       status = ocfs2_read_blocks_sync(osb, blkno, 1, &bh);
+
+               if (status < 0) {
+                       mlog(ML_ERROR, "Can't read the group descriptor # "
+                            "%llu from device.", (unsigned long long)blkno);
+                       status = -EIO;
+                       goto bail;
+               }
+
+               bg = (struct ocfs2_group_desc *)bh->b_data;
+
+               if (!le16_to_cpu(bg->bg_free_bits_count))
+                       continue;
+
+               max_bits = le16_to_cpu(bg->bg_bits);
+               offset = 0;
+
+               for (chunk = 0; chunk < chunks_in_group; chunk++) {
+                       /*
+                        * last chunk may be not an entire one.
+                        */
+                       if ((offset + ffg->iff_chunksize) > max_bits)
+                               num_clusters = max_bits - offset;
+                       else
+                               num_clusters = ffg->iff_chunksize;
+
+                       chunk_free = 0;
+                       for (cluster = 0; cluster < num_clusters; cluster++) {
+                               used = ocfs2_test_bit(offset,
+                                               (unsigned long *)bg->bg_bitmap);
+                               /*
+                                * - chunk_free counts free clusters in #N chunk.
+                                * - last_chunksize records the size(in) clusters
+                                *   for the last real free chunk being counted.
+                                */
+                               if (!used) {
+                                       last_chunksize++;
+                                       chunk_free++;
+                               }
+
+                               if (used && last_chunksize) {
+                                       ocfs2_info_update_ffg(ffg,
+                                                             last_chunksize);
+                                       last_chunksize = 0;
+                               }
+
+                               offset++;
+                       }
+
+                       if (chunk_free == ffg->iff_chunksize)
+                               ffg->iff_ffs.ffs_free_chunks++;
+               }
+
+               /*
+                * need to update the info for last free chunk.
+                */
+               if (last_chunksize)
+                       ocfs2_info_update_ffg(ffg, last_chunksize);
+
+       } while (le64_to_cpu(bg->bg_next_group));
+
+bail:
+       brelse(bh);
+
+       return status;
+}
+
+int ocfs2_info_freefrag_scan_bitmap(struct ocfs2_super *osb,
+                                   struct inode *gb_inode, u64 blkno,
+                                   struct ocfs2_info_freefrag *ffg)
+{
+       u32 chunks_in_group;
+       int status = 0, unlock = 0, i;
+
+       struct buffer_head *bh = NULL;
+       struct ocfs2_chain_list *cl = NULL;
+       struct ocfs2_chain_rec *rec = NULL;
+       struct ocfs2_dinode *gb_dinode = NULL;
+
+       if (gb_inode)
+               mutex_lock(&gb_inode->i_mutex);
+
+       if (o2info_coherent(&ffg->iff_req)) {
+               status = ocfs2_inode_lock(gb_inode, &bh, 0);
+               if (status < 0) {
+                       mlog_errno(status);
+                       goto bail;
+               }
+               unlock = 1;
+       } else {
+               status = ocfs2_read_blocks_sync(osb, blkno, 1, &bh);
+               if (status < 0) {
+                       mlog_errno(status);
+                       goto bail;
+               }
+       }
+
+       gb_dinode = (struct ocfs2_dinode *)bh->b_data;
+       cl = &(gb_dinode->id2.i_chain);
+
+       /*
+        * Chunksize(in) clusters from userspace should be
+        * less than clusters in a group.
+        */
+       if (ffg->iff_chunksize > le16_to_cpu(cl->cl_cpg)) {
+               status = -EINVAL;
+               goto bail;
+       }
+
+       memset(&ffg->iff_ffs, 0, sizeof(struct ocfs2_info_freefrag_stats));
+
+       ffg->iff_ffs.ffs_min = ~0U;
+       ffg->iff_ffs.ffs_clusters =
+                       le32_to_cpu(gb_dinode->id1.bitmap1.i_total);
+       ffg->iff_ffs.ffs_free_clusters = ffg->iff_ffs.ffs_clusters -
+                       le32_to_cpu(gb_dinode->id1.bitmap1.i_used);
+
+       chunks_in_group = le16_to_cpu(cl->cl_cpg) / ffg->iff_chunksize + 1;
+
+       for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i++) {
+               rec = &(cl->cl_recs[i]);
+               status = ocfs2_info_freefrag_scan_chain(osb, gb_inode,
+                                                       gb_dinode,
+                                                       rec, ffg,
+                                                       chunks_in_group);
+               if (status)
+                       goto bail;
+       }
+
+       if (ffg->iff_ffs.ffs_free_chunks_real)
+               ffg->iff_ffs.ffs_avg = (ffg->iff_ffs.ffs_avg /
+                                       ffg->iff_ffs.ffs_free_chunks_real);
+bail:
+       if (unlock)
+               ocfs2_inode_unlock(gb_inode, 0);
+
+       if (gb_inode)
+               mutex_unlock(&gb_inode->i_mutex);
+
+       if (gb_inode)
+               iput(gb_inode);
+
+       brelse(bh);
+
+       return status;
+}
+
+int ocfs2_info_handle_freefrag(struct inode *inode,
+                              struct ocfs2_info_request __user *req)
+{
+       u64 blkno = -1;
+       char namebuf[40];
+       int status = -EFAULT, type = GLOBAL_BITMAP_SYSTEM_INODE;
+
+       struct ocfs2_info_freefrag *oiff;
+       struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+       struct inode *gb_inode = NULL;
+
+       oiff = kzalloc(sizeof(struct ocfs2_info_freefrag), GFP_KERNEL);
+       if (!oiff) {
+               status = -ENOMEM;
+               mlog_errno(status);
+               goto bail;
+       }
+
+       if (o2info_from_user(*oiff, req))
+               goto bail;
+       /*
+        * chunksize from userspace should be power of 2.
+        */
+       if ((oiff->iff_chunksize & (oiff->iff_chunksize - 1)) ||
+           (!oiff->iff_chunksize)) {
+               status = -EINVAL;
+               goto bail;
+       }
+
+       if (o2info_coherent(&oiff->iff_req)) {
+               gb_inode = ocfs2_get_system_file_inode(osb, type,
+                                                      OCFS2_INVALID_SLOT);
+               if (!gb_inode) {
+                       mlog(ML_ERROR, "unable to get global_bitmap inode\n");
+                       status = -EIO;
+                       goto bail;
+               }
+       } else {
+               ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type,
+                                               OCFS2_INVALID_SLOT);
+               status = ocfs2_lookup_ino_from_name(osb->sys_root_inode,
+                                                   namebuf,
+                                                   strlen(namebuf),
+                                                   &blkno);
+               if (status < 0) {
+                       status = -ENOENT;
+                       goto bail;
+               }
+       }
+
+       status = ocfs2_info_freefrag_scan_bitmap(osb, gb_inode, blkno, oiff);
+       if (status < 0)
+               goto bail;
+
+       o2info_set_request_filled(&oiff->iff_req);
+
+       if (o2info_to_user(*oiff, req))
+               goto bail;
+
+       status = 0;
+bail:
+       if (status)
+               o2info_set_request_error(&oiff->iff_req, req);
+
+       kfree(oiff);
 
        return status;
 }
@@ -327,7 +729,7 @@ int ocfs2_info_handle_unknown(struct inode *inode,
        if (o2info_from_user(oir, req))
                goto bail;
 
-       o2info_clear_request_filled(oir);
+       o2info_clear_request_filled(&oir);
 
        if (o2info_to_user(oir, req))
                goto bail;
@@ -335,7 +737,7 @@ int ocfs2_info_handle_unknown(struct inode *inode,
        status = 0;
 bail:
        if (status)
-               o2info_set_request_error(oir, req);
+               o2info_set_request_error(&oir, req);
 
        return status;
 }
@@ -389,6 +791,14 @@ int ocfs2_info_handle_request(struct inode *inode,
                if (oir.ir_size == sizeof(struct ocfs2_info_journal_size))
                        status = ocfs2_info_handle_journal_size(inode, req);
                break;
+       case OCFS2_INFO_FREEINODE:
+               if (oir.ir_size == sizeof(struct ocfs2_info_freeinode))
+                       status = ocfs2_info_handle_freeinode(inode, req);
+               break;
+       case OCFS2_INFO_FREEFRAG:
+               if (oir.ir_size == sizeof(struct ocfs2_info_freefrag))
+                       status = ocfs2_info_handle_freefrag(inode, req);
+               break;
        default:
                status = ocfs2_info_handle_unknown(inode, req);
                break;
@@ -542,6 +952,31 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                        return -EFAULT;
 
                return ocfs2_info_handle(inode, &info, 0);
+       case FITRIM:
+       {
+               struct super_block *sb = inode->i_sb;
+               struct fstrim_range range;
+               int ret = 0;
+
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               if (copy_from_user(&range, (struct fstrim_range *)arg,
+                   sizeof(range)))
+                       return -EFAULT;
+
+               ret = ocfs2_trim_fs(sb, &range);
+               if (ret < 0)
+                       return ret;
+
+               if (copy_to_user((struct fstrim_range *)arg, &range,
+                   sizeof(range)))
+                       return -EFAULT;
+
+               return 0;
+       }
+       case OCFS2_IOC_MOVE_EXT:
+               return ocfs2_ioctl_move_extents(filp, (void __user *)arg);
        default:
                return -ENOTTY;
        }
@@ -569,6 +1004,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
        case OCFS2_IOC_GROUP_EXTEND:
        case OCFS2_IOC_GROUP_ADD:
        case OCFS2_IOC_GROUP_ADD64:
+       case FITRIM:
                break;
        case OCFS2_IOC_REFLINK:
                if (copy_from_user(&args, (struct reflink_arguments *)arg,
@@ -584,6 +1020,8 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
                        return -EFAULT;
 
                return ocfs2_info_handle(inode, &info, 1);
+       case OCFS2_IOC_MOVE_EXT:
+               break;
        default:
                return -ENOIOCTLCMD;
        }