dm ioctl: fill in device parameters in more ioctls

[pandora-kernel.git] / block / cfq-iosched.c
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c

index 3c7b537..1f96ad6 100644 (file)
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -87,9 +87,10 @@ struct cfq_rb_root {
         unsigned count;
         unsigned total_weight;
         u64 min_vdisktime;
+       struct cfq_ttime ttime;
  };
-#define CFQ_RB_ROOT    (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \
-                       .count = 0, .min_vdisktime = 0, }
+#define CFQ_RB_ROOT    (struct cfq_rb_root) { .rb = RB_ROOT, \
+                       .ttime = {.last_end_request = jiffies,},}
  
  /*
   * Per process-grouping structure
@@ -129,14 +130,12 @@ struct cfq_queue {
         unsigned long slice_end;
         long slice_resid;
  
-       /* pending metadata requests */
-       int meta_pending;
         /* number of requests that are on the dispatch list or inside driver */
         int dispatched;
  
         /* io prio of this group */
         unsigned short ioprio, org_ioprio;
-       unsigned short ioprio_class, org_ioprio_class;
+       unsigned short ioprio_class;
  
         pid_t pid;
  
@@ -212,6 +211,7 @@ struct cfq_group {
  #endif
         /* number of requests that are on the dispatch list or inside driver */
         int dispatched;
+       struct cfq_ttime ttime;
  };
  
  /*
@@ -393,6 +393,18 @@ CFQ_CFQQ_FNS(wait_busy);
                         j++, st = i < IDLE_WORKLOAD ? \
                         &cfqg->service_trees[i][j]: NULL) \
  
+static inline bool cfq_io_thinktime_big(struct cfq_data *cfqd,
+       struct cfq_ttime *ttime, bool group_idle)
+{
+       unsigned long slice;
+       if (!sample_valid(ttime->ttime_samples))
+               return false;
+       if (group_idle)
+               slice = cfqd->cfq_group_idle;
+       else
+               slice = cfqd->cfq_slice_idle;
+       return ttime->ttime_mean > slice;
+}
  
  static inline bool iops_mode(struct cfq_data *cfqd)
  {
@@ -670,9 +682,6 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2,
         if (rq_is_sync(rq1) != rq_is_sync(rq2))
                 return rq_is_sync(rq1) ? rq1 : rq2;
  
-       if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_META)
-               return rq1->cmd_flags & REQ_META ? rq1 : rq2;
-
         s1 = blk_rq_pos(rq1);
         s2 = blk_rq_pos(rq2);
  
@@ -988,9 +997,10 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
  
         cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
                                         st->min_vdisktime);
-       cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u disp=%u charge=%u iops=%u"
-                       " sect=%u", used_sl, cfqq->slice_dispatch, charge,
-                       iops_mode(cfqd), cfqq->nr_sectors);
+       cfq_log_cfqq(cfqq->cfqd, cfqq,
+                    "sl_used=%u disp=%u charge=%u iops=%u sect=%lu",
+                    used_sl, cfqq->slice_dispatch, charge,
+                    iops_mode(cfqd), cfqq->nr_sectors);
         cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl,
                                           unaccounted_sl);
         cfq_blkiocg_set_start_empty_time(&cfqg->blkg);
@@ -1004,8 +1014,8 @@ static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg)
         return NULL;
  }
  
-void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
-                                       unsigned int weight)
+static void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
+                                         unsigned int weight)
  {
         struct cfq_group *cfqg = cfqg_of_blkg(blkg);
         cfqg->new_weight = weight;
@@ -1058,6 +1068,8 @@ static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd)
                 *st = CFQ_RB_ROOT;
         RB_CLEAR_NODE(&cfqg->rb_node);
  
+       cfqg->ttime.last_end_request = jiffies;
+
         /*
          * Take the initial reference that will be released on destroy
          * This can be thought of a joint reference by cgroup and
@@ -1234,7 +1246,7 @@ static void cfq_release_cfq_groups(struct cfq_data *cfqd)
   * it should not be NULL as even if elevator was exiting, cgroup deltion
   * path got to it first.
   */
-void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
+static void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
  {
         unsigned long  flags;
         struct cfq_data *cfqd = key;
@@ -1501,16 +1513,11 @@ static void cfq_add_rq_rb(struct request *rq)
  {
         struct cfq_queue *cfqq = RQ_CFQQ(rq);
         struct cfq_data *cfqd = cfqq->cfqd;
-       struct request *__alias, *prev;
+       struct request *prev;
  
         cfqq->queued[rq_is_sync(rq)]++;
  
-       /*
-        * looks a little odd, but the first insert might return an alias.
-        * if that happens, put the alias on the dispatch list
-        */
-       while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL)
-               cfq_dispatch_insert(cfqd->queue, __alias);
+       elv_rb_add(&cfqq->sort_list, rq);
  
         if (!cfq_cfqq_on_rr(cfqq))
                 cfq_add_cfqq_rr(cfqd, cfqq);
@@ -1597,10 +1604,6 @@ static void cfq_remove_request(struct request *rq)
         cfqq->cfqd->rq_queued--;
         cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
                                         rq_data_dir(rq), rq_is_sync(rq));
-       if (rq->cmd_flags & REQ_META) {
-               WARN_ON(!cfqq->meta_pending);
-               cfqq->meta_pending--;
-       }
  }
  
  static int cfq_merge(struct request_queue *q, struct request **req,
@@ -1968,7 +1971,8 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
          * Otherwise, we do only if they are the last ones
          * in their service tree.
          */
-       if (service_tree->count == 1 && cfq_cfqq_sync(cfqq))
+       if (service_tree->count == 1 && cfq_cfqq_sync(cfqq) &&
+          !cfq_io_thinktime_big(cfqd, &service_tree->ttime, false))
                 return true;
         cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d",
                         service_tree->count);
@@ -2021,10 +2025,10 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
          * slice, then don't idle. This avoids overrunning the allotted
          * time slice.
          */
-       if (sample_valid(cic->ttime_samples) &&
-           (cfqq->slice_end - jiffies < cic->ttime_mean)) {
-               cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%d",
-                               cic->ttime_mean);
+       if (sample_valid(cic->ttime.ttime_samples) &&
+           (cfqq->slice_end - jiffies < cic->ttime.ttime_mean)) {
+               cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%lu",
+                            cic->ttime.ttime_mean);
                 return;
         }
  
@@ -2380,8 +2384,9 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
          * this group, wait for requests to complete.
          */
  check_group_idle:
-       if (cfqd->cfq_group_idle && cfqq->cfqg->nr_cfqq == 1
-           && cfqq->cfqg->dispatched) {
+       if (cfqd->cfq_group_idle && cfqq->cfqg->nr_cfqq == 1 &&
+           cfqq->cfqg->dispatched &&
+           !cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true)) {
                 cfqq = NULL;
                 goto keep_queue;
         }
@@ -2772,8 +2777,14 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
         smp_wmb();
         cic->key = cfqd_dead_key(cfqd);
  
-       if (ioc->ioc_data == cic)
+       rcu_read_lock();
+       if (rcu_dereference(ioc->ioc_data) == cic) {
+               rcu_read_unlock();
+               spin_lock(&ioc->lock);
                 rcu_assign_pointer(ioc->ioc_data, NULL);
+               spin_unlock(&ioc->lock);
+       } else
+               rcu_read_unlock();
  
         if (cic->cfqq[BLK_RW_ASYNC]) {
                 cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]);
@@ -2826,7 +2837,7 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
         cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask | __GFP_ZERO,
                                                         cfqd->queue->node);
         if (cic) {
-               cic->last_end_request = jiffies;
+               cic->ttime.last_end_request = jiffies;
                 INIT_LIST_HEAD(&cic->queue_list);
                 INIT_HLIST_NODE(&cic->cic_list);
                 cic->dtor = cfq_free_io_context;
@@ -2876,7 +2887,6 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
          * elevate the priority of this queue
          */
         cfqq->org_ioprio = cfqq->ioprio;
-       cfqq->org_ioprio_class = cfqq->ioprio_class;
         cfq_clear_cfqq_prio_changed(cfqq);
  }
  
@@ -3080,7 +3090,8 @@ cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc,
  
         spin_lock_irqsave(&ioc->lock, flags);
  
-       BUG_ON(ioc->ioc_data == cic);
+       BUG_ON(rcu_dereference_check(ioc->ioc_data,
+               lockdep_is_held(&ioc->lock)) == cic);
  
         radix_tree_delete(&ioc->radix_root, cfqd->cic_index);
         hlist_del_rcu(&cic->cic_list);
@@ -3213,14 +3224,28 @@ err:
  }
  
  static void
-cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic)
+__cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle)
  {
-       unsigned long elapsed = jiffies - cic->last_end_request;
-       unsigned long ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle);
+       unsigned long elapsed = jiffies - ttime->last_end_request;
+       elapsed = min(elapsed, 2UL * slice_idle);
  
-       cic->ttime_samples = (7*cic->ttime_samples + 256) / 8;
-       cic->ttime_total = (7*cic->ttime_total + 256*ttime) / 8;
-       cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples;
+       ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8;
+       ttime->ttime_total = (7*ttime->ttime_total + 256*elapsed) / 8;
+       ttime->ttime_mean = (ttime->ttime_total + 128) / ttime->ttime_samples;
+}
+
+static void
+cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
+       struct cfq_io_context *cic)
+{
+       if (cfq_cfqq_sync(cfqq)) {
+               __cfq_update_io_thinktime(&cic->ttime, cfqd->cfq_slice_idle);
+               __cfq_update_io_thinktime(&cfqq->service_tree->ttime,
+                       cfqd->cfq_slice_idle);
+       }
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
+       __cfq_update_io_thinktime(&cfqq->cfqg->ttime, cfqd->cfq_group_idle);
+#endif
  }
  
  static void
@@ -3269,8 +3294,8 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
         else if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
             (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq)))
                 enable_idle = 0;
-       else if (sample_valid(cic->ttime_samples)) {
-               if (cic->ttime_mean > cfqd->cfq_slice_idle)
+       else if (sample_valid(cic->ttime.ttime_samples)) {
+               if (cic->ttime.ttime_mean > cfqd->cfq_slice_idle)
                         enable_idle = 0;
                 else
                         enable_idle = 1;
@@ -3331,13 +3356,6 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
             RB_EMPTY_ROOT(&cfqq->sort_list))
                 return true;
  
-       /*
-        * So both queues are sync. Let the new request get disk time if
-        * it's a metadata request and the current queue is doing regular IO.
-        */
-       if ((rq->cmd_flags & REQ_META) && !cfqq->meta_pending)
-               return true;
-
         /*
          * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
          */
@@ -3402,10 +3420,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
         struct cfq_io_context *cic = RQ_CIC(rq);
  
         cfqd->rq_queued++;
-       if (rq->cmd_flags & REQ_META)
-               cfqq->meta_pending++;
  
-       cfq_update_io_thinktime(cfqd, cic);
+       cfq_update_io_thinktime(cfqd, cfqq, cic);
         cfq_update_io_seektime(cfqd, cfqq, rq);
         cfq_update_idle_window(cfqd, cfqq, cic);
  
@@ -3512,12 +3528,16 @@ static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
         if (cfqq->cfqg->nr_cfqq > 1)
                 return false;
  
+       /* the only queue in the group, but think time is big */
+       if (cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true))
+               return false;
+
         if (cfq_slice_used(cfqq))
                 return true;
  
         /* if slice left is less than think time, wait busy */
-       if (cic && sample_valid(cic->ttime_samples)
-           && (cfqq->slice_end - jiffies < cic->ttime_mean))
+       if (cic && sample_valid(cic->ttime.ttime_samples)
+           && (cfqq->slice_end - jiffies < cic->ttime.ttime_mean))
                 return true;
  
         /*
@@ -3558,11 +3578,24 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
         cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
  
         if (sync) {
-               RQ_CIC(rq)->last_end_request = now;
+               struct cfq_rb_root *service_tree;
+
+               RQ_CIC(rq)->ttime.last_end_request = now;
+
+               if (cfq_cfqq_on_rr(cfqq))
+                       service_tree = cfqq->service_tree;
+               else
+                       service_tree = service_tree_for(cfqq->cfqg,
+                               cfqq_prio(cfqq), cfqq_type(cfqq));
+               service_tree->ttime.last_end_request = now;
                 if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now))
                         cfqd->last_delayed_sync = now;
         }
  
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
+       cfqq->cfqg->ttime.last_end_request = now;
+#endif
+
         /*
          * If this is the active queue, check if it needs to be expired,
          * or if we want to idle in case it has no pending requests.
@@ -3608,30 +3641,6 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
                 cfq_schedule_dispatch(cfqd);
  }
  
-/*
- * we temporarily boost lower priority queues if they are holding fs exclusive
- * resources. they are boosted to normal prio (CLASS_BE/4)
- */
-static void cfq_prio_boost(struct cfq_queue *cfqq)
-{
-       if (has_fs_excl()) {
-               /*
-                * boost idle prio on transactions that would lock out other
-                * users of the filesystem
-                */
-               if (cfq_class_idle(cfqq))
-                       cfqq->ioprio_class = IOPRIO_CLASS_BE;
-               if (cfqq->ioprio > IOPRIO_NORM)
-                       cfqq->ioprio = IOPRIO_NORM;
-       } else {
-               /*
-                * unboost the queue (if needed)
-                */
-               cfqq->ioprio_class = cfqq->org_ioprio_class;
-               cfqq->ioprio = cfqq->org_ioprio;
-       }
-}
-
  static inline int __cfq_may_queue(struct cfq_queue *cfqq)
  {
         if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) {
@@ -3662,7 +3671,6 @@ static int cfq_may_queue(struct request_queue *q, int rw)
         cfqq = cic_to_cfqq(cic, rw_is_sync(rw));
         if (cfqq) {
                 cfq_init_prio_data(cfqq, cic->ioc);
-               cfq_prio_boost(cfqq);
  
                 return __cfq_may_queue(cfqq);
         }