#define CFQ_KEY_ASYNC (0)
-static DEFINE_RWLOCK(cfq_exit_lock);
+static DEFINE_SPINLOCK(cfq_exit_lock);
/*
* for the hash of cfqq inside the cfqd
mempool_t *crq_pool;
int rq_in_driver;
+ int hw_tag;
/*
* schedule slice state info
/*
* if queue was preempted, just add to front to be fair. busy_rr
- * isn't sorted.
+ * isn't sorted, but insert at the back for fairness.
*/
if (preempted || list == &cfqd->busy_rr) {
- list_add(&cfqq->cfq_list, list);
+ if (preempted)
+ list = list->prev;
+
+ list_add_tail(&cfqq->cfq_list, list);
return;
}
struct cfq_data *cfqd = q->elevator->elevator_data;
cfqd->rq_in_driver++;
+
+ /*
+ * If the depth is larger 1, it really could be queueing. But lets
+ * make the mark a little higher - idling could still be good for
+ * low queueing, and a low queueing number could also just indicate
+ * a SCSI mid layer like behaviour where limit+1 is often seen.
+ */
+ if (!cfqd->hw_tag && cfqd->rq_in_driver > 4)
+ cfqd->hw_tag = 1;
}
static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1)
cfqq = list_entry_cfqq(cfqd->cur_rr.next);
+ /*
+ * If no new queues are available, check if the busy list has some
+ * before falling back to idle io.
+ */
+ if (!cfqq && !list_empty(&cfqd->busy_rr))
+ cfqq = list_entry_cfqq(cfqd->busy_rr.next);
+
/*
* if we have idle queues and no rt or be queues had pending
* requests, either allow immediate service if the grace period
/*
* put the reference this task is holding to the various queues
*/
- read_lock_irqsave(&cfq_exit_lock, flags);
+ spin_lock_irqsave(&cfq_exit_lock, flags);
n = rb_first(&ioc->cic_root);
while (n != NULL) {
n = rb_next(n);
}
- read_unlock_irqrestore(&cfq_exit_lock, flags);
+ spin_unlock_irqrestore(&cfq_exit_lock, flags);
}
static struct cfq_io_context *
struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask);
if (cic) {
- RB_CLEAR(&cic->rb_node);
- cic->key = NULL;
- cic->cfqq[ASYNC] = NULL;
- cic->cfqq[SYNC] = NULL;
+ memset(cic, 0, sizeof(*cic));
+ RB_CLEAR_COLOR(&cic->rb_node);
cic->last_end_request = jiffies;
- cic->ttime_total = 0;
- cic->ttime_samples = 0;
- cic->ttime_mean = 0;
+ INIT_LIST_HEAD(&cic->queue_list);
cic->dtor = cfq_free_io_context;
cic->exit = cfq_exit_io_context;
- INIT_LIST_HEAD(&cic->queue_list);
atomic_inc(&ioc_count);
}
struct cfq_io_context *cic;
struct rb_node *n;
- write_lock(&cfq_exit_lock);
+ spin_lock(&cfq_exit_lock);
n = rb_first(&ioc->cic_root);
while (n != NULL) {
cic = rb_entry(n, struct cfq_io_context, rb_node);
-
+
changed_ioprio(cic);
n = rb_next(n);
}
- write_unlock(&cfq_exit_lock);
+ spin_unlock(&cfq_exit_lock);
return 0;
}
* set ->slice_left to allow preemption for a new process
*/
cfqq->slice_left = 2 * cfqd->cfq_slice_idle;
- cfq_mark_cfqq_idle_window(cfqq);
+ if (!cfqd->hw_tag)
+ cfq_mark_cfqq_idle_window(cfqq);
cfq_mark_cfqq_prio_changed(cfqq);
cfq_init_prio_data(cfqq);
}
static void
cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic)
{
- read_lock(&cfq_exit_lock);
+ spin_lock(&cfq_exit_lock);
rb_erase(&cic->rb_node, &ioc->cic_root);
- read_unlock(&cfq_exit_lock);
+ list_del_init(&cic->queue_list);
+ spin_unlock(&cfq_exit_lock);
kmem_cache_free(cfq_ioc_pool, cic);
atomic_dec(&ioc_count);
}
BUG();
}
- read_lock(&cfq_exit_lock);
+ spin_lock(&cfq_exit_lock);
rb_link_node(&cic->rb_node, parent, p);
rb_insert_color(&cic->rb_node, &ioc->cic_root);
list_add(&cic->queue_list, &cfqd->cic_list);
- read_unlock(&cfq_exit_lock);
+ spin_unlock(&cfq_exit_lock);
}
/*
{
int enable_idle = cfq_cfqq_idle_window(cfqq);
- if (!cic->ioc->task || !cfqd->cfq_slice_idle)
+ if (!cic->ioc->task || !cfqd->cfq_slice_idle || cfqd->hw_tag)
enable_idle = 0;
else if (sample_valid(cic->ttime_samples)) {
if (cic->ttime_mean > cfqd->cfq_slice_idle)
cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq);
+ cic = crq->io_context;
+
/*
* we never wait for an async request and we don't allow preemption
* of an async request. so just return early
*/
- if (!cfq_crq_is_sync(crq))
+ if (!cfq_crq_is_sync(crq)) {
+ /*
+ * sync process issued an async request, if it's waiting
+ * then expire it and kick rq handling.
+ */
+ if (cic == cfqd->active_cic &&
+ del_timer(&cfqd->idle_slice_timer)) {
+ cfq_slice_expired(cfqd, 0);
+ cfq_start_queueing(cfqd, cfqq);
+ }
return;
-
- cic = crq->io_context;
+ }
cfq_update_io_thinktime(cfqd, cic);
cfq_update_io_seektime(cfqd, cic, crq);
* race with a non-idle queue, reset timer
*/
end = cfqd->last_end_request + CFQ_IDLE_GRACE;
- if (!time_after_eq(jiffies, end)) {
- cfqd->idle_class_timer.expires = end;
- add_timer(&cfqd->idle_class_timer);
- } else
+ if (!time_after_eq(jiffies, end))
+ mod_timer(&cfqd->idle_class_timer, end);
+ else
cfq_schedule_dispatch(cfqd);
spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
cfq_shutdown_timer_wq(cfqd);
- write_lock(&cfq_exit_lock);
+ spin_lock(&cfq_exit_lock);
spin_lock_irq(q->queue_lock);
if (cfqd->active_queue)
}
spin_unlock_irq(q->queue_lock);
- write_unlock(&cfq_exit_lock);
+ spin_unlock(&cfq_exit_lock);
cfq_shutdown_timer_wq(cfqd);
kfree(cfqd);
}
-static int cfq_init_queue(request_queue_t *q, elevator_t *e)
+static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
{
struct cfq_data *cfqd;
int i;
cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL);
if (!cfqd)
- return -ENOMEM;
+ return NULL;
memset(cfqd, 0, sizeof(*cfqd));
for (i = 0; i < CFQ_QHASH_ENTRIES; i++)
INIT_HLIST_HEAD(&cfqd->cfq_hash[i]);
- e->elevator_data = cfqd;
-
cfqd->queue = q;
cfqd->max_queued = q->nr_requests / 4;
cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
cfqd->cfq_slice_idle = cfq_slice_idle;
- return 0;
+ return cfqd;
out_crqpool:
kfree(cfqd->cfq_hash);
out_cfqhash:
kfree(cfqd->crq_hash);
out_crqhash:
kfree(cfqd);
- return -ENOMEM;
+ return NULL;
}
static void cfq_slab_kill(void)