Merge branch 'for-3.2/core' of git://git.kernel.dk/linux-block

author Linus Torvalds <torvalds@linux-foundation.org>

Sat, 5 Nov 2011 00:06:58 +0000 (17:06 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 5 Nov 2011 00:06:58 +0000 (17:06 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Sat, 5 Nov 2011 00:06:58 +0000 (17:06 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 5 Nov 2011 00:06:58 +0000 (17:06 -0700)
diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c

index 48e50f8..e301133 100644 (file)
--- a/arch/m68k/emu/nfblock.c
+++ b/arch/m68k/emu/nfblock.c
@@ -59,7 +59,7 @@ struct nfhd_device {
         struct gendisk *disk;
  };
  
-static int nfhd_make_request(struct request_queue *queue, struct bio *bio)
+static void nfhd_make_request(struct request_queue *queue, struct bio *bio)
  {
         struct nfhd_device *dev = queue->queuedata;
         struct bio_vec *bvec;
@@ -76,7 +76,6 @@ static int nfhd_make_request(struct request_queue *queue, struct bio *bio)
                 sec += len;
         }
         bio_endio(bio, 0);
-       return 0;
  }
  
  static int nfhd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c

index 265f0f0..ba42719 100644 (file)
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -104,7 +104,7 @@ axon_ram_irq_handler(int irq, void *dev)
   * axon_ram_make_request - make_request() method for block device
   * @queue, @bio: see blk_queue_make_request()
   */
-static int
+static void
  axon_ram_make_request(struct request_queue *queue, struct bio *bio)
  {
         struct axon_ram_bank *bank = bio->bi_bdev->bd_disk->private_data;
@@ -113,7 +113,6 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
         struct bio_vec *vec;
         unsigned int transfered;
         unsigned short idx;
-       int rc = 0;
  
         phys_mem = bank->io_addr + (bio->bi_sector << AXON_RAM_SECTOR_SHIFT);
         phys_end = bank->io_addr + bank->size;
@@ -121,8 +120,7 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
         bio_for_each_segment(vec, bio, idx) {
                 if (unlikely(phys_mem + vec->bv_len > phys_end)) {
                         bio_io_error(bio);
-                       rc = -ERANGE;
-                       break;
+                       return;
                 }
  
                 user_mem = page_address(vec->bv_page) + vec->bv_offset;
@@ -135,8 +133,6 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
                 transfered += vec->bv_len;
         }
         bio_endio(bio, 0);
-
-       return rc;
  }
  
  /**
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c

index b596e54..8f630ce 100644 (file)
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -768,25 +768,14 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg,
         return disk_total;
  }
  
-static int blkio_check_dev_num(dev_t dev)
-{
-       int part = 0;
-       struct gendisk *disk;
-
-       disk = get_gendisk(dev, &part);
-       if (!disk || part)
-               return -ENODEV;
-
-       return 0;
-}
-
  static int blkio_policy_parse_and_set(char *buf,
         struct blkio_policy_node *newpn, enum blkio_policy_id plid, int fileid)
  {
+       struct gendisk *disk = NULL;
         char *s[4], *p, *major_s = NULL, *minor_s = NULL;
-       int ret;
         unsigned long major, minor;
-       int i = 0;
+       int i = 0, ret = -EINVAL;
+       int part;
         dev_t dev;
         u64 temp;
  
@@ -804,37 +793,36 @@ static int blkio_policy_parse_and_set(char *buf,
         }
  
         if (i != 2)
-               return -EINVAL;
+               goto out;
  
         p = strsep(&s[0], ":");
         if (p != NULL)
                 major_s = p;
         else
-               return -EINVAL;
+               goto out;
  
         minor_s = s[0];
         if (!minor_s)
-               return -EINVAL;
+               goto out;
  
-       ret = strict_strtoul(major_s, 10, &major);
-       if (ret)
-               return -EINVAL;
+       if (strict_strtoul(major_s, 10, &major))
+               goto out;
  
-       ret = strict_strtoul(minor_s, 10, &minor);
-       if (ret)
-               return -EINVAL;
+       if (strict_strtoul(minor_s, 10, &minor))
+               goto out;
  
         dev = MKDEV(major, minor);
  
-       ret = strict_strtoull(s[1], 10, &temp);
-       if (ret)
-               return -EINVAL;
+       if (strict_strtoull(s[1], 10, &temp))
+               goto out;
  
         /* For rule removal, do not check for device presence. */
         if (temp) {
-               ret = blkio_check_dev_num(dev);
-               if (ret)
-                       return ret;
+               disk = get_gendisk(dev, &part);
+               if (!disk || part) {
+                       ret = -ENODEV;
+                       goto out;
+               }
         }
  
         newpn->dev = dev;
@@ -843,7 +831,7 @@ static int blkio_policy_parse_and_set(char *buf,
         case BLKIO_POLICY_PROP:
                 if ((temp < BLKIO_WEIGHT_MIN && temp > 0) ||
                      temp > BLKIO_WEIGHT_MAX)
-                       return -EINVAL;
+                       goto out;
  
                 newpn->plid = plid;
                 newpn->fileid = fileid;
@@ -860,7 +848,7 @@ static int blkio_policy_parse_and_set(char *buf,
                 case BLKIO_THROTL_read_iops_device:
                 case BLKIO_THROTL_write_iops_device:
                         if (temp > THROTL_IOPS_MAX)
-                               return -EINVAL;
+                               goto out;
  
                         newpn->plid = plid;
                         newpn->fileid = fileid;
@@ -871,68 +859,96 @@ static int blkio_policy_parse_and_set(char *buf,
         default:
                 BUG();
         }
-
-       return 0;
+       ret = 0;
+out:
+       put_disk(disk);
+       return ret;
  }
  
  unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
                               dev_t dev)
  {
         struct blkio_policy_node *pn;
+       unsigned long flags;
+       unsigned int weight;
+
+       spin_lock_irqsave(&blkcg->lock, flags);
  
         pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_PROP,
                                 BLKIO_PROP_weight_device);
         if (pn)
-               return pn->val.weight;
+               weight = pn->val.weight;
         else
-               return blkcg->weight;
+               weight = blkcg->weight;
+
+       spin_unlock_irqrestore(&blkcg->lock, flags);
+
+       return weight;
  }
  EXPORT_SYMBOL_GPL(blkcg_get_weight);
  
  uint64_t blkcg_get_read_bps(struct blkio_cgroup *blkcg, dev_t dev)
  {
         struct blkio_policy_node *pn;
+       unsigned long flags;
+       uint64_t bps = -1;
  
+       spin_lock_irqsave(&blkcg->lock, flags);
         pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
                                 BLKIO_THROTL_read_bps_device);
         if (pn)
-               return pn->val.bps;
-       else
-               return -1;
+               bps = pn->val.bps;
+       spin_unlock_irqrestore(&blkcg->lock, flags);
+
+       return bps;
  }
  
  uint64_t blkcg_get_write_bps(struct blkio_cgroup *blkcg, dev_t dev)
  {
         struct blkio_policy_node *pn;
+       unsigned long flags;
+       uint64_t bps = -1;
+
+       spin_lock_irqsave(&blkcg->lock, flags);
         pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
                                 BLKIO_THROTL_write_bps_device);
         if (pn)
-               return pn->val.bps;
-       else
-               return -1;
+               bps = pn->val.bps;
+       spin_unlock_irqrestore(&blkcg->lock, flags);
+
+       return bps;
  }
  
  unsigned int blkcg_get_read_iops(struct blkio_cgroup *blkcg, dev_t dev)
  {
         struct blkio_policy_node *pn;
+       unsigned long flags;
+       unsigned int iops = -1;
  
+       spin_lock_irqsave(&blkcg->lock, flags);
         pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
                                 BLKIO_THROTL_read_iops_device);
         if (pn)
-               return pn->val.iops;
-       else
-               return -1;
+               iops = pn->val.iops;
+       spin_unlock_irqrestore(&blkcg->lock, flags);
+
+       return iops;
  }
  
  unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg, dev_t dev)
  {
         struct blkio_policy_node *pn;
+       unsigned long flags;
+       unsigned int iops = -1;
+
+       spin_lock_irqsave(&blkcg->lock, flags);
         pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
                                 BLKIO_THROTL_write_iops_device);
         if (pn)
-               return pn->val.iops;
-       else
-               return -1;
+               iops = pn->val.iops;
+       spin_unlock_irqrestore(&blkcg->lock, flags);
+
+       return iops;
  }
  
  /* Checks whether user asked for deleting a policy rule */
@@ -1085,6 +1101,7 @@ static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft,
  
         if (blkio_delete_rule_command(newpn)) {
                 blkio_policy_delete_node(pn);
+               kfree(pn);
                 spin_unlock_irq(&blkcg->lock);
                 goto update_io_group;
         }
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h

index a71d290..6f3ace7 100644 (file)
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -188,7 +188,7 @@ struct blkio_policy_node {
         union {
                 unsigned int weight;
                 /*
-                * Rate read/write in terms of byptes per second
+                * Rate read/write in terms of bytes per second
                  * Whether this rate represents read or write is determined
                  * by file type "fileid".
                  */
diff --git a/block/blk-core.c b/block/blk-core.c

index d34433a..f43c8a5 100644 (file)
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -28,6 +28,7 @@
  #include <linux/task_io_accounting_ops.h>
  #include <linux/fault-inject.h>
  #include <linux/list_sort.h>
+#include <linux/delay.h>
  
  #define CREATE_TRACE_POINTS
  #include <trace/events/block.h>
@@ -38,8 +39,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
  EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
  EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
  
-static int __make_request(struct request_queue *q, struct bio *bio);
-
  /*
   * For the allocated request tables
   */
@@ -347,30 +346,80 @@ void blk_put_queue(struct request_queue *q)
  }
  EXPORT_SYMBOL(blk_put_queue);
  
-/*
- * Note: If a driver supplied the queue lock, it is disconnected
- * by this function. The actual state of the lock doesn't matter
- * here as the request_queue isn't accessible after this point
- * (QUEUE_FLAG_DEAD is set) and no other requests will be queued.
+/**
+ * blk_drain_queue - drain requests from request_queue
+ * @q: queue to drain
+ * @drain_all: whether to drain all requests or only the ones w/ ELVPRIV
+ *
+ * Drain requests from @q.  If @drain_all is set, all requests are drained.
+ * If not, only ELVPRIV requests are drained.  The caller is responsible
+ * for ensuring that no new requests which need to be drained are queued.
+ */
+void blk_drain_queue(struct request_queue *q, bool drain_all)
+{
+       while (true) {
+               int nr_rqs;
+
+               spin_lock_irq(q->queue_lock);
+
+               elv_drain_elevator(q);
+               if (drain_all)
+                       blk_throtl_drain(q);
+
+               __blk_run_queue(q);
+
+               if (drain_all)
+                       nr_rqs = q->rq.count[0] + q->rq.count[1];
+               else
+                       nr_rqs = q->rq.elvpriv;
+
+               spin_unlock_irq(q->queue_lock);
+
+               if (!nr_rqs)
+                       break;
+               msleep(10);
+       }
+}
+
+/**
+ * blk_cleanup_queue - shutdown a request queue
+ * @q: request queue to shutdown
+ *
+ * Mark @q DEAD, drain all pending requests, destroy and put it.  All
+ * future requests will be failed immediately with -ENODEV.
   */
  void blk_cleanup_queue(struct request_queue *q)
  {
-       /*
-        * We know we have process context here, so we can be a little
-        * cautious and ensure that pending block actions on this device
-        * are done before moving on. Going into this function, we should
-        * not have processes doing IO to this device.
-        */
-       blk_sync_queue(q);
+       spinlock_t *lock = q->queue_lock;
  
-       del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
+       /* mark @q DEAD, no new request or merges will be allowed afterwards */
         mutex_lock(&q->sysfs_lock);
         queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
-       mutex_unlock(&q->sysfs_lock);
+
+       spin_lock_irq(lock);
+       queue_flag_set(QUEUE_FLAG_NOMERGES, q);
+       queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
+       queue_flag_set(QUEUE_FLAG_DEAD, q);
  
         if (q->queue_lock != &q->__queue_lock)
                 q->queue_lock = &q->__queue_lock;
  
+       spin_unlock_irq(lock);
+       mutex_unlock(&q->sysfs_lock);
+
+       /*
+        * Drain all requests queued before DEAD marking.  The caller might
+        * be trying to tear down @q before its elevator is initialized, in
+        * which case we don't want to call into draining.
+        */
+       if (q->elevator)
+               blk_drain_queue(q, true);
+
+       /* @q won't process any more request, flush async actions */
+       del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
+       blk_sync_queue(q);
+
+       /* @q is and will stay empty, shutdown and put */
         blk_put_queue(q);
  }
  EXPORT_SYMBOL(blk_cleanup_queue);
@@ -541,7 +590,7 @@ blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
         /*
          * This also sets hw/phys segments, boundary and size
          */
-       blk_queue_make_request(q, __make_request);
+       blk_queue_make_request(q, blk_queue_bio);
  
         q->sg_reserved_size = INT_MAX;
  
@@ -576,7 +625,7 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq)
  }
  
  static struct request *
-blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)
+blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask)
  {
         struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
  
@@ -587,12 +636,10 @@ blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)
  
         rq->cmd_flags = flags | REQ_ALLOCED;
  
-       if (priv) {
-               if (unlikely(elv_set_request(q, rq, gfp_mask))) {
-                       mempool_free(rq, q->rq.rq_pool);
-                       return NULL;
-               }
-               rq->cmd_flags |= REQ_ELVPRIV;
+       if ((flags & REQ_ELVPRIV) &&
+           unlikely(elv_set_request(q, rq, gfp_mask))) {
+               mempool_free(rq, q->rq.rq_pool);
+               return NULL;
         }
  
         return rq;
@@ -651,12 +698,13 @@ static void __freed_request(struct request_queue *q, int sync)
   * A request has just been released.  Account for it, update the full and
   * congestion status, wake up any waiters.   Called under q->queue_lock.
   */
-static void freed_request(struct request_queue *q, int sync, int priv)
+static void freed_request(struct request_queue *q, unsigned int flags)
  {
         struct request_list *rl = &q->rq;
+       int sync = rw_is_sync(flags);
  
         rl->count[sync]--;
-       if (priv)
+       if (flags & REQ_ELVPRIV)
                 rl->elvpriv--;
  
         __freed_request(q, sync);
@@ -684,10 +732,19 @@ static bool blk_rq_should_init_elevator(struct bio *bio)
         return true;
  }
  
-/*
- * Get a free request, queue_lock must be held.
- * Returns NULL on failure, with queue_lock held.
- * Returns !NULL on success, with queue_lock *not held*.
+/**
+ * get_request - get a free request
+ * @q: request_queue to allocate request from
+ * @rw_flags: RW and SYNC flags
+ * @bio: bio to allocate request for (can be %NULL)
+ * @gfp_mask: allocation mask
+ *
+ * Get a free request from @q.  This function may fail under memory
+ * pressure or if @q is dead.
+ *
+ * Must be callled with @q->queue_lock held and,
+ * Returns %NULL on failure, with @q->queue_lock held.
+ * Returns !%NULL on success, with @q->queue_lock *not held*.
   */
  static struct request *get_request(struct request_queue *q, int rw_flags,
                                    struct bio *bio, gfp_t gfp_mask)
@@ -696,7 +753,10 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
         struct request_list *rl = &q->rq;
         struct io_context *ioc = NULL;
         const bool is_sync = rw_is_sync(rw_flags) != 0;
-       int may_queue, priv = 0;
+       int may_queue;
+
+       if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
+               return NULL;
  
         may_queue = elv_may_queue(q, rw_flags);
         if (may_queue == ELV_MQUEUE_NO)
@@ -740,17 +800,17 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
         rl->count[is_sync]++;
         rl->starved[is_sync] = 0;
  
-       if (blk_rq_should_init_elevator(bio)) {
-               priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
-               if (priv)
-                       rl->elvpriv++;
+       if (blk_rq_should_init_elevator(bio) &&
+           !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) {
+               rw_flags |= REQ_ELVPRIV;
+               rl->elvpriv++;
         }
  
         if (blk_queue_io_stat(q))
                 rw_flags |= REQ_IO_STAT;
         spin_unlock_irq(q->queue_lock);
  
-       rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);
+       rq = blk_alloc_request(q, rw_flags, gfp_mask);
         if (unlikely(!rq)) {
                 /*
                  * Allocation failed presumably due to memory. Undo anything
@@ -760,7 +820,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
                  * wait queue, but this is pretty rare.
                  */
                 spin_lock_irq(q->queue_lock);
-               freed_request(q, is_sync, priv);
+               freed_request(q, rw_flags);
  
                 /*
                  * in the very unlikely event that allocation failed and no
@@ -790,11 +850,18 @@ out:
         return rq;
  }
  
-/*
- * No available requests for this queue, wait for some requests to become
- * available.
+/**
+ * get_request_wait - get a free request with retry
+ * @q: request_queue to allocate request from
+ * @rw_flags: RW and SYNC flags
+ * @bio: bio to allocate request for (can be %NULL)
+ *
+ * Get a free request from @q.  This function keeps retrying under memory
+ * pressure and fails iff @q is dead.
   *
- * Called with q->queue_lock held, and returns with it unlocked.
+ * Must be callled with @q->queue_lock held and,
+ * Returns %NULL on failure, with @q->queue_lock held.
+ * Returns !%NULL on success, with @q->queue_lock *not held*.
   */
  static struct request *get_request_wait(struct request_queue *q, int rw_flags,
                                         struct bio *bio)
@@ -808,6 +875,9 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
                 struct io_context *ioc;
                 struct request_list *rl = &q->rq;
  
+               if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
+                       return NULL;
+
                 prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
                                 TASK_UNINTERRUPTIBLE);
  
@@ -838,19 +908,15 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
  {
         struct request *rq;
  
-       if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
-               return NULL;
-
         BUG_ON(rw != READ && rw != WRITE);
  
         spin_lock_irq(q->queue_lock);
-       if (gfp_mask & __GFP_WAIT) {
+       if (gfp_mask & __GFP_WAIT)
                 rq = get_request_wait(q, rw, NULL);
-       } else {
+       else
                 rq = get_request(q, rw, NULL, gfp_mask);
-               if (!rq)
-                       spin_unlock_irq(q->queue_lock);
-       }
+       if (!rq)
+               spin_unlock_irq(q->queue_lock);
         /* q->queue_lock is unlocked at this point */
  
         return rq;
@@ -1052,14 +1118,13 @@ void __blk_put_request(struct request_queue *q, struct request *req)
          * it didn't come out of our reserved rq pools
          */
         if (req->cmd_flags & REQ_ALLOCED) {
-               int is_sync = rq_is_sync(req) != 0;
-               int priv = req->cmd_flags & REQ_ELVPRIV;
+               unsigned int flags = req->cmd_flags;
  
                 BUG_ON(!list_empty(&req->queuelist));
                 BUG_ON(!hlist_unhashed(&req->hash));
  
                 blk_free_request(q, req);
-               freed_request(q, is_sync, priv);
+               freed_request(q, flags);
         }
  }
  EXPORT_SYMBOL_GPL(__blk_put_request);
@@ -1161,18 +1226,32 @@ static bool bio_attempt_front_merge(struct request_queue *q,
         return true;
  }
  
-/*
- * Attempts to merge with the plugged list in the current process. Returns
- * true if merge was successful, otherwise false.
+/**
+ * attempt_plug_merge - try to merge with %current's plugged list
+ * @q: request_queue new bio is being queued at
+ * @bio: new bio being queued
+ * @request_count: out parameter for number of traversed plugged requests
+ *
+ * Determine whether @bio being queued on @q can be merged with a request
+ * on %current's plugged list.  Returns %true if merge was successful,
+ * otherwise %false.
+ *
+ * This function is called without @q->queue_lock; however, elevator is
+ * accessed iff there already are requests on the plugged list which in
+ * turn guarantees validity of the elevator.
+ *
+ * Note that, on successful merge, elevator operation
+ * elevator_bio_merged_fn() will be called without queue lock.  Elevator
+ * must be ready for this.
   */
-static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q,
-                              struct bio *bio, unsigned int *request_count)
+static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
+                              unsigned int *request_count)
  {
         struct blk_plug *plug;
         struct request *rq;
         bool ret = false;
  
-       plug = tsk->plug;
+       plug = current->plug;
         if (!plug)
                 goto out;
         *request_count = 0;
@@ -1202,7 +1281,6 @@ out:
  
  void init_request_from_bio(struct request *req, struct bio *bio)
  {
-       req->cpu = bio->bi_comp_cpu;
         req->cmd_type = REQ_TYPE_FS;
  
         req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK;
@@ -1215,7 +1293,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
         blk_rq_bio_prep(req->q, req, bio);
  }
  
-static int __make_request(struct request_queue *q, struct bio *bio)
+void blk_queue_bio(struct request_queue *q, struct bio *bio)
  {
         const bool sync = !!(bio->bi_rw & REQ_SYNC);
         struct blk_plug *plug;
@@ -1240,8 +1318,8 @@ static int __make_request(struct request_queue *q, struct bio *bio)
          * Check if we can merge with the plugged list before grabbing
          * any locks.
          */
-       if (attempt_plug_merge(current, q, bio, &request_count))
-               goto out;
+       if (attempt_plug_merge(q, bio, &request_count))
+               return;
  
         spin_lock_irq(q->queue_lock);
  
@@ -1275,6 +1353,10 @@ get_rq:
          * Returns with the queue unlocked.
          */
         req = get_request_wait(q, rw_flags, bio);
+       if (unlikely(!req)) {
+               bio_endio(bio, -ENODEV);        /* @q is dead */
+               goto out_unlock;
+       }
  
         /*
          * After dropping the lock and possibly sleeping here, our request
@@ -1284,8 +1366,7 @@ get_rq:
          */
         init_request_from_bio(req, bio);
  
-       if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
-           bio_flagged(bio, BIO_CPU_AFFINE))
+       if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags))
                 req->cpu = raw_smp_processor_id();
  
         plug = current->plug;
@@ -1316,9 +1397,8 @@ get_rq:
  out_unlock:
                 spin_unlock_irq(q->queue_lock);
         }
-out:
-       return 0;
  }
+EXPORT_SYMBOL_GPL(blk_queue_bio);      /* for device mapper only */
  
  /*
   * If bio->bi_dev is a partition, remap the location
@@ -1417,165 +1497,135 @@ static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
         return 0;
  }
  
-/**
- * generic_make_request - hand a buffer to its device driver for I/O
- * @bio:  The bio describing the location in memory and on the device.
- *
- * generic_make_request() is used to make I/O requests of block
- * devices. It is passed a &struct bio, which describes the I/O that needs
- * to be done.
- *
- * generic_make_request() does not return any status.  The
- * success/failure status of the request, along with notification of
- * completion, is delivered asynchronously through the bio->bi_end_io
- * function described (one day) else where.
- *
- * The caller of generic_make_request must make sure that bi_io_vec
- * are set to describe the memory buffer, and that bi_dev and bi_sector are
- * set to describe the device address, and the
- * bi_end_io and optionally bi_private are set to describe how
- * completion notification should be signaled.
- *
- * generic_make_request and the drivers it calls may use bi_next if this
- * bio happens to be merged with someone else, and may change bi_dev and
- * bi_sector for remaps as it sees fit.  So the values of these fields
- * should NOT be depended on after the call to generic_make_request.
- */
-static inline void __generic_make_request(struct bio *bio)
+static noinline_for_stack bool
+generic_make_request_checks(struct bio *bio)
  {
         struct request_queue *q;
-       sector_t old_sector;
-       int ret, nr_sectors = bio_sectors(bio);
-       dev_t old_dev;
+       int nr_sectors = bio_sectors(bio);
         int err = -EIO;
+       char b[BDEVNAME_SIZE];
+       struct hd_struct *part;
  
         might_sleep();
  
         if (bio_check_eod(bio, nr_sectors))
                 goto end_io;
  
-       /*
-        * Resolve the mapping until finished. (drivers are
-        * still free to implement/resolve their own stacking
-        * by explicitly returning 0)
-        *
-        * NOTE: we don't repeat the blk_size check for each new device.
-        * Stacking drivers are expected to know what they are doing.
-        */
-       old_sector = -1;
-       old_dev = 0;
-       do {
-               char b[BDEVNAME_SIZE];
-               struct hd_struct *part;
-
-               q = bdev_get_queue(bio->bi_bdev);
-               if (unlikely(!q)) {
-                       printk(KERN_ERR
-                              "generic_make_request: Trying to access "
-                               "nonexistent block-device %s (%Lu)\n",
-                               bdevname(bio->bi_bdev, b),
-                               (long long) bio->bi_sector);
-                       goto end_io;
-               }
-
-               if (unlikely(!(bio->bi_rw & REQ_DISCARD) &&
-                            nr_sectors > queue_max_hw_sectors(q))) {
-                       printk(KERN_ERR "bio too big device %s (%u > %u)\n",
-                              bdevname(bio->bi_bdev, b),
-                              bio_sectors(bio),
-                              queue_max_hw_sectors(q));
-                       goto end_io;
-               }
-
-               if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
-                       goto end_io;
-
-               part = bio->bi_bdev->bd_part;
-               if (should_fail_request(part, bio->bi_size) ||
-                   should_fail_request(&part_to_disk(part)->part0,
-                                       bio->bi_size))
-                       goto end_io;
-
-               /*
-                * If this device has partitions, remap block n
-                * of partition p to block n+start(p) of the disk.
-                */
-               blk_partition_remap(bio);
+       q = bdev_get_queue(bio->bi_bdev);
+       if (unlikely(!q)) {
+               printk(KERN_ERR
+                      "generic_make_request: Trying to access "
+                       "nonexistent block-device %s (%Lu)\n",
+                       bdevname(bio->bi_bdev, b),
+                       (long long) bio->bi_sector);
+               goto end_io;
+       }
  
-               if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))
-                       goto end_io;
+       if (unlikely(!(bio->bi_rw & REQ_DISCARD) &&
+                    nr_sectors > queue_max_hw_sectors(q))) {
+               printk(KERN_ERR "bio too big device %s (%u > %u)\n",
+                      bdevname(bio->bi_bdev, b),
+                      bio_sectors(bio),
+                      queue_max_hw_sectors(q));
+               goto end_io;
+       }
  
-               if (old_sector != -1)
-                       trace_block_bio_remap(q, bio, old_dev, old_sector);
+       part = bio->bi_bdev->bd_part;
+       if (should_fail_request(part, bio->bi_size) ||
+           should_fail_request(&part_to_disk(part)->part0,
+                               bio->bi_size))
+               goto end_io;
  
-               old_sector = bio->bi_sector;
-               old_dev = bio->bi_bdev->bd_dev;
+       /*
+        * If this device has partitions, remap block n
+        * of partition p to block n+start(p) of the disk.
+        */
+       blk_partition_remap(bio);
  
-               if (bio_check_eod(bio, nr_sectors))
-                       goto end_io;
+       if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))
+               goto end_io;
  
-               /*
-                * Filter flush bio's early so that make_request based
-                * drivers without flush support don't have to worry
-                * about them.
-                */
-               if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) {
-                       bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);
-                       if (!nr_sectors) {
-                               err = 0;
-                               goto end_io;
-                       }
-               }
+       if (bio_check_eod(bio, nr_sectors))
+               goto end_io;
  
-               if ((bio->bi_rw & REQ_DISCARD) &&
-                   (!blk_queue_discard(q) ||
-                    ((bio->bi_rw & REQ_SECURE) &&
-                     !blk_queue_secdiscard(q)))) {
-                       err = -EOPNOTSUPP;
+       /*
+        * Filter flush bio's early so that make_request based
+        * drivers without flush support don't have to worry
+        * about them.
+        */
+       if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) {
+               bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);
+               if (!nr_sectors) {
+                       err = 0;
                         goto end_io;
                 }
+       }
  
-               if (blk_throtl_bio(q, &bio))
-                       goto end_io;
-
-               /*
-                * If bio = NULL, bio has been throttled and will be submitted
-                * later.
-                */
-               if (!bio)
-                       break;
-
-               trace_block_bio_queue(q, bio);
+       if ((bio->bi_rw & REQ_DISCARD) &&
+           (!blk_queue_discard(q) ||
+            ((bio->bi_rw & REQ_SECURE) &&
+             !blk_queue_secdiscard(q)))) {
+               err = -EOPNOTSUPP;
+               goto end_io;
+       }
  
-               ret = q->make_request_fn(q, bio);
-       } while (ret);
+       if (blk_throtl_bio(q, bio))
+               return false;   /* throttled, will be resubmitted later */
  
-       return;
+       trace_block_bio_queue(q, bio);
+       return true;
  
  end_io:
         bio_endio(bio, err);
+       return false;
  }
  
-/*
- * We only want one ->make_request_fn to be active at a time,
- * else stack usage with stacked devices could be a problem.
- * So use current->bio_list to keep a list of requests
- * submited by a make_request_fn function.
- * current->bio_list is also used as a flag to say if
- * generic_make_request is currently active in this task or not.
- * If it is NULL, then no make_request is active.  If it is non-NULL,
- * then a make_request is active, and new requests should be added
- * at the tail
+/**
+ * generic_make_request - hand a buffer to its device driver for I/O
+ * @bio:  The bio describing the location in memory and on the device.
+ *
+ * generic_make_request() is used to make I/O requests of block
+ * devices. It is passed a &struct bio, which describes the I/O that needs
+ * to be done.
+ *
+ * generic_make_request() does not return any status.  The
+ * success/failure status of the request, along with notification of
+ * completion, is delivered asynchronously through the bio->bi_end_io
+ * function described (one day) else where.
+ *
+ * The caller of generic_make_request must make sure that bi_io_vec
+ * are set to describe the memory buffer, and that bi_dev and bi_sector are
+ * set to describe the device address, and the
+ * bi_end_io and optionally bi_private are set to describe how
+ * completion notification should be signaled.
+ *
+ * generic_make_request and the drivers it calls may use bi_next if this
+ * bio happens to be merged with someone else, and may resubmit the bio to
+ * a lower device by calling into generic_make_request recursively, which
+ * means the bio should NOT be touched after the call to ->make_request_fn.
   */
  void generic_make_request(struct bio *bio)
  {
         struct bio_list bio_list_on_stack;
  
+       if (!generic_make_request_checks(bio))
+               return;
+
+       /*
+        * We only want one ->make_request_fn to be active at a time, else
+        * stack usage with stacked devices could be a problem.  So use
+        * current->bio_list to keep a list of requests submited by a
+        * make_request_fn function.  current->bio_list is also used as a
+        * flag to say if generic_make_request is currently active in this
+        * task or not.  If it is NULL, then no make_request is active.  If
+        * it is non-NULL, then a make_request is active, and new requests
+        * should be added at the tail
+        */
         if (current->bio_list) {
-               /* make_request is active */
                 bio_list_add(current->bio_list, bio);
                 return;
         }
+
         /* following loop may be a bit non-obvious, and so deserves some
          * explanation.
          * Before entering the loop, bio->bi_next is NULL (as all callers
@@ -1583,22 +1633,21 @@ void generic_make_request(struct bio *bio)
          * We pretend that we have just taken it off a longer list, so
          * we assign bio_list to a pointer to the bio_list_on_stack,
          * thus initialising the bio_list of new bios to be
-        * added.  __generic_make_request may indeed add some more bios
+        * added.  ->make_request() may indeed add some more bios
          * through a recursive call to generic_make_request.  If it
          * did, we find a non-NULL value in bio_list and re-enter the loop
          * from the top.  In this case we really did just take the bio
          * of the top of the list (no pretending) and so remove it from
-        * bio_list, and call into __generic_make_request again.
-        *
-        * The loop was structured like this to make only one call to
-        * __generic_make_request (which is important as it is large and
-        * inlined) and to keep the structure simple.
+        * bio_list, and call into ->make_request() again.
          */
         BUG_ON(bio->bi_next);
         bio_list_init(&bio_list_on_stack);
         current->bio_list = &bio_list_on_stack;
         do {
-               __generic_make_request(bio);
+               struct request_queue *q = bdev_get_queue(bio->bi_bdev);
+
+               q->make_request_fn(q, bio);
+
                 bio = bio_list_pop(current->bio_list);
         } while (bio);
         current->bio_list = NULL; /* deactivate */
@@ -1725,6 +1774,8 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
                 where = ELEVATOR_INSERT_FLUSH;
  
         add_acct_request(q, rq, where);
+       if (where == ELEVATOR_INSERT_FLUSH)
+               __blk_run_queue(q);
         spin_unlock_irqrestore(q->queue_lock, flags);
  
         return 0;
@@ -2628,6 +2679,20 @@ EXPORT_SYMBOL(kblockd_schedule_delayed_work);
  
  #define PLUG_MAGIC     0x91827364
  
+/**
+ * blk_start_plug - initialize blk_plug and track it inside the task_struct
+ * @plug:      The &struct blk_plug that needs to be initialized
+ *
+ * Description:
+ *   Tracking blk_plug inside the task_struct will help with auto-flushing the
+ *   pending I/O should the task end up blocking between blk_start_plug() and
+ *   blk_finish_plug(). This is important from a performance perspective, but
+ *   also ensures that we don't deadlock. For instance, if the task is blocking
+ *   for a memory allocation, memory reclaim could end up wanting to free a
+ *   page belonging to that request that is currently residing in our private
+ *   plug. By flushing the pending I/O when the process goes to sleep, we avoid
+ *   this kind of deadlock.
+ */
  void blk_start_plug(struct blk_plug *plug)
  {
         struct task_struct *tsk = current;
diff --git a/block/blk-flush.c b/block/blk-flush.c

index 491eb30..720ad60 100644 (file)
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -320,7 +320,7 @@ void blk_insert_flush(struct request *rq)
                 return;
         }
  
-       BUG_ON(!rq->bio || rq->bio != rq->biotail);
+       BUG_ON(rq->bio != rq->biotail); /*assumes zero or single bio rq */
  
         /*
          * If there's data but flush is not necessary, the request can be
@@ -330,7 +330,6 @@ void blk_insert_flush(struct request *rq)
         if ((policy & REQ_FSEQ_DATA) &&
             !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
                 list_add_tail(&rq->queuelist, &q->queue_head);
-               blk_run_queue_async(q);
                 return;
         }
  
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c

index 60fda88..e7f9f65 100644 (file)
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -457,11 +457,11 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
  }
  
  /**
- * blk_cleanup_queue: - release a &struct request_queue when it is no longer needed
- * @kobj:    the kobj belonging of the request queue to be released
+ * blk_release_queue: - release a &struct request_queue when it is no longer needed
+ * @kobj:    the kobj belonging to the request queue to be released
   *
   * Description:
- *     blk_cleanup_queue is the pair to blk_init_queue() or
+ *     blk_release_queue is the pair to blk_init_queue() or
   *     blk_queue_make_request().  It should be called when a request queue is
   *     being released; typically when a block device is being de-registered.
   *     Currently, its primary task it to free all the &struct request
@@ -490,6 +490,7 @@ static void blk_release_queue(struct kobject *kobj)
         if (q->queue_tags)
                 __blk_queue_free_tags(q);
  
+       blk_throtl_release(q);
         blk_trace_shutdown(q);
  
         bdi_destroy(&q->backing_dev_info);
diff --git a/block/blk-tag.c b/block/blk-tag.c

index ece65fc..e74d6d1 100644 (file)
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -286,12 +286,14 @@ void blk_queue_end_tag(struct request_queue *q, struct request *rq)
  
         BUG_ON(tag == -1);
  
-       if (unlikely(tag >= bqt->real_max_depth))
+       if (unlikely(tag >= bqt->max_depth)) {
                 /*
                  * This can happen after tag depth has been reduced.
-                * FIXME: how about a warning or info message here?
+                * But tag shouldn't be larger than real_max_depth.
                  */
+               WARN_ON(tag >= bqt->real_max_depth);
                 return;
+       }
  
         list_del_init(&rq->queuelist);
         rq->cmd_flags &= ~REQ_QUEUED;
diff --git a/block/blk-throttle.c b/block/blk-throttle.c

index a19f58c..4553245 100644 (file)
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -10,6 +10,7 @@
  #include <linux/bio.h>
  #include <linux/blktrace_api.h>
  #include "blk-cgroup.h"
+#include "blk.h"
  
  /* Max dispatch from a group in 1 round */
  static int throtl_grp_quantum = 8;
@@ -302,16 +303,16 @@ throtl_grp *throtl_find_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
         return tg;
  }
  
-/*
- * This function returns with queue lock unlocked in case of error, like
- * request queue is no more
- */
  static struct throtl_grp * throtl_get_tg(struct throtl_data *td)
  {
         struct throtl_grp *tg = NULL, *__tg = NULL;
         struct blkio_cgroup *blkcg;
         struct request_queue *q = td->queue;
  
+       /* no throttling for dead queue */
+       if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
+               return NULL;
+
         rcu_read_lock();
         blkcg = task_blkio_cgroup(current);
         tg = throtl_find_tg(td, blkcg);
@@ -323,32 +324,22 @@ static struct throtl_grp * throtl_get_tg(struct throtl_data *td)
         /*
          * Need to allocate a group. Allocation of group also needs allocation
          * of per cpu stats which in-turn takes a mutex() and can block. Hence
-        * we need to drop rcu lock and queue_lock before we call alloc
-        *
-        * Take the request queue reference to make sure queue does not
-        * go away once we return from allocation.
+        * we need to drop rcu lock and queue_lock before we call alloc.
          */
-       blk_get_queue(q);
         rcu_read_unlock();
         spin_unlock_irq(q->queue_lock);
  
         tg = throtl_alloc_tg(td);
-       /*
-        * We might have slept in group allocation. Make sure queue is not
-        * dead
-        */
-       if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
-               blk_put_queue(q);
-               if (tg)
-                       kfree(tg);
-
-               return ERR_PTR(-ENODEV);
-       }
-       blk_put_queue(q);
  
         /* Group allocated and queue is still alive. take the lock */
         spin_lock_irq(q->queue_lock);
  
+       /* Make sure @q is still alive */
+       if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
+               kfree(tg);
+               return NULL;
+       }
+
         /*
          * Initialize the new group. After sleeping, read the blkcg again.
          */
@@ -1014,11 +1005,6 @@ static void throtl_release_tgs(struct throtl_data *td)
         }
  }
  
-static void throtl_td_free(struct throtl_data *td)
-{
-       kfree(td);
-}
-
  /*
   * Blk cgroup controller notification saying that blkio_group object is being
   * delinked as associated cgroup object is going away. That also means that
@@ -1123,17 +1109,17 @@ static struct blkio_policy_type blkio_policy_throtl = {
         .plid = BLKIO_POLICY_THROTL,
  };
  
-int blk_throtl_bio(struct request_queue *q, struct bio **biop)
+bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
  {
         struct throtl_data *td = q->td;
         struct throtl_grp *tg;
-       struct bio *bio = *biop;
         bool rw = bio_data_dir(bio), update_disptime = true;
         struct blkio_cgroup *blkcg;
+       bool throttled = false;
  
         if (bio->bi_rw & REQ_THROTTLED) {
                 bio->bi_rw &= ~REQ_THROTTLED;
-               return 0;
+               goto out;
         }
  
         /*
@@ -1152,7 +1138,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
                         blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size,
                                         rw, rw_is_sync(bio->bi_rw));
                         rcu_read_unlock();
-                       return 0;
+                       goto out;
                 }
         }
         rcu_read_unlock();
@@ -1161,18 +1147,10 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
          * Either group has not been allocated yet or it is not an unlimited
          * IO group
          */
-
         spin_lock_irq(q->queue_lock);
         tg = throtl_get_tg(td);
-
-       if (IS_ERR(tg)) {
-               if (PTR_ERR(tg) == -ENODEV) {
-                       /*
-                        * Queue is gone. No queue lock held here.
-                        */
-                       return -ENODEV;
-               }
-       }
+       if (unlikely(!tg))
+               goto out_unlock;
  
         if (tg->nr_queued[rw]) {
                 /*
@@ -1200,7 +1178,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
                  * So keep on trimming slice even if bio is not queued.
                  */
                 throtl_trim_slice(td, tg, rw);
-               goto out;
+               goto out_unlock;
         }
  
  queue_bio:
@@ -1212,16 +1190,52 @@ queue_bio:
                         tg->nr_queued[READ], tg->nr_queued[WRITE]);
  
         throtl_add_bio_tg(q->td, tg, bio);
-       *biop = NULL;
+       throttled = true;
  
         if (update_disptime) {
                 tg_update_disptime(td, tg);
                 throtl_schedule_next_dispatch(td);
         }
  
+out_unlock:
+       spin_unlock_irq(q->queue_lock);
  out:
+       return throttled;
+}
+
+/**
+ * blk_throtl_drain - drain throttled bios
+ * @q: request_queue to drain throttled bios for
+ *
+ * Dispatch all currently throttled bios on @q through ->make_request_fn().
+ */
+void blk_throtl_drain(struct request_queue *q)
+       __releases(q->queue_lock) __acquires(q->queue_lock)
+{
+       struct throtl_data *td = q->td;
+       struct throtl_rb_root *st = &td->tg_service_tree;
+       struct throtl_grp *tg;
+       struct bio_list bl;
+       struct bio *bio;
+
+       WARN_ON_ONCE(!queue_is_locked(q));
+
+       bio_list_init(&bl);
+
+       while ((tg = throtl_rb_first(st))) {
+               throtl_dequeue_tg(td, tg);
+
+               while ((bio = bio_list_peek(&tg->bio_lists[READ])))
+                       tg_dispatch_one_bio(td, tg, bio_data_dir(bio), &bl);
+               while ((bio = bio_list_peek(&tg->bio_lists[WRITE])))
+                       tg_dispatch_one_bio(td, tg, bio_data_dir(bio), &bl);
+       }
         spin_unlock_irq(q->queue_lock);
-       return 0;
+
+       while ((bio = bio_list_pop(&bl)))
+               generic_make_request(bio);
+
+       spin_lock_irq(q->queue_lock);
  }
  
  int blk_throtl_init(struct request_queue *q)
@@ -1296,7 +1310,11 @@ void blk_throtl_exit(struct request_queue *q)
          * it.
          */
         throtl_shutdown_wq(q);
-       throtl_td_free(td);
+}
+
+void blk_throtl_release(struct request_queue *q)
+{
+       kfree(q->td);
  }
  
  static int __init throtl_init(void)
diff --git a/block/blk.h b/block/blk.h

index 20b900a..3f6551b 100644 (file)
--- a/block/blk.h
+++ b/block/blk.h
@@ -15,6 +15,7 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
                         struct bio *bio);
  int blk_rq_append_bio(struct request_queue *q, struct request *rq,
                       struct bio *bio);
+void blk_drain_queue(struct request_queue *q, bool drain_all);
  void blk_dequeue_request(struct request *rq);
  void __blk_queue_free_tags(struct request_queue *q);
  bool __blk_end_bidi_request(struct request *rq, int error,
@@ -188,4 +189,21 @@ static inline int blk_do_io_stat(struct request *rq)
                 (rq->cmd_flags & REQ_DISCARD));
  }
  
-#endif
+#ifdef CONFIG_BLK_DEV_THROTTLING
+extern bool blk_throtl_bio(struct request_queue *q, struct bio *bio);
+extern void blk_throtl_drain(struct request_queue *q);
+extern int blk_throtl_init(struct request_queue *q);
+extern void blk_throtl_exit(struct request_queue *q);
+extern void blk_throtl_release(struct request_queue *q);
+#else /* CONFIG_BLK_DEV_THROTTLING */
+static inline bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
+{
+       return false;
+}
+static inline void blk_throtl_drain(struct request_queue *q) { }
+static inline int blk_throtl_init(struct request_queue *q) { return 0; }
+static inline void blk_throtl_exit(struct request_queue *q) { }
+static inline void blk_throtl_release(struct request_queue *q) { }
+#endif /* CONFIG_BLK_DEV_THROTTLING */
+
+#endif /* BLK_INTERNAL_H */
diff --git a/block/elevator.c b/block/elevator.c

index a3b64bc..66343d6 100644 (file)
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -31,7 +31,6 @@
  #include <linux/slab.h>
  #include <linux/init.h>
  #include <linux/compiler.h>
-#include <linux/delay.h>
  #include <linux/blktrace_api.h>
  #include <linux/hash.h>
  #include <linux/uaccess.h>
@@ -182,7 +181,7 @@ static void elevator_attach(struct request_queue *q, struct elevator_queue *eq,
         eq->elevator_data = data;
  }
  
-static char chosen_elevator[16];
+static char chosen_elevator[ELV_NAME_MAX];
  
  static int __init elevator_setup(char *str)
  {
@@ -606,43 +605,35 @@ void elv_requeue_request(struct request_queue *q, struct request *rq)
  void elv_drain_elevator(struct request_queue *q)
  {
         static int printed;
+
+       lockdep_assert_held(q->queue_lock);
+
         while (q->elevator->ops->elevator_dispatch_fn(q, 1))
                 ;
-       if (q->nr_sorted == 0)
-               return;
-       if (printed++ < 10) {
+       if (q->nr_sorted && printed++ < 10) {
                 printk(KERN_ERR "%s: forced dispatching is broken "
                        "(nr_sorted=%u), please report this\n",
                        q->elevator->elevator_type->elevator_name, q->nr_sorted);
         }
  }
  
-/*
- * Call with queue lock held, interrupts disabled
- */
  void elv_quiesce_start(struct request_queue *q)
  {
         if (!q->elevator)
                 return;
  
+       spin_lock_irq(q->queue_lock);
         queue_flag_set(QUEUE_FLAG_ELVSWITCH, q);
+       spin_unlock_irq(q->queue_lock);
  
-       /*
-        * make sure we don't have any requests in flight
-        */
-       elv_drain_elevator(q);
-       while (q->rq.elvpriv) {
-               __blk_run_queue(q);
-               spin_unlock_irq(q->queue_lock);
-               msleep(10);
-               spin_lock_irq(q->queue_lock);
-               elv_drain_elevator(q);
-       }
+       blk_drain_queue(q, false);
  }
  
  void elv_quiesce_end(struct request_queue *q)
  {
+       spin_lock_irq(q->queue_lock);
         queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
+       spin_unlock_irq(q->queue_lock);
  }
  
  void __elv_add_request(struct request_queue *q, struct request *rq, int where)
@@ -972,7 +963,6 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
         /*
          * Turn on BYPASS and drain all requests w/ elevator private data
          */
-       spin_lock_irq(q->queue_lock);
         elv_quiesce_start(q);
  
         /*
@@ -983,8 +973,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
         /*
          * attach and start new elevator
          */
+       spin_lock_irq(q->queue_lock);
         elevator_attach(q, e, data);
-
         spin_unlock_irq(q->queue_lock);
  
         if (old_elevator->registered) {
@@ -999,9 +989,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
          * finally exit old elevator and turn off BYPASS.
          */
         elevator_exit(old_elevator);
-       spin_lock_irq(q->queue_lock);
         elv_quiesce_end(q);
-       spin_unlock_irq(q->queue_lock);
  
         blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name);
  
@@ -1015,10 +1003,7 @@ fail_register:
         elevator_exit(e);
         q->elevator = old_elevator;
         elv_register_queue(q);
-
-       spin_lock_irq(q->queue_lock);
-       queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
-       spin_unlock_irq(q->queue_lock);
+       elv_quiesce_end(q);
  
         return err;
  }
diff --git a/block/genhd.c b/block/genhd.c

index 94855a9..024fc39 100644 (file)
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -612,6 +612,12 @@ void add_disk(struct gendisk *disk)
         register_disk(disk);
         blk_register_queue(disk);
  
+       /*
+        * Take an extra ref on queue which will be put on disk_release()
+        * so that it sticks around as long as @disk is there.
+        */
+       WARN_ON_ONCE(blk_get_queue(disk->queue));
+
         retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
                                    "bdi");
         WARN_ON(retval);
@@ -1166,6 +1172,8 @@ static void disk_release(struct device *dev)
         disk_replace_part_tbl(disk, NULL);
         free_part_stats(&disk->part0);
         free_part_info(&disk->part0);
+       if (disk->queue)
+               blk_put_queue(disk->queue);
         kfree(disk);
  }
  struct class block_class = {
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c

index 4f4230b..fbdf0d8 100644 (file)
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -565,7 +565,7 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod
  {
         int err;
  
-       if (!q || blk_get_queue(q))
+       if (!q)
                 return -ENXIO;
  
         switch (cmd) {
@@ -686,7 +686,6 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod
                         err = -ENOTTY;
         }
  
-       blk_put_queue(q);
         return err;
  }
  EXPORT_SYMBOL(scsi_cmd_ioctl);
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c

index 528f631..167ba0a 100644 (file)
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -159,7 +159,7 @@ aoeblk_release(struct gendisk *disk, fmode_t mode)
         return 0;
  }
  
-static int
+static void
  aoeblk_make_request(struct request_queue *q, struct bio *bio)
  {
         struct sk_buff_head queue;
@@ -172,25 +172,25 @@ aoeblk_make_request(struct request_queue *q, struct bio *bio)
         if (bio == NULL) {
                 printk(KERN_ERR "aoe: bio is NULL\n");
                 BUG();
-               return 0;
+               return;
         }
         d = bio->bi_bdev->bd_disk->private_data;
         if (d == NULL) {
                 printk(KERN_ERR "aoe: bd_disk->private_data is NULL\n");
                 BUG();
                 bio_endio(bio, -ENXIO);
-               return 0;
+               return;
         } else if (bio->bi_io_vec == NULL) {
                 printk(KERN_ERR "aoe: bi_io_vec is NULL\n");
                 BUG();
                 bio_endio(bio, -ENXIO);
-               return 0;
+               return;
         }
         buf = mempool_alloc(d->bufpool, GFP_NOIO);
         if (buf == NULL) {
                 printk(KERN_INFO "aoe: buf allocation failure\n");
                 bio_endio(bio, -ENOMEM);
-               return 0;
+               return;
         }
         memset(buf, 0, sizeof(*buf));
         INIT_LIST_HEAD(&buf->bufs);
@@ -211,7 +211,7 @@ aoeblk_make_request(struct request_queue *q, struct bio *bio)
                 spin_unlock_irqrestore(&d->lock, flags);
                 mempool_free(buf, d->bufpool);
                 bio_endio(bio, -ENXIO);
-               return 0;
+               return;
         }
  
         list_add_tail(&buf->bufs, &d->bufq);
@@ -222,8 +222,6 @@ aoeblk_make_request(struct request_queue *q, struct bio *bio)
  
         spin_unlock_irqrestore(&d->lock, flags);
         aoenet_xmit(&queue);
-
-       return 0;
  }
  
  static int
diff --git a/drivers/block/brd.c b/drivers/block/brd.c

index dba1c32..d22119d 100644 (file)
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -323,7 +323,7 @@ out:
         return err;
  }
  
-static int brd_make_request(struct request_queue *q, struct bio *bio)
+static void brd_make_request(struct request_queue *q, struct bio *bio)
  {
         struct block_device *bdev = bio->bi_bdev;
         struct brd_device *brd = bdev->bd_disk->private_data;
@@ -359,8 +359,6 @@ static int brd_make_request(struct request_queue *q, struct bio *bio)
  
  out:
         bio_endio(bio, err);
-
-       return 0;
  }
  
  #ifdef CONFIG_BLK_DEV_XIP
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h

index 1706d60..9cf2035 100644 (file)
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1506,7 +1506,7 @@ extern void drbd_free_mdev(struct drbd_conf *mdev);
  extern int proc_details;
  
  /* drbd_req */
-extern int drbd_make_request(struct request_queue *q, struct bio *bio);
+extern void drbd_make_request(struct request_queue *q, struct bio *bio);
  extern int drbd_read_remote(struct drbd_conf *mdev, struct drbd_request *req);
  extern int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec);
  extern int is_valid_ar_handle(struct drbd_request *, sector_t);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c

index 3424d67..4a0f314 100644 (file)
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1073,7 +1073,7 @@ static int drbd_fail_request_early(struct drbd_conf *mdev, int is_write)
         return 0;
  }
  
-int drbd_make_request(struct request_queue *q, struct bio *bio)
+void drbd_make_request(struct request_queue *q, struct bio *bio)
  {
         unsigned int s_enr, e_enr;
         struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata;
@@ -1081,7 +1081,7 @@ int drbd_make_request(struct request_queue *q, struct bio *bio)
  
         if (drbd_fail_request_early(mdev, bio_data_dir(bio) & WRITE)) {
                 bio_endio(bio, -EPERM);
-               return 0;
+               return;
         }
  
         start_time = jiffies;
@@ -1100,7 +1100,8 @@ int drbd_make_request(struct request_queue *q, struct bio *bio)
  
         if (likely(s_enr == e_enr)) {
                 inc_ap_bio(mdev, 1);
-               return drbd_make_request_common(mdev, bio, start_time);
+               drbd_make_request_common(mdev, bio, start_time);
+               return;
         }
  
         /* can this bio be split generically?
@@ -1148,7 +1149,6 @@ int drbd_make_request(struct request_queue *q, struct bio *bio)
  
                 bio_pair_release(bp);
         }
-       return 0;
  }
  
  /* This is called by bio_add_page().  With this function we reduce
diff --git a/drivers/block/loop.c b/drivers/block/loop.c

index 4720c7a..c77983e 100644 (file)
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -202,74 +202,6 @@ lo_do_transfer(struct loop_device *lo, int cmd,
         return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock);
  }
  
-/**
- * do_lo_send_aops - helper for writing data to a loop device
- *
- * This is the fast version for backing filesystems which implement the address
- * space operations write_begin and write_end.
- */
-static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
-               loff_t pos, struct page *unused)
-{
-       struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */
-       struct address_space *mapping = file->f_mapping;
-       pgoff_t index;
-       unsigned offset, bv_offs;
-       int len, ret;
-
-       mutex_lock(&mapping->host->i_mutex);
-       index = pos >> PAGE_CACHE_SHIFT;
-       offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1);
-       bv_offs = bvec->bv_offset;
-       len = bvec->bv_len;
-       while (len > 0) {
-               sector_t IV;
-               unsigned size, copied;
-               int transfer_result;
-               struct page *page;
-               void *fsdata;
-
-               IV = ((sector_t)index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9);
-               size = PAGE_CACHE_SIZE - offset;
-               if (size > len)
-                       size = len;
-
-               ret = pagecache_write_begin(file, mapping, pos, size, 0,
-                                                       &page, &fsdata);
-               if (ret)
-                       goto fail;
-
-               file_update_time(file);
-
-               transfer_result = lo_do_transfer(lo, WRITE, page, offset,
-                               bvec->bv_page, bv_offs, size, IV);
-               copied = size;
-               if (unlikely(transfer_result))
-                       copied = 0;
-
-               ret = pagecache_write_end(file, mapping, pos, size, copied,
-                                                       page, fsdata);
-               if (ret < 0 || ret != copied)
-                       goto fail;
-
-               if (unlikely(transfer_result))
-                       goto fail;
-
-               bv_offs += copied;
-               len -= copied;
-               offset = 0;
-               index++;
-               pos += copied;
-       }
-       ret = 0;
-out:
-       mutex_unlock(&mapping->host->i_mutex);
-       return ret;
-fail:
-       ret = -1;
-       goto out;
-}
-
  /**
   * __do_lo_send_write - helper for writing data to a loop device
   *
@@ -297,10 +229,8 @@ static int __do_lo_send_write(struct file *file,
  /**
   * do_lo_send_direct_write - helper for writing data to a loop device
   *
- * This is the fast, non-transforming version for backing filesystems which do
- * not implement the address space operations write_begin and write_end.
- * It uses the write file operation which should be present on all writeable
- * filesystems.
+ * This is the fast, non-transforming version that does not need double
+ * buffering.
   */
  static int do_lo_send_direct_write(struct loop_device *lo,
                 struct bio_vec *bvec, loff_t pos, struct page *page)
@@ -316,15 +246,9 @@ static int do_lo_send_direct_write(struct loop_device *lo,
  /**
   * do_lo_send_write - helper for writing data to a loop device
   *
- * This is the slow, transforming version for filesystems which do not
- * implement the address space operations write_begin and write_end.  It
- * uses the write file operation which should be present on all writeable
- * filesystems.
- *
- * Using fops->write is slower than using aops->{prepare,commit}_write in the
- * transforming case because we need to double buffer the data as we cannot do
- * the transformations in place as we do not have direct access to the
- * destination pages of the backing file.
+ * This is the slow, transforming version that needs to double buffer the
+ * data as it cannot do the transformations in place without having direct
+ * access to the destination pages of the backing file.
   */
  static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec,
                 loff_t pos, struct page *page)
@@ -350,17 +274,16 @@ static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos)
         struct page *page = NULL;
         int i, ret = 0;
  
-       do_lo_send = do_lo_send_aops;
-       if (!(lo->lo_flags & LO_FLAGS_USE_AOPS)) {
+       if (lo->transfer != transfer_none) {
+               page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
+               if (unlikely(!page))
+                       goto fail;
+               kmap(page);
+               do_lo_send = do_lo_send_write;
+       } else {
                 do_lo_send = do_lo_send_direct_write;
-               if (lo->transfer != transfer_none) {
-                       page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
-                       if (unlikely(!page))
-                               goto fail;
-                       kmap(page);
-                       do_lo_send = do_lo_send_write;
-               }
         }
+
         bio_for_each_segment(bvec, bio, i) {
                 ret = do_lo_send(lo, bvec, pos, page);
                 if (ret < 0)
@@ -514,7 +437,7 @@ static struct bio *loop_get_bio(struct loop_device *lo)
         return bio_list_pop(&lo->lo_bio_list);
  }
  
-static int loop_make_request(struct request_queue *q, struct bio *old_bio)
+static void loop_make_request(struct request_queue *q, struct bio *old_bio)
  {
         struct loop_device *lo = q->queuedata;
         int rw = bio_rw(old_bio);
@@ -532,12 +455,11 @@ static int loop_make_request(struct request_queue *q, struct bio *old_bio)
         loop_add_bio(lo, old_bio);
         wake_up(&lo->lo_event);
         spin_unlock_irq(&lo->lo_lock);
-       return 0;
+       return;
  
  out:
         spin_unlock_irq(&lo->lo_lock);
         bio_io_error(old_bio);
-       return 0;
  }
  
  struct switch_request {
@@ -849,35 +771,23 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
         mapping = file->f_mapping;
         inode = mapping->host;
  
-       if (!(file->f_mode & FMODE_WRITE))
-               lo_flags |= LO_FLAGS_READ_ONLY;
-
         error = -EINVAL;
-       if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) {
-               const struct address_space_operations *aops = mapping->a_ops;
-
-               if (aops->write_begin)
-                       lo_flags |= LO_FLAGS_USE_AOPS;
-               if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write)
-                       lo_flags |= LO_FLAGS_READ_ONLY;
+       if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
+               goto out_putf;
  
-               lo_blocksize = S_ISBLK(inode->i_mode) ?
-                       inode->i_bdev->bd_block_size : PAGE_SIZE;
+       if (!(file->f_mode & FMODE_WRITE) || !(mode & FMODE_WRITE) ||
+           !file->f_op->write)
+               lo_flags |= LO_FLAGS_READ_ONLY;
  
-               error = 0;
-       } else {
-               goto out_putf;
-       }
+       lo_blocksize = S_ISBLK(inode->i_mode) ?
+               inode->i_bdev->bd_block_size : PAGE_SIZE;
  
+       error = -EFBIG;
         size = get_loop_size(lo, file);
-
-       if ((loff_t)(sector_t)size != size) {
-               error = -EFBIG;
+       if ((loff_t)(sector_t)size != size)
                 goto out_putf;
-       }
  
-       if (!(mode & FMODE_WRITE))
-               lo_flags |= LO_FLAGS_READ_ONLY;
+       error = 0;
  
         set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
  
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c

index e133f09..a63b0a2 100644 (file)
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2444,7 +2444,7 @@ static void pkt_end_io_read_cloned(struct bio *bio, int err)
         pkt_bio_finished(pd);
  }
  
-static int pkt_make_request(struct request_queue *q, struct bio *bio)
+static void pkt_make_request(struct request_queue *q, struct bio *bio)
  {
         struct pktcdvd_device *pd;
         char b[BDEVNAME_SIZE];
@@ -2473,7 +2473,7 @@ static int pkt_make_request(struct request_queue *q, struct bio *bio)
                 cloned_bio->bi_end_io = pkt_end_io_read_cloned;
                 pd->stats.secs_r += bio->bi_size >> 9;
                 pkt_queue_bio(pd, cloned_bio);
-               return 0;
+               return;
         }
  
         if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
@@ -2509,7 +2509,7 @@ static int pkt_make_request(struct request_queue *q, struct bio *bio)
                         pkt_make_request(q, &bp->bio1);
                         pkt_make_request(q, &bp->bio2);
                         bio_pair_release(bp);
-                       return 0;
+                       return;
                 }
         }
  
@@ -2533,7 +2533,7 @@ static int pkt_make_request(struct request_queue *q, struct bio *bio)
                                 }
                                 spin_unlock(&pkt->lock);
                                 spin_unlock(&pd->cdrw.active_list_lock);
-                               return 0;
+                               return;
                         } else {
                                 blocked_bio = 1;
                         }
@@ -2584,10 +2584,9 @@ static int pkt_make_request(struct request_queue *q, struct bio *bio)
                  */
                 wake_up(&pd->wqueue);
         }
-       return 0;
+       return;
  end_io:
         bio_io_error(bio);
-       return 0;
  }
  
  
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c

index b3bdb8a..7fad7af 100644 (file)
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -596,7 +596,7 @@ out:
         return next;
  }
  
-static int ps3vram_make_request(struct request_queue *q, struct bio *bio)
+static void ps3vram_make_request(struct request_queue *q, struct bio *bio)
  {
         struct ps3_system_bus_device *dev = q->queuedata;
         struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
@@ -610,13 +610,11 @@ static int ps3vram_make_request(struct request_queue *q, struct bio *bio)
         spin_unlock_irq(&priv->lock);
  
         if (busy)
-               return 0;
+               return;
  
         do {
                 bio = ps3vram_do_bio(dev, bio);
         } while (bio);
-
-       return 0;
  }
  
  static int __devinit ps3vram_probe(struct ps3_system_bus_device *dev)
diff --git a/drivers/block/umem.c b/drivers/block/umem.c

index 031ca72..aa27120 100644 (file)
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -513,7 +513,7 @@ static void process_page(unsigned long data)
         }
  }
  
-static int mm_make_request(struct request_queue *q, struct bio *bio)
+static void mm_make_request(struct request_queue *q, struct bio *bio)
  {
         struct cardinfo *card = q->queuedata;
         pr_debug("mm_make_request %llu %u\n",
@@ -525,7 +525,7 @@ static int mm_make_request(struct request_queue *q, struct bio *bio)
         card->biotail = &bio->bi_next;
         spin_unlock_irq(&card->lock);
  
-       return 0;
+       return;
  }
  
  static irqreturn_t mm_interrupt(int irq, void *__card)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c

index 6b6616a..4720f68 100644 (file)
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -192,9 +192,6 @@ struct mapped_device {
         /* forced geometry settings */
         struct hd_geometry geometry;
  
-       /* For saving the address of __make_request for request based dm */
-       make_request_fn *saved_make_request_fn;
-
         /* sysfs handle */
         struct kobject kobj;
  
@@ -1403,7 +1400,7 @@ out:
   * The request function that just remaps the bio built up by
   * dm_merge_bvec.
   */
-static int _dm_request(struct request_queue *q, struct bio *bio)
+static void _dm_request(struct request_queue *q, struct bio *bio)
  {
         int rw = bio_data_dir(bio);
         struct mapped_device *md = q->queuedata;
@@ -1424,19 +1421,12 @@ static int _dm_request(struct request_queue *q, struct bio *bio)
                         queue_io(md, bio);
                 else
                         bio_io_error(bio);
-               return 0;
+               return;
         }
  
         __split_and_process_bio(md, bio);
         up_read(&md->io_lock);
-       return 0;
-}
-
-static int dm_make_request(struct request_queue *q, struct bio *bio)
-{
-       struct mapped_device *md = q->queuedata;
-
-       return md->saved_make_request_fn(q, bio); /* call __make_request() */
+       return;
  }
  
  static int dm_request_based(struct mapped_device *md)
@@ -1444,14 +1434,14 @@ static int dm_request_based(struct mapped_device *md)
         return blk_queue_stackable(md->queue);
  }
  
-static int dm_request(struct request_queue *q, struct bio *bio)
+static void dm_request(struct request_queue *q, struct bio *bio)
  {
         struct mapped_device *md = q->queuedata;
  
         if (dm_request_based(md))
-               return dm_make_request(q, bio);
-
-       return _dm_request(q, bio);
+               blk_queue_bio(q, bio);
+       else
+               _dm_request(q, bio);
  }
  
  void dm_dispatch_request(struct request *rq)
@@ -2191,7 +2181,6 @@ static int dm_init_request_based_queue(struct mapped_device *md)
                 return 0;
  
         md->queue = q;
-       md->saved_make_request_fn = md->queue->make_request_fn;
         dm_init_md_queue(md);
         blk_queue_softirq_done(md->queue, dm_softirq_done);
         blk_queue_prep_rq(md->queue, dm_prep_fn);
diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c

index 60816b1..918fb8a 100644 (file)
--- a/drivers/md/faulty.c
+++ b/drivers/md/faulty.c
@@ -169,7 +169,7 @@ static void add_sector(struct faulty_conf *conf, sector_t start, int mode)
                 conf->nfaults = n+1;
  }
  
-static int make_request(struct mddev *mddev, struct bio *bio)
+static void make_request(struct mddev *mddev, struct bio *bio)
  {
         struct faulty_conf *conf = mddev->private;
         int failit = 0;
@@ -181,7 +181,7 @@ static int make_request(struct mddev *mddev, struct bio *bio)
                          * just fail immediately
                          */
                         bio_endio(bio, -EIO);
-                       return 0;
+                       return;
                 }
  
                 if (check_sector(conf, bio->bi_sector, bio->bi_sector+(bio->bi_size>>9),
@@ -211,15 +211,15 @@ static int make_request(struct mddev *mddev, struct bio *bio)
         }
         if (failit) {
                 struct bio *b = bio_clone_mddev(bio, GFP_NOIO, mddev);
+
                 b->bi_bdev = conf->rdev->bdev;
                 b->bi_private = bio;
                 b->bi_end_io = faulty_fail;
-               generic_make_request(b);
-               return 0;
-       } else {
+               bio = b;
+       } else
                 bio->bi_bdev = conf->rdev->bdev;
-               return 1;
-       }
+
+       generic_make_request(bio);
  }
  
  static void status(struct seq_file *seq, struct mddev *mddev)
diff --git a/drivers/md/linear.c b/drivers/md/linear.c

index 10c5844..a820358 100644 (file)
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -264,14 +264,14 @@ static int linear_stop (struct mddev *mddev)
         return 0;
  }
  
-static int linear_make_request (struct mddev *mddev, struct bio *bio)
+static void linear_make_request(struct mddev *mddev, struct bio *bio)
  {
         struct dev_info *tmp_dev;
         sector_t start_sector;
  
         if (unlikely(bio->bi_rw & REQ_FLUSH)) {
                 md_flush_request(mddev, bio);
-               return 0;
+               return;
         }
  
         rcu_read_lock();
@@ -293,7 +293,7 @@ static int linear_make_request (struct mddev *mddev, struct bio *bio)
                        (unsigned long long)start_sector);
                 rcu_read_unlock();
                 bio_io_error(bio);
-               return 0;
+               return;
         }
         if (unlikely(bio->bi_sector + (bio->bi_size >> 9) >
                      tmp_dev->end_sector)) {
@@ -307,20 +307,17 @@ static int linear_make_request (struct mddev *mddev, struct bio *bio)
  
                 bp = bio_split(bio, end_sector - bio->bi_sector);
  
-               if (linear_make_request(mddev, &bp->bio1))
-                       generic_make_request(&bp->bio1);
-               if (linear_make_request(mddev, &bp->bio2))
-                       generic_make_request(&bp->bio2);
+               linear_make_request(mddev, &bp->bio1);
+               linear_make_request(mddev, &bp->bio2);
                 bio_pair_release(bp);
-               return 0;
+               return;
         }
                     
         bio->bi_bdev = tmp_dev->rdev->bdev;
         bio->bi_sector = bio->bi_sector - start_sector
                 + tmp_dev->rdev->data_offset;
         rcu_read_unlock();
-
-       return 1;
+       generic_make_request(bio);
  }
  
  static void linear_status (struct seq_file *seq, struct mddev *mddev)
diff --git a/drivers/md/md.c b/drivers/md/md.c

index 266e82e..2acb328 100644 (file)
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -332,18 +332,17 @@ static DEFINE_SPINLOCK(all_mddevs_lock);
   * call has finished, the bio has been linked into some internal structure
   * and so is visible to ->quiesce(), so we don't need the refcount any more.
   */
-static int md_make_request(struct request_queue *q, struct bio *bio)
+static void md_make_request(struct request_queue *q, struct bio *bio)
  {
         const int rw = bio_data_dir(bio);
         struct mddev *mddev = q->queuedata;
-       int rv;
         int cpu;
         unsigned int sectors;
  
         if (mddev == NULL || mddev->pers == NULL
             || !mddev->ready) {
                 bio_io_error(bio);
-               return 0;
+               return;
         }
         smp_rmb(); /* Ensure implications of  'active' are visible */
         rcu_read_lock();
@@ -368,7 +367,7 @@ static int md_make_request(struct request_queue *q, struct bio *bio)
          * go away inside make_request
          */
         sectors = bio_sectors(bio);
-       rv = mddev->pers->make_request(mddev, bio);
+       mddev->pers->make_request(mddev, bio);
  
         cpu = part_stat_lock();
         part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
@@ -377,8 +376,6 @@ static int md_make_request(struct request_queue *q, struct bio *bio)
  
         if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
                 wake_up(&mddev->sb_wait);
-
-       return rv;
  }
  
  /* mddev_suspend makes sure no new requests are submitted
@@ -477,8 +474,7 @@ static void md_submit_flush_data(struct work_struct *ws)
                 bio_endio(bio, 0);
         else {
                 bio->bi_rw &= ~REQ_FLUSH;
-               if (mddev->pers->make_request(mddev, bio))
-                       generic_make_request(bio);
+               mddev->pers->make_request(mddev, bio);
         }
  
         mddev->flush_bio = NULL;
diff --git a/drivers/md/md.h b/drivers/md/md.h

index 51c1d91..cf742d9 100644 (file)
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -419,7 +419,7 @@ struct md_personality
         int level;
         struct list_head list;
         struct module *owner;
-       int (*make_request)(struct mddev *mddev, struct bio *bio);
+       void (*make_request)(struct mddev *mddev, struct bio *bio);
         int (*run)(struct mddev *mddev);
         int (*stop)(struct mddev *mddev);
         void (*status)(struct seq_file *seq, struct mddev *mddev);
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c

index d32c785..ad20a28 100644 (file)
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -106,7 +106,7 @@ static void multipath_end_request(struct bio *bio, int error)
         rdev_dec_pending(rdev, conf->mddev);
  }
  
-static int multipath_make_request(struct mddev *mddev, struct bio * bio)
+static void multipath_make_request(struct mddev *mddev, struct bio * bio)
  {
         struct mpconf *conf = mddev->private;
         struct multipath_bh * mp_bh;
@@ -114,7 +114,7 @@ static int multipath_make_request(struct mddev *mddev, struct bio * bio)
  
         if (unlikely(bio->bi_rw & REQ_FLUSH)) {
                 md_flush_request(mddev, bio);
-               return 0;
+               return;
         }
  
         mp_bh = mempool_alloc(conf->pool, GFP_NOIO);
@@ -126,7 +126,7 @@ static int multipath_make_request(struct mddev *mddev, struct bio * bio)
         if (mp_bh->path < 0) {
                 bio_endio(bio, -EIO);
                 mempool_free(mp_bh, conf->pool);
-               return 0;
+               return;
         }
         multipath = conf->multipaths + mp_bh->path;
  
@@ -137,7 +137,7 @@ static int multipath_make_request(struct mddev *mddev, struct bio * bio)
         mp_bh->bio.bi_end_io = multipath_end_request;
         mp_bh->bio.bi_private = mp_bh;
         generic_make_request(&mp_bh->bio);
-       return 0;
+       return;
  }
  
  static void multipath_status (struct seq_file *seq, struct mddev *mddev)
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c

index 0eb08a4..27e19e2 100644 (file)
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -468,7 +468,7 @@ static inline int is_io_in_chunk_boundary(struct mddev *mddev,
         }
  }
  
-static int raid0_make_request(struct mddev *mddev, struct bio *bio)
+static void raid0_make_request(struct mddev *mddev, struct bio *bio)
  {
         unsigned int chunk_sects;
         sector_t sector_offset;
@@ -477,7 +477,7 @@ static int raid0_make_request(struct mddev *mddev, struct bio *bio)
  
         if (unlikely(bio->bi_rw & REQ_FLUSH)) {
                 md_flush_request(mddev, bio);
-               return 0;
+               return;
         }
  
         chunk_sects = mddev->chunk_sectors;
@@ -497,13 +497,10 @@ static int raid0_make_request(struct mddev *mddev, struct bio *bio)
                 else
                         bp = bio_split(bio, chunk_sects -
                                        sector_div(sector, chunk_sects));
-               if (raid0_make_request(mddev, &bp->bio1))
-                       generic_make_request(&bp->bio1);
-               if (raid0_make_request(mddev, &bp->bio2))
-                       generic_make_request(&bp->bio2);
-
+               raid0_make_request(mddev, &bp->bio1);
+               raid0_make_request(mddev, &bp->bio2);
                 bio_pair_release(bp);
-               return 0;
+               return;
         }
  
         sector_offset = bio->bi_sector;
@@ -513,10 +510,9 @@ static int raid0_make_request(struct mddev *mddev, struct bio *bio)
         bio->bi_bdev = tmp_dev->bdev;
         bio->bi_sector = sector_offset + zone->dev_start +
                 tmp_dev->data_offset;
-       /*
-        * Let the main block layer submit the IO and resolve recursion:
-        */
-       return 1;
+
+       generic_make_request(bio);
+       return;
  
  bad_map:
         printk("md/raid0:%s: make_request bug: can't convert block across chunks"
@@ -525,7 +521,7 @@ bad_map:
                (unsigned long long)bio->bi_sector, bio->bi_size >> 10);
  
         bio_io_error(bio);
-       return 0;
+       return;
  }
  
  static void raid0_status(struct seq_file *seq, struct mddev *mddev)
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c

index 4602fc5..cae8746 100644 (file)
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -807,7 +807,7 @@ do_sync_io:
         pr_debug("%dB behind alloc failed, doing sync I/O\n", bio->bi_size);
  }
  
-static int make_request(struct mddev *mddev, struct bio * bio)
+static void make_request(struct mddev *mddev, struct bio * bio)
  {
         struct r1conf *conf = mddev->private;
         struct mirror_info *mirror;
@@ -892,7 +892,7 @@ read_again:
                 if (rdisk < 0) {
                         /* couldn't find anywhere to read from */
                         raid_end_bio_io(r1_bio);
-                       return 0;
+                       return;
                 }
                 mirror = conf->mirrors + rdisk;
  
@@ -950,7 +950,7 @@ read_again:
                         goto read_again;
                 } else
                         generic_make_request(read_bio);
-               return 0;
+               return;
         }
  
         /*
@@ -1151,8 +1151,6 @@ read_again:
  
         if (do_sync || !bitmap || !plugged)
                 md_wakeup_thread(mddev->thread);
-
-       return 0;
  }
  
  static void status(struct seq_file *seq, struct mddev *mddev)
@@ -2193,7 +2191,6 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
                 bio->bi_next = NULL;
                 bio->bi_flags &= ~(BIO_POOL_MASK-1);
                 bio->bi_flags |= 1 << BIO_UPTODATE;
-               bio->bi_comp_cpu = -1;
                 bio->bi_rw = READ;
                 bio->bi_vcnt = 0;
                 bio->bi_idx = 0;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c

index c025a82..dde6dd4 100644 (file)
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -842,7 +842,7 @@ static void unfreeze_array(struct r10conf *conf)
         spin_unlock_irq(&conf->resync_lock);
  }
  
-static int make_request(struct mddev *mddev, struct bio * bio)
+static void make_request(struct mddev *mddev, struct bio * bio)
  {
         struct r10conf *conf = mddev->private;
         struct mirror_info *mirror;
@@ -861,7 +861,7 @@ static int make_request(struct mddev *mddev, struct bio * bio)
  
         if (unlikely(bio->bi_rw & REQ_FLUSH)) {
                 md_flush_request(mddev, bio);
-               return 0;
+               return;
         }
  
         /* If this request crosses a chunk boundary, we need to
@@ -893,10 +893,8 @@ static int make_request(struct mddev *mddev, struct bio * bio)
                 conf->nr_waiting++;
                 spin_unlock_irq(&conf->resync_lock);
  
-               if (make_request(mddev, &bp->bio1))
-                       generic_make_request(&bp->bio1);
-               if (make_request(mddev, &bp->bio2))
-                       generic_make_request(&bp->bio2);
+               make_request(mddev, &bp->bio1);
+               make_request(mddev, &bp->bio2);
  
                 spin_lock_irq(&conf->resync_lock);
                 conf->nr_waiting--;
@@ -904,14 +902,14 @@ static int make_request(struct mddev *mddev, struct bio * bio)
                 spin_unlock_irq(&conf->resync_lock);
  
                 bio_pair_release(bp);
-               return 0;
+               return;
         bad_map:
                 printk("md/raid10:%s: make_request bug: can't convert block across chunks"
                        " or bigger than %dk %llu %d\n", mdname(mddev), chunk_sects/2,
                        (unsigned long long)bio->bi_sector, bio->bi_size >> 10);
  
                 bio_io_error(bio);
-               return 0;
+               return;
         }
  
         md_write_start(mddev, bio);
@@ -954,7 +952,7 @@ read_again:
                 slot = r10_bio->read_slot;
                 if (disk < 0) {
                         raid_end_bio_io(r10_bio);
-                       return 0;
+                       return;
                 }
                 mirror = conf->mirrors + disk;
  
@@ -1002,7 +1000,7 @@ read_again:
                         goto read_again;
                 } else
                         generic_make_request(read_bio);
-               return 0;
+               return;
         }
  
         /*
@@ -1176,7 +1174,6 @@ retry_write:
  
         if (do_sync || !mddev->bitmap || !plugged)
                 md_wakeup_thread(mddev->thread);
-       return 0;
  }
  
  static void status(struct seq_file *seq, struct mddev *mddev)
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c

index f6fe053..bb1b461 100644 (file)
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3688,7 +3688,7 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf)
         return sh;
  }
  
-static int make_request(struct mddev *mddev, struct bio * bi)
+static void make_request(struct mddev *mddev, struct bio * bi)
  {
         struct r5conf *conf = mddev->private;
         int dd_idx;
@@ -3701,7 +3701,7 @@ static int make_request(struct mddev *mddev, struct bio * bi)
  
         if (unlikely(bi->bi_rw & REQ_FLUSH)) {
                 md_flush_request(mddev, bi);
-               return 0;
+               return;
         }
  
         md_write_start(mddev, bi);
@@ -3709,7 +3709,7 @@ static int make_request(struct mddev *mddev, struct bio * bi)
         if (rw == READ &&
              mddev->reshape_position == MaxSector &&
              chunk_aligned_read(mddev,bi))
-               return 0;
+               return;
  
         logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
         last_sector = bi->bi_sector + (bi->bi_size>>9);
@@ -3844,8 +3844,6 @@ static int make_request(struct mddev *mddev, struct bio * bi)
  
                 bio_endio(bi, 0);
         }
-
-       return 0;
  }
  
  static sector_t raid5_size(struct mddev *mddev, sector_t sectors, int raid_disks);
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c

index 9b43ae9..a5a55da 100644 (file)
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -27,7 +27,7 @@
  
  static int dcssblk_open(struct block_device *bdev, fmode_t mode);
  static int dcssblk_release(struct gendisk *disk, fmode_t mode);
-static int dcssblk_make_request(struct request_queue *q, struct bio *bio);
+static void dcssblk_make_request(struct request_queue *q, struct bio *bio);
  static int dcssblk_direct_access(struct block_device *bdev, sector_t secnum,
                                  void **kaddr, unsigned long *pfn);
  
@@ -814,7 +814,7 @@ out:
         return rc;
  }
  
-static int
+static void
  dcssblk_make_request(struct request_queue *q, struct bio *bio)
  {
         struct dcssblk_dev_info *dev_info;
@@ -871,10 +871,9 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio)
                 bytes_done += bvec->bv_len;
         }
         bio_endio(bio, 0);
-       return 0;
+       return;
  fail:
         bio_io_error(bio);
-       return 0;
  }
  
  static int
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c

index 1f6a4d8..98f3e4a 100644 (file)
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -181,7 +181,7 @@ static unsigned long xpram_highest_page_index(void)
  /*
   * Block device make request function.
   */
-static int xpram_make_request(struct request_queue *q, struct bio *bio)
+static void xpram_make_request(struct request_queue *q, struct bio *bio)
  {
         xpram_device_t *xdev = bio->bi_bdev->bd_disk->private_data;
         struct bio_vec *bvec;
@@ -221,10 +221,9 @@ static int xpram_make_request(struct request_queue *q, struct bio *bio)
         }
         set_bit(BIO_UPTODATE, &bio->bi_flags);
         bio_endio(bio, 0);
-       return 0;
+       return;
  fail:
         bio_io_error(bio);
-       return 0;
  }
  
  static int xpram_getgeo(struct block_device *bdev, struct hd_geometry *geo)
diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c

index b9926ee..09de99f 100644 (file)
--- a/drivers/staging/zram/zram_drv.c
+++ b/drivers/staging/zram/zram_drv.c
@@ -556,7 +556,7 @@ static inline int valid_io_request(struct zram *zram, struct bio *bio)
  /*
   * Handler function for all zram I/O requests.
   */
-static int zram_make_request(struct request_queue *queue, struct bio *bio)
+static void zram_make_request(struct request_queue *queue, struct bio *bio)
  {
         struct zram *zram = queue->queuedata;
  
@@ -575,13 +575,12 @@ static int zram_make_request(struct request_queue *queue, struct bio *bio)
         __zram_make_request(zram, bio, bio_data_dir(bio));
         up_read(&zram->init_lock);
  
-       return 0;
+       return;
  
  error_unlock:
         up_read(&zram->init_lock);
  error:
         bio_io_error(bio);
-       return 0;
  }
  
  void __zram_reset_device(struct zram *zram)
diff --git a/fs/bio.c b/fs/bio.c

index 9bfade8..41c93c7 100644 (file)
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -255,7 +255,6 @@ void bio_init(struct bio *bio)
  {
         memset(bio, 0, sizeof(*bio));
         bio->bi_flags = 1 << BIO_UPTODATE;
-       bio->bi_comp_cpu = -1;
         atomic_set(&bio->bi_cnt, 1);
  }
  EXPORT_SYMBOL(bio_init);
diff --git a/fs/block_dev.c b/fs/block_dev.c

index 95f786e..1c44b8d 100644 (file)
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1085,6 +1085,7 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
  static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
  {
         struct gendisk *disk;
+       struct module *owner;
         int ret;
         int partno;
         int perm = 0;
@@ -1110,6 +1111,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
         disk = get_gendisk(bdev->bd_dev, &partno);
         if (!disk)
                 goto out;
+       owner = disk->fops->owner;
  
         disk_block_events(disk);
         mutex_lock_nested(&bdev->bd_mutex, for_part);
@@ -1137,8 +1139,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                                         bdev->bd_disk = NULL;
                                         mutex_unlock(&bdev->bd_mutex);
                                         disk_unblock_events(disk);
-                                       module_put(disk->fops->owner);
                                         put_disk(disk);
+                                       module_put(owner);
                                         goto restart;
                                 }
                         }
@@ -1194,8 +1196,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                                 goto out_unlock_bdev;
                 }
                 /* only one opener holds refs to the module and disk */
-               module_put(disk->fops->owner);
                 put_disk(disk);
+               module_put(owner);
         }
         bdev->bd_openers++;
         if (for_part)
@@ -1215,8 +1217,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
   out_unlock_bdev:
         mutex_unlock(&bdev->bd_mutex);
         disk_unblock_events(disk);
-       module_put(disk->fops->owner);
         put_disk(disk);
+       module_put(owner);
   out:
         bdput(bdev);
  
@@ -1442,14 +1444,15 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
         if (!bdev->bd_openers) {
                 struct module *owner = disk->fops->owner;
  
-               put_disk(disk);
-               module_put(owner);
                 disk_put_part(bdev->bd_part);
                 bdev->bd_part = NULL;
                 bdev->bd_disk = NULL;
                 if (bdev != bdev->bd_contains)
                         victim = bdev->bd_contains;
                 bdev->bd_contains = NULL;
+
+               put_disk(disk);
+               module_put(owner);
         }
         mutex_unlock(&bdev->bd_mutex);
         bdput(bdev);
diff --git a/include/linux/bio.h b/include/linux/bio.h

index ce33e68..a3c071c 100644 (file)
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -268,14 +268,6 @@ extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set
  extern void bvec_free_bs(struct bio_set *, struct bio_vec *, unsigned int);
  extern unsigned int bvec_nr_vecs(unsigned short idx);
  
-/*
- * Allow queuer to specify a completion CPU for this bio
- */
-static inline void bio_set_completion_cpu(struct bio *bio, unsigned int cpu)
-{
-       bio->bi_comp_cpu = cpu;
-}
-
  /*
   * bio_set is used to allow other portions of the IO system to
   * allocate their own private memory pools for bio and iovec structures.
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h

index 71fc53b..4053cbd 100644 (file)
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -59,8 +59,6 @@ struct bio {
  
         unsigned int            bi_max_vecs;    /* max bvl_vecs we can hold */
  
-       unsigned int            bi_comp_cpu;    /* completion CPU */
-
         atomic_t                bi_cnt;         /* pin count */
  
         struct bio_vec          *bi_io_vec;     /* the actual vec list */
@@ -93,11 +91,10 @@ struct bio {
  #define BIO_BOUNCED    5       /* bio is a bounce bio */
  #define BIO_USER_MAPPED 6      /* contains user pages */
  #define BIO_EOPNOTSUPP 7       /* not supported */
-#define BIO_CPU_AFFINE 8       /* complete bio on same CPU as submitted */
-#define BIO_NULL_MAPPED 9      /* contains invalid user pages */
-#define BIO_FS_INTEGRITY 10    /* fs owns integrity data, not block layer */
-#define BIO_QUIET      11      /* Make BIO Quiet */
-#define BIO_MAPPED_INTEGRITY 12/* integrity metadata has been remapped */
+#define BIO_NULL_MAPPED 8      /* contains invalid user pages */
+#define BIO_FS_INTEGRITY 9     /* fs owns integrity data, not block layer */
+#define BIO_QUIET      10      /* Make BIO Quiet */
+#define BIO_MAPPED_INTEGRITY 11/* integrity metadata has been remapped */
  #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag)))
  
  /*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h

index 7fbaa91..5267cd2 100644 (file)
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -195,7 +195,7 @@ struct request_pm_state
  #include <linux/elevator.h>
  
  typedef void (request_fn_proc) (struct request_queue *q);
-typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
+typedef void (make_request_fn) (struct request_queue *q, struct bio *bio);
  typedef int (prep_rq_fn) (struct request_queue *, struct request *);
  typedef void (unprep_rq_fn) (struct request_queue *, struct request *);
  
@@ -680,6 +680,8 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t,
  extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
                          struct scsi_ioctl_command __user *);
  
+extern void blk_queue_bio(struct request_queue *q, struct bio *bio);
+
  /*
   * A queue has just exitted congestion.  Note this in the global counter of
   * congested queues, and wake up anyone who was waiting for requests to be
@@ -863,16 +865,22 @@ struct request_queue *blk_alloc_queue_node(gfp_t, int);
  extern void blk_put_queue(struct request_queue *);
  
  /*
- * Note: Code in between changing the blk_plug list/cb_list or element of such
- * lists is preemptable, but such code can't do sleep (or be very careful),
- * otherwise data is corrupted. For details, please check schedule() where
- * blk_schedule_flush_plug() is called.
+ * blk_plug permits building a queue of related requests by holding the I/O
+ * fragments for a short period. This allows merging of sequential requests
+ * into single larger request. As the requests are moved from a per-task list to
+ * the device's request_queue in a batch, this results in improved scalability
+ * as the lock contention for request_queue lock is reduced.
+ *
+ * It is ok not to disable preemption when adding the request to the plug list
+ * or when attempting a merge, because blk_schedule_flush_list() will only flush
+ * the plug list when the task sleeps by itself. For details, please see
+ * schedule() where blk_schedule_flush_plug() is called.
   */
  struct blk_plug {
-       unsigned long magic;
-       struct list_head list;
-       struct list_head cb_list;
-       unsigned int should_sort;
+       unsigned long magic; /* detect uninitialized use-cases */
+       struct list_head list; /* requests */
+       struct list_head cb_list; /* md requires an unplug callback */
+       unsigned int should_sort; /* list to be sorted before flushing? */
  };
  #define BLK_MAX_REQUEST_COUNT 16
  
@@ -1189,20 +1197,6 @@ static inline uint64_t rq_io_start_time_ns(struct request *req)
  }
  #endif
  
-#ifdef CONFIG_BLK_DEV_THROTTLING
-extern int blk_throtl_init(struct request_queue *q);
-extern void blk_throtl_exit(struct request_queue *q);
-extern int blk_throtl_bio(struct request_queue *q, struct bio **bio);
-#else /* CONFIG_BLK_DEV_THROTTLING */
-static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio)
-{
-       return 0;
-}
-
-static inline int blk_throtl_init(struct request_queue *q) { return 0; }
-static inline int blk_throtl_exit(struct request_queue *q) { return 0; }
-#endif /* CONFIG_BLK_DEV_THROTTLING */
-
  #define MODULE_ALIAS_BLOCKDEV(major,minor) \
         MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
  #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \
diff --git a/include/linux/elevator.h b/include/linux/elevator.h

index d800d51..1d0f7a2 100644 (file)
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -38,6 +38,12 @@ struct elevator_ops
         elevator_merged_fn *elevator_merged_fn;
         elevator_merge_req_fn *elevator_merge_req_fn;
         elevator_allow_merge_fn *elevator_allow_merge_fn;
+
+       /*
+        * Used for both plugged list and elevator merging and in the
+        * former case called without queue_lock.  Read comment on top of
+        * attempt_plug_merge() for details.
+        */
         elevator_bio_merged_fn *elevator_bio_merged_fn;
  
         elevator_dispatch_fn *elevator_dispatch_fn;
diff --git a/include/linux/loop.h b/include/linux/loop.h

index 683d698..a068806 100644 (file)
--- a/include/linux/loop.h
+++ b/include/linux/loop.h
@@ -73,7 +73,6 @@ struct loop_device {
   */
  enum {
         LO_FLAGS_READ_ONLY      = 1,
-       LO_FLAGS_USE_AOPS       = 2,
         LO_FLAGS_AUTOCLEAR      = 4,
  };
  
diff --git a/mm/bounce.c b/mm/bounce.c

index 1481de6..434fb4f 100644 (file)
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -14,6 +14,7 @@
  #include <linux/init.h>
  #include <linux/hash.h>
  #include <linux/highmem.h>
+#include <linux/bootmem.h>
  #include <asm/tlbflush.h>
  
  #include <trace/events/block.h>
@@ -26,12 +27,10 @@ static mempool_t *page_pool, *isa_page_pool;
  #ifdef CONFIG_HIGHMEM
  static __init int init_emergency_pool(void)
  {
-       struct sysinfo i;
-       si_meminfo(&i);
-       si_swapinfo(&i);
-
-       if (!i.totalhigh)
+#ifndef CONFIG_MEMORY_HOTPLUG
+       if (max_pfn <= max_low_pfn)
                 return 0;
+#endif
  
         page_pool = mempool_create_page_pool(POOL_SIZE, 0);
         BUG_ON(!page_pool);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 5 Nov 2011 00:06:58 +0000 (17:06 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 5 Nov 2011 00:06:58 +0000 (17:06 -0700)
arch/m68k/emu/nfblock.c		patch \| blob \| history
arch/powerpc/sysdev/axonram.c		patch \| blob \| history
block/blk-cgroup.c		patch \| blob \| history
block/blk-cgroup.h		patch \| blob \| history
block/blk-core.c		patch \| blob \| history
block/blk-flush.c		patch \| blob \| history
block/blk-sysfs.c		patch \| blob \| history
block/blk-tag.c		patch \| blob \| history
block/blk-throttle.c		patch \| blob \| history
block/blk.h		patch \| blob \| history
block/elevator.c		patch \| blob \| history
block/genhd.c		patch \| blob \| history
block/scsi_ioctl.c		patch \| blob \| history
drivers/block/aoe/aoeblk.c		patch \| blob \| history
drivers/block/brd.c		patch \| blob \| history
drivers/block/drbd/drbd_int.h		patch \| blob \| history
drivers/block/drbd/drbd_req.c		patch \| blob \| history
drivers/block/loop.c		patch \| blob \| history
drivers/block/pktcdvd.c		patch \| blob \| history
drivers/block/ps3vram.c		patch \| blob \| history
drivers/block/umem.c		patch \| blob \| history
drivers/md/dm.c		patch \| blob \| history
drivers/md/faulty.c		patch \| blob \| history
drivers/md/linear.c		patch \| blob \| history
drivers/md/md.c		patch \| blob \| history
drivers/md/md.h		patch \| blob \| history
drivers/md/multipath.c		patch \| blob \| history
drivers/md/raid0.c		patch \| blob \| history
drivers/md/raid1.c		patch \| blob \| history
drivers/md/raid10.c		patch \| blob \| history
drivers/md/raid5.c		patch \| blob \| history
drivers/s390/block/dcssblk.c		patch \| blob \| history
drivers/s390/block/xpram.c		patch \| blob \| history
drivers/staging/zram/zram_drv.c		patch \| blob \| history
fs/bio.c		patch \| blob \| history
fs/block_dev.c		patch \| blob \| history
include/linux/bio.h		patch \| blob \| history
include/linux/blk_types.h		patch \| blob \| history
include/linux/blkdev.h		patch \| blob \| history
include/linux/elevator.h		patch \| blob \| history
include/linux/loop.h		patch \| blob \| history
mm/bounce.c		patch \| blob \| history