Merge git://git.infradead.org/users/willy/linux-nvme

author Linus Torvalds <torvalds@linux-foundation.org>

Mon, 16 Jun 2014 01:58:03 +0000 (15:58 -1000)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 16 Jun 2014 01:58:03 +0000 (15:58 -1000)
author Linus Torvalds <torvalds@linux-foundation.org>
Mon, 16 Jun 2014 01:58:03 +0000 (15:58 -1000)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 16 Jun 2014 01:58:03 +0000 (15:58 -1000)
diff --combined block/blk-core.c

index 9aca8c7,c488b55..f6f6b9a
--- 1/block/blk-core.c
--- 2/block/blk-core.c
+++ b/block/blk-core.c
@@@ -43,6 -43,7 +43,7 @@@
   EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
   EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
   EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
+ EXPORT_TRACEPOINT_SYMBOL_GPL(block_split);
   EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug);
   
   DEFINE_IDA(blk_queue_ida);
@@@ -146,8 -147,8 +147,8 @@@ void blk_dump_rq_flags(struct request *
         printk(KERN_INFO "  sector %llu, nr/cnr %u/%u\n",
                (unsigned long long)blk_rq_pos(rq),
                blk_rq_sectors(rq), blk_rq_cur_sectors(rq));
- -      printk(KERN_INFO "  bio %p, biotail %p, buffer %p, len %u\n",
- -             rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq));
+ +      printk(KERN_INFO "  bio %p, biotail %p, len %u\n",
+ +             rq->bio, rq->biotail, blk_rq_bytes(rq));
   
         if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
                 printk(KERN_INFO "  cdb: ");
@@@ -251,10 -252,8 +252,10 @@@ void blk_sync_queue(struct request_queu
                 struct blk_mq_hw_ctx *hctx;
                 int i;
   
- -              queue_for_each_hw_ctx(q, hctx, i)
- -                      cancel_delayed_work_sync(&hctx->delayed_work);
+ +              queue_for_each_hw_ctx(q, hctx, i) {
+ +                      cancel_delayed_work_sync(&hctx->run_work);
+ +                      cancel_delayed_work_sync(&hctx->delay_work);
+ +              }
         } else {
                 cancel_delayed_work_sync(&q->delay_work);
         }
@@@ -576,9 -575,12 +577,9 @@@ struct request_queue *blk_alloc_queue_n
         if (!q)
                 return NULL;
   
- -      if (percpu_counter_init(&q->mq_usage_counter, 0))
- -              goto fail_q;
- -
         q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
         if (q->id < 0)
- -              goto fail_c;
+ +              goto fail_q;
   
         q->backing_dev_info.ra_pages =
                         (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
@@@ -636,6 -638,8 +637,6 @@@ fail_bdi
         bdi_destroy(&q->backing_dev_info);
   fail_id:
         ida_simple_remove(&blk_queue_ida, q->id);
- -fail_c:
- -      percpu_counter_destroy(&q->mq_usage_counter);
   fail_q:
         kmem_cache_free(blk_requestq_cachep, q);
         return NULL;
@@@ -843,47 -847,6 +844,47 @@@ static void freed_request(struct reques
                 __freed_request(rl, sync ^ 1);
   }
   
+ +int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
+ +{
+ +      struct request_list *rl;
+ +
+ +      spin_lock_irq(q->queue_lock);
+ +      q->nr_requests = nr;
+ +      blk_queue_congestion_threshold(q);
+ +
+ +      /* congestion isn't cgroup aware and follows root blkcg for now */
+ +      rl = &q->root_rl;
+ +
+ +      if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q))
+ +              blk_set_queue_congested(q, BLK_RW_SYNC);
+ +      else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q))
+ +              blk_clear_queue_congested(q, BLK_RW_SYNC);
+ +
+ +      if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q))
+ +              blk_set_queue_congested(q, BLK_RW_ASYNC);
+ +      else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q))
+ +              blk_clear_queue_congested(q, BLK_RW_ASYNC);
+ +
+ +      blk_queue_for_each_rl(rl, q) {
+ +              if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
+ +                      blk_set_rl_full(rl, BLK_RW_SYNC);
+ +              } else {
+ +                      blk_clear_rl_full(rl, BLK_RW_SYNC);
+ +                      wake_up(&rl->wait[BLK_RW_SYNC]);
+ +              }
+ +
+ +              if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
+ +                      blk_set_rl_full(rl, BLK_RW_ASYNC);
+ +              } else {
+ +                      blk_clear_rl_full(rl, BLK_RW_ASYNC);
+ +                      wake_up(&rl->wait[BLK_RW_ASYNC]);
+ +              }
+ +      }
+ +
+ +      spin_unlock_irq(q->queue_lock);
+ +      return 0;
+ +}
+ +
   /*
    * Determine if elevator data should be initialized when allocating the
    * request associated with @bio.
@@@ -1173,7 -1136,7 +1174,7 @@@ static struct request *blk_old_get_requ
   struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
   {
         if (q->mq_ops)
- -              return blk_mq_alloc_request(q, rw, gfp_mask);
+ +              return blk_mq_alloc_request(q, rw, gfp_mask, false);
         else
                 return blk_old_get_request(q, rw, gfp_mask);
   }
@@@ -1218,8 -1181,6 +1219,8 @@@ struct request *blk_make_request(struc
         if (unlikely(!rq))
                 return ERR_PTR(-ENOMEM);
   
+ +      blk_rq_set_block_pc(rq);
+ +
         for_each_bio(bio) {
                 struct bio *bounce_bio = bio;
                 int ret;
@@@ -1236,22 -1197,6 +1237,22 @@@
   }
   EXPORT_SYMBOL(blk_make_request);
   
+ +/**
+ + * blk_rq_set_block_pc - initialize a requeest to type BLOCK_PC
+ + * @rq:               request to be initialized
+ + *
+ + */
+ +void blk_rq_set_block_pc(struct request *rq)
+ +{
+ +      rq->cmd_type = REQ_TYPE_BLOCK_PC;
+ +      rq->__data_len = 0;
+ +      rq->__sector = (sector_t) -1;
+ +      rq->bio = rq->biotail = NULL;
+ +      memset(rq->__cmd, 0, sizeof(rq->__cmd));
+ +      rq->cmd = rq->__cmd;
+ +}
+ +EXPORT_SYMBOL(blk_rq_set_block_pc);
+ +
   /**
    * blk_requeue_request - put a request back on queue
    * @q:                request queue where request should be inserted
@@@ -1287,15 -1232,12 +1288,15 @@@ static void add_acct_request(struct req
   static void part_round_stats_single(int cpu, struct hd_struct *part,
                                     unsigned long now)
   {
+ +      int inflight;
+ +
         if (now == part->stamp)
                 return;
   
- -      if (part_in_flight(part)) {
+ +      inflight = part_in_flight(part);
+ +      if (inflight) {
                 __part_stat_add(cpu, part, time_in_queue,
- -                              part_in_flight(part) * (now - part->stamp));
+ +                              inflight * (now - part->stamp));
                 __part_stat_add(cpu, part, io_ticks, (now - part->stamp));
         }
         part->stamp = now;
@@@ -1419,6 -1361,7 +1420,6 @@@ void blk_add_request_payload(struct req
   
         rq->__data_len = rq->resid_len = len;
         rq->nr_phys_segments = 1;
- -      rq->buffer = bio_data(bio);
   }
   EXPORT_SYMBOL_GPL(blk_add_request_payload);
   
@@@ -1460,6 -1403,12 +1461,6 @@@ bool bio_attempt_front_merge(struct req
         bio->bi_next = req->bio;
         req->bio = bio;
   
- -      /*
- -       * may not be valid. if the low level driver said
- -       * it didn't need a bounce buffer then it better
- -       * not touch req->buffer either...
- -       */
- -      req->buffer = bio_data(bio);
         req->__sector = bio->bi_iter.bi_sector;
         req->__data_len += bio->bi_iter.bi_size;
         req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
@@@ -1484,8 -1433,6 +1485,8 @@@
    * added on the elevator at this point.  In addition, we don't have
    * reliable access to the elevator outside queue lock.  Only check basic
    * merging parameters without querying the elevator.
+ + *
+ + * Caller must ensure !blk_queue_nomerges(q) beforehand.
    */
   bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
                             unsigned int *request_count)
@@@ -1495,6 -1442,9 +1496,6 @@@
         bool ret = false;
         struct list_head *plug_list;
   
- -      if (blk_queue_nomerges(q))
- -              goto out;
- -
         plug = current->plug;
         if (!plug)
                 goto out;
@@@ -1573,8 -1523,7 +1574,8 @@@ void blk_queue_bio(struct request_queu
          * Check if we can merge with the plugged list before grabbing
          * any locks.
          */
- -      if (blk_attempt_plug_merge(q, bio, &request_count))
+ +      if (!blk_queue_nomerges(q) &&
+ +          blk_attempt_plug_merge(q, bio, &request_count))
                 return;
   
         spin_lock_irq(q->queue_lock);
@@@ -1706,7 -1655,7 +1707,7 @@@ static int __init fail_make_request_deb
         struct dentry *dir = fault_create_debugfs_attr("fail_make_request",
                                                 NULL, &fail_make_request);
   
- -      return IS_ERR(dir) ? PTR_ERR(dir) : 0;
+ +      return PTR_ERR_OR_ZERO(dir);
   }
   
   late_initcall(fail_make_request_debugfs);
@@@ -2486,6 -2435,7 +2487,6 @@@ bool blk_update_request(struct request 
         }
   
         req->__data_len -= total_bytes;
- -      req->buffer = bio_data(req->bio);
   
         /* update sector only for requests with clear definition of sector */
         if (req->cmd_type == REQ_TYPE_FS)
@@@ -2554,7 -2504,7 +2555,7 @@@ EXPORT_SYMBOL_GPL(blk_unprep_request)
   /*
    * queue lock must be held
    */
- -static void blk_finish_request(struct request *req, int error)
+ +void blk_finish_request(struct request *req, int error)
   {
         if (blk_rq_tagged(req))
                 blk_queue_end_tag(req->q, req);
@@@ -2580,7 -2530,6 +2581,7 @@@
                 __blk_put_request(req->q, req);
         }
   }
+ +EXPORT_SYMBOL(blk_finish_request);
   
   /**
    * blk_end_bidi_request - Complete a bidi request
@@@ -2804,9 -2753,10 +2805,9 @@@ void blk_rq_bio_prep(struct request_que
         /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */
         rq->cmd_flags |= bio->bi_rw & REQ_WRITE;
   
- -      if (bio_has_data(bio)) {
+ +      if (bio_has_data(bio))
                 rq->nr_phys_segments = bio_phys_segments(q, bio);
- -              rq->buffer = bio_data(bio);
- -      }
+ +
         rq->__data_len = bio->bi_iter.bi_size;
         rq->bio = rq->biotail = bio;
   
@@@ -2882,7 -2832,7 +2883,7 @@@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone)
   
   /*
    * Copy attributes of the original request to the clone request.
- - * The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied.
+ + * The actual data parts (e.g. ->cmd, ->sense) are not copied.
    */
   static void __blk_rq_prep_clone(struct request *dst, struct request *src)
   {
@@@ -2908,7 -2858,7 +2909,7 @@@
    *
    * Description:
    *     Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.
- - *     The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense)
+ + *     The actual data parts of @rq_src (e.g. ->cmd, ->sense)
    *     are not copied, and copying such parts is the caller's responsibility.
    *     Also, pages which the original bios are pointing to are not copied
    *     and the cloned bios just point same pages.
@@@ -2955,25 -2905,20 +2956,25 @@@ free_and_out
   }
   EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
   
- -int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
+ +int kblockd_schedule_work(struct work_struct *work)
   {
         return queue_work(kblockd_workqueue, work);
   }
   EXPORT_SYMBOL(kblockd_schedule_work);
   
- -int kblockd_schedule_delayed_work(struct request_queue *q,
- -                      struct delayed_work *dwork, unsigned long delay)
+ +int kblockd_schedule_delayed_work(struct delayed_work *dwork,
+ +                                unsigned long delay)
   {
         return queue_delayed_work(kblockd_workqueue, dwork, delay);
   }
   EXPORT_SYMBOL(kblockd_schedule_delayed_work);
   
- -#define PLUG_MAGIC    0x91827364
+ +int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
+ +                                   unsigned long delay)
+ +{
+ +      return queue_delayed_work_on(cpu, kblockd_workqueue, dwork, delay);
+ +}
+ +EXPORT_SYMBOL(kblockd_schedule_delayed_work_on);
   
   /**
    * blk_start_plug - initialize blk_plug and track it inside the task_struct
@@@ -2993,6 -2938,7 +2994,6 @@@ void blk_start_plug(struct blk_plug *pl
   {
         struct task_struct *tsk = current;
   
- -      plug->magic = PLUG_MAGIC;
         INIT_LIST_HEAD(&plug->list);
         INIT_LIST_HEAD(&plug->mq_list);
         INIT_LIST_HEAD(&plug->cb_list);
@@@ -3089,6 -3035,8 +3090,6 @@@ void blk_flush_plug_list(struct blk_plu
         LIST_HEAD(list);
         unsigned int depth;
   
- -      BUG_ON(plug->magic != PLUG_MAGIC);
- -
         flush_plug_callbacks(plug, from_schedule);
   
         if (!list_empty(&plug->mq_list))
diff --combined drivers/block/nvme-core.c

index a842c71,e0ac121..02351e2
--- 1/drivers/block/nvme-core.c
--- 2/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@@ -10,10 -10,6 +10,6 @@@
    * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
    * more details.
-  *
-  * You should have received a copy of the GNU General Public License along with
-  * this program; if not, write to the Free Software Foundation, Inc.,
-  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
    */
   
   #include <linux/nvme.h>
@@@ -46,16 -42,26 +42,26 @@@
   #include <scsi/sg.h>
   #include <asm-generic/io-64-nonatomic-lo-hi.h>
   
- #define NVME_Q_DEPTH 1024
+ #include <trace/events/block.h>
+ 
+ #define NVME_Q_DEPTH          1024
   #define SQ_SIZE(depth)                (depth * sizeof(struct nvme_command))
   #define CQ_SIZE(depth)                (depth * sizeof(struct nvme_completion))
- #define ADMIN_TIMEOUT (60 * HZ)
- #define IOD_TIMEOUT   (4 * NVME_IO_TIMEOUT)
+ #define ADMIN_TIMEOUT         (admin_timeout * HZ)
+ #define IOD_TIMEOUT           (retry_time * HZ)
+ 
+ static unsigned char admin_timeout = 60;
+ module_param(admin_timeout, byte, 0644);
+ MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
   
- unsigned char io_timeout = 30;
- module_param(io_timeout, byte, 0644);
+ unsigned char nvme_io_timeout = 30;
+ module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
   MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
   
+ static unsigned char retry_time = 30;
+ module_param(retry_time, byte, 0644);
+ MODULE_PARM_DESC(retry_time, "time in seconds to retry failed I/O");
+ 
   static int nvme_major;
   module_param(nvme_major, int, 0);
   
@@@ -67,6 -73,7 +73,7 @@@ static LIST_HEAD(dev_list)
   static struct task_struct *nvme_thread;
   static struct workqueue_struct *nvme_workq;
   static wait_queue_head_t nvme_kthread_wait;
+ static struct notifier_block nvme_nb;
   
   static void nvme_reset_failed_dev(struct work_struct *ws);
   
@@@ -199,16 -206,13 +206,13 @@@ static int alloc_cmdid_killable(struct 
   #define CMD_CTX_CANCELLED     (0x30C + CMD_CTX_BASE)
   #define CMD_CTX_COMPLETED     (0x310 + CMD_CTX_BASE)
   #define CMD_CTX_INVALID               (0x314 + CMD_CTX_BASE)
- #define CMD_CTX_FLUSH         (0x318 + CMD_CTX_BASE)
- #define CMD_CTX_ABORT         (0x31C + CMD_CTX_BASE)
+ #define CMD_CTX_ABORT         (0x318 + CMD_CTX_BASE)
   
   static void special_completion(struct nvme_queue *nvmeq, void *ctx,
                                                 struct nvme_completion *cqe)
   {
         if (ctx == CMD_CTX_CANCELLED)
                 return;
-       if (ctx == CMD_CTX_FLUSH)
-               return;
         if (ctx == CMD_CTX_ABORT) {
                 ++nvmeq->dev->abort_limit;
                 return;
@@@ -247,8 -251,9 +251,9 @@@ static void *free_cmdid(struct nvme_que
         void *ctx;
         struct nvme_cmd_info *info = nvme_cmd_info(nvmeq);
   
-       if (cmdid >= nvmeq->q_depth) {
-               *fn = special_completion;
+       if (cmdid >= nvmeq->q_depth || !info[cmdid].fn) {
+               if (fn)
+                       *fn = special_completion;
                 return CMD_CTX_INVALID;
         }
         if (fn)
@@@ -281,9 -286,17 +286,17 @@@ static struct nvme_queue *raw_nvmeq(str
   
   static struct nvme_queue *get_nvmeq(struct nvme_dev *dev) __acquires(RCU)
   {
+       struct nvme_queue *nvmeq;
         unsigned queue_id = get_cpu_var(*dev->io_queue);
+ 
         rcu_read_lock();
-       return rcu_dereference(dev->queues[queue_id]);
+       nvmeq = rcu_dereference(dev->queues[queue_id]);
+       if (nvmeq)
+               return nvmeq;
+ 
+       rcu_read_unlock();
+       put_cpu_var(*dev->io_queue);
+       return NULL;
   }
   
   static void put_nvmeq(struct nvme_queue *nvmeq) __releases(RCU)
@@@ -295,8 -308,15 +308,15 @@@
   static struct nvme_queue *lock_nvmeq(struct nvme_dev *dev, int q_idx)
                                                         __acquires(RCU)
   {
+       struct nvme_queue *nvmeq;
+ 
         rcu_read_lock();
-       return rcu_dereference(dev->queues[q_idx]);
+       nvmeq = rcu_dereference(dev->queues[q_idx]);
+       if (nvmeq)
+               return nvmeq;
+ 
+       rcu_read_unlock();
+       return NULL;
   }
   
   static void unlock_nvmeq(struct nvme_queue *nvmeq) __releases(RCU)
@@@ -387,25 -407,30 +407,30 @@@ void nvme_free_iod(struct nvme_dev *dev
   static void nvme_start_io_acct(struct bio *bio)
   {
         struct gendisk *disk = bio->bi_bdev->bd_disk;
-       const int rw = bio_data_dir(bio);
-       int cpu = part_stat_lock();
-       part_round_stats(cpu, &disk->part0);
-       part_stat_inc(cpu, &disk->part0, ios[rw]);
-       part_stat_add(cpu, &disk->part0, sectors[rw], bio_sectors(bio));
-       part_inc_in_flight(&disk->part0, rw);
-       part_stat_unlock();
+       if (blk_queue_io_stat(disk->queue)) {
+               const int rw = bio_data_dir(bio);
+               int cpu = part_stat_lock();
+               part_round_stats(cpu, &disk->part0);
+               part_stat_inc(cpu, &disk->part0, ios[rw]);
+               part_stat_add(cpu, &disk->part0, sectors[rw],
+                                                       bio_sectors(bio));
+               part_inc_in_flight(&disk->part0, rw);
+               part_stat_unlock();
+       }
   }
   
   static void nvme_end_io_acct(struct bio *bio, unsigned long start_time)
   {
         struct gendisk *disk = bio->bi_bdev->bd_disk;
-       const int rw = bio_data_dir(bio);
-       unsigned long duration = jiffies - start_time;
-       int cpu = part_stat_lock();
-       part_stat_add(cpu, &disk->part0, ticks[rw], duration);
-       part_round_stats(cpu, &disk->part0);
-       part_dec_in_flight(&disk->part0, rw);
-       part_stat_unlock();
+       if (blk_queue_io_stat(disk->queue)) {
+               const int rw = bio_data_dir(bio);
+               unsigned long duration = jiffies - start_time;
+               int cpu = part_stat_lock();
+               part_stat_add(cpu, &disk->part0, ticks[rw], duration);
+               part_round_stats(cpu, &disk->part0);
+               part_dec_in_flight(&disk->part0, rw);
+               part_stat_unlock();
+       }
   }
   
   static void bio_completion(struct nvme_queue *nvmeq, void *ctx,
@@@ -414,6 -439,7 +439,7 @@@
         struct nvme_iod *iod = ctx;
         struct bio *bio = iod->private;
         u16 status = le16_to_cpup(&cqe->status) >> 1;
+       int error = 0;
   
         if (unlikely(status)) {
                 if (!(status & NVME_SC_DNR ||
@@@ -426,6 -452,7 +452,7 @@@
                         wake_up(&nvmeq->sq_full);
                         return;
                 }
+               error = -EIO;
         }
         if (iod->nents) {
                 dma_unmap_sg(nvmeq->q_dmadev, iod->sg, iod->nents,
@@@ -433,10 -460,9 +460,9 @@@
                 nvme_end_io_acct(bio, iod->start_time);
         }
         nvme_free_iod(nvmeq->dev, iod);
-       if (status)
-               bio_endio(bio, -EIO);
-       else
-               bio_endio(bio, 0);
+ 
+       trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio, error);
+       bio_endio(bio, error);
   }
   
   /* length is in bytes.  gfp flags indicates whether we may sleep. */
@@@ -525,6 -551,8 +551,8 @@@ static int nvme_split_and_submit(struc
         if (!split)
                 return -ENOMEM;
   
+       trace_block_split(bdev_get_queue(bio->bi_bdev), bio,
+                                       split->bi_iter.bi_sector);
         bio_chain(split, bio);
   
         if (!waitqueue_active(&nvmeq->sq_full))
@@@ -627,16 -655,6 +655,6 @@@ static int nvme_submit_flush(struct nvm
         return 0;
   }
   
- int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns)
- {
-       int cmdid = alloc_cmdid(nvmeq, (void *)CMD_CTX_FLUSH,
-                                       special_completion, NVME_IO_TIMEOUT);
-       if (unlikely(cmdid < 0))
-               return cmdid;
- 
-       return nvme_submit_flush(nvmeq, ns, cmdid);
- }
- 
   static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod)
   {
         struct bio *bio = iod->private;
@@@ -652,7 -670,7 +670,7 @@@
   
         if (bio->bi_rw & REQ_DISCARD)
                 return nvme_submit_discard(nvmeq, ns, bio, iod, cmdid);
-       if ((bio->bi_rw & REQ_FLUSH) && !iod->nents)
+       if (bio->bi_rw & REQ_FLUSH)
                 return nvme_submit_flush(nvmeq, ns, cmdid);
   
         control = 0;
@@@ -686,6 -704,26 +704,26 @@@
         return 0;
   }
   
+ static int nvme_split_flush_data(struct nvme_queue *nvmeq, struct bio *bio)
+ {
+       struct bio *split = bio_clone(bio, GFP_ATOMIC);
+       if (!split)
+               return -ENOMEM;
+ 
+       split->bi_iter.bi_size = 0;
+       split->bi_phys_segments = 0;
+       bio->bi_rw &= ~REQ_FLUSH;
+       bio_chain(split, bio);
+ 
+       if (!waitqueue_active(&nvmeq->sq_full))
+               add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
+       bio_list_add(&nvmeq->sq_cong, split);
+       bio_list_add(&nvmeq->sq_cong, bio);
+       wake_up_process(nvme_thread);
+ 
+       return 0;
+ }
+ 
   /*
    * Called with local interrupts disabled and the q_lock held.  May not sleep.
    */
@@@ -696,11 -734,8 +734,8 @@@ static int nvme_submit_bio_queue(struc
         int psegs = bio_phys_segments(ns->queue, bio);
         int result;
   
-       if ((bio->bi_rw & REQ_FLUSH) && psegs) {
-               result = nvme_submit_flush_data(nvmeq, ns);
-               if (result)
-                       return result;
-       }
+       if ((bio->bi_rw & REQ_FLUSH) && psegs)
+               return nvme_split_flush_data(nvmeq, bio);
   
         iod = nvme_alloc_iod(psegs, bio->bi_iter.bi_size, GFP_ATOMIC);
         if (!iod)
@@@ -795,7 -830,6 +830,6 @@@ static void nvme_make_request(struct re
         int result = -EBUSY;
   
         if (!nvmeq) {
-               put_nvmeq(NULL);
                 bio_endio(bio, -EIO);
                 return;
         }
@@@ -870,10 -904,8 +904,8 @@@ static int nvme_submit_sync_cmd(struct 
         struct nvme_queue *nvmeq;
   
         nvmeq = lock_nvmeq(dev, q_idx);
-       if (!nvmeq) {
-               unlock_nvmeq(nvmeq);
+       if (!nvmeq)
                 return -ENODEV;
-       }
   
         cmdinfo.task = current;
         cmdinfo.status = -EINTR;
@@@ -898,9 -930,10 +930,10 @@@
   
         if (cmdinfo.status == -EINTR) {
                 nvmeq = lock_nvmeq(dev, q_idx);
-               if (nvmeq)
+               if (nvmeq) {
                         nvme_abort_command(nvmeq, cmdid);
-               unlock_nvmeq(nvmeq);
+                       unlock_nvmeq(nvmeq);
+               }
                 return -EINTR;
         }
   
@@@ -1358,7 -1391,8 +1391,8 @@@ static int nvme_wait_ready(struct nvme_
                         return -EINTR;
                 if (time_after(jiffies, timeout)) {
                         dev_err(&dev->pci_dev->dev,
-                               "Device not ready; aborting initialisation\n");
+                               "Device not ready; aborting %s\n", enabled ?
+                                               "initialisation" : "reset");
                         return -ENODEV;
                 }
         }
@@@ -1481,7 -1515,11 +1515,11 @@@ struct nvme_iod *nvme_map_user_pages(st
                 goto put_pages;
         }
   
+       err = -ENOMEM;
         iod = nvme_alloc_iod(count, length, GFP_KERNEL);
+       if (!iod)
+               goto put_pages;
+ 
         sg = iod->sg;
         sg_init_table(sg, count);
         for (i = 0; i < count; i++) {
@@@ -1494,7 -1532,6 +1532,6 @@@
         sg_mark_end(&sg[i - 1]);
         iod->nents = count;
   
-       err = -ENOMEM;
         nents = dma_map_sg(&dev->pci_dev->dev, sg, count,
                                 write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
         if (!nents)
@@@ -1894,6 -1931,8 +1931,8 @@@ static struct nvme_ns *nvme_alloc_ns(st
         blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
         if (dev->max_hw_sectors)
                 blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors);
+       if (dev->vwc & NVME_CTRL_VWC_PRESENT)
+               blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA);
   
         disk->major = nvme_major;
         disk->first_minor = 0;
@@@ -2062,8 -2101,13 +2101,13 @@@ static int set_queue_count(struct nvme_
   
         status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, q_count, 0,
                                                                 &result);
-       if (status)
-               return status < 0 ? -EIO : -EBUSY;
+       if (status < 0)
+               return status;
+       if (status > 0) {
+               dev_err(&dev->pci_dev->dev, "Could not set queue count (%d)\n",
+                                                                       status);
+               return -EBUSY;
+       }
         return min(result & 0xffff, result >> 16) + 1;
   }
   
@@@ -2072,14 -2116,25 +2116,25 @@@ static size_t db_bar_size(struct nvme_d
         return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride);
   }
   
+ static void nvme_cpu_workfn(struct work_struct *work)
+ {
+       struct nvme_dev *dev = container_of(work, struct nvme_dev, cpu_work);
+       if (dev->initialized)
+               nvme_assign_io_queues(dev);
+ }
+ 
   static int nvme_cpu_notify(struct notifier_block *self,
                                 unsigned long action, void *hcpu)
   {
-       struct nvme_dev *dev = container_of(self, struct nvme_dev, nb);
+       struct nvme_dev *dev;
+ 
         switch (action) {
         case CPU_ONLINE:
         case CPU_DEAD:
-               nvme_assign_io_queues(dev);
+               spin_lock(&dev_list_lock);
+               list_for_each_entry(dev, &dev_list, node)
+                       schedule_work(&dev->cpu_work);
+               spin_unlock(&dev_list_lock);
                 break;
         }
         return NOTIFY_OK;
@@@ -2148,11 -2203,6 +2203,6 @@@ static int nvme_setup_io_queues(struct 
         nvme_free_queues(dev, nr_io_queues + 1);
         nvme_assign_io_queues(dev);
   
-       dev->nb.notifier_call = &nvme_cpu_notify;
-       result = register_hotcpu_notifier(&dev->nb);
-       if (result)
-               goto free_queues;
- 
         return 0;
   
    free_queues:
@@@ -2184,6 -2234,7 +2234,7 @@@ static int nvme_dev_add(struct nvme_de
   
         res = nvme_identify(dev, 0, 1, dma_addr);
         if (res) {
+               dev_err(&pdev->dev, "Identify Controller failed (%d)\n", res);
                 res = -EIO;
                 goto out;
         }
@@@ -2192,6 -2243,7 +2243,7 @@@
         nn = le32_to_cpup(&ctrl->nn);
         dev->oncs = le16_to_cpup(&ctrl->oncs);
         dev->abort_limit = ctrl->acl + 1;
+       dev->vwc = ctrl->vwc;
         memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
         memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
         memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
@@@ -2450,8 -2502,6 +2502,6 @@@ static void nvme_dev_shutdown(struct nv
         int i;
   
         dev->initialized = 0;
-       unregister_hotcpu_notifier(&dev->nb);
- 
         nvme_dev_list_remove(dev);
   
         if (!dev->bar || (dev->bar && readl(&dev->bar->csts) == -1)) {
@@@ -2722,6 -2772,7 +2772,7 @@@ static int nvme_probe(struct pci_dev *p
         INIT_LIST_HEAD(&dev->namespaces);
         dev->reset_workfn = nvme_reset_failed_dev;
         INIT_WORK(&dev->reset_work, nvme_reset_workfn);
+       INIT_WORK(&dev->cpu_work, nvme_cpu_workfn);
         dev->pci_dev = pdev;
         pci_set_drvdata(pdev, dev);
         result = nvme_set_instance(dev);
@@@ -2775,16 -2826,6 +2826,16 @@@
         return result;
   }
   
+ +static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
+ +{
+ +       struct nvme_dev *dev = pci_get_drvdata(pdev);
+ +
+ +       if (prepare)
+ +               nvme_dev_shutdown(dev);
+ +       else
+ +               nvme_dev_resume(dev);
+ +}
+ +
   static void nvme_shutdown(struct pci_dev *pdev)
   {
         struct nvme_dev *dev = pci_get_drvdata(pdev);
@@@ -2801,6 -2842,7 +2852,7 @@@ static void nvme_remove(struct pci_dev 
   
         pci_set_drvdata(pdev, NULL);
         flush_work(&dev->reset_work);
+       flush_work(&dev->cpu_work);
         misc_deregister(&dev->miscdev);
         nvme_dev_remove(dev);
         nvme_dev_shutdown(dev);
@@@ -2849,7 -2891,6 +2901,7 @@@ static const struct pci_error_handlers 
         .link_reset     = nvme_link_reset,
         .slot_reset     = nvme_slot_reset,
         .resume         = nvme_error_resume,
+ +      .reset_notify   = nvme_reset_notify,
   };
   
   /* Move to pci_ids.h later */
@@@ -2889,11 -2930,18 +2941,18 @@@ static int __init nvme_init(void
         else if (result > 0)
                 nvme_major = result;
   
-       result = pci_register_driver(&nvme_driver);
+       nvme_nb.notifier_call = &nvme_cpu_notify;
+       result = register_hotcpu_notifier(&nvme_nb);
         if (result)
                 goto unregister_blkdev;
+ 
+       result = pci_register_driver(&nvme_driver);
+       if (result)
+               goto unregister_hotcpu;
         return 0;
   
+  unregister_hotcpu:
+       unregister_hotcpu_notifier(&nvme_nb);
    unregister_blkdev:
         unregister_blkdev(nvme_major, "nvme");
    kill_workq:
@@@ -2904,9 -2952,11 +2963,11 @@@
   static void __exit nvme_exit(void)
   {
         pci_unregister_driver(&nvme_driver);
+       unregister_hotcpu_notifier(&nvme_nb);
         unregister_blkdev(nvme_major, "nvme");
         destroy_workqueue(nvme_workq);
         BUG_ON(nvme_thread && !IS_ERR(nvme_thread));
+       _nvme_check_size();
   }
   
   MODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>");
author	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 16 Jun 2014 01:58:03 +0000 (15:58 -1000)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 16 Jun 2014 01:58:03 +0000 (15:58 -1000)
		1	2
block/blk-core.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/block/nvme-core.c	patch \|	diff1 \|	diff2 \|	blob \| history