Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 17 Oct 2008 16:29:55 +0000 (09:29 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 17 Oct 2008 16:29:55 +0000 (09:29 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 17 Oct 2008 16:29:55 +0000 (09:29 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 17 Oct 2008 16:29:55 +0000 (09:29 -0700)
diff --git a/block/blk-core.c b/block/blk-core.c

index 9e79a48..c3df30c 100644 (file)
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -257,7 +257,6 @@ void __generic_unplug_device(struct request_queue *q)
  
         q->request_fn(q);
  }
-EXPORT_SYMBOL(__generic_unplug_device);
  
  /**
   * generic_unplug_device - fire a request queue
@@ -325,6 +324,9 @@ EXPORT_SYMBOL(blk_unplug);
  
  static void blk_invoke_request_fn(struct request_queue *q)
  {
+       if (unlikely(blk_queue_stopped(q)))
+               return;
+
         /*
          * one level of recursion is ok and is much faster than kicking
          * the unplug handling
@@ -399,8 +401,13 @@ void blk_sync_queue(struct request_queue *q)
  EXPORT_SYMBOL(blk_sync_queue);
  
  /**
- * blk_run_queue - run a single device queue
+ * __blk_run_queue - run a single device queue
   * @q: The queue to run
+ *
+ * Description:
+ *    See @blk_run_queue. This variant must be called with the queue lock
+ *    held and interrupts disabled.
+ *
   */
  void __blk_run_queue(struct request_queue *q)
  {
@@ -418,6 +425,12 @@ EXPORT_SYMBOL(__blk_run_queue);
  /**
   * blk_run_queue - run a single device queue
   * @q: The queue to run
+ *
+ * Description:
+ *    Invoke request handling on this queue, if it has pending work to do.
+ *    May be used to restart queueing when a request has completed. Also
+ *    See @blk_start_queueing.
+ *
   */
  void blk_run_queue(struct request_queue *q)
  {
@@ -501,6 +514,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
         init_timer(&q->unplug_timer);
         setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
         INIT_LIST_HEAD(&q->timeout_list);
+       INIT_WORK(&q->unplug_work, blk_unplug_work);
  
         kobject_init(&q->kobj, &blk_queue_ktype);
  
@@ -884,7 +898,8 @@ EXPORT_SYMBOL(blk_get_request);
   *
   * This is basically a helper to remove the need to know whether a queue
   * is plugged or not if someone just wants to initiate dispatch of requests
- * for this queue.
+ * for this queue. Should be used to start queueing on a device outside
+ * of ->request_fn() context. Also see @blk_run_queue.
   *
   * The queue lock must be held with interrupts disabled.
   */
@@ -1003,8 +1018,9 @@ static void part_round_stats_single(int cpu, struct hd_struct *part,
  }
  
  /**
- * part_round_stats()  - Round off the performance stats on a struct
- * disk_stats.
+ * part_round_stats() - Round off the performance stats on a struct disk_stats.
+ * @cpu: cpu number for stats access
+ * @part: target partition
   *
   * The average IO queue length and utilisation statistics are maintained
   * by observing the current state of the queue length and the amount of
diff --git a/block/blk-merge.c b/block/blk-merge.c

index 908d3e1..8681cd6 100644 (file)
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -77,12 +77,20 @@ void blk_recalc_rq_segments(struct request *rq)
                         continue;
                 }
  new_segment:
+               if (nr_phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size)
+                       rq->bio->bi_seg_front_size = seg_size;
+
                 nr_phys_segs++;
                 bvprv = bv;
                 seg_size = bv->bv_len;
                 highprv = high;
         }
  
+       if (nr_phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size)
+               rq->bio->bi_seg_front_size = seg_size;
+       if (seg_size > rq->biotail->bi_seg_back_size)
+               rq->biotail->bi_seg_back_size = seg_size;
+
         rq->nr_phys_segments = nr_phys_segs;
  }
  
@@ -106,7 +114,8 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
         if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags))
                 return 0;
  
-       if (bio->bi_size + nxt->bi_size > q->max_segment_size)
+       if (bio->bi_seg_back_size + nxt->bi_seg_front_size >
+           q->max_segment_size)
                 return 0;
  
         if (!bio_has_data(bio))
@@ -309,6 +318,8 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
                                 struct request *next)
  {
         int total_phys_segments;
+       unsigned int seg_size =
+               req->biotail->bi_seg_back_size + next->bio->bi_seg_front_size;
  
         /*
          * First check if the either of the requests are re-queued
@@ -324,8 +335,13 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
                 return 0;
  
         total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
-       if (blk_phys_contig_segment(q, req->biotail, next->bio))
+       if (blk_phys_contig_segment(q, req->biotail, next->bio)) {
+               if (req->nr_phys_segments == 1)
+                       req->bio->bi_seg_front_size = seg_size;
+               if (next->nr_phys_segments == 1)
+                       next->biotail->bi_seg_back_size = seg_size;
                 total_phys_segments--;
+       }
  
         if (total_phys_segments > q->max_phys_segments)
                 return 0;
diff --git a/block/blk-settings.c b/block/blk-settings.c

index b21dcdb..41392fb 100644 (file)
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -141,8 +141,6 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
         if (q->unplug_delay == 0)
                 q->unplug_delay = 1;
  
-       INIT_WORK(&q->unplug_work, blk_unplug_work);
-
         q->unplug_timer.function = blk_unplug_timeout;
         q->unplug_timer.data = (unsigned long)q;
  
diff --git a/block/blk.h b/block/blk.h

index e5c5797..d2e49af 100644 (file)
--- a/block/blk.h
+++ b/block/blk.h
@@ -20,6 +20,7 @@ void blk_unplug_timeout(unsigned long data);
  void blk_rq_timed_out_timer(unsigned long data);
  void blk_delete_timer(struct request *);
  void blk_add_timer(struct request *);
+void __generic_unplug_device(struct request_queue *);
  
  /*
   * Internal atomic flags for request handling
diff --git a/block/elevator.c b/block/elevator.c

index 0451892..59173a6 100644 (file)
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -612,7 +612,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
                  *   processing.
                  */
                 blk_remove_plug(q);
-               q->request_fn(q);
+               blk_start_queueing(q);
                 break;
  
         case ELEVATOR_INSERT_SORT:
@@ -950,7 +950,7 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
                     blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
                     blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) {
                         blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
-                       q->request_fn(q);
+                       blk_start_queueing(q);
                 }
         }
  }
@@ -1109,8 +1109,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
         elv_drain_elevator(q);
  
         while (q->rq.elvpriv) {
-               blk_remove_plug(q);
-               q->request_fn(q);
+               blk_start_queueing(q);
                 spin_unlock_irq(q->queue_lock);
                 msleep(10);
                 spin_lock_irq(q->queue_lock);
@@ -1166,15 +1165,10 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
                           size_t count)
  {
         char elevator_name[ELV_NAME_MAX];
-       size_t len;
         struct elevator_type *e;
  
-       elevator_name[sizeof(elevator_name) - 1] = '\0';
-       strncpy(elevator_name, name, sizeof(elevator_name) - 1);
-       len = strlen(elevator_name);
-
-       if (len && elevator_name[len - 1] == '\n')
-               elevator_name[len - 1] = '\0';
+       strlcpy(elevator_name, name, sizeof(elevator_name));
+       strstrip(elevator_name);
  
         e = elevator_get(elevator_name);
         if (!e) {
diff --git a/block/genhd.c b/block/genhd.c

index 4cd3433..646e1d2 100644 (file)
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -358,7 +358,6 @@ static int blk_mangle_minor(int minor)
  /**
   * blk_alloc_devt - allocate a dev_t for a partition
   * @part: partition to allocate dev_t for
- * @gfp_mask: memory allocation flag
   * @devt: out parameter for resulting dev_t
   *
   * Allocate a dev_t for block device.
@@ -535,7 +534,7 @@ void unlink_gendisk(struct gendisk *disk)
  /**
   * get_gendisk - get partitioning information for a given device
   * @devt: device to get partitioning information for
- * @part: returned partition index
+ * @partno: returned partition index
   *
   * This function gets the structure containing partitioning
   * information for the given device @devt.
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c

index 77c6eae..7162d67 100644 (file)
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -1493,8 +1493,8 @@ void ide_do_drive_cmd(ide_drive_t *drive, struct request *rq)
  
         spin_lock_irqsave(&ide_lock, flags);
         hwgroup->rq = NULL;
-       __elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 1);
-       __generic_unplug_device(drive->queue);
+       __elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 0);
+       blk_start_queueing(drive->queue);
         spin_unlock_irqrestore(&ide_lock, flags);
  }
  
diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h

index 8a13071..9ce4c75 100644 (file)
--- a/drivers/s390/scsi/zfcp_def.h
+++ b/drivers/s390/scsi/zfcp_def.h
@@ -583,6 +583,8 @@ struct zfcp_fsf_req {
         unsigned long long     issued;         /* request sent time (STCK) */
         struct zfcp_unit       *unit;
         void                    (*handler)(struct zfcp_fsf_req *);
+       u16                     qdio_outb_usage;/* usage of outbound queue */
+       u16                     qdio_inb_usage; /* usage of inbound queue */
  };
  
  /* driver data */
diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c

index 739356a..5ae1d49 100644 (file)
--- a/drivers/s390/scsi/zfcp_fsf.c
+++ b/drivers/s390/scsi/zfcp_fsf.c
@@ -6,6 +6,7 @@
   * Copyright IBM Corporation 2002, 2008
   */
  
+#include <linux/blktrace_api.h>
  #include "zfcp_ext.h"
  
  static void zfcp_fsf_request_timeout_handler(unsigned long data)
@@ -777,6 +778,7 @@ static int zfcp_fsf_req_send(struct zfcp_fsf_req *req)
         list_add_tail(&req->list, &adapter->req_list[idx]);
         spin_unlock(&adapter->req_list_lock);
  
+       req->qdio_outb_usage = atomic_read(&req_q->count);
         req->issued = get_clock();
         if (zfcp_qdio_send(req)) {
                 /* Queues are down..... */
@@ -2082,6 +2084,36 @@ static void zfcp_fsf_req_latency(struct zfcp_fsf_req *req)
         spin_unlock_irqrestore(&unit->latencies.lock, flags);
  }
  
+#ifdef CONFIG_BLK_DEV_IO_TRACE
+static void zfcp_fsf_trace_latency(struct zfcp_fsf_req *fsf_req)
+{
+       struct fsf_qual_latency_info *lat_inf;
+       struct scsi_cmnd *scsi_cmnd = (struct scsi_cmnd *)fsf_req->data;
+       struct request *req = scsi_cmnd->request;
+       struct zfcp_blk_drv_data trace;
+       int ticks = fsf_req->adapter->timer_ticks;
+
+       trace.flags = 0;
+       trace.magic = ZFCP_BLK_DRV_DATA_MAGIC;
+       if (fsf_req->adapter->adapter_features & FSF_FEATURE_MEASUREMENT_DATA) {
+               trace.flags |= ZFCP_BLK_LAT_VALID;
+               lat_inf = &fsf_req->qtcb->prefix.prot_status_qual.latency_info;
+               trace.channel_lat = lat_inf->channel_lat * ticks;
+               trace.fabric_lat = lat_inf->fabric_lat * ticks;
+       }
+       if (fsf_req->status & ZFCP_STATUS_FSFREQ_ERROR)
+               trace.flags |= ZFCP_BLK_REQ_ERROR;
+       trace.inb_usage = fsf_req->qdio_inb_usage;
+       trace.outb_usage = fsf_req->qdio_outb_usage;
+
+       blk_add_driver_data(req->q, req, &trace, sizeof(trace));
+}
+#else
+static inline void zfcp_fsf_trace_latency(struct zfcp_fsf_req *fsf_req)
+{
+}
+#endif
+
  static void zfcp_fsf_send_fcp_command_task_handler(struct zfcp_fsf_req *req)
  {
         struct scsi_cmnd *scpnt = req->data;
@@ -2114,6 +2146,8 @@ static void zfcp_fsf_send_fcp_command_task_handler(struct zfcp_fsf_req *req)
         if (req->adapter->adapter_features & FSF_FEATURE_MEASUREMENT_DATA)
                 zfcp_fsf_req_latency(req);
  
+       zfcp_fsf_trace_latency(req);
+
         if (unlikely(fcp_rsp_iu->validity.bits.fcp_rsp_len_valid)) {
                 if (fcp_rsp_info[3] == RSP_CODE_GOOD)
                         set_host_byte(scpnt, DID_OK);
diff --git a/drivers/s390/scsi/zfcp_fsf.h b/drivers/s390/scsi/zfcp_fsf.h

index fd3a887..fa2a317 100644 (file)
--- a/drivers/s390/scsi/zfcp_fsf.h
+++ b/drivers/s390/scsi/zfcp_fsf.h
@@ -439,4 +439,16 @@ struct fsf_qtcb {
         u8 log[FSF_QTCB_LOG_SIZE];
  } __attribute__ ((packed));
  
+struct zfcp_blk_drv_data {
+#define ZFCP_BLK_DRV_DATA_MAGIC                        0x1
+       u32 magic;
+#define ZFCP_BLK_LAT_VALID                     0x1
+#define ZFCP_BLK_REQ_ERROR                     0x2
+       u16 flags;
+       u8 inb_usage;
+       u8 outb_usage;
+       u64 channel_lat;
+       u64 fabric_lat;
+} __attribute__ ((packed));
+
  #endif                         /* FSF_H */
diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c

index 3e05080..664752f 100644 (file)
--- a/drivers/s390/scsi/zfcp_qdio.c
+++ b/drivers/s390/scsi/zfcp_qdio.c
@@ -115,6 +115,7 @@ static void zfcp_qdio_reqid_check(struct zfcp_adapter *adapter,
         spin_unlock_irqrestore(&adapter->req_list_lock, flags);
  
         fsf_req->sbal_response = sbal_idx;
+       fsf_req->qdio_inb_usage = atomic_read(&adapter->resp_q.count);
         zfcp_fsf_req_complete(fsf_req);
  }
  
diff --git a/fs/block_dev.c b/fs/block_dev.c

index d84f046..218408e 100644 (file)
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1262,7 +1262,7 @@ EXPORT_SYMBOL(ioctl_by_bdev);
  
  /**
   * lookup_bdev  - lookup a struct block_device by name
- * @pathname:  special file representing the block device
+ * @path:      special file representing the block device
   *
   * Get a reference to the blockdevice at @pathname in the current
   * namespace if possible and return it.  Return ERR_PTR(error)
diff --git a/fs/partitions/check.c b/fs/partitions/check.c

index fbeb2f3..cfb0c80 100644 (file)
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -195,6 +195,14 @@ check_partition(struct gendisk *hd, struct block_device *bdev)
         return ERR_PTR(res);
  }
  
+static ssize_t part_partition_show(struct device *dev,
+                                  struct device_attribute *attr, char *buf)
+{
+       struct hd_struct *p = dev_to_part(dev);
+
+       return sprintf(buf, "%d\n", p->partno);
+}
+
  static ssize_t part_start_show(struct device *dev,
                                struct device_attribute *attr, char *buf)
  {
@@ -260,6 +268,7 @@ ssize_t part_fail_store(struct device *dev,
  }
  #endif
  
+static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL);
  static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
  static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
  static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
@@ -269,6 +278,7 @@ static struct device_attribute dev_attr_fail =
  #endif
  
  static struct attribute *part_attrs[] = {
+       &dev_attr_partition.attr,
         &dev_attr_start.attr,
         &dev_attr_size.attr,
         &dev_attr_stat.attr,
diff --git a/include/linux/bio.h b/include/linux/bio.h

index 1beda20..1c91a17 100644 (file)
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -79,6 +79,13 @@ struct bio {
  
         unsigned int            bi_size;        /* residual I/O count */
  
+       /*
+        * To keep track of the max segment size, we account for the
+        * sizes of the first and last mergeable segments in this bio.
+        */
+       unsigned int            bi_seg_front_size;
+       unsigned int            bi_seg_back_size;
+
         unsigned int            bi_max_vecs;    /* max bvl_vecs we can hold */
  
         unsigned int            bi_comp_cpu;    /* completion CPU */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h

index f3491d2..b4fe68f 100644 (file)
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -865,7 +865,6 @@ extern void blk_ordered_complete_seq(struct request_queue *, unsigned, int);
  extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
  extern void blk_dump_rq_flags(struct request *, char *);
  extern void generic_unplug_device(struct request_queue *);
-extern void __generic_unplug_device(struct request_queue *);
  extern long nr_blockdev_pages(void);
  
  int blk_get_queue(struct request_queue *);
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h

index 3a31eb5..bdf505d 100644 (file)
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -24,6 +24,7 @@ enum blktrace_cat {
         BLK_TC_AHEAD    = 1 << 11,      /* readahead */
         BLK_TC_META     = 1 << 12,      /* metadata */
         BLK_TC_DISCARD  = 1 << 13,      /* discard requests */
+       BLK_TC_DRV_DATA = 1 << 14,      /* binary per-driver data */
  
         BLK_TC_END      = 1 << 15,      /* only 16-bits, reminder */
  };
@@ -51,6 +52,7 @@ enum blktrace_act {
         __BLK_TA_BOUNCE,                /* bio was bounced */
         __BLK_TA_REMAP,                 /* bio was remapped */
         __BLK_TA_ABORT,                 /* request aborted */
+       __BLK_TA_DRV_DATA,              /* driver-specific binary data */
  };
  
  /*
@@ -82,6 +84,7 @@ enum blktrace_notify {
  #define BLK_TA_BOUNCE          (__BLK_TA_BOUNCE)
  #define BLK_TA_REMAP           (__BLK_TA_REMAP | BLK_TC_ACT(BLK_TC_QUEUE))
  #define BLK_TA_ABORT           (__BLK_TA_ABORT | BLK_TC_ACT(BLK_TC_QUEUE))
+#define BLK_TA_DRV_DATA        (__BLK_TA_DRV_DATA | BLK_TC_ACT(BLK_TC_DRV_DATA))
  
  #define BLK_TN_PROCESS         (__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY))
  #define BLK_TN_TIMESTAMP       (__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY))
@@ -317,6 +320,34 @@ static inline void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
         __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
  }
  
+/**
+ * blk_add_driver_data - Add binary message with driver-specific data
+ * @q:         queue the io is for
+ * @rq:                io request
+ * @data:      driver-specific data
+ * @len:       length of driver-specific data
+ *
+ * Description:
+ *     Some drivers might want to write driver-specific data per request.
+ *
+ **/
+static inline void blk_add_driver_data(struct request_queue *q,
+                                      struct request *rq,
+                                      void *data, size_t len)
+{
+       struct blk_trace *bt = q->blk_trace;
+
+       if (likely(!bt))
+               return;
+
+       if (blk_pc_request(rq))
+               __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA,
+                               rq->errors, len, data);
+       else
+               __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
+                               0, BLK_TA_DRV_DATA, rq->errors, len, data);
+}
+
  extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
                            char __user *arg);
  extern int blk_trace_startstop(struct request_queue *q, int start);
@@ -330,6 +361,7 @@ extern int blk_trace_remove(struct request_queue *q);
  #define blk_add_trace_generic(q, rq, rw, what) do { } while (0)
  #define blk_add_trace_pdu_int(q, what, bio, pdu)       do { } while (0)
  #define blk_add_trace_remap(q, bio, dev, f, t) do {} while (0)
+#define blk_add_driver_data(q, rq, data, len)  do {} while (0)
  #define do_blk_trace_setup(q, name, dev, buts) (-ENOTTY)
  #define blk_trace_setup(q, name, dev, arg)     (-ENOTTY)
  #define blk_trace_startstop(q, start)          (-ENOTTY)
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h

index 54b3623..35a61dc 100644 (file)
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -11,6 +11,8 @@
  #include <linux/hardirq.h>
  #include <linux/sched.h>
  #include <linux/irqflags.h>
+#include <linux/smp.h>
+#include <linux/percpu.h>
  #include <asm/atomic.h>
  #include <asm/ptrace.h>
  #include <asm/system.h>
@@ -273,6 +275,25 @@ extern void softirq_init(void);
  extern void raise_softirq_irqoff(unsigned int nr);
  extern void raise_softirq(unsigned int nr);
  
+/* This is the worklist that queues up per-cpu softirq work.
+ *
+ * send_remote_sendirq() adds work to these lists, and
+ * the softirq handler itself dequeues from them.  The queues
+ * are protected by disabling local cpu interrupts and they must
+ * only be accessed by the local cpu that they are for.
+ */
+DECLARE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
+
+/* Try to send a softirq to a remote cpu.  If this cannot be done, the
+ * work will be queued to the local cpu.
+ */
+extern void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq);
+
+/* Like send_remote_softirq(), but the caller must disable local cpu interrupts
+ * and compute the current cpu, passed in as 'this_cpu'.
+ */
+extern void __send_remote_softirq(struct call_single_data *cp, int cpu,
+                                 int this_cpu, int softirq);
  
  /* Tasklets --- multithreaded analogue of BHs.
  
diff --git a/include/linux/smp.h b/include/linux/smp.h

index 66484d4..2e4d58b 100644 (file)
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -7,6 +7,7 @@
   */
  
  #include <linux/errno.h>
+#include <linux/types.h>
  #include <linux/list.h>
  #include <linux/cpumask.h>
  
@@ -16,7 +17,8 @@ struct call_single_data {
         struct list_head list;
         void (*func) (void *info);
         void *info;
-       unsigned int flags;
+       u16 flags;
+       u16 priv;
  };
  
  #ifdef CONFIG_SMP
diff --git a/kernel/softirq.c b/kernel/softirq.c

index 37d67aa..83ba21a 100644 (file)
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -6,6 +6,8 @@
   *     Distribute under GPLv2.
   *
   *     Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
+ *
+ *     Remote softirq infrastructure is by Jens Axboe.
   */
  
  #include <linux/module.h>
@@ -474,17 +476,144 @@ void tasklet_kill(struct tasklet_struct *t)
  
  EXPORT_SYMBOL(tasklet_kill);
  
+DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
+EXPORT_PER_CPU_SYMBOL(softirq_work_list);
+
+static void __local_trigger(struct call_single_data *cp, int softirq)
+{
+       struct list_head *head = &__get_cpu_var(softirq_work_list[softirq]);
+
+       list_add_tail(&cp->list, head);
+
+       /* Trigger the softirq only if the list was previously empty.  */
+       if (head->next == &cp->list)
+               raise_softirq_irqoff(softirq);
+}
+
+#ifdef CONFIG_USE_GENERIC_SMP_HELPERS
+static void remote_softirq_receive(void *data)
+{
+       struct call_single_data *cp = data;
+       unsigned long flags;
+       int softirq;
+
+       softirq = cp->priv;
+
+       local_irq_save(flags);
+       __local_trigger(cp, softirq);
+       local_irq_restore(flags);
+}
+
+static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
+{
+       if (cpu_online(cpu)) {
+               cp->func = remote_softirq_receive;
+               cp->info = cp;
+               cp->flags = 0;
+               cp->priv = softirq;
+
+               __smp_call_function_single(cpu, cp);
+               return 0;
+       }
+       return 1;
+}
+#else /* CONFIG_USE_GENERIC_SMP_HELPERS */
+static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
+{
+       return 1;
+}
+#endif
+
+/**
+ * __send_remote_softirq - try to schedule softirq work on a remote cpu
+ * @cp: private SMP call function data area
+ * @cpu: the remote cpu
+ * @this_cpu: the currently executing cpu
+ * @softirq: the softirq for the work
+ *
+ * Attempt to schedule softirq work on a remote cpu.  If this cannot be
+ * done, the work is instead queued up on the local cpu.
+ *
+ * Interrupts must be disabled.
+ */
+void __send_remote_softirq(struct call_single_data *cp, int cpu, int this_cpu, int softirq)
+{
+       if (cpu == this_cpu || __try_remote_softirq(cp, cpu, softirq))
+               __local_trigger(cp, softirq);
+}
+EXPORT_SYMBOL(__send_remote_softirq);
+
+/**
+ * send_remote_softirq - try to schedule softirq work on a remote cpu
+ * @cp: private SMP call function data area
+ * @cpu: the remote cpu
+ * @softirq: the softirq for the work
+ *
+ * Like __send_remote_softirq except that disabling interrupts and
+ * computing the current cpu is done for the caller.
+ */
+void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
+{
+       unsigned long flags;
+       int this_cpu;
+
+       local_irq_save(flags);
+       this_cpu = smp_processor_id();
+       __send_remote_softirq(cp, cpu, this_cpu, softirq);
+       local_irq_restore(flags);
+}
+EXPORT_SYMBOL(send_remote_softirq);
+
+static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self,
+                                              unsigned long action, void *hcpu)
+{
+       /*
+        * If a CPU goes away, splice its entries to the current CPU
+        * and trigger a run of the softirq
+        */
+       if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
+               int cpu = (unsigned long) hcpu;
+               int i;
+
+               local_irq_disable();
+               for (i = 0; i < NR_SOFTIRQS; i++) {
+                       struct list_head *head = &per_cpu(softirq_work_list[i], cpu);
+                       struct list_head *local_head;
+
+                       if (list_empty(head))
+                               continue;
+
+                       local_head = &__get_cpu_var(softirq_work_list[i]);
+                       list_splice_init(head, local_head);
+                       raise_softirq_irqoff(i);
+               }
+               local_irq_enable();
+       }
+
+       return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata remote_softirq_cpu_notifier = {
+       .notifier_call  = remote_softirq_cpu_notify,
+};
+
  void __init softirq_init(void)
  {
         int cpu;
  
         for_each_possible_cpu(cpu) {
+               int i;
+
                 per_cpu(tasklet_vec, cpu).tail =
                         &per_cpu(tasklet_vec, cpu).head;
                 per_cpu(tasklet_hi_vec, cpu).tail =
                         &per_cpu(tasklet_hi_vec, cpu).head;
+               for (i = 0; i < NR_SOFTIRQS; i++)
+                       INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu));
         }
  
+       register_hotcpu_notifier(&remote_softirq_cpu_notifier);
+
         open_softirq(TASKLET_SOFTIRQ, tasklet_action);
         open_softirq(HI_SOFTIRQ, tasklet_hi_action);
  }
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug

index 31d784d..b0f239e 100644 (file)
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -652,6 +652,11 @@ config DEBUG_BLOCK_EXT_DEVT
         depends on BLOCK
         default n
         help
+         BIG FAT WARNING: ENABLING THIS OPTION MIGHT BREAK BOOTING ON
+         SOME DISTRIBUTIONS.  DO NOT ENABLE THIS UNLESS YOU KNOW WHAT
+         YOU ARE DOING.  Distros, please enable this and fix whatever
+         is broken.
+
           Conventionally, block device numbers are allocated from
           predetermined contiguous area.  However, extended block area
           may introduce non-contiguous block device numbers.  This
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 17 Oct 2008 16:29:55 +0000 (09:29 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 17 Oct 2008 16:29:55 +0000 (09:29 -0700)
block/blk-core.c		patch \| blob \| history
block/blk-merge.c		patch \| blob \| history
block/blk-settings.c		patch \| blob \| history
block/blk.h		patch \| blob \| history
block/elevator.c		patch \| blob \| history
block/genhd.c		patch \| blob \| history
drivers/ide/ide-io.c		patch \| blob \| history
drivers/s390/scsi/zfcp_def.h		patch \| blob \| history
drivers/s390/scsi/zfcp_fsf.c		patch \| blob \| history
drivers/s390/scsi/zfcp_fsf.h		patch \| blob \| history
drivers/s390/scsi/zfcp_qdio.c		patch \| blob \| history
fs/block_dev.c		patch \| blob \| history
fs/partitions/check.c		patch \| blob \| history
include/linux/bio.h		patch \| blob \| history
include/linux/blkdev.h		patch \| blob \| history
include/linux/blktrace_api.h		patch \| blob \| history
include/linux/interrupt.h		patch \| blob \| history
include/linux/smp.h		patch \| blob \| history
kernel/softirq.c		patch \| blob \| history
lib/Kconfig.debug		patch \| blob \| history