workqueue: fix checkpatch issues
[pandora-kernel.git] / kernel / workqueue.c
index 692d976..de429ba 100644 (file)
@@ -183,7 +183,8 @@ struct global_cwq {
        struct hlist_head       busy_hash[BUSY_WORKER_HASH_SIZE];
                                                /* L: hash of busy workers */
 
-       struct worker_pool      pools[2];       /* normal and highpri pools */
+       struct worker_pool      pools[NR_WORKER_POOLS];
+                                               /* normal and highpri pools */
 
        wait_queue_head_t       rebind_hold;    /* rebind hold wait */
 } ____cacheline_aligned_in_smp;
@@ -268,16 +269,18 @@ struct workqueue_struct {
 };
 
 struct workqueue_struct *system_wq __read_mostly;
-struct workqueue_struct *system_long_wq __read_mostly;
-struct workqueue_struct *system_nrt_wq __read_mostly;
-struct workqueue_struct *system_unbound_wq __read_mostly;
-struct workqueue_struct *system_freezable_wq __read_mostly;
-struct workqueue_struct *system_nrt_freezable_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_wq);
+struct workqueue_struct *system_highpri_wq __read_mostly;
+EXPORT_SYMBOL_GPL(system_highpri_wq);
+struct workqueue_struct *system_long_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_long_wq);
+struct workqueue_struct *system_nrt_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_nrt_wq);
+struct workqueue_struct *system_unbound_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_unbound_wq);
+struct workqueue_struct *system_freezable_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_freezable_wq);
+struct workqueue_struct *system_nrt_freezable_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_nrt_freezable_wq);
 
 #define CREATE_TRACE_POINTS
@@ -533,18 +536,24 @@ static int work_next_color(int color)
 }
 
 /*
- * A work's data points to the cwq with WORK_STRUCT_CWQ set while the
- * work is on queue.  Once execution starts, WORK_STRUCT_CWQ is
- * cleared and the work data contains the cpu number it was last on.
+ * While queued, %WORK_STRUCT_CWQ is set and non flag bits of a work's data
+ * contain the pointer to the queued cwq.  Once execution starts, the flag
+ * is cleared and the high bits contain OFFQ flags and CPU number.
  *
- * set_work_{cwq|cpu}() and clear_work_data() can be used to set the
- * cwq, cpu or clear work->data.  These functions should only be
- * called while the work is owned - ie. while the PENDING bit is set.
+ * set_work_cwq(), set_work_cpu_and_clear_pending(), mark_work_canceling()
+ * and clear_work_data() can be used to set the cwq, cpu or clear
+ * work->data.  These functions should only be called while the work is
+ * owned - ie. while the PENDING bit is set.
  *
- * get_work_[g]cwq() can be used to obtain the gcwq or cwq
- * corresponding to a work.  gcwq is available once the work has been
- * queued anywhere after initialization.  cwq is available only from
- * queueing until execution starts.
+ * get_work_[g]cwq() can be used to obtain the gcwq or cwq corresponding to
+ * a work.  gcwq is available once the work has been queued anywhere after
+ * initialization until it is sync canceled.  cwq is available only while
+ * the work item is queued.
+ *
+ * %WORK_OFFQ_CANCELING is used to mark a work item which is being
+ * canceled.  While being canceled, a work item may have its PENDING set
+ * but stay off timer and worklist for arbitrarily long and nobody should
+ * try to steal the PENDING bit.
  */
 static inline void set_work_data(struct work_struct *work, unsigned long data,
                                 unsigned long flags)
@@ -561,13 +570,22 @@ static void set_work_cwq(struct work_struct *work,
                      WORK_STRUCT_PENDING | WORK_STRUCT_CWQ | extra_flags);
 }
 
-static void set_work_cpu(struct work_struct *work, unsigned int cpu)
+static void set_work_cpu_and_clear_pending(struct work_struct *work,
+                                          unsigned int cpu)
 {
-       set_work_data(work, cpu << WORK_STRUCT_FLAG_BITS, WORK_STRUCT_PENDING);
+       /*
+        * The following wmb is paired with the implied mb in
+        * test_and_set_bit(PENDING) and ensures all updates to @work made
+        * here are visible to and precede any updates by the next PENDING
+        * owner.
+        */
+       smp_wmb();
+       set_work_data(work, (unsigned long)cpu << WORK_OFFQ_CPU_SHIFT, 0);
 }
 
 static void clear_work_data(struct work_struct *work)
 {
+       smp_wmb();      /* see set_work_cpu_and_clear_pending() */
        set_work_data(work, WORK_STRUCT_NO_CPU, 0);
 }
 
@@ -590,7 +608,7 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work)
                return ((struct cpu_workqueue_struct *)
                        (data & WORK_STRUCT_WQ_DATA_MASK))->pool->gcwq;
 
-       cpu = data >> WORK_STRUCT_FLAG_BITS;
+       cpu = data >> WORK_OFFQ_CPU_SHIFT;
        if (cpu == WORK_CPU_NONE)
                return NULL;
 
@@ -598,6 +616,22 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work)
        return get_gcwq(cpu);
 }
 
+static void mark_work_canceling(struct work_struct *work)
+{
+       struct global_cwq *gcwq = get_work_gcwq(work);
+       unsigned long cpu = gcwq ? gcwq->cpu : WORK_CPU_NONE;
+
+       set_work_data(work, (cpu << WORK_OFFQ_CPU_SHIFT) | WORK_OFFQ_CANCELING,
+                     WORK_STRUCT_PENDING);
+}
+
+static bool work_is_canceling(struct work_struct *work)
+{
+       unsigned long data = atomic_long_read(&work->data);
+
+       return !(data & WORK_STRUCT_CWQ) && (data & WORK_OFFQ_CANCELING);
+}
+
 /*
  * Policy functions.  These define the policies on how the global worker
  * pools are managed.  Unless noted otherwise, these functions assume that
@@ -901,6 +935,191 @@ static struct worker *find_worker_executing_work(struct global_cwq *gcwq,
                                            work);
 }
 
+/**
+ * move_linked_works - move linked works to a list
+ * @work: start of series of works to be scheduled
+ * @head: target list to append @work to
+ * @nextp: out paramter for nested worklist walking
+ *
+ * Schedule linked works starting from @work to @head.  Work series to
+ * be scheduled starts at @work and includes any consecutive work with
+ * WORK_STRUCT_LINKED set in its predecessor.
+ *
+ * If @nextp is not NULL, it's updated to point to the next work of
+ * the last scheduled work.  This allows move_linked_works() to be
+ * nested inside outer list_for_each_entry_safe().
+ *
+ * CONTEXT:
+ * spin_lock_irq(gcwq->lock).
+ */
+static void move_linked_works(struct work_struct *work, struct list_head *head,
+                             struct work_struct **nextp)
+{
+       struct work_struct *n;
+
+       /*
+        * Linked worklist will always end before the end of the list,
+        * use NULL for list head.
+        */
+       list_for_each_entry_safe_from(work, n, NULL, entry) {
+               list_move_tail(&work->entry, head);
+               if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
+                       break;
+       }
+
+       /*
+        * If we're already inside safe list traversal and have moved
+        * multiple works to the scheduled queue, the next position
+        * needs to be updated.
+        */
+       if (nextp)
+               *nextp = n;
+}
+
+static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
+{
+       struct work_struct *work = list_first_entry(&cwq->delayed_works,
+                                                   struct work_struct, entry);
+
+       trace_workqueue_activate_work(work);
+       move_linked_works(work, &cwq->pool->worklist, NULL);
+       __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
+       cwq->nr_active++;
+}
+
+/**
+ * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
+ * @cwq: cwq of interest
+ * @color: color of work which left the queue
+ * @delayed: for a delayed work
+ *
+ * A work either has completed or is removed from pending queue,
+ * decrement nr_in_flight of its cwq and handle workqueue flushing.
+ *
+ * CONTEXT:
+ * spin_lock_irq(gcwq->lock).
+ */
+static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color,
+                                bool delayed)
+{
+       /* ignore uncolored works */
+       if (color == WORK_NO_COLOR)
+               return;
+
+       cwq->nr_in_flight[color]--;
+
+       if (!delayed) {
+               cwq->nr_active--;
+               if (!list_empty(&cwq->delayed_works)) {
+                       /* one down, submit a delayed one */
+                       if (cwq->nr_active < cwq->max_active)
+                               cwq_activate_first_delayed(cwq);
+               }
+       }
+
+       /* is flush in progress and are we at the flushing tip? */
+       if (likely(cwq->flush_color != color))
+               return;
+
+       /* are there still in-flight works? */
+       if (cwq->nr_in_flight[color])
+               return;
+
+       /* this cwq is done, clear flush_color */
+       cwq->flush_color = -1;
+
+       /*
+        * If this was the last cwq, wake up the first flusher.  It
+        * will handle the rest.
+        */
+       if (atomic_dec_and_test(&cwq->wq->nr_cwqs_to_flush))
+               complete(&cwq->wq->first_flusher->done);
+}
+
+/**
+ * try_to_grab_pending - steal work item from worklist and disable irq
+ * @work: work item to steal
+ * @is_dwork: @work is a delayed_work
+ * @flags: place to store irq state
+ *
+ * Try to grab PENDING bit of @work.  This function can handle @work in any
+ * stable state - idle, on timer or on worklist.  Return values are
+ *
+ *  1          if @work was pending and we successfully stole PENDING
+ *  0          if @work was idle and we claimed PENDING
+ *  -EAGAIN    if PENDING couldn't be grabbed at the moment, safe to busy-retry
+ *  -ENOENT    if someone else is canceling @work, this state may persist
+ *             for arbitrarily long
+ *
+ * On >= 0 return, the caller owns @work's PENDING bit.  To avoid getting
+ * preempted while holding PENDING and @work off queue, preemption must be
+ * disabled on entry.  This ensures that we don't return -EAGAIN while
+ * another task is preempted in this function.
+ *
+ * On successful return, >= 0, irq is disabled and the caller is
+ * responsible for releasing it using local_irq_restore(*@flags).
+ *
+ * This function is safe to call from any context other than IRQ handler.
+ * An IRQ handler may run on top of delayed_work_timer_fn() which can make
+ * this function return -EAGAIN perpetually.
+ */
+static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
+                              unsigned long *flags)
+{
+       struct global_cwq *gcwq;
+
+       WARN_ON_ONCE(in_irq());
+
+       local_irq_save(*flags);
+
+       /* try to steal the timer if it exists */
+       if (is_dwork) {
+               struct delayed_work *dwork = to_delayed_work(work);
+
+               if (likely(del_timer(&dwork->timer)))
+                       return 1;
+       }
+
+       /* try to claim PENDING the normal way */
+       if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
+               return 0;
+
+       /*
+        * The queueing is in progress, or it is already queued. Try to
+        * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
+        */
+       gcwq = get_work_gcwq(work);
+       if (!gcwq)
+               goto fail;
+
+       spin_lock(&gcwq->lock);
+       if (!list_empty(&work->entry)) {
+               /*
+                * This work is queued, but perhaps we locked the wrong gcwq.
+                * In that case we must see the new value after rmb(), see
+                * insert_work()->wmb().
+                */
+               smp_rmb();
+               if (gcwq == get_work_gcwq(work)) {
+                       debug_work_deactivate(work);
+                       list_del_init(&work->entry);
+                       cwq_dec_nr_in_flight(get_work_cwq(work),
+                               get_work_color(work),
+                               *work_data_bits(work) & WORK_STRUCT_DELAYED);
+
+                       spin_unlock(&gcwq->lock);
+                       return 1;
+               }
+       }
+       spin_unlock(&gcwq->lock);
+fail:
+       local_irq_restore(*flags);
+       if (work_is_canceling(work))
+               return -ENOENT;
+       cpu_relax();
+       return -EAGAIN;
+}
+
 /**
  * insert_work - insert a work into gcwq
  * @cwq: cwq @work belongs to
@@ -981,7 +1200,15 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
        struct cpu_workqueue_struct *cwq;
        struct list_head *worklist;
        unsigned int work_flags;
-       unsigned long flags;
+       unsigned int req_cpu = cpu;
+
+       /*
+        * While a work item is PENDING && off queue, a task trying to
+        * steal the PENDING will busy-loop waiting for it to either get
+        * queued or lose PENDING.  Grabbing PENDING and queueing should
+        * happen with IRQ disabled.
+        */
+       WARN_ON_ONCE(!irqs_disabled());
 
        debug_work_activate(work);
 
@@ -994,7 +1221,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
        if (!(wq->flags & WQ_UNBOUND)) {
                struct global_cwq *last_gcwq;
 
-               if (unlikely(cpu == WORK_CPU_UNBOUND))
+               if (cpu == WORK_CPU_UNBOUND)
                        cpu = raw_smp_processor_id();
 
                /*
@@ -1008,7 +1235,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
                    (last_gcwq = get_work_gcwq(work)) && last_gcwq != gcwq) {
                        struct worker *worker;
 
-                       spin_lock_irqsave(&last_gcwq->lock, flags);
+                       spin_lock(&last_gcwq->lock);
 
                        worker = find_worker_executing_work(last_gcwq, work);
 
@@ -1016,22 +1243,23 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
                                gcwq = last_gcwq;
                        else {
                                /* meh... not running there, queue here */
-                               spin_unlock_irqrestore(&last_gcwq->lock, flags);
-                               spin_lock_irqsave(&gcwq->lock, flags);
+                               spin_unlock(&last_gcwq->lock);
+                               spin_lock(&gcwq->lock);
                        }
-               } else
-                       spin_lock_irqsave(&gcwq->lock, flags);
+               } else {
+                       spin_lock(&gcwq->lock);
+               }
        } else {
                gcwq = get_gcwq(WORK_CPU_UNBOUND);
-               spin_lock_irqsave(&gcwq->lock, flags);
+               spin_lock(&gcwq->lock);
        }
 
        /* gcwq determined, get cwq and queue */
        cwq = get_cwq(gcwq->cpu, wq);
-       trace_workqueue_queue_work(cpu, cwq, work);
+       trace_workqueue_queue_work(req_cpu, cwq, work);
 
        if (WARN_ON(!list_empty(&work->entry))) {
-               spin_unlock_irqrestore(&gcwq->lock, flags);
+               spin_unlock(&gcwq->lock);
                return;
        }
 
@@ -1049,79 +1277,111 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 
        insert_work(cwq, work, worklist, work_flags);
 
-       spin_unlock_irqrestore(&gcwq->lock, flags);
+       spin_unlock(&gcwq->lock);
 }
 
 /**
- * queue_work - queue work on a workqueue
+ * queue_work_on - queue work on specific cpu
+ * @cpu: CPU number to execute work on
  * @wq: workqueue to use
  * @work: work to queue
  *
- * Returns 0 if @work was already on a queue, non-zero otherwise.
+ * Returns %false if @work was already on a queue, %true otherwise.
  *
- * We queue the work to the CPU on which it was submitted, but if the CPU dies
- * it can be processed by another CPU.
+ * We queue the work to a specific CPU, the caller must ensure it
+ * can't go away.
  */
-int queue_work(struct workqueue_struct *wq, struct work_struct *work)
+bool queue_work_on(int cpu, struct workqueue_struct *wq,
+                  struct work_struct *work)
 {
-       int ret;
+       bool ret = false;
+       unsigned long flags;
 
-       ret = queue_work_on(get_cpu(), wq, work);
-       put_cpu();
+       local_irq_save(flags);
 
+       if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
+               __queue_work(cpu, wq, work);
+               ret = true;
+       }
+
+       local_irq_restore(flags);
        return ret;
 }
-EXPORT_SYMBOL_GPL(queue_work);
+EXPORT_SYMBOL_GPL(queue_work_on);
 
 /**
- * queue_work_on - queue work on specific cpu
- * @cpu: CPU number to execute work on
+ * queue_work - queue work on a workqueue
  * @wq: workqueue to use
  * @work: work to queue
  *
- * Returns 0 if @work was already on a queue, non-zero otherwise.
+ * Returns %false if @work was already on a queue, %true otherwise.
  *
- * We queue the work to a specific CPU, the caller must ensure it
- * can't go away.
+ * We queue the work to the CPU on which it was submitted, but if the CPU dies
+ * it can be processed by another CPU.
  */
-int
-queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
+bool queue_work(struct workqueue_struct *wq, struct work_struct *work)
 {
-       int ret = 0;
-
-       if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
-               __queue_work(cpu, wq, work);
-               ret = 1;
-       }
-       return ret;
+       return queue_work_on(WORK_CPU_UNBOUND, wq, work);
 }
-EXPORT_SYMBOL_GPL(queue_work_on);
+EXPORT_SYMBOL_GPL(queue_work);
 
-static void delayed_work_timer_fn(unsigned long __data)
+void delayed_work_timer_fn(unsigned long __data)
 {
        struct delayed_work *dwork = (struct delayed_work *)__data;
        struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work);
 
-       __queue_work(smp_processor_id(), cwq->wq, &dwork->work);
+       local_irq_disable();
+       __queue_work(dwork->cpu, cwq->wq, &dwork->work);
+       local_irq_enable();
 }
+EXPORT_SYMBOL_GPL(delayed_work_timer_fn);
 
-/**
- * queue_delayed_work - queue work on a workqueue after delay
- * @wq: workqueue to use
- * @dwork: delayable work to queue
- * @delay: number of jiffies to wait before queueing
- *
- * Returns 0 if @work was already on a queue, non-zero otherwise.
- */
-int queue_delayed_work(struct workqueue_struct *wq,
-                       struct delayed_work *dwork, unsigned long delay)
+static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
+                               struct delayed_work *dwork, unsigned long delay)
 {
-       if (delay == 0)
-               return queue_work(wq, &dwork->work);
+       struct timer_list *timer = &dwork->timer;
+       struct work_struct *work = &dwork->work;
+       unsigned int lcpu;
+
+       WARN_ON_ONCE(timer->function != delayed_work_timer_fn ||
+                    timer->data != (unsigned long)dwork);
+       BUG_ON(timer_pending(timer));
+       BUG_ON(!list_empty(&work->entry));
+
+       timer_stats_timer_set_start_info(&dwork->timer);
+
+       /*
+        * This stores cwq for the moment, for the timer_fn.  Note that the
+        * work's gcwq is preserved to allow reentrance detection for
+        * delayed works.
+        */
+       if (!(wq->flags & WQ_UNBOUND)) {
+               struct global_cwq *gcwq = get_work_gcwq(work);
+
+               /*
+                * If we cannot get the last gcwq from @work directly,
+                * select the last CPU such that it avoids unnecessarily
+                * triggering non-reentrancy check in __queue_work().
+                */
+               lcpu = cpu;
+               if (gcwq)
+                       lcpu = gcwq->cpu;
+               if (lcpu == WORK_CPU_UNBOUND)
+                       lcpu = raw_smp_processor_id();
+       } else {
+               lcpu = WORK_CPU_UNBOUND;
+       }
+
+       set_work_cwq(work, get_cwq(lcpu, wq), 0);
+
+       dwork->cpu = cpu;
+       timer->expires = jiffies + delay;
 
-       return queue_delayed_work_on(-1, wq, dwork, delay);
+       if (unlikely(cpu != WORK_CPU_UNBOUND))
+               add_timer_on(timer, cpu);
+       else
+               add_timer(timer);
 }
-EXPORT_SYMBOL_GPL(queue_delayed_work);
 
 /**
  * queue_delayed_work_on - queue work on specific CPU after delay
@@ -1130,53 +1390,100 @@ EXPORT_SYMBOL_GPL(queue_delayed_work);
  * @dwork: work to queue
  * @delay: number of jiffies to wait before queueing
  *
- * Returns 0 if @work was already on a queue, non-zero otherwise.
+ * Returns %false if @work was already on a queue, %true otherwise.  If
+ * @delay is zero and @dwork is idle, it will be scheduled for immediate
+ * execution.
  */
-int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
-                       struct delayed_work *dwork, unsigned long delay)
+bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
+                          struct delayed_work *dwork, unsigned long delay)
 {
-       int ret = 0;
-       struct timer_list *timer = &dwork->timer;
        struct work_struct *work = &dwork->work;
+       bool ret = false;
+       unsigned long flags;
 
-       if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
-               unsigned int lcpu;
+       if (!delay)
+               return queue_work_on(cpu, wq, &dwork->work);
 
-               BUG_ON(timer_pending(timer));
-               BUG_ON(!list_empty(&work->entry));
+       /* read the comment in __queue_work() */
+       local_irq_save(flags);
 
-               timer_stats_timer_set_start_info(&dwork->timer);
+       if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
+               __queue_delayed_work(cpu, wq, dwork, delay);
+               ret = true;
+       }
 
-               /*
-                * This stores cwq for the moment, for the timer_fn.
-                * Note that the work's gcwq is preserved to allow
-                * reentrance detection for delayed works.
-                */
-               if (!(wq->flags & WQ_UNBOUND)) {
-                       struct global_cwq *gcwq = get_work_gcwq(work);
+       local_irq_restore(flags);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(queue_delayed_work_on);
 
-                       if (gcwq && gcwq->cpu != WORK_CPU_UNBOUND)
-                               lcpu = gcwq->cpu;
-                       else
-                               lcpu = raw_smp_processor_id();
-               } else
-                       lcpu = WORK_CPU_UNBOUND;
+/**
+ * queue_delayed_work - queue work on a workqueue after delay
+ * @wq: workqueue to use
+ * @dwork: delayable work to queue
+ * @delay: number of jiffies to wait before queueing
+ *
+ * Equivalent to queue_delayed_work_on() but tries to use the local CPU.
+ */
+bool queue_delayed_work(struct workqueue_struct *wq,
+                       struct delayed_work *dwork, unsigned long delay)
+{
+       return queue_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay);
+}
+EXPORT_SYMBOL_GPL(queue_delayed_work);
 
-               set_work_cwq(work, get_cwq(lcpu, wq), 0);
+/**
+ * mod_delayed_work_on - modify delay of or queue a delayed work on specific CPU
+ * @cpu: CPU number to execute work on
+ * @wq: workqueue to use
+ * @dwork: work to queue
+ * @delay: number of jiffies to wait before queueing
+ *
+ * If @dwork is idle, equivalent to queue_delayed_work_on(); otherwise,
+ * modify @dwork's timer so that it expires after @delay.  If @delay is
+ * zero, @work is guaranteed to be scheduled immediately regardless of its
+ * current state.
+ *
+ * Returns %false if @dwork was idle and queued, %true if @dwork was
+ * pending and its timer was modified.
+ *
+ * This function is safe to call from any context other than IRQ handler.
+ * See try_to_grab_pending() for details.
+ */
+bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
+                        struct delayed_work *dwork, unsigned long delay)
+{
+       unsigned long flags;
+       int ret;
 
-               timer->expires = jiffies + delay;
-               timer->data = (unsigned long)dwork;
-               timer->function = delayed_work_timer_fn;
+       do {
+               ret = try_to_grab_pending(&dwork->work, true, &flags);
+       } while (unlikely(ret == -EAGAIN));
 
-               if (unlikely(cpu >= 0))
-                       add_timer_on(timer, cpu);
-               else
-                       add_timer(timer);
-               ret = 1;
+       if (likely(ret >= 0)) {
+               __queue_delayed_work(cpu, wq, dwork, delay);
+               local_irq_restore(flags);
        }
+
+       /* -ENOENT from try_to_grab_pending() becomes %true */
        return ret;
 }
-EXPORT_SYMBOL_GPL(queue_delayed_work_on);
+EXPORT_SYMBOL_GPL(mod_delayed_work_on);
+
+/**
+ * mod_delayed_work - modify delay of or queue a delayed work
+ * @wq: workqueue to use
+ * @dwork: work to queue
+ * @delay: number of jiffies to wait before queueing
+ *
+ * mod_delayed_work_on() on local CPU.
+ */
+bool mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dwork,
+                     unsigned long delay)
+{
+       return mod_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay);
+}
+EXPORT_SYMBOL_GPL(mod_delayed_work);
 
 /**
  * worker_enter_idle - enter idle state
@@ -1434,6 +1741,7 @@ retry:
        /* rebind busy workers */
        for_each_busy_worker(worker, i, pos, gcwq) {
                struct work_struct *rebind_work = &worker->rebind_work;
+               struct workqueue_struct *wq;
 
                /* morph UNBOUND to REBIND */
                worker->flags &= ~WORKER_UNBOUND;
@@ -1443,11 +1751,20 @@ retry:
                                     work_data_bits(rebind_work)))
                        continue;
 
-               /* wq doesn't matter, use the default one */
                debug_work_activate(rebind_work);
-               insert_work(get_cwq(gcwq->cpu, system_wq), rebind_work,
-                           worker->scheduled.next,
-                           work_color_to_flags(WORK_NO_COLOR));
+
+               /*
+                * wq doesn't really matter but let's keep @worker->pool
+                * and @cwq->pool consistent for sanity.
+                */
+               if (worker_pool_pri(worker->pool))
+                       wq = system_highpri_wq;
+               else
+                       wq = system_wq;
+
+               insert_work(get_cwq(gcwq->cpu, wq), rebind_work,
+                       worker->scheduled.next,
+                       work_color_to_flags(WORK_NO_COLOR));
        }
 }
 
@@ -1810,107 +2127,6 @@ static bool manage_workers(struct worker *worker)
        return ret;
 }
 
-/**
- * move_linked_works - move linked works to a list
- * @work: start of series of works to be scheduled
- * @head: target list to append @work to
- * @nextp: out paramter for nested worklist walking
- *
- * Schedule linked works starting from @work to @head.  Work series to
- * be scheduled starts at @work and includes any consecutive work with
- * WORK_STRUCT_LINKED set in its predecessor.
- *
- * If @nextp is not NULL, it's updated to point to the next work of
- * the last scheduled work.  This allows move_linked_works() to be
- * nested inside outer list_for_each_entry_safe().
- *
- * CONTEXT:
- * spin_lock_irq(gcwq->lock).
- */
-static void move_linked_works(struct work_struct *work, struct list_head *head,
-                             struct work_struct **nextp)
-{
-       struct work_struct *n;
-
-       /*
-        * Linked worklist will always end before the end of the list,
-        * use NULL for list head.
-        */
-       list_for_each_entry_safe_from(work, n, NULL, entry) {
-               list_move_tail(&work->entry, head);
-               if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
-                       break;
-       }
-
-       /*
-        * If we're already inside safe list traversal and have moved
-        * multiple works to the scheduled queue, the next position
-        * needs to be updated.
-        */
-       if (nextp)
-               *nextp = n;
-}
-
-static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
-{
-       struct work_struct *work = list_first_entry(&cwq->delayed_works,
-                                                   struct work_struct, entry);
-
-       trace_workqueue_activate_work(work);
-       move_linked_works(work, &cwq->pool->worklist, NULL);
-       __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
-       cwq->nr_active++;
-}
-
-/**
- * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
- * @cwq: cwq of interest
- * @color: color of work which left the queue
- * @delayed: for a delayed work
- *
- * A work either has completed or is removed from pending queue,
- * decrement nr_in_flight of its cwq and handle workqueue flushing.
- *
- * CONTEXT:
- * spin_lock_irq(gcwq->lock).
- */
-static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color,
-                                bool delayed)
-{
-       /* ignore uncolored works */
-       if (color == WORK_NO_COLOR)
-               return;
-
-       cwq->nr_in_flight[color]--;
-
-       if (!delayed) {
-               cwq->nr_active--;
-               if (!list_empty(&cwq->delayed_works)) {
-                       /* one down, submit a delayed one */
-                       if (cwq->nr_active < cwq->max_active)
-                               cwq_activate_first_delayed(cwq);
-               }
-       }
-
-       /* is flush in progress and are we at the flushing tip? */
-       if (likely(cwq->flush_color != color))
-               return;
-
-       /* are there still in-flight works? */
-       if (cwq->nr_in_flight[color])
-               return;
-
-       /* this cwq is done, clear flush_color */
-       cwq->flush_color = -1;
-
-       /*
-        * If this was the last cwq, wake up the first flusher.  It
-        * will handle the rest.
-        */
-       if (atomic_dec_and_test(&cwq->wq->nr_cwqs_to_flush))
-               complete(&cwq->wq->first_flusher->done);
-}
-
 /**
  * process_one_work - process single work
  * @worker: self
@@ -1970,15 +2186,13 @@ __acquires(&gcwq->lock)
                return;
        }
 
-       /* claim and process */
+       /* claim and dequeue */
        debug_work_deactivate(work);
        hlist_add_head(&worker->hentry, bwh);
        worker->current_work = work;
        worker->current_cwq = cwq;
        work_color = get_work_color(work);
 
-       /* record the current cpu number in the work data and dequeue */
-       set_work_cpu(work, gcwq->cpu);
        list_del_init(&work->entry);
 
        /*
@@ -1995,9 +2209,16 @@ __acquires(&gcwq->lock)
        if ((worker->flags & WORKER_UNBOUND) && need_more_worker(pool))
                wake_up_worker(pool);
 
+       /*
+        * Record the last CPU and clear PENDING which should be the last
+        * update to @work.  Also, do this inside @gcwq->lock so that
+        * PENDING and queued state changes happen together while IRQ is
+        * disabled.
+        */
+       set_work_cpu_and_clear_pending(work, gcwq->cpu);
+
        spin_unlock_irq(&gcwq->lock);
 
-       work_clear_pending(work);
        lock_map_acquire_read(&cwq->wq->lockdep_map);
        lock_map_acquire(&lockdep_map);
        trace_workqueue_execute_start(work);
@@ -2011,11 +2232,9 @@ __acquires(&gcwq->lock)
        lock_map_release(&cwq->wq->lockdep_map);
 
        if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
-               printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
-                      "%s/0x%08x/%d\n",
-                      current->comm, preempt_count(), task_pid_nr(current));
-               printk(KERN_ERR "    last function: ");
-               print_symbol("%s\n", (unsigned long)f);
+               pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
+                      "     last function: %pf\n",
+                      current->comm, preempt_count(), task_pid_nr(current), f);
                debug_show_held_locks(current);
                dump_stack();
        }
@@ -2569,8 +2788,8 @@ reflush:
 
                if (++flush_cnt == 10 ||
                    (flush_cnt % 100 == 0 && flush_cnt <= 1000))
-                       pr_warning("workqueue %s: flush on destruction isn't complete after %u tries\n",
-                                  wq->name, flush_cnt);
+                       pr_warn("workqueue %s: flush on destruction isn't complete after %u tries\n",
+                               wq->name, flush_cnt);
                goto reflush;
        }
 
@@ -2738,60 +2957,26 @@ bool flush_work_sync(struct work_struct *work)
 }
 EXPORT_SYMBOL_GPL(flush_work_sync);
 
-/*
- * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
- * so this work can't be re-armed in any way.
- */
-static int try_to_grab_pending(struct work_struct *work)
-{
-       struct global_cwq *gcwq;
-       int ret = -1;
-
-       if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
-               return 0;
-
-       /*
-        * The queueing is in progress, or it is already queued. Try to
-        * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
-        */
-       gcwq = get_work_gcwq(work);
-       if (!gcwq)
-               return ret;
-
-       spin_lock_irq(&gcwq->lock);
-       if (!list_empty(&work->entry)) {
-               /*
-                * This work is queued, but perhaps we locked the wrong gcwq.
-                * In that case we must see the new value after rmb(), see
-                * insert_work()->wmb().
-                */
-               smp_rmb();
-               if (gcwq == get_work_gcwq(work)) {
-                       debug_work_deactivate(work);
-                       list_del_init(&work->entry);
-                       cwq_dec_nr_in_flight(get_work_cwq(work),
-                               get_work_color(work),
-                               *work_data_bits(work) & WORK_STRUCT_DELAYED);
-                       ret = 1;
-               }
-       }
-       spin_unlock_irq(&gcwq->lock);
-
-       return ret;
-}
-
-static bool __cancel_work_timer(struct work_struct *work,
-                               struct timer_list* timer)
+static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
 {
+       unsigned long flags;
        int ret;
 
        do {
-               ret = (timer && likely(del_timer(timer)));
-               if (!ret)
-                       ret = try_to_grab_pending(work);
-               wait_on_work(work);
+               ret = try_to_grab_pending(work, is_dwork, &flags);
+               /*
+                * If someone else is canceling, wait for the same event it
+                * would be waiting for before retrying.
+                */
+               if (unlikely(ret == -ENOENT))
+                       wait_on_work(work);
        } while (unlikely(ret < 0));
 
+       /* tell other tasks trying to grab @work to back off */
+       mark_work_canceling(work);
+       local_irq_restore(flags);
+
+       wait_on_work(work);
        clear_work_data(work);
        return ret;
 }
@@ -2816,7 +3001,7 @@ static bool __cancel_work_timer(struct work_struct *work,
  */
 bool cancel_work_sync(struct work_struct *work)
 {
-       return __cancel_work_timer(work, NULL);
+       return __cancel_work_timer(work, false);
 }
 EXPORT_SYMBOL_GPL(cancel_work_sync);
 
@@ -2834,9 +3019,11 @@ EXPORT_SYMBOL_GPL(cancel_work_sync);
  */
 bool flush_delayed_work(struct delayed_work *dwork)
 {
+       local_irq_disable();
        if (del_timer_sync(&dwork->timer))
-               __queue_work(raw_smp_processor_id(),
+               __queue_work(dwork->cpu,
                             get_work_cwq(&dwork->work)->wq, &dwork->work);
+       local_irq_enable();
        return flush_work(&dwork->work);
 }
 EXPORT_SYMBOL(flush_delayed_work);
@@ -2855,9 +3042,11 @@ EXPORT_SYMBOL(flush_delayed_work);
  */
 bool flush_delayed_work_sync(struct delayed_work *dwork)
 {
+       local_irq_disable();
        if (del_timer_sync(&dwork->timer))
-               __queue_work(raw_smp_processor_id(),
+               __queue_work(dwork->cpu,
                             get_work_cwq(&dwork->work)->wq, &dwork->work);
+       local_irq_enable();
        return flush_work_sync(&dwork->work);
 }
 EXPORT_SYMBOL(flush_delayed_work_sync);
@@ -2873,54 +3062,39 @@ EXPORT_SYMBOL(flush_delayed_work_sync);
  */
 bool cancel_delayed_work_sync(struct delayed_work *dwork)
 {
-       return __cancel_work_timer(&dwork->work, &dwork->timer);
+       return __cancel_work_timer(&dwork->work, true);
 }
 EXPORT_SYMBOL(cancel_delayed_work_sync);
 
 /**
- * schedule_work - put work task in global workqueue
- * @work: job to be done
- *
- * Returns zero if @work was already on the kernel-global workqueue and
- * non-zero otherwise.
- *
- * This puts a job in the kernel-global workqueue if it was not already
- * queued and leaves it in the same position on the kernel-global
- * workqueue otherwise.
- */
-int schedule_work(struct work_struct *work)
-{
-       return queue_work(system_wq, work);
-}
-EXPORT_SYMBOL(schedule_work);
-
-/*
  * schedule_work_on - put work task on a specific cpu
  * @cpu: cpu to put the work task on
  * @work: job to be done
  *
  * This puts a job on a specific cpu
  */
-int schedule_work_on(int cpu, struct work_struct *work)
+bool schedule_work_on(int cpu, struct work_struct *work)
 {
        return queue_work_on(cpu, system_wq, work);
 }
 EXPORT_SYMBOL(schedule_work_on);
 
 /**
- * schedule_delayed_work - put work task in global workqueue after delay
- * @dwork: job to be done
- * @delay: number of jiffies to wait or 0 for immediate execution
+ * schedule_work - put work task in global workqueue
+ * @work: job to be done
  *
- * After waiting for a given time this puts a job in the kernel-global
- * workqueue.
+ * Returns %false if @work was already on the kernel-global workqueue and
+ * %true otherwise.
+ *
+ * This puts a job in the kernel-global workqueue if it was not already
+ * queued and leaves it in the same position on the kernel-global
+ * workqueue otherwise.
  */
-int schedule_delayed_work(struct delayed_work *dwork,
-                                       unsigned long delay)
+bool schedule_work(struct work_struct *work)
 {
-       return queue_delayed_work(system_wq, dwork, delay);
+       return queue_work(system_wq, work);
 }
-EXPORT_SYMBOL(schedule_delayed_work);
+EXPORT_SYMBOL(schedule_work);
 
 /**
  * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
@@ -2931,13 +3105,27 @@ EXPORT_SYMBOL(schedule_delayed_work);
  * After waiting for a given time this puts a job in the kernel-global
  * workqueue on the specified CPU.
  */
-int schedule_delayed_work_on(int cpu,
-                       struct delayed_work *dwork, unsigned long delay)
+bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
+                             unsigned long delay)
 {
        return queue_delayed_work_on(cpu, system_wq, dwork, delay);
 }
 EXPORT_SYMBOL(schedule_delayed_work_on);
 
+/**
+ * schedule_delayed_work - put work task in global workqueue after delay
+ * @dwork: job to be done
+ * @delay: number of jiffies to wait or 0 for immediate execution
+ *
+ * After waiting for a given time this puts a job in the kernel-global
+ * workqueue.
+ */
+bool schedule_delayed_work(struct delayed_work *dwork, unsigned long delay)
+{
+       return queue_delayed_work(system_wq, dwork, delay);
+}
+EXPORT_SYMBOL(schedule_delayed_work);
+
 /**
  * schedule_on_each_cpu - execute a function synchronously on each online CPU
  * @func: the function to call
@@ -3085,9 +3273,8 @@ static int wq_clamp_max_active(int max_active, unsigned int flags,
        int lim = flags & WQ_UNBOUND ? WQ_UNBOUND_MAX_ACTIVE : WQ_MAX_ACTIVE;
 
        if (max_active < 1 || max_active > lim)
-               printk(KERN_WARNING "workqueue: max_active %d requested for %s "
-                      "is out of range, clamping between %d and %d\n",
-                      max_active, name, 1, lim);
+               pr_warn("workqueue: max_active %d requested for %s is out of range, clamping between %d and %d\n",
+                       max_active, name, 1, lim);
 
        return clamp_val(max_active, 1, lim);
 }
@@ -3490,7 +3677,7 @@ static int __devinit workqueue_cpu_down_callback(struct notifier_block *nfb,
        case CPU_DOWN_PREPARE:
                /* unbinding should happen on the local CPU */
                INIT_WORK_ONSTACK(&unbind_work, gcwq_unbind_fn);
-               schedule_work_on(cpu, &unbind_work);
+               queue_work_on(cpu, system_highpri_wq, &unbind_work);
                flush_work(&unbind_work);
                break;
        }
@@ -3692,6 +3879,10 @@ static int __init init_workqueues(void)
        unsigned int cpu;
        int i;
 
+       /* make sure we have enough bits for OFFQ CPU number */
+       BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_CPU_SHIFT)) <
+                    WORK_CPU_LAST);
+
        cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP);
        cpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN);
 
@@ -3746,6 +3937,7 @@ static int __init init_workqueues(void)
        }
 
        system_wq = alloc_workqueue("events", 0, 0);
+       system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0);
        system_long_wq = alloc_workqueue("events_long", 0, 0);
        system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0);
        system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
@@ -3754,9 +3946,9 @@ static int __init init_workqueues(void)
                                              WQ_FREEZABLE, 0);
        system_nrt_freezable_wq = alloc_workqueue("events_nrt_freezable",
                        WQ_NON_REENTRANT | WQ_FREEZABLE, 0);
-       BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq ||
-              !system_unbound_wq || !system_freezable_wq ||
-               !system_nrt_freezable_wq);
+       BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
+              !system_nrt_wq || !system_unbound_wq || !system_freezable_wq ||
+              !system_nrt_freezable_wq);
        return 0;
 }
 early_initcall(init_workqueues);