sched/loadavg: Fix loadavg artifacts on fully idle and on fully loaded systems

[pandora-kernel.git] / kernel / workqueue.c
diff --git a/kernel/workqueue.c b/kernel/workqueue.c

index 7bf068a..563820c 100644 (file)
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -128,6 +128,7 @@ struct worker {
         };
  
         struct work_struct      *current_work;  /* L: work being processed */
+       work_func_t             current_func;   /* L: current_work's fn */
         struct cpu_workqueue_struct *current_cwq; /* L: current_work's cwq */
         struct list_head        scheduled;      /* L: scheduled works */
         struct task_struct      *task;          /* I: worker task */
@@ -843,7 +844,8 @@ static struct worker *__find_worker_executing_work(struct global_cwq *gcwq,
         struct hlist_node *tmp;
  
         hlist_for_each_entry(worker, tmp, bwh, hentry)
-               if (worker->current_work == work)
+               if (worker->current_work == work &&
+                   worker->current_func == work->func)
                         return worker;
         return NULL;
  }
@@ -853,9 +855,27 @@ static struct worker *__find_worker_executing_work(struct global_cwq *gcwq,
   * @gcwq: gcwq of interest
   * @work: work to find worker for
   *
- * Find a worker which is executing @work on @gcwq.  This function is
- * identical to __find_worker_executing_work() except that this
- * function calculates @bwh itself.
+ * Find a worker which is executing @work on @gcwq by searching
+ * @gcwq->busy_hash which is keyed by the address of @work.  For a worker
+ * to match, its current execution should match the address of @work and
+ * its work function.  This is to avoid unwanted dependency between
+ * unrelated work executions through a work item being recycled while still
+ * being executed.
+ *
+ * This is a bit tricky.  A work item may be freed once its execution
+ * starts and nothing prevents the freed area from being recycled for
+ * another work item.  If the same work item address ends up being reused
+ * before the original execution finishes, workqueue will identify the
+ * recycled work item as currently executing and make it wait until the
+ * current execution finishes, introducing an unwanted dependency.
+ *
+ * This function checks the work item address, work function and workqueue
+ * to avoid false positives.  Note that this isn't complete as one may
+ * construct a work function which can introduce dependency onto itself
+ * through a recycled work item.  Well, if somebody wants to shoot oneself
+ * in the foot that badly, there's only so much we can do, and if such
+ * deadlock actually occurs, it should be easy to locate the culprit work
+ * function.
   *
   * CONTEXT:
   * spin_lock_irq(gcwq->lock).
@@ -1454,12 +1474,19 @@ static void destroy_worker(struct worker *worker)
         if (worker->flags & WORKER_IDLE)
                 gcwq->nr_idle--;
  
+       /*
+        * Once WORKER_DIE is set, the kworker may destroy itself at any
+        * point.  Pin to ensure the task stays until we're done with it.
+        */
+       get_task_struct(worker->task);
+
         list_del_init(&worker->entry);
         worker->flags |= WORKER_DIE;
  
         spin_unlock_irq(&gcwq->lock);
  
         kthread_stop(worker->task);
+       put_task_struct(worker->task);
         kfree(worker);
  
         spin_lock_irq(&gcwq->lock);
@@ -1816,7 +1843,6 @@ __acquires(&gcwq->lock)
         struct global_cwq *gcwq = cwq->gcwq;
         struct hlist_head *bwh = busy_worker_head(gcwq, work);
         bool cpu_intensive = cwq->wq->flags & WQ_CPU_INTENSIVE;
-       work_func_t f = work->func;
         int work_color;
         struct worker *collision;
  #ifdef CONFIG_LOCKDEP
@@ -1845,6 +1871,7 @@ __acquires(&gcwq->lock)
         debug_work_deactivate(work);
         hlist_add_head(&worker->hentry, bwh);
         worker->current_work = work;
+       worker->current_func = work->func;
         worker->current_cwq = cwq;
         work_color = get_work_color(work);
  
@@ -1882,7 +1909,7 @@ __acquires(&gcwq->lock)
         lock_map_acquire_read(&cwq->wq->lockdep_map);
         lock_map_acquire(&lockdep_map);
         trace_workqueue_execute_start(work);
-       f(work);
+       worker->current_func(work);
         /*
          * While we must be careful to not use "work" after this, the trace
          * point will only record its address.
@@ -1892,15 +1919,23 @@ __acquires(&gcwq->lock)
         lock_map_release(&cwq->wq->lockdep_map);
  
         if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
-               printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
-                      "%s/0x%08x/%d\n",
-                      current->comm, preempt_count(), task_pid_nr(current));
-               printk(KERN_ERR "    last function: ");
-               print_symbol("%s\n", (unsigned long)f);
+               pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
+                      "     last function: %pf\n",
+                      current->comm, preempt_count(), task_pid_nr(current),
+                      worker->current_func);
                 debug_show_held_locks(current);
                 dump_stack();
         }
  
+       /*
+        * The following prevents a kworker from hogging CPU on !PREEMPT
+        * kernels, where a requeueing work item waiting for something to
+        * happen could deadlock with stop_machine as such work item could
+        * indefinitely requeue itself while all other CPUs are trapped in
+        * stop_machine.
+        */
+       cond_resched();
+
         spin_lock_irq(&gcwq->lock);
  
         /* clear cpu intensive status */
@@ -1910,6 +1945,7 @@ __acquires(&gcwq->lock)
         /* we're done with it, release */
         hlist_del_init(&worker->hentry);
         worker->current_work = NULL;
+       worker->current_func = NULL;
         worker->current_cwq = NULL;
         cwq_dec_nr_in_flight(cwq, work_color, false);
  }