#include <asm/cache.h>
#include <asm/apic.h>
#include <asm/uv/uv.h>
+#include <asm/kaiser.h>
DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
= { &init_mm, 0, };
+static void load_new_mm_cr3(pgd_t *pgdir)
+{
+ unsigned long new_mm_cr3 = __pa(pgdir);
+
+#ifdef CONFIG_KAISER
+ if (this_cpu_has(X86_FEATURE_PCID)) {
+ /*
+ * We reuse the same PCID for different tasks, so we must
+ * flush all the entries for the PCID out when we change tasks.
+ * Flush KERN below, flush USER when returning to userspace in
+ * kaiser's SWITCH_USER_CR3 (_SWITCH_TO_USER_CR3) macro.
+ *
+ * invpcid_flush_single_context(X86_CR3_PCID_ASID_USER) could
+ * do it here, but can only be used if X86_FEATURE_INVPCID is
+ * available - and many machines support pcid without invpcid.
+ *
+ * The line below is a no-op: X86_CR3_PCID_KERN_FLUSH is now 0;
+ * but keep that line in there in case something changes.
+ */
+ new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH;
+ kaiser_flush_tlb_on_return_to_user();
+ }
+#endif /* CONFIG_KAISER */
+
+ /*
+ * Caution: many callers of this function expect
+ * that load_new_mm_cr3() is serializing and orders TLB
+ * fills with respect to the mm_cpumask writes.
+ */
+ write_cr3(new_mm_cr3);
+}
+
/*
- * Smarter SMP flushing macros.
+ * TLB flushing, formerly SMP-only
* c/o Linus Torvalds.
*
* These mean you can really definitely utterly forget about
BUG();
cpumask_clear_cpu(cpu,
mm_cpumask(percpu_read(cpu_tlbstate.active_mm)));
- load_cr3(swapper_pg_dir);
+ load_new_mm_cr3(swapper_pg_dir);
}
EXPORT_SYMBOL_GPL(leave_mm);
+void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ switch_mm_irqs_off(prev, next, tsk);
+ local_irq_restore(flags);
+}
+
+void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk)
+{
+ unsigned cpu = smp_processor_id();
+
+ if (likely(prev != next)) {
+ percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+ percpu_write(cpu_tlbstate.active_mm, next);
+ cpumask_set_cpu(cpu, mm_cpumask(next));
+
+ /*
+ * Re-load page tables.
+ *
+ * This logic has an ordering constraint:
+ *
+ * CPU 0: Write to a PTE for 'next'
+ * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI.
+ * CPU 1: set bit 1 in next's mm_cpumask
+ * CPU 1: load from the PTE that CPU 0 writes (implicit)
+ *
+ * We need to prevent an outcome in which CPU 1 observes
+ * the new PTE value and CPU 0 observes bit 1 clear in
+ * mm_cpumask. (If that occurs, then the IPI will never
+ * be sent, and CPU 0's TLB will contain a stale entry.)
+ *
+ * The bad outcome can occur if either CPU's load is
+ * reordered before that CPU's store, so both CPUs must
+ * execute full barriers to prevent this from happening.
+ *
+ * Thus, switch_mm needs a full barrier between the
+ * store to mm_cpumask and any operation that could load
+ * from next->pgd. TLB fills are special and can happen
+ * due to instruction fetches or for no reason at all,
+ * and neither LOCK nor MFENCE orders them.
+ * Fortunately, load_new_mm_cr3() is serializing
+ * and gives the ordering guarantee we need.
+ */
+ load_new_mm_cr3(next->pgd);
+
+ /* stop flush ipis for the previous mm */
+ cpumask_clear_cpu(cpu, mm_cpumask(prev));
+
+ /*
+ * load the LDT, if the LDT is different:
+ */
+ if (unlikely(prev->context.ldt != next->context.ldt))
+ load_mm_ldt(next);
+ } else {
+ percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+ BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
+
+ if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) {
+ /* We were in lazy tlb mode and leave_mm disabled
+ * tlb flush IPI delivery. We must reload CR3
+ * to make sure to use no freed page tables.
+ *
+ * As above, load_new_mm_cr3() is serializing and orders
+ * TLB fills with respect to the mm_cpumask write.
+ */
+ load_new_mm_cr3(next->pgd);
+ load_mm_ldt(next);
+ }
+ }
+}
+
/*
*
* The flush IPI assumes that a thread switch happens in this order:
static void flush_tlb_others_ipi(const struct cpumask *cpumask,
struct mm_struct *mm, unsigned long va)
{
+#ifdef CONFIG_SMP
unsigned int sender;
union smp_flush_state *f;
f->flush_va = 0;
if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS)
raw_spin_unlock(&f->tlbstate_lock);
+#endif
}
void native_flush_tlb_others(const struct cpumask *cpumask,
preempt_disable();
+ /* This is an implicit full barrier that synchronizes with switch_mm. */
local_flush_tlb();
+
if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL);
preempt_enable();
preempt_disable();
if (current->active_mm == mm) {
- if (current->mm)
+ if (current->mm) {
+ /*
+ * This is an implicit full barrier (MOV to CR) that
+ * synchronizes with switch_mm.
+ */
local_flush_tlb();
- else
+ } else {
leave_mm(smp_processor_id());
+ /* Synchronize with switch_mm. */
+ smp_mb();
+ }
+ } else {
+ /* Synchronize with switch_mm. */
+ smp_mb();
}
if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL);
preempt_disable();
if (current->active_mm == mm) {
- if (current->mm)
+ if (current->mm) {
+ /*
+ * Implicit full barrier (INVLPG) that synchronizes
+ * with switch_mm.
+ */
__flush_tlb_one(va);
- else
+ } else {
leave_mm(smp_processor_id());
+
+ /* Synchronize with switch_mm. */
+ smp_mb();
+ }
}
if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)