x86: use cpuid vector 0xb when available for detecting cpu topology
authorSuresh Siddha <suresh.b.siddha@intel.com>
Sat, 23 Aug 2008 15:47:10 +0000 (17:47 +0200)
committerIngo Molnar <mingo@elte.hu>
Sat, 23 Aug 2008 15:47:10 +0000 (17:47 +0200)
cpuid leaf 0xb provides extended topology enumeration. This interface provides
the 32-bit x2APIC id of the logical processor and it also provides a new
mechanism to detect SMT and core siblings (which provides increased
addressability).

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
arch/x86/kernel/cpu/addon_cpuid_features.c
arch/x86/kernel/cpu/common_64.c
arch/x86/kernel/cpu/intel.c
arch/x86/kernel/cpu/intel_64.c
include/asm-x86/cpufeature.h
include/asm-x86/processor.h

index 84a8220..aa9641a 100644 (file)
@@ -7,6 +7,8 @@
 #include <asm/pat.h>
 #include <asm/processor.h>
 
+#include <mach_apic.h>
+
 struct cpuid_bit {
        u16 feature;
        u8 reg;
@@ -48,6 +50,90 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
        }
 }
 
+/* leaf 0xb SMT level */
+#define SMT_LEVEL      0
+
+/* leaf 0xb sub-leaf types */
+#define INVALID_TYPE   0
+#define SMT_TYPE       1
+#define CORE_TYPE      2
+
+#define LEAFB_SUBTYPE(ecx)             (((ecx) >> 8) & 0xff)
+#define BITS_SHIFT_NEXT_LEVEL(eax)     ((eax) & 0x1f)
+#define LEVEL_MAX_SIBLINGS(ebx)                ((ebx) & 0xffff)
+
+/*
+ * Check for extended topology enumeration cpuid leaf 0xb and if it
+ * exists, use it for populating initial_apicid and cpu topology
+ * detection.
+ */
+void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
+{
+       unsigned int eax, ebx, ecx, edx, sub_index;
+       unsigned int ht_mask_width, core_plus_mask_width;
+       unsigned int core_select_mask, core_level_siblings;
+
+       if (c->cpuid_level < 0xb)
+               return;
+
+       cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
+
+       /*
+        * check if the cpuid leaf 0xb is actually implemented.
+        */
+       if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE))
+               return;
+
+       set_cpu_cap(c, X86_FEATURE_XTOPOLOGY);
+
+       /*
+        * initial apic id, which also represents 32-bit extended x2apic id.
+        */
+       c->initial_apicid = edx;
+
+       /*
+        * Populate HT related information from sub-leaf level 0.
+        */
+       core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx);
+       core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
+
+       sub_index = 1;
+       do {
+               cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx);
+
+               /*
+                * Check for the Core type in the implemented sub leaves.
+                */
+               if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) {
+                       core_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
+                       core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
+                       break;
+               }
+
+               sub_index++;
+       } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE);
+
+       core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width;
+
+#ifdef CONFIG_X86_32
+       c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width)
+                                                & core_select_mask;
+       c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width);
+#else
+       c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask;
+       c->phys_proc_id = phys_pkg_id(core_plus_mask_width);
+#endif
+       c->x86_max_cores = (core_level_siblings / smp_num_siblings);
+
+
+       printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
+              c->phys_proc_id);
+       if (c->x86_max_cores > 1)
+               printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
+                      c->cpu_core_id);
+       return;
+}
+
 #ifdef CONFIG_X86_PAT
 void __cpuinit validate_pat_support(struct cpuinfo_x86 *c)
 {
index af569a9..a2888c7 100644 (file)
@@ -128,6 +128,9 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
        u32 eax, ebx, ecx, edx;
        int index_msb, core_bits;
 
+       if (cpu_has(c, X86_FEATURE_XTOPOLOGY))
+               return;
+
        cpuid(1, &eax, &ebx, &ecx, &edx);
 
 
index 77618c7..58a6f1a 100644 (file)
@@ -176,9 +176,16 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
        if (p)
                strcpy(c->x86_model_id, p);
 
-       c->x86_max_cores = num_cpu_cores(c);
-
-       detect_ht(c);
+       detect_extended_topology(c);
+
+       if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
+               /*
+                * let's use the legacy cpuid vector 0x1 and 0x4 for topology
+                * detection.
+                */
+               c->x86_max_cores = num_cpu_cores(c);
+               detect_ht(c);
+       }
 
        /* Work around errata */
        Intel_errata_workarounds(c);
index 1019c58..42d501a 100644 (file)
@@ -80,7 +80,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
        if (c->x86 == 6)
                set_cpu_cap(c, X86_FEATURE_REP_GOOD);
        set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
-       c->x86_max_cores = intel_num_cpu_cores(c);
+
+       detect_extended_topology(c);
+       if (!cpu_has(c, X86_FEATURE_XTOPOLOGY))
+               c->x86_max_cores = intel_num_cpu_cores(c);
 
        srat_detect_node();
 }
index 5fc4d55..8d842af 100644 (file)
@@ -81,6 +81,7 @@
 #define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* Lfence synchronizes RDTSC */
 #define X86_FEATURE_11AP       (3*32+19) /* Bad local APIC aka 11AP */
 #define X86_FEATURE_NOPL       (3*32+20) /* The NOPL (0F 1F) instructions */
+#define X86_FEATURE_XTOPOLOGY  (3*32+21) /* cpu topology enum extensions */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3       (4*32+ 0) /* Streaming SIMD Extensions-3 */
index 5f58da4..79338fe 100644 (file)
@@ -161,6 +161,7 @@ extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
 extern unsigned short num_cache_leaves;
 
+extern void detect_extended_topology(struct cpuinfo_x86 *c);
 #if defined(CONFIG_X86_HT) || defined(CONFIG_X86_64)
 extern void detect_ht(struct cpuinfo_x86 *c);
 #else